# Preppin Data 2023 Week 11

#### Load data

In [30]:
import pandas as pd
import math as mt
import numpy as np

In [31]:
branches = pd.read_csv('DSB Branches.csv')

In [32]:
customers = pd.read_csv('DSB Customer Locations.csv')

In [33]:
branches.head()

Unnamed: 0,Branch,Branch Long,Branch Lat
0,London,-0.109863,51.481383
1,Oxford,-1.252441,51.727028
2,Nottingham,-1.142578,52.948638
3,Newcastle,-1.604004,54.965002


In [34]:
customers.head()

Unnamed: 0,Customer,Address Long,Address Lat
0,1,0.74707,51.549751
1,2,0.406494,51.957807
2,3,-0.142822,50.833698
3,4,-4.64035,50.335819
4,5,-1.593018,51.289406


#### join tables together

In [35]:
branches['join'] = 1
customers['join'] = 1
df = branches.merge(customers, on='join').drop('join', axis=1)

In [36]:
df.head()

Unnamed: 0,Branch,Branch Long,Branch Lat,Customer,Address Long,Address Lat
0,London,-0.109863,51.481383,1,0.74707,51.549751
1,London,-0.109863,51.481383,2,0.406494,51.957807
2,London,-0.109863,51.481383,3,-0.142822,50.833698
3,London,-0.109863,51.481383,4,-4.64035,50.335819
4,London,-0.109863,51.481383,5,-1.593018,51.289406


In [37]:
# how many rows?
df.shape[0]

500

#### Transform the latitude and longitudes from decimal degrees to radians by dividing them by 180/pi

In [38]:
rad = 180/np.pi

In [39]:
def distance(x):
    rad = 180/np.pi
    c_lat = x['Address Lat']/rad
    c_lon = x['Address Long']/rad
    b_lat = x['Branch Lat']/rad
    b_lon = x['Branch Long']/rad
    return 3963 * (np.arccos((np.sin(c_lat) * np.sin(b_lat)) + np.cos(c_lat) * np.cos(b_lat) * np.cos(b_lon - c_lon)))

#### Calculate distance in miles from branch

In [40]:
# distance = 3963 * (np.arccos((np.sin(c_lat) * np.sin(b_lat)) + np.cos(c_lat) * np.cos(b_lat) * np.cos(b_lon - c_lon)))

In [41]:
# apply to all rows
df['Distance'] = round(distance(df), 2)

In [42]:
# check values
df[df['Customer'] == 99]

Unnamed: 0,Branch,Branch Long,Branch Lat,Customer,Address Long,Address Lat,Distance
98,London,-0.109863,51.481383,99,-0.241699,52.842595,94.32
223,Oxford,-1.252441,51.727028,99,-0.241699,52.842595,88.22
348,Nottingham,-1.142578,52.948638,99,-0.241699,52.842595,38.3
473,Newcastle,-1.604004,54.965002,99,-0.241699,52.842595,156.94


#### rank each customer by nearest distance

In [43]:
df['Customer Rank'] = df.groupby('Customer')['Distance'].rank(method='first', ascending=True)

In [44]:
df.head()

Unnamed: 0,Branch,Branch Long,Branch Lat,Customer,Address Long,Address Lat,Distance,Customer Rank
0,London,-0.109863,51.481383,1,0.74707,51.549751,37.19,1.0
1,London,-0.109863,51.481383,2,0.406494,51.957807,39.69,1.0
2,London,-0.109863,51.481383,3,-0.142822,50.833698,44.82,1.0
3,London,-0.109863,51.481383,4,-4.64035,50.335819,212.84,2.0
4,London,-0.109863,51.481383,5,-1.593018,51.289406,65.38,2.0


#### only get rows where customer is closest, rank = 1

In [45]:
df2 = df[df['Customer Rank']==1]

In [46]:
df2.head()

Unnamed: 0,Branch,Branch Long,Branch Lat,Customer,Address Long,Address Lat,Distance,Customer Rank
0,London,-0.109863,51.481383,1,0.74707,51.549751,37.19,1.0
1,London,-0.109863,51.481383,2,0.406494,51.957807,39.69,1.0
2,London,-0.109863,51.481383,3,-0.142822,50.833698,44.82,1.0
7,London,-0.109863,51.481383,8,1.296387,52.61639,98.69,1.0
25,London,-0.109863,51.481383,26,1.494141,52.308479,89.22,1.0


#### order customers by priority based on distance to branch

In [55]:
df2['Customer Priority'] = df2.groupby('Branch')['Distance'].rank(method='first', ascending=True)

#### output

In [56]:
df2 = df2.sort_values(['Branch','Customer Priority'], ascending=True)

In [57]:
df2.drop('Customer Rank', axis=1, inplace=True)

In [59]:
df2.to_csv('2023W11_output.csv', index=False)