In [None]:
import pandas as pd

wloc = pd.read_csv("Workforce data/workplace location table.csv")
staff = pd.read_csv("Workforce data/LAS_Staff_in_Post_070319.csv")

# Merge workplace and staff dataframes, append 'AA' to every staff postcode sector 
df = pd.merge(staff, wloc, left_on = "Location", right_on = "esrLocationFull")
df = df[['PostcodeSector', 'postcode']]
df.columns = ['home', 'work']
df['home'] = df['home'] + 'AA'
df.head()

In [None]:
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from geopy.extra.rate_limiter import RateLimiter

# Translate staff postcodes to longitude and latitude
geolocator = Nominatim(user_agent="GoogleV3")

dt = df
geocode = RateLimiter(geolocator.geocode, min_delay_seconds = 1)
dt['location'] = df['home'].apply(geocode)

dt['point'] = dt['location'].apply(lambda loc: tuple(loc.point) if loc else None)
dt.drop(dt.columns[[3]], axis=1, inplace=True)
dt.dropna(axis = 0, inplace = True)

dt['index'] = range(1, len(dt) + 1)

In [None]:
# Join with the same data from work places (see other Jupyter notebook)
temp = pd.read_pickle("work.pkl")
work_pc = pd.concat([wloc, temp], axis=1, join_axes=[wloc.index])
work_pc.drop(work_pc.columns[[0, 1, 3, 4]], axis=1, inplace=True)

df2 = pd.merge(dt, work_pc, left_on = 'work', right_on = 'postcode', how = 'inner')
df2 = df2.drop_duplicates(['index'])

df2['index'] = range(len(df2))
df2 = df2.set_index(['index'])

In [None]:
# Drop faulty values, i.e. locations outside in Africa, etc.
for i in range(len(df2)):
    if not (-5 < df2['location'][i].longitude < 5):
        df2 = df2.drop([i], axis = 0)

df2['index'] = range(len(df2))
df2 = df2.set_index(['index'])

In [None]:
import openrouteservice as ors

# Calculate commute durations from home to work place for every employee using openrouteservice (car commute)
df2.dropna(axis = 0, inplace = True)

client = ors.Client(key='enter_your_openrouteservice_api_key_here')
duration = []

for i in range(5004, len(df2)):
    coordinates = ((df2['location'][i].longitude, df2['location'][i].latitude), df2['Point'][i])

    
    duration.append(round(ors.distance_matrix.distance_matrix(client, coordinates, sources = [0],
                                                              destinations = [1])['durations'][0][0] / 60, 2))

df2['duration'] = duration
df2.groupby(['work']).mean()
wdur = df2.groupby(['work']).mean()

In [None]:
# Change a faulty postcode
df2["work"] = df2["work"].str.replace("SW1 4AT", "SW1P 4AT") 
rep = df2[df2.work == 'SW1P 4AT']
rep['Point'] = rep['work'].apply(geocode)

rep['index'] = range(len(rep))
rep = rep.set_index(['index'])

In [None]:
# New distance matrix calculation for fault postcode
durrep = []

for i in range(len(rep)):
    coordinates = ((rep['location'][i].longitude, rep['location'][i].latitude),
                   (rep['Point'][i].longitude, rep['Point'][i].latitude))

    
    durrep.append(round(ors.distance_matrix.distance_matrix(client, coordinates, sources = [0],
                                                              destinations = [1])['durations'][0][0] / 60, 2))

In [None]:
from statistics import mean

wdur.iloc[69].duration = mean(durrep)

In [None]:
# Following, drop other faulty postcode, merge all dataframes and pivot table to create a mean commute time of all employees
# by location and a sum of full time employees (FTE) by location
wdur = wdur.drop(['BR5'])
wdur.index.name = 'postcode'
wdur.reset_index(inplace=True)

wdur = pd.merge(wloc, wdur, on = "postcode")
wdur = wdur.drop_duplicates(['postcode'])

wfte = staff.groupby(['Location']).sum()

wfte.index.name = 'esrLocationFull'
wfte.reset_index(inplace=True)
wfte = wfte[['esrLocationFull','FTE']]

In [None]:
wfte = pd.merge(wloc, wfte, on = "esrLocationFull")
wfte = wfte.groupby(['postcode']).sum()
wfte.index.name = 'postcode'
wfte.reset_index(inplace=True)
wfte = wfte[['postcode','FTE']]

wdur = pd.merge(wdur, wfte, on = "postcode")

In [None]:
temp = pd.merge(wdur, work_pc, on = "postcode", how = "inner")

temp = temp.drop_duplicates(['postcode'])
temp['index'] = range(len(temp))
temp = temp.set_index(['index'])

In [None]:
lat = []
long = []

for i in range(len(temp.Point)):
    lat.append(temp.Point[i][0])
    long.append(temp.Point[i][1])

temp['lat'] = lat
temp['long'] = long