In [2]:
import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import googlemaps

%matplotlib inline

# Load Dataset

In [6]:
address = pd.read_csv('./14.ShuttleStops/Employee_Addresses.csv')
address.head()

Unnamed: 0,address,employee_id
0,"98 Edinburgh St, San Francisco, CA 94112, USA",206
1,"237 Accacia St, Daly City, CA 94014, USA",2081
2,"1835 Folsom St, San Francisco, CA 94103, USA",178
3,"170 Cambridge St, San Francisco, CA 94134, USA",50
4,"16 Roanoke St, San Francisco, CA 94131, USA",1863


In [7]:
address.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2191 entries, 0 to 2190
Data columns (total 2 columns):
address        2191 non-null object
employee_id    2191 non-null int64
dtypes: int64(1), object(1)
memory usage: 34.3+ KB


In [8]:
stops = pd.read_csv('./14.ShuttleStops/Potentail_Bust_Stops.csv')
stops.head()

Unnamed: 0,Street_One,Street_Two
0,MISSION ST,ITALY AVE
1,MISSION ST,NEW MONTGOMERY ST
2,MISSION ST,01ST ST
3,MISSION ST,20TH ST
4,MISSION ST,FREMONT ST


In [9]:
stops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119 entries, 0 to 118
Data columns (total 2 columns):
Street_One    119 non-null object
Street_Two    119 non-null object
dtypes: object(2)
memory usage: 1.9+ KB


# Data Processing

In [10]:
# get unique address and stop information
unique_address = list(set(address['address']))

def merge_stop(x):
    return x[0] + ' & ' + x[1] + ' CA'

unique_stop = list(set(stops.apply(merge_stop, axis=1)))

In [11]:
unique_address[:10]

['1162 Geneva Ave, San Francisco, CA 94112, USA',
 '97 Sanchez St, San Francisco, CA 94114, USA',
 '1935 Silver Ave, San Francisco, CA 94124, USA',
 '16 Roanoke St, San Francisco, CA 94131, USA',
 '349 2nd St, San Francisco, CA 94107, USA',
 '114 Henry St, San Francisco, CA 94114, USA',
 '2685 Diamond St, San Francisco, CA 94131, USA',
 '147 29th St, San Francisco, CA 94110, USA',
 '501 Twin Peaks Blvd, San Francisco, CA 94131, USA',
 '799 Bayshore Blvd, San Francisco, CA 94124, USA']

In [12]:
unique_stop[:10]

['MISSION ST & ROLPH ST CA',
 'MISSION ST & MORSE ST CA',
 'MISSION ST & COLLEGE AVE CA',
 'MISSION ST & AMAZON AVE CA',
 'MISSION ST & APPLETON AVE CA',
 'MISSION ST & 01ST ST CA',
 'MISSION ST & WASHBURN ST CA',
 'MISSION ST & MAIN ST CA',
 'MISSION ST & GUTTENBERG ST CA',
 'MISSION ST & SHAW ALY CA']

# Distance Measure

Here, I am using the Google Distance Matrix API for origin and destination walking distance measure. The GitHub link for google-maps-services-python can be found at: [https://github.com/googlemaps/google-maps-services-python](https://github.com/googlemaps/google-maps-services-python).

Since this is a charged service, I only show one query for demo purpose. After query the distance, I believe the next step is pretty clear.

In [16]:
# start google API with your KEY (you can register at Google.com)
gmaps = googlemaps.Client(key='----------------------------------------')

In [17]:
# defind origin and destination
origin = unique_address[0]
destination = unique_stop[0]

print('Origin:\t\t', origin)
print('Destination:\t', destination)

Origin:		 1162 Geneva Ave, San Francisco, CA 94112, USA
Destination:	 MISSION ST & ROLPH ST CA


In [18]:
# query walking distance between origins and destinations
result = gmaps.distance_matrix(origins=origin, destinations=destination, 
                               mode='walking')
result

{'destination_addresses': ['Mission St & Rolph St, San Francisco, CA 94112, USA'],
 'origin_addresses': ['1162 Geneva Ave, San Francisco, CA 94112, USA'],
 'rows': [{'elements': [{'distance': {'text': '0.5 km', 'value': 509},
     'duration': {'text': '6 mins', 'value': 368},
     'status': 'OK'}]}],
 'status': 'OK'}

# Reference Solution

There is another solution online, you can refer to [Link](https://github.com/stasi009/TakeHomeDataChallenges/blob/master/14.ShuttleStops/shuttle_stops.py).