# Getting the scores from Walkscore's API

walkscore's API documentation:  
https://www.walkscore.com/professional/api.php

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
from urllib.parse import urlencode, quote_plus

In [2]:
# Read in the file from the previous notebook.
df=pd.read_csv('data/dc_mscores.csv', dtype={'ZIPCODE': object, 
                                             'LATITUDE': object,
                                            'LONGITUDE': object})
print(df.shape)

(2717, 11)


In [3]:
df.head()

Unnamed: 0,coordinates,LATITUDE,LONGITUDE,FULLADDRESS,ZIPCODE,ASSESSMENT_NBHD,CLUSTER_,WARD,Units,Residential,mobilityscore
0,"38.8688316,-77.01825188",38.8688316,-77.01825188,221 2ND AVENUE SW,20024,Ft. McNair,Cluster 9,Ward 6,59,0.20339,54.0
1,"38.8675634,-77.01823116",38.8675634,-77.01823116,241 2ND AVENUE SW,20024,Ft. McNair,Cluster 9,Ward 6,59,0.20339,49.0
2,"38.90749873,-76.94332398",38.90749873,-76.94332398,4300 ANACOSTIA AVENUE NE,20019,DC Stadium Area,Cluster 29,Ward 7,57,0.0,53.0
3,"38.87882265,-76.97541985",38.87882265,-76.97541985,1900 M STREET SE,20003,Anacostia Park,Cluster 26,Ward 6,16,0.0,49.0
4,"38.87269412,-76.99086222",38.87269412,-76.99086222,1105 O STREET SE,20003,Anacostia Park,Cluster 27,Ward 6,16,0.0,71.0


In [4]:
df.columns

Index(['coordinates', 'LATITUDE', 'LONGITUDE', 'FULLADDRESS', 'ZIPCODE',
       'ASSESSMENT_NBHD', 'CLUSTER_', 'WARD', 'Units', 'Residential',
       'mobilityscore'],
      dtype='object')

In [5]:
df.dtypes

coordinates         object
LATITUDE            object
LONGITUDE           object
FULLADDRESS         object
ZIPCODE             object
ASSESSMENT_NBHD     object
CLUSTER_            object
WARD                object
Units                int64
Residential        float64
mobilityscore      float64
dtype: object

In [25]:
# Remove rows with missing data.
print(df['address'].isnull().sum())
df.dropna(subset = ['address'], inplace=True) # Delete them.
print(df.shape)

183
(2534, 13)


In [27]:
# Confirm we have no other missing data.
print(df['LATITUDE'].isnull().sum())
print(df['LONGITUDE'].isnull().sum())

0
0


## Setting up the API call

This is what the call should look like:
<img src="data/walkscore_api.PNG" alt="Drawing" style="width: 600px;"/>

In [28]:
# Create a column with all the address information.
df['FULL'] = df['FULLADDRESS'].str.replace(' ','%')
df['address']=df['FULL']+'%20Washington%20DC%'+df['ZIPCODE']
df['address'].head()

0           221%2ND%AVENUE%SW%20Washington%20DC%20024
1           241%2ND%AVENUE%SW%20Washington%20DC%20024
2    4300%ANACOSTIA%AVENUE%NE%20Washington%20DC%20019
3            1900%M%STREET%SE%20Washington%20DC%20003
4            1105%O%STREET%SE%20Washington%20DC%20003
Name: address, dtype: object

In [29]:
# Read in the API key from another folder (saved locally, outside of the github repo)
key_path = "/Users/austinlasseter/keys/WAPI_key.txt"
key_file = open(key_path, 'r')
api_key = str(key_file.readline())[:32] # There's a hidden '/n' at the end of that string; slice at index 16 to remove.

In [30]:
# Set up the non-changing components of the call
str1='http://api.walkscore.com/score?format=json'
str2='&address='
str3='&lat='
str4='&lon='
str5='&transit=0&bike=0&wsapikey='

{'description': "Walker's Paradise",
 'help_link': 'https://www.redfin.com/how-walk-score-works',
 'logo_url': 'https://cdn.walk.sc/images/api-logo.png',
 'more_info_icon': 'https://cdn.walk.sc/images/api-more-info.gif',
 'more_info_link': 'https://www.redfin.com/how-walk-score-works',
 'snapped_lat': 47.6085,
 'snapped_lon': -122.3295,
 'status': 1,
 'updated': '2017-11-16 04:56:55.909740',
 'walkscore': 98,
 'ws_link': 'https://www.walkscore.com/score/loc/lat=47.6085/lng=-122.3295/?utm_source=aqueous-beyond-12197.herokuapp.com &utm_medium=ws_api&utm_campaign=ws_api'}

In [31]:
# Define a function that reads in the coordinates, then produces a new string from those 5 strings.
def api_string(address, lat, lon):
    call=str1+str2+address+str3+lat+str4+lon+str5+api_key
    return call

In [32]:
# Apply the function to our combined lat-long coordinates.
df['call'] = df[['address','LATITUDE', 'LONGITUDE']].apply(lambda x: api_string(*x), axis=1)
df.head()

Unnamed: 0,coordinates,LATITUDE,LONGITUDE,FULLADDRESS,ZIPCODE,ASSESSMENT_NBHD,CLUSTER_,WARD,Units,Residential,mobilityscore,FULL,address,call
0,"38.8688316,-77.01825188",38.8688316,-77.01825188,221 2ND AVENUE SW,20024,Ft. McNair,Cluster 9,Ward 6,59,0.20339,54.0,221%2ND%AVENUE%SW,221%2ND%AVENUE%SW%20Washington%20DC%20024,http://api.walkscore.com/score?format=json&add...
1,"38.8675634,-77.01823116",38.8675634,-77.01823116,241 2ND AVENUE SW,20024,Ft. McNair,Cluster 9,Ward 6,59,0.20339,49.0,241%2ND%AVENUE%SW,241%2ND%AVENUE%SW%20Washington%20DC%20024,http://api.walkscore.com/score?format=json&add...
2,"38.90749873,-76.94332398",38.90749873,-76.94332398,4300 ANACOSTIA AVENUE NE,20019,DC Stadium Area,Cluster 29,Ward 7,57,0.0,53.0,4300%ANACOSTIA%AVENUE%NE,4300%ANACOSTIA%AVENUE%NE%20Washington%20DC%20019,http://api.walkscore.com/score?format=json&add...
3,"38.87882265,-76.97541985",38.87882265,-76.97541985,1900 M STREET SE,20003,Anacostia Park,Cluster 26,Ward 6,16,0.0,49.0,1900%M%STREET%SE,1900%M%STREET%SE%20Washington%20DC%20003,http://api.walkscore.com/score?format=json&add...
4,"38.87269412,-76.99086222",38.87269412,-76.99086222,1105 O STREET SE,20003,Anacostia Park,Cluster 27,Ward 6,16,0.0,71.0,1105%O%STREET%SE,1105%O%STREET%SE%20Washington%20DC%20003,http://api.walkscore.com/score?format=json&add...


## Make requests to Walkscore API

In [33]:
# Extract the information frome the API.
for i in df.index:
    df.loc[i, 'json'] = [requests.get(df.loc[i,'call']).json()] 

In [34]:
# View the results for one address
df['json'][0]

[{'description': 'Car-Dependent',
  'help_link': 'https://www.redfin.com/how-walk-score-works',
  'logo_url': 'https://cdn.walk.sc/images/api-logo.png',
  'more_info_icon': 'https://cdn.walk.sc/images/api-more-info.gif',
  'more_info_link': 'https://www.redfin.com/how-walk-score-works',
  'snapped_lat': 38.8695,
  'snapped_lon': -77.019,
  'status': 1,
  'updated': '2017-12-03 07:05:27.109020',
  'walkscore': 40,
  'ws_link': 'https://www.walkscore.com/score/221-2ND-AVENUE-SW-20Washington-20DC-20024/lat=38.8688316/lng=-77.01825188/?utm_source=aqueous-beyond-12197.herokuapp.com &utm_medium=ws_api&utm_campaign=ws_api'}]

In [35]:
# View the walkscore for one address
df['json'][0][0]['walkscore']

40

In [37]:
# Apply to all rows.
for i in df.index:
    df.loc[(df.index==i) , 'walkscore']=df['json'][i][0]['walkscore']
df['walkscore'].head()

0    40.0
1    35.0
2    40.0
3    26.0
4    77.0
Name: walkscore, dtype: float64

In [38]:
# Remove unnecessary columns.
df.drop(['coordinates', 'FULL', 'address', 'call', 'json'], axis=1, inplace=True)
df.head()

Unnamed: 0,LATITUDE,LONGITUDE,FULLADDRESS,ZIPCODE,ASSESSMENT_NBHD,CLUSTER_,WARD,Units,Residential,mobilityscore,walkscore
0,38.8688316,-77.01825188,221 2ND AVENUE SW,20024,Ft. McNair,Cluster 9,Ward 6,59,0.20339,54.0,40.0
1,38.8675634,-77.01823116,241 2ND AVENUE SW,20024,Ft. McNair,Cluster 9,Ward 6,59,0.20339,49.0,35.0
2,38.90749873,-76.94332398,4300 ANACOSTIA AVENUE NE,20019,DC Stadium Area,Cluster 29,Ward 7,57,0.0,53.0,40.0
3,38.87882265,-76.97541985,1900 M STREET SE,20003,Anacostia Park,Cluster 26,Ward 6,16,0.0,49.0,26.0
4,38.87269412,-76.99086222,1105 O STREET SE,20003,Anacostia Park,Cluster 27,Ward 6,16,0.0,71.0,77.0


In [39]:
df.to_csv('data/dc_wscores.csv',index=False)