This notebook gets the directions between citi bike stations using the mapbox api. It passes a station network dataframe, which has the source station and target station, through the mapbox directions api. 

In [5]:
import json
import requests
import datetime

import pandas as pd
import numpy as np

from mapbox import Directions
import time

In [6]:
station_network = pd.read_csv('~/Desktop/Pratt/fall2018/ad_data_vis/analysis/station_network/dir_station_network_08-20-2018-10-28-2018.csv')
station_nodes = pd.read_csv('~/Desktop/Pratt/fall2018/ad_data_vis/analysis/station_network/station_nodes.csv')
station_network.head()

Unnamed: 0,source,target,count
0,0,0,2086
1,72,72,206
2,72,79,13
3,72,127,73
4,72,128,15


In [7]:
#get lat and lon for source and target stations

station = pd.merge(station_network, station_nodes, left_on='source', right_on='id', how='left')
station = station[['source','target','count','lat','lon']].rename(columns={'lat':'source_lat','lon':'source_lon'})

station = pd.merge(station,station_nodes, left_on='target', right_on='id', how='left')
station = station[['source','target','count','source_lat','source_lon','lat','lon']].rename(columns={'lat':'target_lat','lon':'target_lon'})

station.head()

Unnamed: 0,source,target,count,source_lat,source_lon,target_lat,target_lon
0,0,0,2086,,,,
1,72,72,206,40.767272,-73.993929,40.767272,-73.993929
2,72,79,13,40.767272,-73.993929,40.719116,-74.006667
3,72,127,73,40.767272,-73.993929,40.731724,-74.006744
4,72,128,15,40.767272,-73.993929,40.727103,-74.002971


In [8]:
#drop null values
station = station.dropna()
station.head()

Unnamed: 0,source,target,count,source_lat,source_lon,target_lat,target_lon
1,72,72,206,40.767272,-73.993929,40.767272,-73.993929
2,72,79,13,40.767272,-73.993929,40.719116,-74.006667
3,72,127,73,40.767272,-73.993929,40.731724,-74.006744
4,72,128,15,40.767272,-73.993929,40.727103,-74.002971
5,72,146,4,40.767272,-73.993929,40.71625,-74.009106


In [9]:
#get rid of trips that start and end at the same station
station = station[station['source']!=station['target']]

In [10]:
#filter dataframe to pass through the api in chunks
station = station.sort_values(by='count',ascending=False).reset_index()
#station = station[station.index>=60000]
station = station[station.index<70000]
station.head()

Unnamed: 0,index,source,target,count,source_lat,source_lon,target_lat,target_lon
0,111682,2006,3282,1738,40.765909,-73.976342,40.78307,-73.95939
1,69311,432,3263,1437,40.726218,-73.983799,40.729515,-73.990753
2,78898,460,3093,1346,40.712859,-73.965903,40.717452,-73.958509
3,73425,445,293,1228,40.727408,-73.98142,40.730207,-73.991026
4,101038,519,492,1184,40.751873,-73.977706,40.7502,-73.990931


In [55]:
#call the api
my_dict = {}
for index, row in station.iterrows():
    source_lon = row['source_lon'].astype(str)
    source_lat = row['source_lat'].astype(str)
    target_lon = row['target_lon'].astype(str)
    target_lat = row['target_lat'].astype(str)
    url = 'https://api.mapbox.com/directions/v5/mapbox/cycling/'+source_lon+','+source_lat+';'+target_lon+','+target_lat+'?geometries=geojson&access_token='+token
    routes = requests.get(url)
    content = json.loads(routes.content)
    my_dict[index] = content

In [56]:
#save the api responses
with open('routes60k.json', 'w') as fp:
    json.dump(my_dict, fp, sort_keys=True, indent=4)