In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

%matplotlib inline

In [3]:
df_chicago = pd.read_csv('../dataset/chicago_2018.csv') 
df_chicago.head()

Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name,bike_id,user_type
0,2018-04-01 00:04:44,2018-04-01 00:13:03,22,171,May St & Taylor St,May St & Cullerton St,3819,Subscriber
1,2018-04-01 00:06:42,2018-04-01 00:27:07,157,190,Lake Shore Dr & Wellington Ave,Southport Ave & Wrightwood Ave,5000,Subscriber
2,2018-04-01 00:07:19,2018-04-01 00:23:19,106,106,State St & Pearson St,State St & Pearson St,5165,Customer
3,2018-04-01 00:07:33,2018-04-01 00:14:47,241,171,Morgan St & Polk St,May St & Cullerton St,3851,Subscriber
4,2018-04-01 00:10:23,2018-04-01 00:22:12,228,219,Damen Ave & Melrose Ave,Damen Ave & Cortland St,5065,Subscriber


In [4]:
df_chicago_stations = pd.read_csv('../dataset/Divvy_Bicycle_Stations.csv')
df_chicago_stations.head()

Unnamed: 0,ID,Station Name,Total Docks,Docks in Service,Status,Latitude,Longitude,Location
0,1594046383808271024,Troy St & Jackson Blvd,9,9,In Service,41.877505,-87.70485,"(41.877505, -87.70485)"
1,641,Central Park Ave & Bloomingdale Ave,11,11,In Service,41.914166,-87.716755,"(41.914166, -87.716755)"
2,1683527931525155814,Public Rack - Cornell Ave & 87th Pl,1,1,In Service,41.736881,-87.583146,"(41.73688127, -87.58314552)"
3,367,Racine Ave & 35th St,15,15,In Service,41.830689,-87.656211,"(41.83068856472101, -87.65621066093445)"
4,1673852313397164648,Public Rack - Francisco Ave & Touhy Ave,2,2,In Service,42.011865,-87.701317,"(42.011865, -87.701317)"


In [5]:
df_chicago_stations_indexed_by_id = df_chicago_stations.set_index('ID')
df_chicago_stations_indexed_by_id = df_chicago_stations_indexed_by_id.drop(
    ['Total Docks', 'Docks in Service', 'Status'], axis = 1
)
df_chicago_stations_indexed_by_id = df_chicago_stations_indexed_by_id.rename(
    columns = {'Latitude': 'x', 'Longitude': 'y', 'Station Name': 'station_name', 'Location': 'position'}
)
df_chicago_stations_indexed_by_id.head()

Unnamed: 0_level_0,station_name,x,y,position
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1594046383808271024,Troy St & Jackson Blvd,41.877505,-87.70485,"(41.877505, -87.70485)"
641,Central Park Ave & Bloomingdale Ave,41.914166,-87.716755,"(41.914166, -87.716755)"
1683527931525155814,Public Rack - Cornell Ave & 87th Pl,41.736881,-87.583146,"(41.73688127, -87.58314552)"
367,Racine Ave & 35th St,41.830689,-87.656211,"(41.83068856472101, -87.65621066093445)"
1673852313397164648,Public Rack - Francisco Ave & Touhy Ave,42.011865,-87.701317,"(42.011865, -87.701317)"


In [6]:
relevant_station_ids = np.union1d(
    df_chicago['start_station_id'].unique(), df_chicago['end_station_id'].unique()
)

In [7]:
available_stations_ids = np.intersect1d(df_chicago_stations_indexed_by_id.index, 
                                    relevant_station_ids)

In [8]:
missing_stations_ids = np.setdiff1d(relevant_station_ids, available_stations_ids)
missing_stations_ids

array([360, 361, 363, 397, 459, 512, 606, 607, 608, 609, 610, 611, 612,
       613, 614, 615, 616, 617, 618, 651])

In [9]:
df_stations = df_chicago_stations_indexed_by_id.loc[available_stations_ids]
df_stations.head()

Unnamed: 0_level_0,station_name,x,y,position
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,Buckingham Fountain,41.876423,-87.620339,"(41.876423, -87.620339)"
3,Shedd Aquarium,41.867226,-87.615355,"(41.86722595682, -87.6153553902)"
4,Burnham Harbor,41.857412,-87.613792,"(41.85741178707404, -87.61379152536392)"
5,State St & Harrison St,41.874053,-87.627716,"(41.874053, -87.627716)"
6,Dusable Harbor,41.886976,-87.612813,"(41.886976, -87.612813)"


In [10]:
missing_stations = {}

for station_id in missing_stations_ids:
    station_name = np.nan
    if not df_chicago[df_chicago['start_station_id'] == station_id].empty:
        station_name = df_chicago[df_chicago['start_station_id'] == station_id]['start_station_name'].iloc[0]
    elif not df_chicago[df_chicago['end_station_id'] == station_id].empty:
        station_name = df_chicago[df_chicago['end_station_id'] == station_id]['end_station_name'].iloc[0]
    missing_stations[station_id] = station_name

In [11]:
missing_stations

{360: 'DIVVY Map Frame B/C Station',
 361: 'DIVVY CASSETTE REPAIR MOBILE STATION',
 363: 'TS ~ DIVVY PARTS TESTING',
 397: 'Saginaw Ave & Exchange Ave',
 459: 'Lakefront Trail & Bryn Mawr Ave',
 512: 'BBB ~ Divvy Parts Testing',
 606: 'Forest Ave & Chicago Ave',
 607: 'Cuyler Ave & Augusta St',
 608: 'Humphrey Ave & Ontario St',
 609: 'Forest Ave & Lake St',
 610: 'Marion St & South Blvd',
 611: 'Oak Park Ave & South Blvd',
 612: 'Ridgeland Ave & Lake St',
 613: 'Wisconsin Ave & Madison St (Temp)',
 614: 'East Ave & Madison St',
 615: 'Lombard Ave & Madison St',
 616: 'Oak Park Ave & Harrison St',
 617: 'East Ave & Garfield St',
 618: 'Lombard Ave & Garfield St',
 651: 'Michigan Ave & 71st St'}

In [12]:
df_missing_stations = pd.DataFrame(data=missing_stations.values(), index=missing_stations.keys(), columns=['station_name'])
df_missing_stations.index.name = 'ID'

In [13]:
df_stations = pd.concat([df_stations, df_missing_stations])
df_stations

Unnamed: 0_level_0,station_name,x,y,position
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,Buckingham Fountain,41.876423,-87.620339,"(41.876423, -87.620339)"
3,Shedd Aquarium,41.867226,-87.615355,"(41.86722595682, -87.6153553902)"
4,Burnham Harbor,41.857412,-87.613792,"(41.85741178707404, -87.61379152536392)"
5,State St & Harrison St,41.874053,-87.627716,"(41.874053, -87.627716)"
6,Dusable Harbor,41.886976,-87.612813,"(41.886976, -87.612813)"
...,...,...,...,...
615,Lombard Ave & Madison St,,,
616,Oak Park Ave & Harrison St,,,
617,East Ave & Garfield St,,,
618,Lombard Ave & Garfield St,,,


In [14]:
df_stations = df_stations.reset_index()
df_stations = df_stations.sort_values(by='ID')

In [15]:
df_stations.to_csv(r'../dataset/chicago_stations.csv', index = False)