In [225]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
import pickle

import json
import plotly.plotly as py
from plotly.graph_objs import *


### Load Data

In [None]:
# Define trip data path
trip_data_path = '/home/cneiderer/Metis/boston_bike_data/trip_data'

In [None]:
# Build dataframes from old data (pre General Bikeshare Feed Specification (GBFS))
# and new data (postGeneral Bikeshare Feed Specification (GBFS))
old_df = pd.DataFrame()
new_df = pd.DataFrame()
for f in os.listdir(trip_data_path):
    tmp_df = pd.read_csv(os.path.join(trip_data_path, f))
    if f[:6] == 'hubway':
        old_df = pd.concat([old_df, tmp_df], axis=0, ignore_index=True)
    else:
        new_df = pd.concat([new_df, tmp_df], axis=0, ignore_index=True)

In [None]:
# Save old df
with open('old_raw.pkl', 'wb') as f:
    pickle.dump(old_df, f)

In [None]:
# Save new df    
with open('new_raw.pkl', 'wb') as f:
    pickle.dump(new_df, f)

### Clean-Up Old Data

In [50]:
# Load old df
with open('old_raw.pkl', 'rb') as f:
    old_df = pickle.load(f)

In [51]:
old_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2762938 entries, 0 to 2762937
Data columns (total 11 columns):
Duration                int64
Start date              object
End date                object
Start station number    object
Start station name      object
End station number      object
End station name        object
Bike number             object
Member type             object
Zip code                object
Gender                  object
dtypes: int64(1), object(10)
memory usage: 231.9+ MB


In [52]:
old_df.head()

Unnamed: 0,Duration,Start date,End date,Start station number,Start station name,End station number,End station name,Bike number,Member type,Zip code,Gender
0,397101,12/31/2014 23:58,1/1/2015 0:05,B32004,Aquarium Station - 200 Atlantic Ave.,D32022,TD Garden - Causeway at Portal Park #1,T01335,Member,2148,Male
1,542806,12/31/2014 23:51,1/1/2015 0:00,M32006,MIT at Mass Ave / Amherst St,M32007,Cambridge St - at Columbia St / Webster Ave,T01426,Member,2143,Male
2,927775,12/31/2014 23:30,12/31/2014 23:46,D32005,Boston Public Library - 700 Boylston St.,A32008,Buswell St. at Park Dr.,B01570,Member,2215,Male
3,269771,12/31/2014 23:26,12/31/2014 23:31,B32004,Aquarium Station - 200 Atlantic Ave.,A32010,South Station - 700 Atlantic Ave.,T01205,Member,2043,Male
4,959775,12/31/2014 23:07,12/31/2014 23:23,D32010,Cross St. at Hanover St.,B32004,Aquarium Station - 200 Atlantic Ave.,T01306,Member,1945,Male


In [54]:
# Rename columns
old_df.columns = ['Duration', 'StartTime', 'StopTime', 
                  'StartID', 'StartName', 
                  'StopID', 'StopName', 
                  'BikeID', 'UserType', 'ZipCode', 'Gender']

In [55]:
old_df.head()

Unnamed: 0,Duration,StartTime,StopTime,StartID,StartName,StopID,StopName,BikeID,UserType,ZipCode,Gender
0,397101,12/31/2014 23:58,1/1/2015 0:05,B32004,Aquarium Station - 200 Atlantic Ave.,D32022,TD Garden - Causeway at Portal Park #1,T01335,Member,2148,Male
1,542806,12/31/2014 23:51,1/1/2015 0:00,M32006,MIT at Mass Ave / Amherst St,M32007,Cambridge St - at Columbia St / Webster Ave,T01426,Member,2143,Male
2,927775,12/31/2014 23:30,12/31/2014 23:46,D32005,Boston Public Library - 700 Boylston St.,A32008,Buswell St. at Park Dr.,B01570,Member,2215,Male
3,269771,12/31/2014 23:26,12/31/2014 23:31,B32004,Aquarium Station - 200 Atlantic Ave.,A32010,South Station - 700 Atlantic Ave.,T01205,Member,2043,Male
4,959775,12/31/2014 23:07,12/31/2014 23:23,D32010,Cross St. at Hanover St.,B32004,Aquarium Station - 200 Atlantic Ave.,T01306,Member,1945,Male


In [81]:
# Convert timestamps to datetime objects
old_df.StartTime = pd.to_datetime(old_df.StartTime, format='%m/%d/%Y %H:%M')
old_df.StopTime = pd.to_datetime(old_df.StopTime, format='%m/%d/%Y %H:%M')

In [82]:
# Convert duration to time in seconds
old_df.Duration = np.round(old_df.Duration.astype(int) / 1000).astype(int)

In [83]:
# Remap user types
old_df.UserType = old_df.UserType.map({'Member': 'Subscriber', 'Casual': 'Customer'})
# Remap gender
old_df.Gender = old_df.Gender.fillna('Unknown')

In [84]:
# Zip code conversion helper function
def convert_zipcode(z):
    if isinstance(z, str):
        try:
            z = int(z)
        except:
            z = (z[:6].strip('-').replace(' ', ''), type(z))
            try:
                z = int(z)
            except:
                z = np.nan
    return z
# Convert zip code
old_df.ZipCode = old_df.ZipCode.apply(convert_zipcode)

In [85]:
old_df.head()

Unnamed: 0,Duration,StartTime,StopTime,StartID,StartName,StopID,StopName,BikeID,UserType,ZipCode,Gender
0,397,2014-12-31 23:58:00,2015-01-01 00:05:00,B32004,Aquarium Station - 200 Atlantic Ave.,D32022,TD Garden - Causeway at Portal Park #1,T01335,Subscriber,2148.0,Male
1,543,2014-12-31 23:51:00,2015-01-01 00:00:00,M32006,MIT at Mass Ave / Amherst St,M32007,Cambridge St - at Columbia St / Webster Ave,T01426,Subscriber,2143.0,Male
2,928,2014-12-31 23:30:00,2014-12-31 23:46:00,D32005,Boston Public Library - 700 Boylston St.,A32008,Buswell St. at Park Dr.,B01570,Subscriber,2215.0,Male
3,270,2014-12-31 23:26:00,2014-12-31 23:31:00,B32004,Aquarium Station - 200 Atlantic Ave.,A32010,South Station - 700 Atlantic Ave.,T01205,Subscriber,2043.0,Male
4,960,2014-12-31 23:07:00,2014-12-31 23:23:00,D32010,Cross St. at Hanover St.,B32004,Aquarium Station - 200 Atlantic Ave.,T01306,Subscriber,1945.0,Male


In [86]:
old_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2762938 entries, 0 to 2762937
Data columns (total 11 columns):
Duration     int64
StartTime    datetime64[ns]
StopTime     datetime64[ns]
StartID      object
StartName    object
StopID       object
StopName     object
BikeID       object
UserType     object
ZipCode      float64
Gender       object
dtypes: datetime64[ns](2), float64(1), int64(1), object(7)
memory usage: 231.9+ MB


In [None]:
# Save old df
with open('old_clean.pkl', 'wb') as f:
    pickle.dump(old_df, f)

### Clean-Up New Data

In [137]:
# Load new df
with open('new_raw.pkl', 'rb') as f:
    new_df = pickle.load(f)

In [138]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3731292 entries, 0 to 3731291
Data columns (total 15 columns):
tripduration               int64
starttime                  object
stoptime                   object
start station id           int64
start station name         object
start station latitude     float64
start station longitude    float64
end station id             object
end station name           object
end station latitude       object
end station longitude      object
bikeid                     int64
usertype                   object
birth year                 object
gender                     int64
dtypes: float64(2), int64(4), object(9)
memory usage: 427.0+ MB


In [139]:
new_df.head()

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,bikeid,usertype,birth year,gender
0,196,2016-11-01 00:00:35,2016-11-01 00:03:52,95,Cambridge St - at Columbia St / Webster Ave,42.372969,-71.094445,78,Union Square - Somerville,42.3796,-71.0953,1380,Subscriber,1983,2
1,222,2016-11-01 00:01:23,2016-11-01 00:05:06,23,Mayor Martin J Walsh - 28 State St,42.35892,-71.057629,190,Nashua Street at Red Auerbach Way,42.3656,-71.0642,866,Subscriber,1980,1
2,483,2016-11-01 00:04:19,2016-11-01 00:12:22,178,MIT Pacific St at Purrington St,42.359573,-71.101295,184,Sidney Research Campus/ Erie Street at Waverly,42.3578,-71.1039,35,Subscriber,1990,1
3,141,2016-11-01 00:05:31,2016-11-01 00:07:53,25,Tremont St / W Newton St,42.341332,-71.076847,21,Prudential Center / Belvidere,42.346,-71.0826,1917,Customer,\N,0
4,154,2016-11-01 00:05:59,2016-11-01 00:08:34,76,Central Sq Post Office / Cambridge City Hall a...,42.366426,-71.105495,139,Dana Park,42.3616,-71.1074,603,Subscriber,1990,1


In [140]:
new_df.columns = ['Duration', 'StartTime', 'StopTime',
                  'StartID', 'StartName', 'StartLat', 'StartLon', 
                  'StopID', 'StopName', 'StopLat', 'StopLon',
                  'BikeID', 'UserType', 'BirthYear', 'Gender']

In [141]:
new_df.head()

Unnamed: 0,Duration,StartTime,StopTime,StartID,StartName,StartLat,StartLon,StopID,StopName,StopLat,StopLon,BikeID,UserType,BirthYear,Gender
0,196,2016-11-01 00:00:35,2016-11-01 00:03:52,95,Cambridge St - at Columbia St / Webster Ave,42.372969,-71.094445,78,Union Square - Somerville,42.3796,-71.0953,1380,Subscriber,1983,2
1,222,2016-11-01 00:01:23,2016-11-01 00:05:06,23,Mayor Martin J Walsh - 28 State St,42.35892,-71.057629,190,Nashua Street at Red Auerbach Way,42.3656,-71.0642,866,Subscriber,1980,1
2,483,2016-11-01 00:04:19,2016-11-01 00:12:22,178,MIT Pacific St at Purrington St,42.359573,-71.101295,184,Sidney Research Campus/ Erie Street at Waverly,42.3578,-71.1039,35,Subscriber,1990,1
3,141,2016-11-01 00:05:31,2016-11-01 00:07:53,25,Tremont St / W Newton St,42.341332,-71.076847,21,Prudential Center / Belvidere,42.346,-71.0826,1917,Customer,\N,0
4,154,2016-11-01 00:05:59,2016-11-01 00:08:34,76,Central Sq Post Office / Cambridge City Hall a...,42.366426,-71.105495,139,Dana Park,42.3616,-71.1074,603,Subscriber,1990,1


In [142]:
# Remove rows with missing data
try:
    new_df = new_df[~(new_df['StartID'] == '\\N')]
except:
    pass

try:
    new_df = new_df[~(new_df['StopID'] == '\\N')]
except:
    pass

  result = getattr(x, name)(y)


In [221]:
# timestamp conversion helper function
def convert_timestamp(t):
    try:
        t = pd.to_datetime(t, format='%Y/%m/%d %H:%M:%S')
    except:
        t = pd.to_datetime(t, format='%m/%d/%Y %H:%M:%S')
    return t

In [222]:
# Convert timestamps to datetime objects
new_df.StartTime = new_df.StartTime.apply(convert_timestamp)

In [223]:
new_df.StopTime = new_df.StopTime.apply(convert_timestamp)

In [134]:
# Convert duration to integer
new_df.Duration = new_df.Duration.astype(int)
# Convert start ID to integer
new_df.StartID = new_df.StartID.astype(int)
# Convert stop ID to integer
new_df.StopID = new_df.StopID.astype(int)

In [128]:
# Convert lat and lon to float
new_df.StartLat = new_df.StartLat.astype(float)
new_df.StartLon = new_df.StartLon.astype(float)
new_df.StopLat = new_df.StopLat.astype(float)
new_df.StopLon = new_df.StopLon.astype(float)

In [130]:
# Remap birth year
new_df.BirthYear = [np.nan if x == '\\N' else x for x in new_df.BirthYear]
# Remap gender
new_df.Gender = new_df.Gender.astype(int)
new_df.Gender = new_df.Gender.map({0: 'Unknown', 1: 'Male', 2:'Female'})

In [135]:
new_df.head()

Unnamed: 0,Duration,StartTime,StopTime,StartID,StartName,StartLat,StartLon,StopID,StopName,StopLat,StopLon,BikeID,UserType,BirthYear,Gender
0,196,2016-11-01 00:00:35,2016-11-01 00:03:52,196,Cambridge St - at Columbia St / Webster Ave,42.372969,-71.094445,196,Union Square - Somerville,42.379637,-71.095319,1380,Subscriber,1983.0,Female
1,222,2016-11-01 00:01:23,2016-11-01 00:05:06,222,Mayor Martin J Walsh - 28 State St,42.35892,-71.057629,222,Nashua Street at Red Auerbach Way,42.365598,-71.064248,866,Subscriber,1980.0,Male
2,483,2016-11-01 00:04:19,2016-11-01 00:12:22,483,MIT Pacific St at Purrington St,42.359573,-71.101295,483,Sidney Research Campus/ Erie Street at Waverly,42.357753,-71.103934,35,Subscriber,1990.0,Male
3,141,2016-11-01 00:05:31,2016-11-01 00:07:53,141,Tremont St / W Newton St,42.341332,-71.076847,141,Prudential Center / Belvidere,42.345959,-71.082578,1917,Customer,,Unknown
4,154,2016-11-01 00:05:59,2016-11-01 00:08:34,154,Central Sq Post Office / Cambridge City Hall a...,42.366426,-71.105495,154,Dana Park,42.361589,-71.107437,603,Subscriber,1990.0,Male


In [136]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3731291 entries, 0 to 3731291
Data columns (total 15 columns):
Duration     int64
StartTime    datetime64[ns]
StopTime     datetime64[ns]
StartID      int64
StartName    object
StartLat     float64
StartLon     float64
StopID       int64
StopName     object
StopLat      float64
StopLon      float64
BikeID       int64
UserType     object
BirthYear    object
Gender       object
dtypes: datetime64[ns](2), float64(4), int64(4), object(5)
memory usage: 455.5+ MB


In [145]:
station_info_path = '/home/cneiderer/Metis/boston_bike_data/station_data'
stations1_file = 'Hubway_Stations_2011_2016.csv'
stations2_file = 'Hubway_Stations_as_of_July_2017.csv'

In [146]:
stations1_df = pd.read_csv(os.path.join(station_info_path, stations1_file))
stations2_df = pd.read_csv(os.path.join(station_info_path, stations2_file))

In [152]:
stations1_df = stations1_df[stations2_df.columns]
stations1_df = stations1_df.sort_values('Station ID', axis=0)
stations1_df.head()

Unnamed: 0,Station ID,Station,Latitude,Longitude,Municipality,# of Docks
0,A32000,Fan Pier,42.353287,-71.044389,Boston,15
1,A32001,Union Square - Brighton Ave. at Cambridge St.,42.353334,-71.137313,Boston,15
2,A32002,Agganis Arena - 925 Comm Ave.,42.351246,-71.115639,Boston,19
3,A32003,B.U. Central - 725 Comm. Ave.,42.350406,-71.108279,Boston,11
4,A32004,Longwood Ave / Binney St,42.338629,-71.1065,Boston,15


In [175]:
stations1_df.shape

(187, 6)

In [176]:
len(set(stations1_df['Station ID']))

187

In [153]:
stations2_df = stations2_df.sort_values('Station ID', axis=0)
stations2_df.head()

Unnamed: 0,Station ID,Station,Latitude,Longitude,Municipality,# of Docks
3,A32000,Fan Pier,42.353287,-71.044389,Boston,15
4,A32001,Union Square - Brighton Ave at Cambridge St,42.353334,-71.137313,Boston,15
5,A32002,Commonwealth Ave at Buick St,42.351246,-71.115639,Boston,19
6,A32003,B.U. Central - 725 Comm. Ave.,42.350406,-71.108279,Boston,11
7,A32004,Longwood Ave at Binney St,42.338629,-71.1065,Boston,15


In [173]:
stations1_df.shape

(187, 6)

In [174]:
len(set(stations1_df['Station ID']))

187

In [177]:
test = stations1_df.merge(stations2_df, on='Station ID', how='outer')
test

Unnamed: 0,Station ID,Station_x,Latitude_x,Longitude_x,Municipality_x,# of Docks_x,Station_y,Latitude_y,Longitude_y,Municipality_y,# of Docks_y
0,A32000,Fan Pier,42.353287,-71.044389,Boston,15.0,Fan Pier,42.353287,-71.044389,Boston,15
1,A32001,Union Square - Brighton Ave. at Cambridge St.,42.353334,-71.137313,Boston,15.0,Union Square - Brighton Ave at Cambridge St,42.353334,-71.137313,Boston,15
2,A32002,Agganis Arena - 925 Comm Ave.,42.351246,-71.115639,Boston,19.0,Commonwealth Ave at Buick St,42.351246,-71.115639,Boston,19
3,A32003,B.U. Central - 725 Comm. Ave.,42.350406,-71.108279,Boston,11.0,B.U. Central - 725 Comm. Ave.,42.350406,-71.108279,Boston,11
4,A32004,Longwood Ave / Binney St,42.338629,-71.106500,Boston,15.0,Longwood Ave at Binney St,42.338629,-71.106500,Boston,15
5,A32005,Harvard Real Estate - Brighton Mills - 370 Wes...,42.361667,-71.138020,Boston,15.0,Brighton Mills - 370 Western Ave,42.361667,-71.138020,Boston,15
6,A32006,Harvard University Housing - 111 Western Ave. ...,42.365064,-71.119233,Boston,15.0,Soldiers Field Park - 111 Western Ave,42.365064,-71.119233,Boston,15
7,A32008,Buswell St. at Park Dr.,42.347241,-71.105301,Boston,15.0,Park Dr at Buswell St,42.347241,-71.105301,Boston,15
8,A32009,Tremont St / W Newton St,42.341332,-71.076847,Boston,15.0,South End Library - Tremont St at W Newton St,42.341332,-71.076847,Boston,15
9,A32010,South Station - 700 Atlantic Ave.,42.352175,-71.055547,Boston,46.0,South Station - 700 Atlantic Ave,42.352175,-71.055547,Boston,46


In [181]:
sum(test.Latitude_x == test.Latitude_y)

187

In [182]:
test[test.isnull().any(axis=1)]

Unnamed: 0,Station ID,Station_x,Latitude_x,Longitude_x,Municipality_x,# of Docks_x,Station_y,Latitude_y,Longitude_y,Municipality_y,# of Docks_y
187,K32006,,,,,,Commonwealth Ave At Babcock St,42.351547,-71.121263,Brookline,19
188,M32045,,,,,,Museum of Science,42.36769,-71.071163,Cambridge,19
189,M32050,,,,,,Verizon Innovation Hub 10 Ware Street,42.372509,-71.113054,Cambridge,19
190,M32051,,,,,,Fresh Pond Reservation,42.382678,-71.143479,Cambridge,19
191,M32052,,,,,,Cambridge Dept. of Public Works -147 Hampshire...,42.371197,-71.097599,Cambridge,19
192,Silber Way,,,,,,Silber Way,42.349496,-71.100576,Brookline,19


In [183]:
len(set(new_df.StartName))

320

In [160]:
stations_df = pd.concat([stations1_df, stations2_df], axis=0)
stations_df = stations_df[~stations_df.duplicated()]
stations_df.columns = ['StationID', 'StationName', 'Lat', 'Lon', 'Municipality', 'NumDocks']
stations_df.head()

Unnamed: 0,StationID,StationName,Lat,Lon,Municipality,NumDocks
0,A32000,Fan Pier,42.353287,-71.044389,Boston,15
1,A32001,Union Square - Brighton Ave. at Cambridge St.,42.353334,-71.137313,Boston,15
2,A32002,Agganis Arena - 925 Comm Ave.,42.351246,-71.115639,Boston,19
3,A32003,B.U. Central - 725 Comm. Ave.,42.350406,-71.108279,Boston,11
4,A32004,Longwood Ave / Binney St,42.338629,-71.1065,Boston,15


In [170]:
stations_df.shape

(300, 6)

In [172]:
len(set(stations_df.StationID))

193

In [162]:
tmp = old_df[['StartID']].merge(stations_df, how='left', left_on='StartID', right_on='StationID')

In [168]:
len(tmp[tmp.isnull().any(axis=1)]) / old_df.shape[0]

0.01607238381751599

In [169]:
tmp = tmp[~tmp.isnull().any(axis=1)]
tmp

Unnamed: 0,StartID,StationID,StationName,Lat,Lon,Municipality,NumDocks
0,B32004,B32004,Aquarium Station - 200 Atlantic Ave.,42.359770,-71.051601,Boston,19.0
1,B32004,B32004,Aquarium T Stop - 200 Atlantic Ave,42.359770,-71.051601,Boston,19.0
2,M32006,M32006,MIT at Mass Ave / Amherst St,42.358100,-71.093198,Cambridge,27.0
3,D32005,D32005,Boston Public Library - 700 Boylston St.,42.349673,-71.077303,Boston,25.0
4,D32005,D32005,Copley Square - Dartmouth St at Boylston St,42.349673,-71.077303,Boston,25.0
5,B32004,B32004,Aquarium Station - 200 Atlantic Ave.,42.359770,-71.051601,Boston,19.0
6,B32004,B32004,Aquarium T Stop - 200 Atlantic Ave,42.359770,-71.051601,Boston,19.0
7,D32010,D32010,Cross St. at Hanover St.,42.362811,-71.056067,Boston,18.0
8,D32010,D32010,Cross St at Hanover St,42.362811,-71.056067,Boston,18.0
9,M32006,M32006,MIT at Mass Ave / Amherst St,42.358100,-71.093198,Cambridge,27.0


In [190]:
station_info = {}
for stid in set(new_df.StartID):
    for st2 in set(new_df[new_df.StartID == stid].StartName):
        station_info[stid] = [st2, np.unique(new_df[(new_df.StartID == stid) & (new_df.StartName == st2)][['StartLat', 'StartLon']].as_matrix(), axis=0)]
    

In [191]:
station_info

{1: ['18 Dorrance Warehouse', array([[ 42.387151, -71.075978]])],
 3: ['Colleges of the Fenway', array([[ 42.340021, -71.100812]])],
 4: ['Tremont St. at Berkeley St.', array([[ 42.345392, -71.069616]])],
 5: ['Northeastern U / North Parking Lot', array([[ 42.341814, -71.090179]])],
 6: ['Cambridge St. at Joy St.', array([[ 42.361174, -71.065142]])],
 7: ['Fan Pier', array([[ 42.3529408 , -71.04388475],
         [ 42.3532874 , -71.044389  ],
         [ 42.35328743, -71.04438901],
         [ 42.35339051, -71.0445714 ]])],
 8: ['Union Square - Brighton Ave. at Cambridge St.',
  array([[ 42.353334, -71.137313]])],
 9: ['Agganis Arena - 925 Comm Ave.', array([[ 42.351246, -71.115639]])],
 10: ['B.U. Central - 725 Comm. Ave.', array([[ 42.350406, -71.108279]])],
 11: ['Longwood Ave / Binney St', array([[ 42.338629, -71.1065  ]])],
 12: ['Ruggles Station / Columbus Ave.', array([[ 42.335911, -71.088496]])],
 13: ['Boston Medical Center - E Concord St at Harrison Ave',
  array([[ 42.336437, -

In [192]:
len(station_info.keys())

206

In [199]:
from mpl_toolkits.basemap import Basemap

ModuleNotFoundError: No module named 'mpl_toolkits.basemap'

In [196]:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np
 
# make sure the value of resolution is a lowercase L,
#  for 'low', not a numeral 1
map = Basemap(projection='ortho', lat_0=50, lon_0=-100,
              resolution='l', area_thresh=1000.0)
 
map.drawcoastlines()
 
plt.show()

ModuleNotFoundError: No module named 'mpl_toolkits.basemap'

In [197]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap

ModuleNotFoundError: No module named 'mpl_toolkits.basemap'

In [200]:
stations2_df.head()

Unnamed: 0,Station ID,Station,Latitude,Longitude,Municipality,# of Docks
3,A32000,Fan Pier,42.353287,-71.044389,Boston,15
4,A32001,Union Square - Brighton Ave at Cambridge St,42.353334,-71.137313,Boston,15
5,A32002,Commonwealth Ave at Buick St,42.351246,-71.115639,Boston,19
6,A32003,B.U. Central - 725 Comm. Ave.,42.350406,-71.108279,Boston,11
7,A32004,Longwood Ave at Binney St,42.338629,-71.1065,Boston,15


In [226]:

credentials = json.load(open('/home/cneiderer/.mapbox/.credentials'))
mapbox_access_token = credentials['public_key']

data = Data([
    Scattermapbox(
        lat=stations2_df.Latitude,
        lon=stations2_df.Longitude,
        mode='markers',
        marker=Marker(
            color='red',
            size=9,
            opacity=0.4
        ),
        text=stations2_df.Station,
    )
])
layout = Layout(
    autosize=True,
    hovermode='closest',
    width=750,
    height=750,
    margin=dict(
        l=25,
        r=25,
        b=25,
        t=25,
        pad=4
    ),
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=42.3601,
            lon=-71.0752 #-71.0589
        ),
        pitch=0,
        zoom=11.5
    ),
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='Multiple Mapbox')

JSONDecodeError: Expecting value: line 3 column 21 (char 51)

In [217]:
new_df.shape

(3731291, 15)

In [219]:
new_df.head()

Unnamed: 0,Duration,StartTime,StopTime,StartID,StartName,StartLat,StartLon,StopID,StopName,StopLat,StopLon,BikeID,UserType,BirthYear,Gender
0,196,2016-11-01 00:00:35,2016-11-01 00:03:52,95,Cambridge St - at Columbia St / Webster Ave,42.372969,-71.094445,78,Union Square - Somerville,42.3796,-71.0953,1380,Subscriber,1983,2
1,222,2016-11-01 00:01:23,2016-11-01 00:05:06,23,Mayor Martin J Walsh - 28 State St,42.35892,-71.057629,190,Nashua Street at Red Auerbach Way,42.3656,-71.0642,866,Subscriber,1980,1
2,483,2016-11-01 00:04:19,2016-11-01 00:12:22,178,MIT Pacific St at Purrington St,42.359573,-71.101295,184,Sidney Research Campus/ Erie Street at Waverly,42.3578,-71.1039,35,Subscriber,1990,1
3,141,2016-11-01 00:05:31,2016-11-01 00:07:53,25,Tremont St / W Newton St,42.341332,-71.076847,21,Prudential Center / Belvidere,42.346,-71.0826,1917,Customer,\N,0
4,154,2016-11-01 00:05:59,2016-11-01 00:08:34,76,Central Sq Post Office / Cambridge City Hall a...,42.366426,-71.105495,139,Dana Park,42.3616,-71.1074,603,Subscriber,1990,1


In [220]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3731291 entries, 0 to 3731291
Data columns (total 15 columns):
Duration     int64
StartTime    object
StopTime     object
StartID      int64
StartName    object
StartLat     float64
StartLon     float64
StopID       object
StopName     object
StopLat      object
StopLon      object
BikeID       int64
UserType     object
BirthYear    object
Gender       int64
dtypes: float64(2), int64(4), object(9)
memory usage: 455.5+ MB
