In [1]:
import pandas as pd
from ipyleaflet import Marker, Map, Popup, Polyline
from ipywidgets import HTML
import plotly.offline as py
py.init_notebook_mode(connected=True)

In [2]:
hubway_stations = pd.read_csv("../data/hubway_stations.csv")
hubway_trips = pd.read_csv("../data/hubway_trips.csv")

In [3]:
year_data = hubway_trips['start_date'].str.split(" ", 1, expand=True)[0].str.split("/", expand=True)[2]
list_year = year_data.drop_duplicates().tolist()
hubway_trips_2011 = hubway_trips[year_data.isin(['2011'])]
hubway_trips_2012 = hubway_trips[year_data.isin(['2012'])]
hubway_trips_2013 = hubway_trips[year_data.isin(['2013'])]

In [4]:
hubway_stations.head()

Unnamed: 0,id,terminal,station,municipal,lat,lng,status
0,3,B32006,Colleges of the Fenway,Boston,42.340021,-71.100812,Existing
1,4,C32000,Tremont St. at Berkeley St.,Boston,42.345392,-71.069616,Existing
2,5,B32012,Northeastern U / North Parking Lot,Boston,42.341814,-71.090179,Existing
3,6,D32000,Cambridge St. at Joy St.,Boston,42.361285,-71.06514,Existing
4,7,A32000,Fan Pier,Boston,42.353412,-71.044624,Existing


In [5]:
hubway_trips.head()

Unnamed: 0,seq_id,hubway_id,status,duration,start_date,strt_statn,end_date,end_statn,bike_nr,subsc_type,zip_code,birth_date,gender
0,1,8,Closed,9,7/28/2011 10:12:00,23.0,7/28/2011 10:12:00,23.0,B00468,Registered,'97217,1976.0,Male
1,2,9,Closed,220,7/28/2011 10:21:00,23.0,7/28/2011 10:25:00,23.0,B00554,Registered,'02215,1966.0,Male
2,3,10,Closed,56,7/28/2011 10:33:00,23.0,7/28/2011 10:34:00,23.0,B00456,Registered,'02108,1943.0,Male
3,4,11,Closed,64,7/28/2011 10:35:00,23.0,7/28/2011 10:36:00,23.0,B00554,Registered,'02116,1981.0,Female
4,5,12,Closed,12,7/28/2011 10:37:00,23.0,7/28/2011 10:37:00,23.0,B00554,Registered,'97214,1983.0,Female


In [6]:
## Merge trips and stations data

hubway_stations_start_merge = hubway_stations[['id', 'terminal', 'station', 'lat', 'lng']]
hubway_stations_start_merge = hubway_stations_start_merge.rename(columns={'id': 'strt_statn'})
hubway_trips_start = hubway_trips.merge(hubway_stations_start_merge, on='strt_statn', how='left')
hubway_trips_start = hubway_trips_start.rename(columns={'terminal': 'strt_terminal', 'station' : 'strt_station',
                                                        'lat' : 'strt_lat', 'lng' : 'strt_lng'})

hubway_stations_end_merge = hubway_stations[['id', 'terminal', 'station', 'lat', 'lng']]
hubway_stations_end_merge = hubway_stations_end_merge.rename(columns={'id': 'end_statn'})
hubway_trips_end = hubway_trips_start.merge(hubway_stations_end_merge, on='end_statn', how='left')
hubway_trips_end = hubway_trips_end.rename(columns={'terminal': 'end_terminal', 'station' : 'end_station',
                                                        'lat' : 'end_lat', 'lng' : 'end_lng'})

hubway_trips_merged = hubway_trips_end

In [9]:
hubway_trips_merged.head()

Unnamed: 0,seq_id,hubway_id,status,duration,start_date,strt_statn,end_date,end_statn,bike_nr,subsc_type,...,birth_date,gender,strt_terminal,strt_station,strt_lat,strt_lng,end_terminal,end_station,end_lat,end_lng
0,1,8,Closed,9,7/28/2011 10:12:00,23.0,7/28/2011 10:12:00,23.0,B00468,Registered,...,1976.0,Male,B32008,Mayor Thomas M. Menino - Government Center,42.359677,-71.059364,B32008,Mayor Thomas M. Menino - Government Center,42.359677,-71.059364
1,2,9,Closed,220,7/28/2011 10:21:00,23.0,7/28/2011 10:25:00,23.0,B00554,Registered,...,1966.0,Male,B32008,Mayor Thomas M. Menino - Government Center,42.359677,-71.059364,B32008,Mayor Thomas M. Menino - Government Center,42.359677,-71.059364
2,3,10,Closed,56,7/28/2011 10:33:00,23.0,7/28/2011 10:34:00,23.0,B00456,Registered,...,1943.0,Male,B32008,Mayor Thomas M. Menino - Government Center,42.359677,-71.059364,B32008,Mayor Thomas M. Menino - Government Center,42.359677,-71.059364
3,4,11,Closed,64,7/28/2011 10:35:00,23.0,7/28/2011 10:36:00,23.0,B00554,Registered,...,1981.0,Female,B32008,Mayor Thomas M. Menino - Government Center,42.359677,-71.059364,B32008,Mayor Thomas M. Menino - Government Center,42.359677,-71.059364
4,5,12,Closed,12,7/28/2011 10:37:00,23.0,7/28/2011 10:37:00,23.0,B00554,Registered,...,1983.0,Female,B32008,Mayor Thomas M. Menino - Government Center,42.359677,-71.059364,B32008,Mayor Thomas M. Menino - Government Center,42.359677,-71.059364


## Display Stations on Map

In [7]:
m = Map(center=(42.35, -71.1), zoom=12)

In [8]:
hubway_stations_latlng = [tuple(x) for x in hubway_stations[['lat', 'lng', 'station']].values]
for station in hubway_stations_latlng :
    marker = Marker(location=station[:-1], draggable=False)
    m.add_layer(marker);
    nom_station = HTML()
    nom_station.value = station[2]
    marker.popup = nom_station

In [None]:
for index, row in hubway_trips_merged.iterrows():
    
    line = Polyline(
        locations = [[
        [row['strt_lat'], row['strt_lng']]
            ,[row['end_lat'], row['end_lng']]],],
        color = "green" ,
        fill_color= "green"
    )
    m.add_layer(line)
    

In [8]:
m

Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

## Genre Data

In [None]:
hubway_male = hubway_trips.loc[hubway_trips['gender']=='Male']
hubway_female = hubway_trips.loc[hubway_trips['gender']=='Female']

In [None]:
hubway_male_2011 = hubway_trips_2011.loc[hubway_trips['gender']=='Male']
hubway_female_2011 = hubway_trips_2011.loc[hubway_trips['gender']=='Female']
hubway_male_2012 = hubway_trips_2012.loc[hubway_trips['gender']=='Male']
hubway_female_2012 = hubway_trips_2012.loc[hubway_trips['gender']=='Female']
hubway_male_2013 = hubway_trips_2013.loc[hubway_trips['gender']=='Male']
hubway_female_2013 = hubway_trips_2013.loc[hubway_trips['gender']=='Female']

In [None]:
percent_male = len(hubway_male)/len(hubway_trips)
percent_female = len(hubway_female)/len(hubway_trips)
percent_unknown_genre = len(hubway_trips[hubway_trips['gender'].isna()].index)/len(hubway_trips.index)
percent_male_2011 = len(hubway_male_2011)/len(hubway_trips_2011)
percent_female_2011 = len(hubway_female_2011)/len(hubway_trips_2011)
percent_unknown_genre_2011 = len(hubway_trips_2011[hubway_trips_2011['gender'].isna()].index)/len(hubway_trips_2011.index)
percent_male_2012 = len(hubway_male_2012)/len(hubway_trips_2012)
percent_female_2012 = len(hubway_female_2012)/len(hubway_trips_2012)
percent_unknown_genre_2012 = len(hubway_trips_2012[hubway_trips_2012['gender'].isna()].index)/len(hubway_trips_2012.index)
percent_male_2013 = len(hubway_male_2013)/len(hubway_trips_2013)
percent_female_2013 = len(hubway_female_2013)/len(hubway_trips_2013)
percent_unknown_genre_2013 = len(hubway_trips_2013[hubway_trips_2013['gender'].isna()].index)/len(hubway_trips_2013.index)

In [None]:
import plotly.graph_objs as go

x_year = ['2011', '2012', '2013']

trace1 = go.Bar(
    x=x_year,
    y=[percent_male_2011, percent_male_2012, percent_male_2013],
    name='Male'
)
trace2 = go.Bar(
    x=x_year,
    y=[percent_female_2011, percent_female_2012, percent_female_2013],
    name='Female'
)
trace3 = go.Bar(
    x=x_year,
    y=[percent_unknown_genre_2011, percent_unknown_genre_2012, percent_unknown_genre_2013],
    name='Without Gender'
)

data = [trace1, trace2, trace3]
layout = go.Layout(
    barmode='group'
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='grouped-bar')

## Date Aggregate

In [None]:
import calendar
from datetime import date

In [None]:
hubway_trips_2011.head()

In [None]:
from datetime import datetime
for i in range(hubway_trips_2011['start_date'].size):
    hubway_trips_2011.at['start_date', i] = datetime.strptime(hubway_trips_2011['start_date'][i], '%m/%d/%Y %H:%M:%S')

In [None]:
hubway_trips_2011['start_date'].size