In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [36]:
df = pd.read_csv('analysis_data/rides.csv')

In [37]:
df['yyyy'] = df['yyyy'].astype('object')

In [38]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   yyyymm     32 non-null     object
 1   yyyy       32 non-null     object
 2   mmm        32 non-null     object
 3   rides      32 non-null     int64 
 4   is_member  32 non-null     int64 
dtypes: int64(2), object(3)
memory usage: 1.4+ KB


In [39]:
df.head()

Unnamed: 0,yyyymm,yyyy,mmm,rides,is_member
0,2014-04,2014,Apr,10125,0
1,2014-04,2014,Apr,98139,1
2,2014-05,2014,May,54386,0
3,2014-05,2014,May,400875,1
4,2014-06,2014,Jun,70152,0


In [123]:
###########################
#PLOT 1: Annual Ridership #
###########################

## Create plotting dataframe from base dataframe
# Group by year
plot_df = df.groupby(
    by = ['yyyy','is_member'],
    as_index = False
).agg(
    rides = ('rides','sum')
)

# Pivot by is member or not
plot_df = plot_df.pivot(columns = 'is_member',
                  index = 'yyyy',
                  values = 'rides')

# Create new columns to calculate total rides and % membership
plot_df['total'] = plot_df[0] + plot_df[1]
plot_df['membership_pct'] = plot_df[1] / plot_df['total']


## Create plotly figure
# Initiate figure object
fig = go.Figure()

# Specify common x axis values for traces
x = plot_df.index.values

# Trace 1: Total rides, create hover template and bar chart
hovertemplate = "<br>".join(
    [
        "Total rides: %{y:.2f}",
        "<extra></extra>"
    ]
)

fig.add_trace(
        go.Bar(
            name = "Total rides",
            x = x,
            y = plot_df['total'] / 1_000_000, # Numbers in millions of rides
            hovertemplate = hovertemplate,
            marker_color = "cornflowerblue"
        )
)

# Trace 2: Member rides, create hover template and bar chart
# Create customdata to store % values for display
customdata = np.transpose([x,plot_df['membership_pct'].tolist()])
hovertemplate = "<br>".join(
    [
        "Member rides: %{y:.2f}",
        "% of total: %{customdata[1]:.2%}",
        "<extra></extra>"
    ]
)

fig.add_trace(
        go.Bar(
            name = "Member rides",
            x = x,
            y = plot_df[1]/ 1_000_000,
            customdata = customdata,
            hovertemplate = hovertemplate,
            marker_color = "cornflowerblue",
            marker_pattern_shape = "/"
        )
)

# Trace 3: Non-member rides, create hover template and bar chart
customdata = np.transpose([x,[1 - val for val in plot_df['membership_pct'].tolist()]])
hovertemplate = "<br>".join(
    [
        "Non-member rides: %{y:.2f}",
        "% of total: %{customdata[1]:.2%}",
        "<extra></extra>"
    ]
)

fig.add_trace(
        go.Bar(
            name = "Non-member rides",
            x = x,
            y = plot_df[0]/ 1_000_000,
            customdata = customdata,
            hovertemplate = hovertemplate,
            marker_color = "cornflowerblue",
            marker_pattern_shape = "+"
        )
)

## Format x, y axis titles, title
fig.update_layout(
    barmode = 'group',
    hovermode = "x unified",
    
    # Set uniform minimum textsize and force show
    # For max size, refer to textfont_size in update_traces
    uniformtext = dict(
        minsize = 9,
        mode = 'show'
    ),

    # Set title properties
     title = dict(
         text = f"Bixi Rides by Year",
         x = 0.5,
         y = 0.85,
         xanchor = 'center',
         yanchor = 'top'
     ),

    # Set xaxis properties
    xaxis = dict(
        title = "Year",
        #titlefont_size = 14,
        tickfont_size = 12,
        #tickangle = -90,
        tickvals = x
    ),
    # Set yaxis properties
    yaxis = dict(
        title = f"Number of Bixi Rides (millions)",
        titlefont_size = 14,
        tickfont_size = 12
    )
    
)

#################
# END OF PLOT 1 #
#################

fig.show()

## Plot 2

In [98]:
df.head()

Unnamed: 0,yyyymm,yyyy,mmm,rides,is_member
0,2014-04,2014,Apr,10125,0
1,2014-04,2014,Apr,98139,1
2,2014-05,2014,May,54386,0
3,2014-05,2014,May,400875,1
4,2014-06,2014,Jun,70152,0


In [186]:
df.head()

Unnamed: 0,yyyymm,yyyy,mmm,trip_length,avg_dur_sec,rides,is_member,start_stn_code,stn_lat,stn_lon,stn_name
0,2014-04,2014,Apr,11-20 min,885.4242,33,0,6221,45.51941,-73.58685,du Mont-Royal / Clark
1,2014-04,2014,Apr,11-20 min,983.75,4,0,6225,45.520188,-73.590559,Villeneuve / St-Urbain
2,2014-04,2014,Apr,11-20 min,851.9091,11,0,6232,45.525021,-73.610737,Hutchison / Van Horne
3,2014-04,2014,Apr,11-20 min,844.4583,24,0,6233,45.524296,-73.604847,Bernard / Jeanne-Mance
4,2014-04,2014,Apr,11-20 min,842.8077,26,0,6235,45.526543,-73.598233,St-Dominique / St-Viateur


In [187]:
plot_df.head()

Unnamed: 0,yyyymm,yyyy,mmm,rides
0,2014-04,2014,Apr,108264
1,2014-05,2014,May,455261
2,2014-06,2014,Jun,525863
3,2014-07,2014,Jul,569352
4,2014-08,2014,Aug,556780


In [201]:
df = pd.read_csv(
    'analysis_data/location.csv',
    dtype = {"start_stn_code": "object"})

In [202]:
df.head()

Unnamed: 0,yyyymm,yyyy,mmm,trip_length,avg_dur_sec,rides,is_member,start_stn_code,stn_lat,stn_lon,stn_name
0,2014-04,2014,Apr,11-20 min,885.4242,33,0,6221,45.51941,-73.58685,du Mont-Royal / Clark
1,2014-04,2014,Apr,11-20 min,983.75,4,0,6225,45.520188,-73.590559,Villeneuve / St-Urbain
2,2014-04,2014,Apr,11-20 min,851.9091,11,0,6232,45.525021,-73.610737,Hutchison / Van Horne
3,2014-04,2014,Apr,11-20 min,844.4583,24,0,6233,45.524296,-73.604847,Bernard / Jeanne-Mance
4,2014-04,2014,Apr,11-20 min,842.8077,26,0,6235,45.526543,-73.598233,St-Dominique / St-Viateur


In [197]:
############################
# PLOT 2 Monthly Ridership #
############################
df = pd.read_csv('analysis_data/location.csv')

plot_df = df.groupby(
    by = ['yyyymm','yyyy','mmm'],
    as_index = False).agg(
    rides = ('rides','sum'))

## Initiate plotly graph object
fig = go.Figure()

x = plot_df['mmm'].unique()

# Add a line for each year 
for year in plot_df['yyyy'].unique():

    x = plot_df['mmm'].unique()
    y = plot_df.loc[plot_df['yyyy'] == year, "rides"]/1_000
    pct_yr_total = [val/y.sum() for val in y]
    customdata = np.transpose([x,pct_yr_total])
    
    hovertemplate = "<br>".join(
        [
            "%{text}",
            "Rides: %{y:.0f} K",
            "% of year: %{customdata[1]:.2%}",
            "<extra></extra>"
        ]
    )

    fig.add_trace(
        go.Scatter(
            name = str(year), 
            x = x, 
            y = y,
            hovertemplate = hovertemplate,
            customdata = customdata,
            text = [year] * len(x)
        )
    )

fig.update_layout(
    barmode = 'group',
    hovermode = "x unified",
    
    # Set uniform minimum textsize and force show
    # For max size, refer to textfont_size in update_traces
    uniformtext = dict(
        minsize = 9,
        mode = 'show'
    ),

    # Set title properties
     title = dict(
         text = f"Bixi Rides by Month",
         x = 0.5,
         y = 0.85,
         xanchor = 'center',
         yanchor = 'top'
     ),

    # Set xaxis properties
    xaxis = dict(
        title = "Month",
        #titlefont_size = 14,
        tickfont_size = 12,
        #tickangle = -90,
        tickvals = x,
        side = 'bottom'
    ),
    # Set yaxis properties
    yaxis = dict(
        title = f"Number of Bixi Rides (thousands)",
        titlefont_size = 14,
        tickfont_size = 12
    )
)

#################
# END OF PLOT 2 #
#################

fig.show()


Columns (7) have mixed types. Specify dtype option on import or set low_memory=False.



ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

In [2]:
df = pd.read_csv('analysis_data/ride_length.csv')

plot_df = df.groupby(
    by = ['yyyymm','yyyy', 'mmm','trip_length'],
    as_index = False).agg(
        rides = ('rides','sum')
)

In [3]:
df.head()

Unnamed: 0,yyyymm,yyyy,mmm,trip_length,rides,is_member
0,2014-04,2014,Apr,00-10 min,2438,0
1,2014-04,2014,Apr,11-20 min,3683,0
2,2014-04,2014,Apr,21-30 min,2372,0
3,2014-04,2014,Apr,> 30 min,1632,0
4,2014-04,2014,Apr,00-10 min,55395,1


In [4]:
plot_df = df.groupby(
    by = ['yyyymm','yyyy', 'mmm','trip_length'],
    as_index = False).agg(
        rides = ('rides','sum')
)

In [174]:
plot_df = plot_df.pivot(
    index = ['mmm'],
    columns = ['trip_length'],
    values = ['rides']
)        

In [175]:
plot_df['total_rides'] = plot_df.sum(axis = 1)

In [176]:
plot_df.iloc[:,:-1].div(plot_df['total_rides'], axis = 0)

Unnamed: 0_level_0,rides,rides,rides,rides
trip_length,00-10 min,11-20 min,21-30 min,> 30 min
mmm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Apr,0.527956,0.305007,0.115017,0.05202
Aug,0.441992,0.33551,0.149405,0.073093
Jul,0.444682,0.338265,0.147966,0.069087
Jun,0.444652,0.335855,0.148415,0.071079
May,0.459611,0.329157,0.143077,0.068155
Nov,0.584912,0.29291,0.090631,0.031548
Oct,0.545198,0.306918,0.106694,0.04119
Sep,0.479113,0.329958,0.133781,0.057148


In [194]:
temp = plot_df.groupby(by = 'yyyymm')['rides'].transform('sum')
plot_df['rides_pct'] = plot_df['rides']/temp

# Plot 3

In [7]:
df = pd.read_csv('analysis_data/ride_length.csv')

plot_df = df.groupby(
    by = ['yyyymm','yyyy', 'mmm','trip_length'],
    as_index = False).agg(
        rides = ('rides','sum')
)

In [8]:
plot_df.head()

Unnamed: 0,yyyymm,yyyy,mmm,trip_length,rides
0,2014-04,2014,Apr,00-10 min,57833
1,2014-04,2014,Apr,11-20 min,33170
2,2014-04,2014,Apr,21-30 min,12014
3,2014-04,2014,Apr,> 30 min,5247
4,2014-05,2014,May,00-10 min,215271


In [196]:
fig = go.Figure()

x = plot_df['mmm'].unique()

for duration_group in plot_df['trip_length'].unique():
    
    cond = plot_df['trip_length'] == duration_group
    fig.add_trace(
            go.Bar(
                name = duration_group,
                x = x,
                y = plot_df.loc[cond,'rides_pct'], # Numbers in millions of rides
                #hovertemplate = hovertemplate,
                #marker_color = "cornflowerblue"
            )
    )

fig.update_layout(
    barmode = 'stack',
    hovermode = 'x unified'
)

fig.show()

In [14]:
########################
# PLOT 3 Trip Duration #
########################

plot_df = df.groupby(
    by = ['yyyy','trip_length'],
    as_index = False).agg(
        rides = ('rides','sum')
)

# Initiate plotly graph object
fig = go.Figure()

# Define trip duration groups for x axis
x = plot_df['trip_length'].unique()

# Add a bar for each year
for year in plot_df['yyyy'].unique():
    
    cond = plot_df['yyyy'] == year
    
    fig.add_trace(
            go.Bar(
                name = str(year),
                x = x,
                y = plot_df.loc[cond,'rides']/1_000_000, # Numbers in millions of rides
            )
    )


fig.update_layout(
    barmode = 'group',
    hovermode = 'x unified',

    # Set uniform minimum textsize and force show
    # For max size, refer to textfont_size in update_traces
    uniformtext = dict(
        minsize = 9,
        mode = 'show'
    ),

    # Set title properties
     title = dict(
         text = f"Trip Duration",
         x = 0.5,
         y = 0.85,
         xanchor = 'center',
         yanchor = 'top'
     ),

    # Set xaxis properties
    xaxis = dict(
        title = "Trip duration (min)",
        #titlefont_size = 14,
        tickfont_size = 12,
        #tickangle = -90,
        tickvals = x,
        side = 'bottom'
    ),
    # Set yaxis properties
    yaxis = dict(
        title = f"Number of Bixi Rides (millions)",
        titlefont_size = 14,
        tickfont_size = 12
    )
)

#################
# END OF PLOT 3 #
#################

fig.show()

# Plot 4

In [22]:
import plotly.express as px

In [256]:
####################
# Plot 4: Location #
####################

df = pd.read_csv(
    'analysis_data/location.csv',
    dtype = {"start_stn_code": "object"}
)

# df['stn_lat'] = df['stn_lat'].round(3)
# df['stn_lon'] = df['stn_lon'].round(3)

groupby_clause = ['yyyy','start_stn_code','stn_lat','stn_lon']

plot_df = df.groupby(
    by = groupby_clause).agg(     
        rides = ('rides','sum'),
        # stn_lat = ('stn_lat','mean'),
        # stn_lon = ('stn_lon','mean'),
        stn_name = ('stn_name', lambda x: x.iloc[-1])
    )    

plot_df['avg_dur_sec'] = df.groupby(
    by = groupby_clause).apply(lambda x: np.average(x.avg_dur_sec, weights = x.rides))

plot_df.reset_index(inplace = True)

In [257]:
plot_df.head()

Unnamed: 0,yyyy,start_stn_code,stn_lat,stn_lon,rides,stn_name,avg_dur_sec
0,2014,10002,45.478228,-73.569651,7093,Métro Charlevoix (Centre / Charlevoix),691.009448
1,2014,5002,45.5332,-73.5156,254,St-Charles / Châteauguay,1164.047242
2,2014,5003,45.5294,-73.5178,202,Place Longueuil,1338.07921
3,2014,5004,45.539824,-73.508752,187,St-Charles / Charlotte,1118.262033
4,2014,5005,45.536408,-73.512776,475,St-Charles / St-Sylvestre,862.648417


In [262]:
####################
# Plot 4: Location #
####################

groupby_clause = ['yyyy','start_stn_code','stn_lat','stn_lon']

plot_df = df.groupby(
    by = groupby_clause).agg(     
        rides = ('rides','sum'),
        # stn_lat = ('stn_lat','mean'),
        # stn_lon = ('stn_lon','mean'),
        stn_name = ('stn_name', lambda x: x.iloc[-1])
    )    

plot_df['avg_dur_sec'] = df.groupby(
    by = groupby_clause).apply(lambda x: np.average(x.avg_dur_sec, weights = x.rides))

plot_df.reset_index(inplace = True)

plot_df.rename(
    columns = 
    {
        "yyyy"        : "Year",
        "stn_lat"     : "Lat",
        "stn_lon"     : "Lon",
        "rides"       : "Trips",
        "stn_name"    : "Station Name",
        "avg_dur_sec" : "Avg. Trip Duration"
    },
    inplace = True
)

px.set_mapbox_access_token(open(".mapbox_token").read())

fig = px.scatter_mapbox(
    plot_df,
    lat = "Lat",
    lon = "Lon",
    hover_name = "Station Name",
    hover_data = {
        "Trips"              : ":,",
        "Lat"                : ":.3f",
        "Lon"                : ":.3f",
        "Avg. Trip Duration" : ":,.0f"
    },
    size = "Trips",
    size_max = 15,
    color = "Avg. Trip Duration",
    zoom = 10,
    mapbox_style = 'light',
    animation_frame = 'Year')

fig.show()

In [81]:
fig.data[0].hovertemplate

'<b>%{hovertext}</b><br><br>yyyy=2014<br>rides=%{marker.size}<br>stn_lat=%{customdata[0]:.3f}<br>stn_lon=%{customdata[1]:.3f}<extra></extra>'

In [181]:
fig.data

(Scattermapbox({
     'customdata': array([[ 45.5332  , -73.5156  ],
                          [ 45.5294  , -73.5178  ],
                          [ 45.539824, -73.508752],
                          ...,
                          [ 45.500035, -73.618105],
                          [ 45.53133 , -73.59155 ],
                          [ 45.478228, -73.569651]]),
     'hovertemplate': ('<b>%{hovertext}</b><br><br>rid' ... '%{marker.color}<extra></extra>'),
     'hovertext': array(['St-Charles / Châteauguay', 'Place Longueuil', 'St-Charles / Charlotte',
                         ..., 'Louis-Colin / McKenna', 'St-André / St-Grégoire',
                         'Métro Charlevoix (Centre / Charlevoix)'], dtype=object),
     'lat': array([45.5332  , 45.5294  , 45.539824, ..., 45.500035, 45.53133 , 45.478228]),
     'legendgroup': '',
     'lon': array([-73.5156  , -73.5178  , -73.508752, ..., -73.618105, -73.59155 ,
                   -73.569651]),
     'marker': {'color': array([1164.04724213, 1

In [120]:
df = pd.read_csv('analysis_data/location.csv')

In [121]:
df.head()

Unnamed: 0,yyyymm,yyyy,mmm,trip_length,avg_dur_sec,rides,is_member,start_stn_code,stn_lat,stn_lon,stn_name
0,2014-04,2014,Apr,00-10 min,340.0,1,0,6003,45.511119,-73.567974,Evans / Clark
1,2014-04,2014,Apr,00-10 min,465.3333,3,0,6008,45.512734,-73.561141,Sanguinet / Ste-Catherine
2,2014-04,2014,Apr,00-10 min,400.9565,23,0,6012,45.51066,-73.56497,Métro St-Laurent (de Maisonneuve / St-Laurent)
3,2014-04,2014,Apr,00-10 min,463.5,2,0,6025,45.507144,-73.555119,Notre-Dame / St-Gabriel
4,2014-04,2014,Apr,00-10 min,436.75,4,0,6026,45.507629,-73.551876,de la Commune / Place Jacques-Cartier


In [175]:
df = pd.read_csv('analysis_data/location.csv')



In [184]:
####################
# Plot 4: Location #
####################

df = pd.read_csv('analysis_data/location.csv')


groupby_clause = ['start_stn_code','stn_lat','stn_lon','stn_name']

plot_df = df.groupby(
    by = groupby_clause).agg(     
        rides = ('rides','sum')
    )    

plot_df['avg_dur_sec'] = df.groupby(
    by = groupby_clause).apply(lambda x: np.average(x.avg_dur_sec, weights = x.rides))

plot_df.reset_index(inplace = True)

fig = go.Figure()

mapbox_access_token = open(".mapbox_token").read()

hovertemplate = "<br>".join(
    [
        "<b>%{text}<b>",
        "Rides: %{marker.size:.0f} K",
        "Average Ride Duration: %{marker.color:.0f}",
        "Station Latitude: %{lat:.3f}",
        "Station Longitude: %{lon:.3f}",
        "<extra></extra>"
    ]
)

fig.add_trace(
    go.Scattermapbox(
        lat = plot_df['stn_lat'],
        lon = plot_df['stn_lon'],
        text = plot_df['stn_name'],
        mode = 'markers',
        marker = go.scattermapbox.Marker(
            size = plot_df['rides']/1_000,
            sizemode = 'area',
            sizeref = 203/1_000,
            color = plot_df['avg_dur_sec'],
            coloraxis = 'coloraxis',
        ),
        hovertemplate = hovertemplate
    ))

fig.update_layout(
    hovermode = 'closest',
    mapbox = dict(
        accesstoken = mapbox_access_token,
        bearing = 0,
        center = go.layout.mapbox.Center(
            lat = 45,
            lon = -73
        ),
        pitch = 0,
        zoom = 5
    ))

fig.show()
        

In [39]:
plot_df.head()

Unnamed: 0,yyyy,start_stn_code,stn_lat,stn_lon,stn_name,rides
0,2014,5002,45.5332,-73.5156,St-Charles / Châteauguay,254
1,2014,5003,45.5294,-73.5178,Place Longueuil,202
2,2014,5004,45.539824,-73.508752,St-Charles / Charlotte,187
3,2014,5005,45.536408,-73.512776,St-Charles / St-Sylvestre,475
4,2014,5006,45.537226,-73.495067,Collège Édouard-Montpetit,325


In [105]:
df

Unnamed: 0,yyyymm,yyyy,mmm,trip_length,rides,is_member,start_stn_code,stn_lat,stn_lon,stn_name
0,2014-04,2014,Apr,00-10 min,1,0,6003,45.511119,-73.567974,Evans / Clark
1,2014-04,2014,Apr,00-10 min,3,0,6008,45.512734,-73.561141,Sanguinet / Ste-Catherine
2,2014-04,2014,Apr,00-10 min,23,0,6012,45.510660,-73.564970,Métro St-Laurent (de Maisonneuve / St-Laurent)
3,2014-04,2014,Apr,00-10 min,2,0,6025,45.507144,-73.555119,Notre-Dame / St-Gabriel
4,2014-04,2014,Apr,00-10 min,4,0,6026,45.507629,-73.551876,de la Commune / Place Jacques-Cartier
...,...,...,...,...,...,...,...,...,...,...
54965,2015-11,2015,Nov,> 30 min,2,1,6743,45.494514,-73.583368,St-Marc / Sherbrooke
54966,2015-11,2015,Nov,> 30 min,6,1,6744,45.456355,-73.597557,Hamilton / Jolicoeur
54967,2015-11,2015,Nov,> 30 min,11,1,6901,45.513830,-73.560432,Ste-Catherine / St-Denis
54968,2015-11,2015,Nov,> 30 min,2,1,6913,45.533348,-73.605834,Drolet / Beaubien


In [114]:
a = np.array([4,5,6])
b = np.array([17,18,20,25,48])
c = np.array([4,5,6,17,18,20,25,48])

In [112]:
a.mean()

5.0

In [113]:
b.mean()

25.6

In [115]:
c.mean()

17.875

In [119]:
a.shape[0]/(a.shape[0]+b.shape[0])*a.mean() + b.shape[0]/(a.shape[0]+b.shape[0])*b.mean()

17.875

In [203]:
df.head()

Unnamed: 0,yyyymm,yyyy,mmm,trip_length,avg_dur_sec,rides,is_member,start_stn_code,stn_lat,stn_lon,stn_name
0,2014-04,2014,Apr,11-20 min,885.4242,33,0,6221,45.51941,-73.58685,du Mont-Royal / Clark
1,2014-04,2014,Apr,11-20 min,983.75,4,0,6225,45.520188,-73.590559,Villeneuve / St-Urbain
2,2014-04,2014,Apr,11-20 min,851.9091,11,0,6232,45.525021,-73.610737,Hutchison / Van Horne
3,2014-04,2014,Apr,11-20 min,844.4583,24,0,6233,45.524296,-73.604847,Bernard / Jeanne-Mance
4,2014-04,2014,Apr,11-20 min,842.8077,26,0,6235,45.526543,-73.598233,St-Dominique / St-Viateur


In [217]:
df = pd.read_csv(
    'analysis_data/location.csv',
    dtype = {"start_stn_code": "object"}
)

df['stn_name_x'] = df['stn_name'].str.strip().str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.replace(' ','')

groupby_clause = ['stn_name_x']

plot_df = df.groupby(
    by = groupby_clause).agg(     
        rides = ('rides','sum'),
        stn_lat = ('stn_lat','mean'),
        stn_lon = ('stn_lon','mean'),
        stn_name = ('stn_name', lambda x: x.iloc[-1])
    )    

plot_df['avg_dur_sec'] = df.groupby(
    by = groupby_clause).apply(lambda x: np.average(x.avg_dur_sec, weights = x.rides))

plot_df.reset_index(inplace = True)

In [223]:
df = pd.read_csv(
    'analysis_data/location.csv',
    dtype = {"start_stn_code": "object"}
)

df['stn_lat'] = df['stn_lat'].round(3)
df['stn_lon'] = df['stn_lon'].round(3)

groupby_clause = ['stn_lat', 'stn_lon']

plot_df = df.groupby(
    by = groupby_clause).agg(     
        rides = ('rides','sum'),
        # stn_lat = ('stn_lat','mean'),
        # stn_lon = ('stn_lon','mean'),
        stn_name = ('stn_name', lambda x: x.iloc[-1])
    )    

plot_df['avg_dur_sec'] = df.groupby(
    by = groupby_clause).apply(lambda x: np.average(x.avg_dur_sec, weights = x.rides))

plot_df.reset_index(inplace = True)

In [224]:
plot_df.head()

Unnamed: 0,stn_lat,stn_lon,rides,stn_name,avg_dur_sec
0,-1.0,-1.0,9749,Smith / Peel,967.284035
1,45.415,-73.627,1940,LaSalle / 67e avenue,1618.846914
2,45.416,-73.638,2139,LaSalle / 80e avenue,2666.817675
3,45.416,-73.613,4683,LaSalle / 37e avenue,1639.449708
4,45.417,-73.644,4371,LaSalle / 90e avenue,1650.571719


In [220]:
cond = plot_df['stn_name'].str.contains('Jeanne Mance') == True
plot_df.loc[cond]

Unnamed: 0,stn_name_x,rides,stn_lat,stn_lon,stn_name,avg_dur_sec
435,JeanneMance/duMont-Royal,36953,45.517643,-73.588928,Jeanne Mance / du Mont-Royal,782.502934
734,ParcJeanneMance(monumentGeorge-EtienneCartier),60620,45.51496,-73.58503,Parc Jeanne Mance (monument George-Étienne Car...,909.307732
735,ParcJeanneMance(monumentasirGeorge-EtienneCart...,55015,45.51496,-73.58503,Parc Jeanne Mance (monument à sir George-Étien...,907.210013
736,ParcJeanneMance(monumentsirGeorge-EtienneCartier),81856,45.51484,-73.584779,Parc Jeanne Mance (monument sir George-Etienne...,970.738552
1041,Viger/JeanneMance,14734,45.504795,-73.561197,Viger / Jeanne Mance,903.518728


In [263]:
df = pd.read_csv(
    'analysis_data/location.csv',
    dtype = {"start_stn_code": "object"}
)

In [264]:
df.head()

Unnamed: 0,yyyymm,yyyy,mmm,trip_length,avg_dur_sec,rides,is_member,start_stn_code,stn_lat,stn_lon,stn_name
0,2014-04,2014,Apr,11-20 min,885.4242,33,0,6221,45.51941,-73.58685,du Mont-Royal / Clark
1,2014-04,2014,Apr,11-20 min,983.75,4,0,6225,45.520188,-73.590559,Villeneuve / St-Urbain
2,2014-04,2014,Apr,11-20 min,851.9091,11,0,6232,45.525021,-73.610737,Hutchison / Van Horne
3,2014-04,2014,Apr,11-20 min,844.4583,24,0,6233,45.524296,-73.604847,Bernard / Jeanne-Mance
4,2014-04,2014,Apr,11-20 min,842.8077,26,0,6235,45.526543,-73.598233,St-Dominique / St-Viateur


In [266]:
df['grouped_rides'] = df.groupby(
    by = ['yyyy','start_stn_code'])['rides'].transform('sum')

In [268]:
cond1 = df['yyyy'] == 2014
cond2 = df['start_stn_code'] == '6221'

df.loc[cond1 & cond2]

Unnamed: 0,yyyymm,yyyy,mmm,trip_length,avg_dur_sec,rides,is_member,start_stn_code,stn_lat,stn_lon,stn_name,grouped_rides
0,2014-04,2014,Apr,11-20 min,885.4242,33,0,6221,45.51941,-73.58685,du Mont-Royal / Clark,24448
91,2014-04,2014,Apr,21-30 min,1507.0000,14,0,6221,45.51941,-73.58685,du Mont-Royal / Clark,24448
176,2014-04,2014,Apr,> 30 min,2291.4375,16,0,6221,45.51941,-73.58685,du Mont-Royal / Clark,24448
262,2014-04,2014,Apr,00-10 min,336.5681,609,1,6221,45.51941,-73.58685,du Mont-Royal / Clark,24448
367,2014-04,2014,Apr,11-20 min,808.2868,272,1,6221,45.51941,-73.58685,du Mont-Royal / Clark,24448
...,...,...,...,...,...,...,...,...,...,...,...,...
25744,2014-11,2014,Nov,> 30 min,1838.0000,1,0,6221,45.51941,-73.58685,du Mont-Royal / Clark,24448
25806,2014-11,2014,Nov,00-10 min,331.6567,603,1,6221,45.51941,-73.58685,du Mont-Royal / Clark,24448
25910,2014-11,2014,Nov,11-20 min,820.2815,238,1,6221,45.51941,-73.58685,du Mont-Royal / Clark,24448
26015,2014-11,2014,Nov,21-30 min,1353.6216,37,1,6221,45.51941,-73.58685,du Mont-Royal / Clark,24448


In [275]:
df['rank'] = df.groupby(
    by = ['yyyy'])['grouped_rides'].rank("dense", ascending = False)

In [282]:
cond1 = df['rank'] == 1
cond2 = df['yyyy'] == 2014
cond3 = df['mmm'] == 'Jun'
df.loc[cond1 & cond2]

Unnamed: 0,yyyymm,yyyy,mmm,trip_length,avg_dur_sec,rides,is_member,start_stn_code,stn_lat,stn_lon,stn_name,grouped_rides,rank
2324,2014-04,2014,Apr,00-10 min,426.6047,43,0,6184,45.524673,-73.58255,Métro Mont-Royal (Rivard / du Mont-Royal),45600,1.0
2408,2014-04,2014,Apr,11-20 min,890.1250,40,0,6184,45.524673,-73.58255,Métro Mont-Royal (Rivard / du Mont-Royal),45600,1.0
2498,2014-04,2014,Apr,21-30 min,1474.0870,23,0,6184,45.524673,-73.58255,Métro Mont-Royal (Rivard / du Mont-Royal),45600,1.0
2581,2014-04,2014,Apr,> 30 min,3220.6154,13,0,6184,45.524673,-73.58255,Métro Mont-Royal (Rivard / du Mont-Royal),45600,1.0
2672,2014-04,2014,Apr,00-10 min,362.2854,1188,1,6184,45.524673,-73.58255,Métro Mont-Royal (Rivard / du Mont-Royal),45600,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
25081,2014-11,2014,Nov,> 30 min,1969.0000,1,0,6184,45.524673,-73.58255,Métro Mont-Royal (Rivard / du Mont-Royal),45600,1.0
25146,2014-11,2014,Nov,00-10 min,350.6432,1121,1,6184,45.524673,-73.58255,Métro Mont-Royal (Rivard / du Mont-Royal),45600,1.0
25256,2014-11,2014,Nov,11-20 min,793.5947,375,1,6184,45.524673,-73.58255,Métro Mont-Royal (Rivard / du Mont-Royal),45600,1.0
25365,2014-11,2014,Nov,21-30 min,1399.5902,61,1,6184,45.524673,-73.58255,Métro Mont-Royal (Rivard / du Mont-Royal),45600,1.0


In [None]:
df['grouped_rides'] = df.groupby(
    by = ['yyyy','start_stn_code'])['rides'].transform('sum')

df['rank'] = df.groupby(
    by = ['yyyy'])['grouped_rides'].rank("dense", ascending = False)

