These are the input parameters for the notebook. They will be automatically changed when the scripts to generate monthly statistics are run. You can modify them manually to generate multiple plots locally as well.

Pass in `None` to remove the filters and plot all data. This is not recommended for production settings, but might be useful for reports based on data snapshots.

In [None]:
year = 2020
month = 11
program = "prepilot"

In [None]:
%pip install seaborn

In [None]:
from collections import defaultdict
import datetime

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import emission.core.get_database as edb
import emission.core.wrapper.entry as ecwe
import emission.storage.decorations.analysis_timeseries_queries as esda
import emission.storage.decorations.trip_queries as esdt
import emission.storage.decorations.timeline as esdl
import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.timeseries.timequery as estt
from uuid import UUID

sns.set_style("whitegrid")
sns.set()
%matplotlib inline

In [None]:
import scaffolding
from plots import *

### Collect Data From Database

In [None]:
# Loading mapping dictionaries from mapping_dictionaries notebook
%store -r df_EI
%store -r dic_re
%store -r dic_pur
%store -r dic_fuel

# convert a dictionary to a defaultdict
dic_pur = defaultdict(lambda: 'Other',dic_pur)
dic_re = defaultdict(lambda: 'Other',dic_re)

In [None]:
# hardcoding this to None, None, since these are timeseries plots
tq = scaffolding.get_time_query(None, None)

In [None]:
participant_ct_df = scaffolding.load_all_participant_trips(program, tq)

In [None]:
labeled_ct = scaffolding.filter_labeled_trips(participant_ct_df)

In [None]:
expanded_ct = scaffolding.expand_userinputs(labeled_ct)

In [None]:
expanded_ct = scaffolding.data_quality_check(expanded_ct)
expanded_ct.shape

In [None]:
# Mapping new labels with dictionaries
expanded_ct['Trip_purpose']= expanded_ct['purpose_confirm'].map(dic_pur)
expanded_ct['Mode_confirm']= expanded_ct['mode_confirm'].map(dic_re)
expanded_ct['Replaced_mode']= expanded_ct['replaced_mode'].map(dic_re)

# Mapping fuel
expanded_ct['Mode_confirm_fuel']= expanded_ct['Mode_confirm'].map(dic_fuel)
expanded_ct['Replaced_mode_fuel']= expanded_ct['Replaced_mode'].map(dic_fuel)

In [None]:
# Change meters to miles
scaffolding.unit_conversions(expanded_ct)

In [None]:
file_suffix = scaffolding.get_file_suffix(year, month, program)
quality_text = scaffolding.get_quality_text(participant_ct_df, expanded_ct)

In [None]:
# Calculate energy impact
expanded_ct = scaffolding.energy_intensity(expanded_ct, df_EI, 'distance_miles', 'Replaced_mode', 'Mode_confirm')
expanded_ct = scaffolding.energy_impact_kWH(expanded_ct, 'distance_miles', 'Replaced_mode', 'Mode_confirm')
expanded_ct = scaffolding.CO2_impact_lb(expanded_ct, 'distance_miles', 'Replaced_mode', 'Mode_confirm')

### Data Preprocessing

In [None]:
# Get timestamp from known year/month/day aggregated to days
data = expanded_ct[['user_id','start_local_dt_year','start_local_dt_month','start_local_dt_day','Mode_confirm','Mode_confirm_EI(kWH)','Mode_confirm_lb_CO2','distance_miles']].copy()
data.rename(columns={'start_local_dt_year':'year','start_local_dt_month':'month','start_local_dt_day':'day'}, inplace=True)
data['date_time'] = pd.to_datetime(data[['year','month','day']])
data = data.drop(columns=['year','month','day'])

# Categorical type will include all days/modes in groupby even if there is no data for a particular tabulation
data.user_id = pd.Categorical(data.user_id)
data.date_time = pd.Categorical(data.date_time)
data.Mode_confirm = pd.Categorical(data.Mode_confirm,
                                   ordered=True,
                                   categories=['Car, drove alone',
                                              'Car, with others',
                                              'Taxi/Uber/Lyft',
                                              'Bus',
                                              'Free Shuttle',
                                              'Train',
                                              'Bikeshare',
                                              'Pilot ebike',
                                              'Regular Bike',
                                              'Scooter share',
                                              'Skate board',
                                              'Walk',
                                              'Other',
                                              'Not a Trip'])

data.head()

In [None]:
# Count the number of trips for each confirmed mode
mode_counts = data.groupby(['user_id','date_time','Mode_confirm'], as_index=False).size()
mode_counts.rename(columns={'size':'trip_count'}, inplace=True)

# Sum daily distance traveled for each mode
mode_distance = data.groupby(['user_id','date_time','Mode_confirm'], as_index=False)[['distance_miles']].sum()
mode_distance.rename(columns={'sum':'distance_miles'}, inplace=True)
mode_distance['distance_miles'] = mode_distance['distance_miles'].fillna(0)

# Sum daily emissions for each user
emissions = data.groupby(['user_id','date_time'], as_index=False)[['Mode_confirm_lb_CO2', 'distance_miles']].sum()
emissions['Mode_confirm_lb_CO2'] = emissions['Mode_confirm_lb_CO2'].fillna(0)
emissions['distance_miles'] = emissions['Mode_confirm_lb_CO2'].fillna(0)

# Sum daily energy for each user
energy = data.groupby(['user_id','date_time'], as_index=False)[['Mode_confirm_EI(kWH)', 'distance_miles']].sum()
energy['Mode_confirm_EI(kWH)'] = energy['Mode_confirm_EI(kWH)'].fillna(0)
energy['distance_miles'] = energy['Mode_confirm_EI(kWH)'].fillna(0)

# Get the count of unique users that were active on each given date
active_users = pd.DataFrame(data.groupby(['date_time'], as_index=False)['user_id'].nunique())
active_users.rename(columns={'user_id':'active_users'}, inplace=True)

# Add 7-day rolling avg smoothing to better see trends
mode_counts['trip_count_smooth'] = mode_counts.groupby(['user_id','Mode_confirm'])['trip_count'].apply(lambda x: x.rolling(7,1).mean())
mode_distance['distance_miles_smooth'] = mode_distance.groupby(['user_id','Mode_confirm'])['distance_miles'].apply(lambda x: x.rolling(7,1).mean())
emissions['distance_miles_smooth'] = emissions.groupby(['user_id'])['distance_miles'].apply(lambda x: x.rolling(7,1).mean())
energy['distance_miles_smooth'] = energy.groupby(['user_id'])['distance_miles'].apply(lambda x: x.rolling(7,1).mean())

### Generate Timeseries Plots

In [None]:
# Emissions per week across all users (net impact)
plot_data = emissions.groupby(['date_time'], as_index=False)['Mode_confirm_lb_CO2'].agg(['sum'])
plot_data = plot_data.merge(active_users, on='date_time')
plot_data['sum'] = plot_data['sum'] / plot_data['active_users']

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='sum').set(title='Net Daily Emissions (All Users, excluding air)\n%s'%quality_text, xlabel='Date', ylabel='Emissions (lb CO2/day/user)')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_emissions_user%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# Energy per week across all users (net impact)
plot_data = energy.groupby(['date_time'], as_index=False)['Mode_confirm_EI(kWH)'].agg(['sum'])
plot_data = plot_data.merge(active_users, on='date_time')
plot_data['sum'] = plot_data['sum'] / plot_data['active_users']

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='sum').set(title='Net Daily Energy (All Users, excluding air)\n%s'%quality_text, xlabel='Date', ylabel='Energy (kWH/day/user)')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_energy_user%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# Emissions per mile per day across all users (travel efficiency)
# Note that the energy plot will be identical to this one since scale factor is divided out
emissions['CO2_per_mile'] = emissions['Mode_confirm_lb_CO2'] / emissions['distance_miles_smooth']
emissions['CO2_per_mile'] = emissions['CO2_per_mile'].fillna(0)
plot_data = emissions.groupby(['date_time'], as_index=False)['CO2_per_mile'].agg(['mean'])

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='mean').set(title='Average Daily Emission Rate (All Users, excluding air)\n%s'%quality_text, xlabel='Date', ylabel='Emissions (lb CO2/mile/day)')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_emissions_vmt%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# Plot of active users
plot_data = active_users

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='active_users').set(title='Number of Active Users\n%s'%quality_text, xlabel='Date', ylabel='Unique IDs')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_users%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# Plot of total ebikeshare trips across all users
plot_data = mode_counts[mode_counts['Mode_confirm']=='Pilot ebike']
plot_data = plot_data.groupby(['date_time'], as_index=False)['trip_count_smooth'].agg(['sum'])

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='sum').set(title='Daily E-Bike Trips\n%s'%quality_text, xlabel='Date', ylabel='Trip Count')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_ebike_share%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# Plot of total ebikeshare mileage across all users
plot_data = data[data['Mode_confirm']=='Pilot ebike']
plot_data = plot_data.groupby(['date_time'], as_index=False)['distance_miles'].agg(['sum'])

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='sum').set(title='Daily E-Bike Mileage\n%s'%quality_text, xlabel='Date', ylabel='Miles')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_ebike_miles%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# Plot of total ebikeshare mileage normalized by number of users
plot_data = data[data['Mode_confirm']=='Pilot ebike']
plot_data = plot_data.groupby(['date_time'], as_index=False)['distance_miles'].agg(['sum'])
plot_data = plot_data.merge(active_users, on='date_time')
plot_data['mileage_per_user'] = plot_data['sum'] / plot_data['active_users']

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='mileage_per_user').set(title='Daily E-Bike Mileage per Active User\n%s'%quality_text, xlabel='Date', ylabel='miles/user')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_ebike_miles_user%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# Plot of ebike mileage share proportion across all users
plot_data = mode_distance[mode_distance['Mode_confirm']=='Pilot ebike']
plot_data = plot_data.groupby(['date_time'], as_index=False)['distance_miles_smooth'].sum()
total_miles = mode_distance.groupby(['date_time'], as_index=False)['distance_miles_smooth'].sum()
plot_data = plot_data.merge(total_miles, on=['date_time'])
plot_data['miles_proportion'] = plot_data['distance_miles_smooth_x'] / plot_data['distance_miles_smooth_y']

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='miles_proportion', legend=False, palette="Set1").set(title='Daily Ebike Mileage Proportion (All Users, excluding air)\n%s'%quality_text, xlabel='Date', ylabel='Proportion of Daily Miles')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)


ax.figure.savefig("/plots/ts_ebike_miles_proportion%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# Plot of ebike trip share proportion across all users
plot_data = mode_counts.groupby(['date_time','Mode_confirm'], as_index=False)['trip_count_smooth'].sum()
total_trips = plot_data.groupby(['date_time'], as_index=False).sum()
plot_data = plot_data.merge(total_trips, on='date_time')
plot_data['trip_proportion'] = plot_data['trip_count_smooth_x'] / plot_data['trip_count_smooth_y']
plot_data = plot_data[plot_data['Mode_confirm']=='Pilot ebike']

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='trip_proportion').set(title='Daily E-Bike Trip Proportion (All Users, excluding air)\n%s'%quality_text, xlabel='Date', ylabel='Proportion of Daily Trips')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_ebike_trips_proportion%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# Plot of mode share proportions across all users
# Consolidate modes
plot_data = mode_counts.replace('Bikeshare', 'Shared Micromobility')
plot_data = plot_data.replace('Scooter share', 'Shared Micromobility')
plot_data = plot_data.replace('Regular Bike', 'Personal Micromobility')
plot_data = plot_data.replace('Skate board', 'Personal Micromobility')
plot_data = plot_data.replace('Train', 'Transit')
plot_data = plot_data.replace('Free Shuttle', 'Transit')
plot_data = plot_data.replace('Bus', 'Transit')
plot_data = plot_data.replace('Walk', 'Walk')
plot_data = plot_data.replace('Taxi/Uber/Lyft', 'Ridehail')
plot_data = plot_data.replace('Pilot ebike', 'E-Bike')

plot_data = plot_data.groupby(['date_time','Mode_confirm'], as_index=False)['trip_count_smooth'].sum()
total_trips = plot_data.groupby(['date_time'], as_index=False).sum()
plot_data = plot_data.merge(total_trips, on='date_time')
plot_data['trip_proportion'] = plot_data['trip_count_smooth_x'] / plot_data['trip_count_smooth_y']

# Re-establish categorical variable to not include Other and Non-trips
plot_data = plot_data[~plot_data['Mode_confirm'].isin(['Not a Trip','Other'])]
plot_data.Mode_confirm = pd.Categorical(plot_data.Mode_confirm,
                                   categories=['Car, drove alone',
                                              'Car, with others',
                                              'Shared Micromobility',
                                              'Personal Micromobility',
                                              'Transit',
                                              'Walk',
                                              'Ridehail',
                                              'E-Bike'])

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='trip_proportion', hue='Mode_confirm').set(title='Daily Aggregate Mode Share (excluding "Other" and "Not a trip"\n%s'%quality_text, xlabel='Date', ylabel='Proportion of All Trips')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
plt.legend(bbox_to_anchor=(1.02, 1), loc='best', borderaxespad=0, title='Confirmed Mode')
ax.figure.savefig("/plots/ts_all_modes%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# Plots the number of trips per user with error bars
# from the seaborn documentation:
# By default, the plot aggregates over multiple y values at each value of x and 
# shows an estimate of the central tendency and a confidence interval for that estimate.
# In our case, we have multiple trip proportions (one per user) for each day
# so the band represents the variation of the number of trips and the thick line represents the mean/median (unsure which)
# but this still doesn't tell us which users have dropped their ridership

plot_data = mode_counts

plot_data_ebike = plot_data[plot_data.Mode_confirm == "Pilot ebike"].groupby(['date_time', 'user_id'], as_index=False)["trip_count_smooth"].sum()
# plot_data_ebike[plot_data_ebike.trip_count_smooth != 0]
plot_data = plot_data.groupby(['date_time','user_id'], as_index=False)['trip_count_smooth'].sum()
# plot_data[plot_data.trip_count_smooth != 0]
plot_data = plot_data.merge(plot_data_ebike, on=['date_time', 'user_id'])
# plot_data.loc[24]
plot_data['bike_trip_proportion'] = plot_data['trip_count_smooth_y'] / plot_data['trip_count_smooth_x']
# plot_data.dropna()

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='bike_trip_proportion', legend=False).set(title='Daily Ebike Trip Proportion for Individual users (Running average and variation)\n%s'%quality_text, xlabel='Date', ylabel='Proportion of Daily Trips')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_ebike_trip_individual_variation%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# Plots the number of trips per user with error bars
# from the seaborn documentation:
# By default, the plot aggregates over multiple y values at each value of x and 
# shows an estimate of the central tendency and a confidence interval for that estimate.
# In our case, we have multiple trip proportions (one per user) for each day
# so the band represents the variation of the number of trips and the thick line represents the mean/median (unsure which)
# but this still doesn't tell us which users have dropped their ridership

plot_data = mode_distance

plot_data_ebike = plot_data[plot_data.Mode_confirm == "Pilot ebike"].groupby(['date_time', 'user_id'], as_index=False)["distance_miles_smooth"].sum()
# plot_data_ebike[plot_data_ebike.trip_count_smooth != 0]
plot_data = plot_data.groupby(['date_time','user_id'], as_index=False)['distance_miles_smooth'].sum()
# plot_data[plot_data.trip_count_smooth != 0]
plot_data = plot_data.merge(plot_data_ebike, on=['date_time', 'user_id'])
# plot_data.loc[24]
plot_data['bike_miles_proportion'] = plot_data['distance_miles_smooth_y'] / plot_data['distance_miles_smooth_x']
# plot_data.dropna()

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='bike_miles_proportion', legend=False).set(title='Daily Ebike Mileage Proportion for Individual users (Running average and variation)\n%s'%quality_text, xlabel='Date', ylabel='Proportion of Daily Miles')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_ebike_mile_individual_variation%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# Plot of ebike mode share across many individual users
# Add weekly grouping to smooth out variation between points
# This explores plotting as bar plots
# Plotting this on a daily basis can get too fine-grained so we try grouping by month

plot_data = mode_counts

plot_data_ebike = plot_data[plot_data.Mode_confirm == "Pilot ebike"].groupby(['date_time', 'user_id'], as_index=False)["trip_count_smooth"].sum()
# plot_data_ebike[plot_data_ebike.trip_count_smooth != 0]
plot_data = plot_data.groupby(['date_time','user_id'], as_index=False)['trip_count_smooth'].sum()
# plot_data[plot_data.trip_count_smooth != 0]
plot_data = plot_data.merge(plot_data_ebike, on=['date_time', 'user_id'])
# plot_data.loc[24]
plot_data['user_id'] = plot_data['user_id'].astype(str).str[-4:]
plot_data['date_time'] = pd.DatetimeIndex(plot_data.date_time)
month_groups = plot_data.groupby(pd.Grouper(freq='M', key="date_time"))
plot_data = pd.DataFrame()
for k, month_df in month_groups:
    month_user_counts = month_df.groupby("user_id").sum()
    month_user_counts["ebike_trip_proportion"] = month_user_counts.trip_count_smooth_y / month_user_counts.trip_count_smooth_x
    month_user_counts["date_month"] = [k] * len(month_user_counts)
    month_user_counts["user_id"] = month_user_counts.index
    plot_data = plot_data.append(month_user_counts)
    
# plot_data
plot_data.reset_index(drop=True, inplace=True)

fig, ax = plt.subplots(figsize=(16,4))
sns.barplot(ax=ax, data=plot_data, x='date_month', y='ebike_trip_proportion', hue="user_id").set(title='Daily Ebike Trip Count Proportion (Individual)\n%s'%quality_text, xlabel='Date', ylabel='Proportion of User Monthly Trips')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
# ax.figure.savefig("/plots/ts_ebike_trip_individual%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# This explores plotting the same data as a line plot
fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_month', y='ebike_trip_proportion', hue="user_id").set(title='Daily Ebike Trip Count Proportion (Individual)\n%s'%quality_text, xlabel='Week of Year', ylabel='Proportion of User Daily Miles')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)

In [None]:
# Plot of ebike mode share across many individual users
# Add weekly grouping to smooth out variation between points
# Both the bar plot and the line plot above are confusing, so we will try plotting subplots instead

plot_data = mode_counts

plot_data_ebike = plot_data[plot_data.Mode_confirm == "Pilot ebike"].groupby(['date_time', 'user_id'], as_index=False)["trip_count_smooth"].sum()
# plot_data_ebike[plot_data_ebike.trip_count_smooth != 0]
plot_data = plot_data.groupby(['date_time','user_id'], as_index=False)['trip_count_smooth'].sum()
# plot_data[plot_data.trip_count_smooth != 0]
plot_data = plot_data.merge(plot_data_ebike, on=['date_time', 'user_id'])
# plot_data.loc[24]
plot_data['user_id'] = plot_data['user_id'].astype(str).str[-4:]
plot_data['date_time'] = pd.DatetimeIndex(plot_data.date_time)
unique_user_ids = plot_data.user_id.unique()
row_count=len(unique_user_ids)//5 + 1
fig, ax_arr = plt.subplots(ncols=5, nrows=row_count, figsize=(16,4*row_count),sharex=True,sharey=True)
flattened_ax_arr = list(itertools.chain.from_iterable(ax_arr))
for i, (user_id, user_df) in enumerate(plot_data.groupby("user_id")):
    month_count_df = user_df.groupby(pd.Grouper(freq='M', key="date_time")).sum()
    # print(month_count_df)
    month_count_df["ebike_trip_proportion"] = month_count_df.trip_count_smooth_y / month_count_df.trip_count_smooth_x
    # print(month_count_df)
    sns.lineplot(ax=flattened_ax_arr[i],data=month_count_df.ebike_trip_proportion).set(title=user_id)
    flattened_ax_arr[i].tick_params(axis='x', rotation=45)

plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
fig.suptitle('Daily Ebike Trip Count Proportion (Individual)\n%s'%quality_text)
fig.savefig("/plots/ts_ebike_trip_proportion_individual%s.png"%file_suffix, bbox_inches='tight')

In [None]:
# Plot of ebike mode share across many individual users
# Add weekly grouping to smooth out variation between points
# Both the bar plot and the line plot above are confusing, so we will try plotting subplots instead

plot_data = mode_distance

plot_data_ebike = plot_data[plot_data.Mode_confirm == "Pilot ebike"].groupby(['date_time', 'user_id'], as_index=False)["distance_miles_smooth"].sum()
# plot_data_ebike[plot_data_ebike.trip_count_smooth != 0]
plot_data = plot_data.groupby(['date_time','user_id'], as_index=False)['distance_miles_smooth'].sum()
# plot_data[plot_data.trip_count_smooth != 0]
plot_data = plot_data.merge(plot_data_ebike, on=['date_time', 'user_id'])
# plot_data.loc[24]
plot_data['user_id'] = plot_data['user_id'].astype(str).str[-4:]
plot_data['date_time'] = pd.DatetimeIndex(plot_data.date_time)
unique_user_ids = plot_data.user_id.unique()
row_count=len(unique_user_ids)//5 + 1
fig, ax_arr = plt.subplots(ncols=5, nrows=row_count, figsize=(16,4*row_count),sharex=True,sharey=True)
flattened_ax_arr = list(itertools.chain.from_iterable(ax_arr))
for i, (user_id, user_df) in enumerate(plot_data.groupby("user_id")):
    month_count_df = user_df.groupby(pd.Grouper(freq='M', key="date_time")).sum()
    # print(month_count_df)
    month_count_df["ebike_miles_proportion"] = month_count_df.distance_miles_smooth_y / month_count_df.distance_miles_smooth_x
    # print(month_count_df)
    sns.lineplot(ax=flattened_ax_arr[i],data=month_count_df.ebike_miles_proportion).set(title=user_id)
    flattened_ax_arr[i].tick_params(axis='x', rotation=45)

plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
fig.suptitle('Daily Ebike Mileage Proportion (Individual)\n%s'%quality_text)
fig.savefig("/plots/ts_ebike_mile_proportion_individual%s.png"%file_suffix, bbox_inches='tight')