In [None]:
year = None
month = None
program = "prepilot"
user_id_plot = '9910245f-ee4e-4cca-ab4c-dd2312eb0d5d'

In [None]:
from collections import defaultdict
import datetime

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import emission.core.get_database as edb
import emission.core.wrapper.entry as ecwe
import emission.storage.decorations.analysis_timeseries_queries as esda
import emission.storage.decorations.trip_queries as esdt
import emission.storage.decorations.timeline as esdl
import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.timeseries.timequery as estt
from uuid import UUID

sns.set_style("whitegrid")
sns.set()
%matplotlib inline

In [None]:
import scaffolding
from plots import *

### Collect Data From Database

In [None]:
# Loading mapping dictionaries from mapping_dictionaries notebook
%store -r df_EI
%store -r dic_re
%store -r dic_pur
%store -r dic_fuel

# convert a dictionary to a defaultdict
dic_pur = defaultdict(lambda: 'Other',dic_pur)
dic_re = defaultdict(lambda: 'Other',dic_re)

In [None]:
tq = scaffolding.get_time_query(year, month)

In [None]:
participant_ct_df = scaffolding.load_all_participant_trips(program, tq)

In [None]:
labeled_ct = scaffolding.filter_labeled_trips(participant_ct_df)

In [None]:
expanded_ct = scaffolding.expand_userinputs(labeled_ct)

In [None]:
expanded_ct = scaffolding.data_quality_check(expanded_ct)
expanded_ct.shape

In [None]:
# Mapping new labels with dictionaries
expanded_ct['Trip_purpose']= expanded_ct['purpose_confirm'].map(dic_pur)
expanded_ct['Mode_confirm']= expanded_ct['mode_confirm'].map(dic_re)
expanded_ct['Replaced_mode']= expanded_ct['replaced_mode'].map(dic_re)

# Mapping fuel
expanded_ct['Mode_confirm_fuel']= expanded_ct['Mode_confirm'].map(dic_fuel)
expanded_ct['Replaced_mode_fuel']= expanded_ct['Replaced_mode'].map(dic_fuel)

In [None]:
# Change meters to miles
scaffolding.unit_conversions(expanded_ct)

In [None]:
file_suffix = scaffolding.get_file_suffix(year, month, program)
quality_text = scaffolding.get_quality_text(participant_ct_df, expanded_ct)

In [None]:
# Calculate energy impact
expanded_ct = scaffolding.energy_intensity(expanded_ct, df_EI, 'distance_miles', 'Replaced_mode', 'Mode_confirm')
expanded_ct = scaffolding.energy_impact_kWH(expanded_ct, 'distance_miles', 'Replaced_mode', 'Mode_confirm')
expanded_ct = scaffolding.CO2_impact_lb(expanded_ct, 'distance_miles', 'Replaced_mode', 'Mode_confirm')

### Data Preprocessing

In [None]:
# Get timestamp from known year/month/day aggregated to days
data = expanded_ct[['user_id','start_local_dt_year','start_local_dt_month','start_local_dt_day','Mode_confirm','Mode_confirm_EI(kWH)','Mode_confirm_lb_CO2','distance_miles']].copy()
data.rename(columns={'start_local_dt_year':'year','start_local_dt_month':'month','start_local_dt_day':'day'}, inplace=True)
data['date_time'] = pd.to_datetime(data[['year','month','day']])
data = data.drop(columns=['year','month','day'])

# Categorical type will include all days/modes in groupby even if there is no data for a particular tabulation
data.user_id = pd.Categorical(data.user_id)
data.date_time = pd.Categorical(data.date_time)
data.Mode_confirm = pd.Categorical(data.Mode_confirm,
                                   ordered=True,
                                   categories=['Car, drove alone',
                                              'Car, with others',
                                              'Taxi/Uber/Lyft',
                                              'Bus',
                                              'Free Shuttle',
                                              'Train',
                                              'Bikeshare',
                                              'Pilot ebike',
                                              'Regular Bike',
                                              'Scooter share',
                                              'Skate board',
                                              'Walk',
                                              'Other',
                                              'Not a Trip'])

data.head()

In [None]:
# Count the number of trips for each confirmed mode
mode_counts = data.groupby(['user_id','date_time','Mode_confirm'], as_index=False).size()
mode_counts.rename(columns={'size':'trip_count'}, inplace=True)

# Sum daily distance traveled for each mode
mode_distance = data.groupby(['user_id','date_time','Mode_confirm'], as_index=False)[['distance_miles']].sum()
mode_distance.rename(columns={'sum':'distance_miles'}, inplace=True)
mode_distance['distance_miles'] = mode_distance['distance_miles'].fillna(0)

# Sum daily emissions for each user
emissions = data.groupby(['user_id','date_time'], as_index=False)[['Mode_confirm_lb_CO2', 'distance_miles']].sum()
emissions['Mode_confirm_lb_CO2'] = emissions['Mode_confirm_lb_CO2'].fillna(0)
emissions['distance_miles'] = emissions['Mode_confirm_lb_CO2'].fillna(0)

# Sum daily energy for each user
energy = data.groupby(['user_id','date_time'], as_index=False)[['Mode_confirm_EI(kWH)', 'distance_miles']].sum()
energy['Mode_confirm_EI(kWH)'] = energy['Mode_confirm_EI(kWH)'].fillna(0)
energy['distance_miles'] = energy['Mode_confirm_EI(kWH)'].fillna(0)

# Get the count of unique users that were active on each given date
active_users = pd.DataFrame(data.groupby(['date_time'], as_index=False)['user_id'].nunique())
active_users.rename(columns={'user_id':'active_users'}, inplace=True)

# Add 7-day rolling avg smoothing to better see trends
mode_counts['trip_count_smooth'] = mode_counts.groupby(['user_id','Mode_confirm'])['trip_count'].apply(lambda x: x.rolling(7,1).mean())
mode_distance['distance_miles_smooth'] = mode_distance.groupby(['user_id','Mode_confirm'])['distance_miles'].apply(lambda x: x.rolling(7,1).mean())
emissions['distance_miles_smooth'] = emissions.groupby(['user_id'])['distance_miles'].apply(lambda x: x.rolling(7,1).mean())
energy['distance_miles_smooth'] = energy.groupby(['user_id'])['distance_miles'].apply(lambda x: x.rolling(7,1).mean())

### Generate Timeseries Plots

In [None]:
# Emissions per week across all users (net impact)
plot_data = emissions.groupby(['date_time'], as_index=False)['Mode_confirm_lb_CO2'].agg(['sum'])
plot_data = plot_data.merge(active_users, on='date_time')
plot_data['sum'] = plot_data['sum'] / plot_data['active_users']

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='sum').set(title='Net Daily Emissions (All Users)', xlabel='Date', ylabel='Emissions (lb CO2/day/user)')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_emissions_user%s.png" % file_suffix)

In [None]:
# Energy per week across all users (net impact)
plot_data = energy.groupby(['date_time'], as_index=False)['Mode_confirm_EI(kWH)'].agg(['sum'])
plot_data = plot_data.merge(active_users, on='date_time')
plot_data['sum'] = plot_data['sum'] / plot_data['active_users']

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='sum').set(title='Net Daily Energy (All Users)', xlabel='Date', ylabel='Energy (kWH/day/user)')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_energy_user%s.png" % file_suffix)

In [None]:
# Emissions per mile per day across all users (travel efficiency)
# Note that the energy plot will be identical to this one since scale factor is divided out
emissions['CO2_per_mile'] = emissions['Mode_confirm_lb_CO2'] / emissions['distance_miles_smooth']
emissions['CO2_per_mile'] = emissions['CO2_per_mile'].fillna(0)
plot_data = emissions.groupby(['date_time'], as_index=False)['CO2_per_mile'].agg(['mean'])

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='mean').set(title='Average Daily Emission Rate (All Users)', xlabel='Date', ylabel='Emissions (lb CO2/mile/day)')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_emissions_vmt%s.png" % file_suffix)

In [None]:
# Plot of all mode shares for single individual
plot_data = mode_counts[mode_counts['user_id']==UUID(user_id_plot)]

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='trip_count_smooth', hue='Mode_confirm', style='Mode_confirm', palette='Set1').set(title=f'Mode Share (User {user_id_plot})', xlabel='Date', ylabel='Trip Count')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
plt.legend(loc='center left', title='Confirmed Mode')
ax.figure.savefig("/plots/ts_mode_share%s.png" % file_suffix)

In [None]:
# Plot of active users
plot_data = active_users

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='active_users').set(title='Number of Active Users', xlabel='Date', ylabel='Unique IDs')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_users%s.png" % file_suffix)

In [None]:
# Plot of total ebikeshare trips across all users
plot_data = mode_counts[mode_counts['Mode_confirm']=='Pilot ebike']
plot_data = plot_data.groupby(['date_time'], as_index=False)['trip_count_smooth'].agg(['sum'])

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='sum').set(title='Daily E-Bike Trips', xlabel='Date', ylabel='Trip Count')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_ebike_share%s.png" % file_suffix)

In [None]:
# Plot of total ebikeshare mileage across all users
plot_data = data[data['Mode_confirm']=='Pilot ebike']
plot_data = plot_data.groupby(['date_time'], as_index=False)['distance_miles'].agg(['sum'])

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='sum').set(title='Daily E-Bike Mileage', xlabel='Date', ylabel='Miles')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_ebike_miles%s.png" % file_suffix)

In [None]:
# Plot of total ebikeshare mileage normalized by number of users
plot_data = data[data['Mode_confirm']=='Pilot ebike']
plot_data = plot_data.groupby(['date_time'], as_index=False)['distance_miles'].agg(['sum'])
plot_data = plot_data.merge(active_users, on='date_time')
plot_data['mileage_per_user'] = plot_data['sum'] / plot_data['active_users']

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='mileage_per_user').set(title='Daily E-Bike Mileage per Active User', xlabel='Date', ylabel='miles/user')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_ebike_miles_user%s.png" % file_suffix)

In [None]:
# Plot of mode share proportions across all users
plot_data = mode_counts.groupby(['date_time','Mode_confirm'], as_index=False)['trip_count_smooth'].sum()
total_trips = plot_data.groupby(['date_time'], as_index=False).sum()
plot_data = plot_data.merge(total_trips, on='date_time')
plot_data['trip_proportion'] = plot_data['trip_count_smooth_x'] / plot_data['trip_count_smooth_y']

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='trip_proportion', hue='Mode_confirm').set(title='Daily Aggregate Mode Share', xlabel='Date', ylabel='Proportion of All Trips')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
plt.legend(loc='center left', title='Confirmed Mode')
ax.figure.savefig("/plots/ts_all_modes%s.png" % file_suffix)

In [None]:
# Plot of ebike mode share across many individual users
# This plot is a little crazy if the number of users displayed is not limited
# For now use the first 20 IDs
test_uuids = pd.unique(mode_distance['user_id'])[:19]
plot_data = mode_distance[mode_distance['user_id'].isin(test_uuids)]
plot_data = plot_data[plot_data['Mode_confirm']=='Pilot ebike']
plot_data = plot_data.groupby(['user_id','date_time'], as_index=False)['distance_miles_smooth'].sum()
total_miles = mode_distance.groupby(['user_id','date_time'], as_index=False)['distance_miles_smooth'].sum()
plot_data = plot_data.merge(total_miles, on=['user_id','date_time'])
plot_data['miles_proportion'] = plot_data['distance_miles_smooth_x'] / plot_data['distance_miles_smooth_y']
plot_data['user_id'] = plot_data['user_id'].astype(str).str[-4:]

fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(ax=ax, data=plot_data, x='date_time', y='miles_proportion', hue='user_id', legend=False, palette="Set1").set(title='Daily Ebike Mileage Proportion (Individual)', xlabel='Date', ylabel='Proportion of User Daily Miles')
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig("/plots/ts_ebike_mile_individual%s.png" % file_suffix)