## Generate Static Graphs

These are the input parameters for the notebook. They will be automatically changed when the scripts to generate monthly statistics are run. You can modify them manually to generate multiple plots locally as well.

Pass in `None` to remove the filters and plot all data. This is not recommended for production settings, but might be useful for reports based on data snapshots.

In [None]:
year = 2020
month = 11
program = "default"
study_type = "study"
mode_of_interest = None

In [None]:
from collections import defaultdict
import datetime

import numpy as np
import pandas as pd

from plots import *
import scaffolding

sns.set_style("whitegrid")
sns.set()
%matplotlib inline

In [None]:
# Loading mapping dictionaries from mapping_dictionaries notebook
%store -r df_ei
%store -r dic_re
%store -r dic_fuel

# convert a dictionary to a defaultdict
dic_re = defaultdict(lambda: 'Other',dic_re)
dic_fuel = defaultdict(lambda: 'Other',dic_fuel)

## Collect Data From Database

In [None]:
expanded_ct, file_suffix, quality_text = scaffolding.load_viz_notebook_data(year,
                                                                            month,
                                                                            program,
                                                                            study_type,
                                                                            dic_re)
expanded_ct = scaffolding.add_energy_labels(expanded_ct, df_ei, dic_fuel)

## Data Preprocessing

In [None]:
# Get timestamp from known year/month/day aggregated to days
data = expanded_ct[['user_id','start_local_dt_year','start_local_dt_month','start_local_dt_day','Mode_confirm','Mode_confirm_EI(kWH)','Mode_confirm_lb_CO2','distance_miles']].copy()
data.rename(columns={'start_local_dt_year':'year','start_local_dt_month':'month','start_local_dt_day':'day'}, inplace=True)
data['date_time'] = pd.to_datetime(data[['year','month','day']])
data = data.drop(columns=['year','month','day'])

# Categorical type will include all days/modes in groupby even if there is no data for a particular tabulation
data.user_id = pd.Categorical(data.user_id)
data.date_time = pd.Categorical(data.date_time)
data.Mode_confirm = pd.Categorical(data.Mode_confirm, ordered=True, categories=np.unique(list(dic_re.values())))

data.head()

In [None]:
# Count the number of trips for each confirmed mode
mode_counts = data.groupby(['user_id','date_time','Mode_confirm'], as_index=False).size()
mode_counts.rename(columns={'size':'trip_count'}, inplace=True)

# Sum daily distance traveled for each mode
mode_distance = data.groupby(['user_id','date_time','Mode_confirm'], as_index=False)[['distance_miles']].sum()
mode_distance.rename(columns={'sum':'distance_miles'}, inplace=True)
mode_distance['distance_miles'] = mode_distance['distance_miles'].fillna(0)

# Sum daily emissions for each user
emissions = data.groupby(['user_id','date_time'], as_index=False)[['Mode_confirm_lb_CO2', 'distance_miles']].sum()
emissions['Mode_confirm_lb_CO2'] = emissions['Mode_confirm_lb_CO2'].fillna(0)
emissions['distance_miles'] = emissions['Mode_confirm_lb_CO2'].fillna(0)

# Sum daily energy for each user
energy = data.groupby(['user_id','date_time'], as_index=False)[['Mode_confirm_EI(kWH)', 'distance_miles']].sum()
energy['Mode_confirm_EI(kWH)'] = energy['Mode_confirm_EI(kWH)'].fillna(0)
energy['distance_miles'] = energy['Mode_confirm_EI(kWH)'].fillna(0)

# Get the count of unique users that were active on each given date
active_users = pd.DataFrame(data.groupby(['date_time'], as_index=False)['user_id'].nunique())
active_users.rename(columns={'user_id':'active_users'}, inplace=True)

# Add 7-day rolling avg smoothing to better see trends
mode_counts['trip_count_smooth'] = mode_counts.groupby(['user_id','Mode_confirm'])['trip_count'].apply(lambda x: x.rolling(7,1).mean())
mode_distance['distance_miles_smooth'] = mode_distance.groupby(['user_id','Mode_confirm'])['distance_miles'].apply(lambda x: x.rolling(7,1).mean())
emissions['distance_miles_smooth'] = emissions.groupby(['user_id'])['distance_miles'].apply(lambda x: x.rolling(7,1).mean())
energy['distance_miles_smooth'] = energy.groupby(['user_id'])['distance_miles'].apply(lambda x: x.rolling(7,1).mean())

## Generate Timeseries Plots

### Emissions per week

In [None]:
# Emissions per week across all users (net impact)
plot_data = emissions.groupby(['date_time'], as_index=False)['Mode_confirm_lb_CO2'].agg(['sum'])
plot_data = plot_data.merge(active_users, on='date_time')
plot_data['sum'] = plot_data['sum'] / plot_data['active_users']

plot_title = 'Net Daily Emissions (All Users, excluding air)\n%s'%quality_text
ylab = 'Emissions (lb CO2/day/user)'
file_name = "ts_emissions_user%s"%file_suffix
timeseries_plot(plot_data['date_time'], plot_data['sum'], plot_title, ylab, file_name)
alt_text = store_alt_text_timeseries(plot_data, file_name, plot_title)

### Energy per week

In [None]:
# Energy per week across all users (net impact)
plot_data = energy.groupby(['date_time'], as_index=False)['Mode_confirm_EI(kWH)'].agg(['sum'])
plot_data = plot_data.merge(active_users, on='date_time')
plot_data['sum'] = plot_data['sum'] / plot_data['active_users']

plot_title = 'Net Daily Energy (All Users, excluding air)\n%s'%quality_text
ylab = 'Energy (kWH/day/user)'
file_name = "ts_energy_user%s"%file_suffix
timeseries_plot(plot_data['date_time'], plot_data['sum'], plot_title, ylab, file_name)
alt_text = store_alt_text_timeseries(plot_data, file_name, plot_title)

### Emissions per mile per day

In [None]:
# Emissions per mile per day across all users (travel efficiency)
# Note that the energy plot will be identical to this one since scale factor is divided out
emissions['CO2_per_mile'] = emissions['Mode_confirm_lb_CO2'] / emissions['distance_miles_smooth']
emissions['CO2_per_mile'] = emissions['CO2_per_mile'].fillna(0)
plot_data = emissions.groupby(['date_time'])['CO2_per_mile'].agg(['mean']).reset_index()

plot_title = 'Average Daily Emission Rate (All Users, excluding air)\n%s'%quality_text
ylab = 'Emissions (lb CO2/mile/day)'
file_name = "ts_emissions_vmt%s"%file_suffix
timeseries_plot(plot_data['date_time'], plot_data['mean'], plot_title, ylab, file_name)
alt_text = store_alt_text_timeseries(plot_data, file_name, plot_title)

### Number of active users

In [None]:
# Plot of active users
plot_data = active_users

plot_title = 'Number of Active Users\n%s'%quality_text
ylab = 'Unique IDs'
file_name = "ts_users%s"%file_suffix
timeseries_plot(plot_data['date_time'], plot_data['active_users'], plot_title, ylab, file_name)
alt_text = store_alt_text_timeseries(plot_data, file_name, plot_title)

### Daily Mode share

In [None]:
# Plot of mode share proportions across all users
# Consolidate modes
plot_data = mode_counts.replace('Bikeshare', 'Shared Micromobility')
plot_data = plot_data.replace('Scooter share', 'Shared Micromobility')
plot_data = plot_data.replace('Regular Bike', 'Personal Micromobility')
plot_data = plot_data.replace('Skate board', 'Personal Micromobility')
plot_data = plot_data.replace('Train', 'Transit')
plot_data = plot_data.replace('Free Shuttle', 'Transit')
plot_data = plot_data.replace('Bus', 'Transit')
plot_data = plot_data.replace('Walk', 'Walk')
plot_data = plot_data.replace('Taxi/Uber/Lyft', 'Ridehail')
plot_data = plot_data.replace('Pilot ebike', 'E-Bike')

plot_data = plot_data.groupby(['date_time','Mode_confirm'], as_index=False)['trip_count_smooth'].sum()
total_trips = plot_data.groupby(['date_time'], as_index=False).sum()
plot_data = plot_data.merge(total_trips, on='date_time')
plot_data['trip_proportion'] = plot_data['trip_count_smooth_x'] / plot_data['trip_count_smooth_y']

# Re-establish categorical variable to not include Other and Non-trips
plot_data = plot_data[~plot_data['Mode_confirm'].isin(['Not a Trip','Other'])]
plot_data.Mode_confirm = pd.Categorical(plot_data.Mode_confirm, ordered=True, categories=np.unique(list(dic_re.values())))

plot_title = 'Daily Aggregate Mode Share (excluding "Other" and "Not a trip"\n%s'%quality_text
ylab = 'Proportion of All Trips'
legend_title = 'Confirmed Mode'
file_name = "ts_all_modes%s"%file_suffix
timeseries_multi_plot(plot_data, 'date_time','trip_proportion','Mode_confirm', plot_title, ylab, legend_title, file_name)
alt_text = store_alt_text_generic('multivariate timeseries', file_name, plot_title)