## Generate Static Graphs

These are the input parameters for the notebook. They will be automatically changed when the scripts to generate monthly statistics are run. You can modify them manually to generate multiple plots locally as well.

Pass in `None` to remove the filters and plot all data. This is not recommended for production settings, but might be useful for reports based on data snapshots.

In [None]:
year = 2020
month = 11
program = "default"
study_type = "program"
mode_of_interest = "e-bike"
include_test_users = False
labels = { }
use_imperial = True

In [None]:
from collections import defaultdict

import numpy as np
import pandas as pd

from plots import *
import scaffolding

sns.set_style("whitegrid")
sns.set()
%matplotlib inline

In [None]:
# Do not run this notebook at all unless it is for a program; nbclient will run up through this cell
if study_type != "program":
    ipython = get_ipython()
    ipython._showtraceback = scaffolding.no_traceback_handler
    raise Exception("The plots in this notebook are only relevant to programs")

In [None]:
# get metric vs imperial vars
label_units, short_label, label_units_lower, distance_col, weight_unit = scaffolding.get_units(use_imperial)

### Color Dictionary

In [None]:
colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble  = scaffolding.mapping_color_labels(labels)
values_to_translations, value_to_translations_purpose, value_to_translations_replaced = scaffolding.translate_values_to_labels(labels)

## Collect Data From Database

In [None]:
expanded_ct, file_suffix, quality_text, debug_df = await scaffolding.load_viz_notebook_data(year,
                                                                            month,
                                                                            program,
                                                                            study_type,
                                                                            labels,
                                                                            include_test_users=include_test_users)

## Collect Data From Database for Inferred Metrics

In [None]:
expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = await scaffolding.load_viz_notebook_inferred_data(year,
                                                                            month,
                                                                            program,
                                                                            study_type,
                                                                            labels,
                                                                            include_test_users=include_test_users)

In [None]:
if 'mode_confirm' in expanded_ct.columns:
    mode_of_interest_df = expanded_ct.query(f"mode_confirm == '{mode_of_interest}'")
    debug_df.loc[f"{mode_of_interest}_trips"] = len(mode_of_interest_df)
    debug_df.loc[f"{mode_of_interest}_trips_with_replaced_mode"] = scaffolding.trip_label_count("Replaced_mode", mode_of_interest_df)

## Metrics for Specific Mode

In [None]:
data_eb = expanded_ct.query(f"mode_confirm == '{mode_of_interest}'") if "mode_confirm" in expanded_ct.columns else expanded_ct

In [None]:
quality_text = scaffolding.get_quality_text(expanded_ct, data_eb, mode_of_interest, include_test_users)

## Metrics for Specific Inferred Mode


In [None]:
data_eb_inferred = expanded_ct_inferred.query(f"mode_confirm == '{mode_of_interest}'") if "mode_confirm" in expanded_ct_inferred.columns else expanded_ct_inferred

In [None]:
quality_text_inferred = scaffolding.get_quality_text(expanded_ct_inferred, data_eb_inferred, mode_of_interest, include_test_users)

In [None]:
quality_text, quality_text_inferred

In [None]:
import re
labeled_match = re.match(r'Based on ([0-9]+) confirmed {} trips from ([0-9]+) (users|testers and participants)\nof ([0-9]+) total confirmed trips from ([0-9]+) (users|testers and participants) (\(([0-9.]+|nan)%\))'.format(mode_of_interest), quality_text)
stacked_bar_quality_text = f"{labeled_match.group(1)} trips {labeled_match.group(7)}\n from {labeled_match.group(2)} {labeled_match.group(3)}"
inferred_match =re.match(r'Based on ([0-9]+) confirmed {} trips from ([0-9]+) (users|testers and participants)\nof ([0-9]+) total confirmed trips from ([0-9]+) (users|testers and participants) (\(([0-9.]+|nan)%\))'.format(mode_of_interest), quality_text_inferred)
stacked_bar_quality_text_inferred = f"{inferred_match.group(1)} trips {inferred_match.group(7)}\n from {inferred_match.group(2)} {inferred_match.group(3)}"

stacked_bar_quality_text, stacked_bar_quality_text_inferred

### Trips by purpose for specified mode

In [None]:
plot_title_no_quality = f"Number of trips for each purpose for {mode_of_interest}"
file_name= f"ntrips_{mode_of_interest}_purpose{file_suffix}"

try:
    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)
    text_results = [["Unmodified Alt Text", "Unmodified HTML"], ["Unmodified Alt Text", "Unmodified HTML"]]
    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby("purpose_confirm_w_other").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),
                                    f"Labeled `{mode_of_interest}` by user\n"+stacked_bar_quality_text, ax[0], text_results[0], colors_purpose, debug_df, value_to_translations_purpose)
    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby("purpose_confirm_w_other").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),
                                    f"Inferred `{mode_of_interest}` from prior labels\n"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred, value_to_translations_purpose)
    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)
except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:
    plt.clf()
    generate_missing_plot(plot_title_no_quality, debug_df, file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)    
    alt_html = store_alt_html_missing(debug_df, file_name, plot_title_no_quality)
except Exception as e:
    fig, ax = plt.subplots()
    plot_and_text_error(e, ax, file_name)

### Total Trip Length for each mode replaced by the specified mode

In [None]:
plot_title_no_quality = "Total trip length (" + label_units_lower + ") covered by replaced mode"
file_name = f"total_trip_length_{mode_of_interest}_replaced_mode{file_suffix}"

try:
    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)
    text_results = [["Unmodified Alt Text", "Unmodified HTML"], ["Unmodified Alt Text", "Unmodified HTML"]]
    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby("replaced_mode_w_other").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), 
                                    "Labeled by user\n (Trip distance)\n"+stacked_bar_quality_text, ax[0], text_results[0], colors_replaced, debug_df, value_to_translations_replaced)
    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby("replaced_mode_w_other").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), 
                                    "Inferred from prior labels\n (Trip distance)\n"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)
    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)
except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:
    plt.clf()
    generate_missing_plot(plot_title_no_quality, debug_df, file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)    
    alt_html = store_alt_html_missing(debug_df, file_name, plot_title_no_quality)
except Exception as e:
    fig, ax = plt.subplots()
    plot_and_text_error(e, ax, file_name)

### Number of Trips for each mode replaced by specified mode

In [None]:
plot_title_no_quality= "Number of trips for replaced mode"
file_name = f'ntrips_{mode_of_interest}_total{file_suffix}'

try:
    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)
    text_results = [["Unmodified Alt Text", "Unmodified HTML"], ["Unmodified Alt Text", "Unmodified HTML"]]
    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby("replaced_mode_w_other").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), 
                                    f"Labeled `{mode_of_interest}` by user\n"+stacked_bar_quality_text, ax[0], text_results[0], colors_replaced, debug_df, value_to_translations_replaced)
    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby("replaced_mode_w_other").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), 
                                    f"Inferred `{mode_of_interest}` from prior labels\n"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)
    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)
except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:
    plt.clf()
    generate_missing_plot(plot_title_no_quality, debug_df, file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)    
    alt_html = store_alt_html_missing(debug_df, file_name, plot_title_no_quality)    
except Exception as e:
    fig, ax = plt.subplots()
    plot_and_text_error(e, ax, file_name)

### Average miles per trip for specified mode

In [None]:
plot_title_no_quality="Average " + label_units + " for each replaced mode with > 3 entries\n'Other' represents trips with a non-standard or missing replacement"
file_name ='average_miles_replaced_mode%s' % file_suffix

try:
    dg=data_eb.groupby('Replaced_mode').agg({distance_col: ['sum', 'count' , 'mean']},)
    dg.columns = ['Total ('+label_units_lower+')', 'Count' ,'Average ('+label_units_lower+')']
    dg = dg.reset_index()
    dg = dg.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)
    data = dg.drop((dg.query("Count < 3").index)).sort_values(by=['Average ('+label_units_lower+')'], ascending=False) 
        
    x='Replaced_mode'
    y='Average ('+label_units_lower+')'
    y2 = "Count"

    plot_title= plot_title_no_quality+"\n"+quality_text
    barplot_mode(data,x,y,plot_title, expanded_ct['Replaced_mode'].dropna().unique().tolist(), file_name)
    alt_text = store_alt_text_bar(pd.DataFrame(data['Average ('+label_units_lower+')'].values, data.Replaced_mode), file_name, plot_title)

except:
    generate_missing_plot(plot_title_no_quality,debug_df,file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)

### Number of trips by day for specified mode

In [None]:
plot_title_no_quality=f"Number of {mode_of_interest} trips by day"
file_name =f'ntrips_{mode_of_interest}_per_day%s' % file_suffix


try:
    fq_days = data_eb.groupby(['start_local_dt_day']).agg({'start_local_dt_day': ['sum', 'count']})
    fq_days = fq_days.reset_index()
    fq_days.columns = ['Day of the Month', 'Total', 'Number of Trips']

    data = fq_days
    x = 'Day of the Month'
    y = 'Number of Trips'

    plot_title= plot_title_no_quality+"\n"+quality_text
    barplot_day(data,x,y,plot_title,file_name)
    alt_text = store_alt_text_bar(pd.DataFrame(data['Number of Trips'].values, data['Day of the Month'].values), file_name, plot_title)
except:
    generate_missing_plot(plot_title_no_quality,debug_df,file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)

### Number of trips by day of week¶

In [None]:
plot_title_no_quality=f"Number of {mode_of_interest} trips by weekday"
file_name =f'ntrips_{mode_of_interest}_per_weekday%s' % file_suffix

try:
    fq_weekdays = data_eb.groupby(['start_local_dt_weekday']).agg({'start_local_dt_weekday': ['sum', 'count']})
    fq_weekdays = fq_weekdays.reset_index()
    fq_weekdays.columns = ['Weekday', 'Total', 'Number of Trips']
    weekday_labels = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
    fq_weekdays["Weekday"] = fq_weekdays.Weekday.apply(lambda x: weekday_labels[x])

    data = fq_weekdays
    x = 'Weekday'
    y = 'Number of Trips'

    plot_title= plot_title_no_quality+"\n"+quality_text
    barplot_day(data,x,y,plot_title,file_name)
    alt_text = store_alt_text_bar(pd.DataFrame(data['Number of Trips'].values, data['Weekday'].values), file_name, plot_title)
except:
    generate_missing_plot(plot_title_no_quality,debug_df,file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)