## Generate Static Graphs

These are the input parameters for the notebook. They will be automatically changed when the scripts to generate monthly statistics are run. You can modify them manually to generate multiple plots locally as well.

Pass in `None` to remove the filters and plot all data. This is not recommended for production settings, but might be useful for reports based on data snapshots.

In [None]:
year = 2020
month = 11
program = "default"
study_type = "study"
mode_of_interest = None
include_test_users = False
dynamic_labels = {}
use_imperial = False
sensed_algo_prefix = "cleaned"

In [None]:
from collections import defaultdict

import numpy as np
import pandas as pd

from plots import *
import scaffolding

sns.set_style("whitegrid")
sns.set()
%matplotlib inline

In [None]:
# Loading mapping dictionaries from mapping_dictionaries notebook
%store -r dic_re
%store -r dic_pur

# convert a dictionary to a defaultdict
dic_re = defaultdict(lambda: 'Other',dic_re)
dic_pur = defaultdict(lambda: 'Other',dic_pur)

# get metric vs imperial vars
label_units, short_label, label_units_lower, distance_col, weight_unit = scaffolding.get_units(use_imperial)

## Collect Data From Database for Generic Metrics

In [None]:
expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,
                                                                            month,
                                                                            program,
                                                                            study_type,
                                                                            dynamic_labels,
                                                                            dic_re,
                                                                            dic_pur=dic_pur,
                                                                            include_test_users=include_test_users)

## Collect Data from Database for Sensed Metrics

In [None]:
expanded_ct_sensed, file_suffix_sensed, quality_text_sensed, debug_df_sensed = scaffolding.load_viz_notebook_sensor_inference_data(year,
                                                                            month,
                                                                            program,
                                                                            include_test_users,
                                                                            sensed_algo_prefix)

## Metrics for Specific Mode

In [None]:
data_eb = expanded_ct.query(f"mode_confirm == '{mode_of_interest}'") if "mode_confirm" in expanded_ct.columns else expanded_ct
quality_text_specific = scaffolding.get_quality_text(expanded_ct, data_eb, mode_of_interest, include_test_users)

### 1. 100% Stacked Bar Charts (Based on Number of Trips)

In [None]:
file_name = f'ntrips_mode_confirm{file_suffix}'
plot_title_no_quality= "Number of trips for each mode (selected by users)"

try:
    labels_mc = expanded_ct['Mode_confirm'].value_counts(dropna=True).keys().tolist()
    values_mc = expanded_ct['Mode_confirm'].value_counts(dropna=True).tolist()
    df_confirmed_tc = process_trip_data(labels_mc, values_mc, "Labeled by user \n (Based on Confirmed Trips)")
    
    labels_pm_sensed = expanded_ct_sensed['primary_mode'].value_counts(dropna=True).keys().tolist()
    values_pm_sensed = expanded_ct_sensed['primary_mode'].value_counts(dropna=True).tolist()
    df_sensed_tc = process_trip_data(labels_pm_sensed, values_pm_sensed, "Sensed by OpenPATH \n (Based on Total Trips)")\
    
    all_data_frames = [df_confirmed_tc,df_sensed_tc]
    result_df = merge_dataframes(all_data_frames)

    plot_title = plot_title_no_quality + "\n" + quality_text
    stacked_bar_chart_generic(plot_title, result_df, file_name, 2)  
except:
    generate_missing_plot(plot_title_no_quality,debug_df,file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)

### 2. 100% Stacked Bar Charts (Represents 80th of the Number of Trips)

In [None]:
file_name = f'ntrips_mode_under80{file_suffix}'
plot_title_no_quality= "Number of trips for 80th percentile for each mode (selected by users):"

try:
    cutoff = expanded_ct.distance.quantile(0.8)
    if pd.isna(cutoff):
        cutoff = 0
    dist_threshold = expanded_ct[distance_col].quantile(0.8).round(1)
    dist_threshold = str(dist_threshold)
    labels_mc_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].Mode_confirm.value_counts(dropna=True).keys().tolist()
    values_mc_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].Mode_confirm.value_counts(dropna=True).tolist()
    u80_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct[expanded_ct['distance'] <= cutoff], "< " + dist_threshold + " " + short_label, include_test_users)
    df_confirmed_tc_u80 = process_trip_data(labels_mc_u80, values_mc_u80, "Labeled by user \n (Based on Confirmed Trips)")

    cutoff_sensed = expanded_ct_sensed.distance.quantile(0.8)
    if pd.isna(cutoff_sensed):
        cutoff_sensed = 0
    dist_threshold_sensed = expanded_ct_sensed[distance_col].quantile(0.8).round(1)
    dist_threshold_sensed = str(dist_threshold_sensed)
    labels_pm_u80 = expanded_ct_sensed.loc[(expanded_ct_sensed['distance'] <= cutoff_sensed)].primary_mode.value_counts(dropna=True).keys().tolist()
    values_pm_u80 = expanded_ct_sensed.loc[(expanded_ct_sensed['distance'] <= cutoff_sensed)].primary_mode.value_counts(dropna=True).tolist()
    u80_quality_text_sensed = scaffolding.get_quality_text_sensed(expanded_ct_sensed[expanded_ct_sensed['distance'] <= cutoff_sensed], "<= " + dist_threshold_sensed + " " + short_label , include_test_users)
    df_sensed_tc_u80 = process_trip_data(labels_pm_u80, values_pm_u80,"Sensed by OpenPATH \n (Based on Total Trips)")

    all_data_frames_u80 = [df_confirmed_tc_u80, df_sensed_tc_u80]
    result_df_u80 = merge_dataframes(all_data_frames_u80)

    plot_title = plot_title_no_quality + "\n" + "For Sensed: " + u80_quality_text_sensed + "\n" + "For Labeled: " + u80_quality_text
    stacked_bar_chart_generic(plot_title, result_df_u80, file_name, 2)
except Exception as e:
    generate_missing_plot(plot_title_no_quality, debug_df, file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)

### 3. 100% Stacked Bar Charts (Represents Commute Trips) 

In [None]:
file_name = "ntrips_commute_mode_confirm{file_suffix}"
plot_title_no_quality= "Number of commute trips for each mode (selected by users)"

try:
    if (len(dynamic_labels)):
        purpose_map_label =  scaffolding.mapping_labels(dynamic_labels, "PURPOSE")
        translation_work = purpose_map_label['work']
        trip_purpose_query = f"Trip_purpose == '{translation_work}'"
    else:
        trip_purpose_query = "Trip_purpose == 'Work'"

    labels_mc_commute = expanded_ct.query(trip_purpose_query).Mode_confirm.value_counts(dropna=True).keys().tolist()
    values_mc_commute = expanded_ct.query(trip_purpose_query).Mode_confirm.value_counts(dropna=True).tolist()
    commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct.query(trip_purpose_query), "commute", include_test_users)
    df_total_trip_commute = process_trip_data(labels_mc_commute, values_mc_commute, "Labeled by user \n (Based on Confirmed Trips)")
    
    plot_title = plot_title_no_quality + "\n" + commute_quality_text
    stacked_bar_chart_generic(plot_title, df_total_trip_commute, file_name, 1)
except Exception as e:
    generate_missing_plot(plot_title_no_quality, debug_df, file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)

### 4. 100% Stacked Bar Charts (Represents Distance by Mode)

In [None]:
file_name ='distance_mode%s' % file_suffix
plot_title_no_quality = label_units + " for each mode"

try:
    dist = expanded_ct.groupby('Mode_confirm').agg({distance_col: ['sum', 'count' , 'mean']})
    dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']
    dist = dist.reset_index()
    dist =dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)
    dist_dict = dict(zip(dist['Mode_confirm'], dist['Total ('+label_units_lower+')']))
    labels_mc_dist = []
    values_mc_dist = []
    for x, y in dist_dict.items():
        labels_mc_dist.append(x)
        values_mc_dist.append(y)    
    df_confirm_dist = process_trip_data(labels_mc_dist, values_mc_dist, "Labeled by user \n Trip distance")

    dist_sensed = expanded_ct_sensed.groupby('primary_mode').agg({distance_col: ['sum', 'count' , 'mean']})
    dist_sensed.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']
    dist_sensed = dist_sensed.reset_index()
    dist_sensed = dist_sensed.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)
    dist_dict_sensed = dict(zip(dist_sensed['primary_mode'], dist_sensed['Total ('+label_units_lower+')']))
    labels_pm_dist = []
    values_pm_dist = []
    for x, y in dist_dict_sensed.items():
        labels_pm_dist.append(x)
        values_pm_dist.append(y)
    df_sensed_dist = process_trip_data(labels_pm_dist, values_pm_dist, "Sensed by OpenPATH \n Trip distance (Overall)")
    plot_title = plot_title_no_quality + "\n" + quality_text

    dist_sensed_land = expanded_ct_sensed.groupby('primary_mode').agg({distance_col: ['sum', 'count' , 'mean']})
    dist_sensed_land.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']
    dist_sensed_land = dist_sensed_land.reset_index()
    dist_sensed_land =dist_sensed_land.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)
    dist_dict_sensed_land = dict(zip(dist_sensed_land['primary_mode'], dist_sensed_land['Total ('+label_units_lower+')']))
    labels_pm_dist_land = []
    values_pm_dist_land = []
    for x, y in dist_dict_sensed_land.items():
        if x != "AIR_OR_HSR":
            labels_pm_dist_land.append(x)
            values_pm_dist_land.append(y)
    df_sensed_dist_land = process_trip_data(labels_pm_dist_land, values_pm_dist_land, "Sensed by OpenPATH \n Trips distance (Land)")
    
    all_data_frames = [df_confirm_dist,df_sensed_dist, df_sensed_dist_land]
    result_df = merge_dataframes(all_data_frames)

    stacked_bar_chart_generic(plot_title_no_quality, result_df, file_name, 3)
except Exception as e:
    generate_missing_plot(plot_title_no_quality, debug_df, file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)

### Average miles per transport mode selected (Mode_confirm)

In [None]:
file_name ='average_miles_mode_confirm%s' % file_suffix
plot_title_no_quality="Average "+ label_units+" for each mode with > 3 entries"

try:
    x='Mode_confirm'
    y='Average ('+label_units_lower+')'
    plot_title= plot_title_no_quality+"\n"+quality_text
    
    data = dist.drop((dist.query("Count < 3").index)).sort_values(by=['Average ('+label_units_lower+')'], ascending=False)

    barplot_mode(data,x,y,plot_title, expanded_ct['Mode_confirm'].dropna().unique().tolist(), file_name)
    alt_text = store_alt_text_bar(pd.DataFrame(data['Average ('+label_units_lower+')'].values, data['Mode_confirm']), file_name, plot_title)
except:
    generate_missing_plot(plot_title_no_quality,debug_df,file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)    

### Number of trips by day¶

In [None]:
plot_title_no_quality="Number of trips by day"
file_name ='ntrips_per_day%s' % file_suffix

try:
    fq_days = expanded_ct.groupby(['start_local_dt_day']).agg({'start_local_dt_day': ['sum', 'count']})
    fq_days = fq_days.reset_index()
    fq_days.columns = ['Day of the Month', 'Total', 'Number of Trips']

    data = fq_days
    x = 'Day of the Month'
    y = 'Number of Trips'
    
    plot_title= plot_title_no_quality+"\n"+quality_text

    barplot_day(data,x,y,plot_title,file_name)
    alt_text = store_alt_text_bar(pd.DataFrame(data['Number of Trips'].values, data['Day of the Month']), file_name, plot_title)
except:
    generate_missing_plot(plot_title_no_quality,debug_df,file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)    

### Number of trips by day of week¶

In [None]:
plot_title_no_quality="Number of trips by weekday"
file_name ='ntrips_per_weekday%s' % file_suffix
try:
    fq_weekdays = expanded_ct.groupby(['start_local_dt_weekday']).agg({'start_local_dt_weekday': ['sum', 'count']})
    fq_weekdays = fq_weekdays.reset_index()
    fq_weekdays.columns = ['Weekday', 'Total', 'Number of Trips']
    weekday_labels = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
    fq_weekdays["Weekday"] = fq_weekdays.Weekday.apply(lambda x: weekday_labels[x])

    data = fq_weekdays
    x = 'Weekday'
    y = 'Number of Trips'

    plot_title= plot_title_no_quality+"\n"+quality_text
    
    barplot_day(data,x,y,plot_title,file_name)
    alt_text = store_alt_text_bar(pd.DataFrame(data['Number of Trips'].values, data['Weekday']), file_name, plot_title)
except:
    generate_missing_plot(plot_title_no_quality,debug_df,file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)    