## Generate Static Graphs -- Metrics for Survey Deployments

These are the input parameters for the notebook. They will be automatically changed when the scripts to generate monthly statistics are run. You can modify them manually to generate multiple plots locally as well.

Pass in `None` to remove the filters and plot all data. This is not recommended for production settings, but might be useful for reports based on data snapshots.

In [None]:
year = 2024
month = 11
program = "default"
study_type = "study"
include_test_users = False
labels = {}
use_imperial = True
sensed_algo_prefix = "cleaned"
bluetooth_only = True #current proxy for fleet status
survey_info = {}

In [None]:
from collections import defaultdict

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from plots import *
import scaffolding

sns.set_style("whitegrid")
sns.set()
%matplotlib inline

# get metric vs imperial vars
label_units, short_label, label_units_lower, distance_col, weight_unit = scaffolding.get_units(use_imperial)

# get color mappings
colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble  = scaffolding.mapping_color_labels() #just need sensed

In [None]:
# Do not run this notebook at all unless it is for a survey configuration; nbclient will run up through this cell
if not survey_info.get('trip-labels', None) == 'ENKETO':
    ipython = get_ipython()
    ipython._showtraceback = scaffolding.no_traceback_handler
    raise Exception("The plots in this notebook are only relevant to deployments with trip-level surveys")

## Collect Data From Database

In [None]:
expanded_ct_sensed, file_suffix_sensed, quality_text_sensed, debug_df_sensed = await scaffolding.load_viz_notebook_sensor_inference_data(year,
                                                                            month,
                                                                            program,
                                                                            labels,
                                                                            include_test_users,
                                                                            sensed_algo_prefix)

In [None]:
#if fleet, replace primary_mode with primary_ble_sensed mode
try:
    if bluetooth_only and 'ble_sensed_summary' in expanded_ct_sensed.columns:
        expanded_ct_sensed = expanded_ct_sensed[expanded_ct_sensed['ble_sensed_summary'].notna()]
        expanded_ct_sensed["primary_mode"] = expanded_ct_sensed.ble_sensed_summary.apply(lambda md: max(md["distance"], key=md["distance"].get))
        unique_keys = expanded_ct_sensed.groupby("primary_mode").agg({distance_col: "count"}).index
        print(unique_keys)
        colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = scaffolding.mapping_color_labels(unique_keys)
        colors_sensed = colors_ble
except ValueError as e:
    print("Got ValueError ", e)

In [None]:
import re
sensed_match = re.match(r'Based on ([0-9]+) trips from ([0-9]+) (users|testers and participants)', quality_text_sensed)
stacked_bar_quality_text_sensed = f"{sensed_match.group(1)} trips (100%)\n from {sensed_match.group(2)} {sensed_match.group(3)}"
stacked_bar_quality_text_sensed

## Trips - count and distance

In [None]:
file_name = f'ntrips_total_survey{file_suffix_sensed}'
plot_title_no_quality= "Number of trips for each mode (selected by users)"

try:
    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)
    # We will have text results corresponding to the axes for simplicity and consistency
    text_results = [["Unmodified Alt Text", "Unmodified HTML"], ["Unmodified Alt Text", "Unmodified HTML"]]
    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby("primary_mode").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), 
                                    "Count of Trips\n"+stacked_bar_quality_text_sensed, ax[0], text_results[0], colors_sensed, debug_df_sensed)
    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby("primary_mode").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), 
                                    "Distance of Trips\n"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)
    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)
except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:
    plt.clf()
    generate_missing_plot(plot_title_no_quality, debug_df_sensed, file_name)
    alt_text = store_alt_text_missing(debug_df_sensed, file_name, plot_title_no_quality)        
    alt_html = store_alt_html_missing(debug_df_sensed, file_name, plot_title_no_quality)
except Exception as e:
    # TODO: Future cleanup can pass in just the figure and have the function choose the last axis
    fig, ax = plt.subplots()
    plot_and_text_error(e, ax, file_name)

## Trips under 80%

In [None]:
file_name = f'ntrips_under80_survey{file_suffix_sensed}'

try:
    # Preprocess to find cutoff and filter below cutoff
    # For simplicity, and to aid in comparison, we have a single cutoff based on the total number of trips
    cutoff = expanded_ct_sensed.distance.quantile(0.8)
    if pd.isna(cutoff):
        cutoff = 0
    dist_threshold = expanded_ct_sensed[distance_col].quantile(0.8).round(1)
    dist_threshold = str(dist_threshold) 

    plot_title_no_quality="Number of trips per travel model under " + dist_threshold + " " + label_units_lower
    plot_title_no_quality=plot_title_no_quality+"\n["+dist_threshold + " " + label_units_lower+" represents 80th percentile of trip length]"

    expanded_ct_sensed_u80 = expanded_ct_sensed.loc[(expanded_ct_sensed['distance'] <= cutoff)]
    sensed_u80_quality_text = f"{len(expanded_ct_sensed_u80)} trips ({round(len(expanded_ct_sensed_u80)/len(expanded_ct_sensed)*100)}% of all trips)\nfrom {scaffolding.unique_users(expanded_ct_sensed_u80)} {sensed_match.group(3)}"

    # Plot entries
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)
    text_results = ["Unmodified Alt Text", "Unmodified HTML"]
    plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby("primary_mode").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), 
                                    "Sensed by OpenPATH\n"+sensed_u80_quality_text, ax, text_results, colors_sensed, debug_df_sensed)
    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)
except (AttributeError, KeyError, pd.errors.UndefinedVariableError, ZeroDivisionError) as e:
    # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot
    # here, our pre-processing only relies on sensed data, so we use the debug_df_sensed
    plt.clf()
    plot_title_default = "Number of trips below 80th percentile in each mode"
    generate_missing_plot(plot_title_default, debug_df_sensed, file_name)
    alt_text = store_alt_text_missing(debug_df_sensed, file_name, plot_title_default)
    alt_html = store_alt_html_missing(debug_df_sensed, file_name, plot_title_no_quality)
except Exception as e:
    fig, ax = plt.subplots()
    plot_and_text_error(e, ax, file_name)

## Land mode distances

In [None]:
plot_title_no_quality= "Total trip length (" + label_units_lower + ") covered by each mode by land"
file_name =f"total_trip_length_land_survey{file_suffix_sensed}"

try:
    ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value
    ## but we don't need to have an existence check for sensed because in that case we will have no data to display
    sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != "AIR_OR_HSR"]
    
    sensed_land_quality_text = f"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}"

    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)
    text_results = ["Unmodified Alt Text", "Unmodified HTML"]
    plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby("primary_mode").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), 
                                    "Sensed by OpenPATH\n"+sensed_land_quality_text, ax, text_results, colors_sensed, debug_df_sensed)
    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)    
except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:
    plt.clf()
    generate_missing_plot(plot_title_no_quality, debug_df_sensed, file_name)
    alt_text = store_alt_text_missing(debug_df_sensed, file_name, plot_title_no_quality)        
    alt_html = store_alt_html_missing(debug_df_sensed, file_name, plot_title_no_quality)
except Exception as e:
    fig, ax = plt.subplots()
    plot_and_text_error(e, ax, file_name)