In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
from uuid import UUID

import matplotlib.pyplot as plt

import confusion_matrix_handling as cm_handling
from confusion_matrix_handling import MODE_MAPPING_DICT
import get_EC
import helper_functions as hf

import sklearn.model_selection as skm

from sklearn.model_selection import KFold
from sklearn import linear_model

METERS_TO_MILES = 0.000621371 # 1 meter = 0.000621371 miles
ECAR_PROPORTION = 0 #0.01 #~1% of cars on the road are electric.
DROVE_ALONE_TO_SHARED_RIDE_RATIO = 1

df_EI = pd.read_csv(r'Public_Dashboard/auxiliary_files/energy_intensity.csv') # r stands for raw string, only matters if the path is on Windows

In [3]:
import database_related_functions as drf  # all the emission server functions for this notebook are in here.
user_list, os_map, uuid_program_map = drf.get_participants_programs_and_operating_systems()
#print(len(user_list), len(os_map), len(uuid_program_map))

# Takes 6 to 14 minutes for the full 1.5 year CEO study plus stage and prepilot.
# Takes ~ 1 min 45 s to 2 min 45 s on Macbook Pro for all ceo data up to May 2022.
expanded_labeled_trips = drf.get_expanded_labeled_trips(user_list)
expanded_labeled_trips['os'] = expanded_labeled_trips.user_id.map(os_map)
expanded_labeled_trips['program'] = expanded_labeled_trips['user_id'].map(uuid_program_map)

expanded_labeled_trips = expanded_labeled_trips.drop(labels = ['source', 'end_fmt_time', 'end_loc', 'raw_trip',
    'start_fmt_time', 'start_loc','start_local_dt_year', 'start_local_dt_month', 'start_local_dt_day',
    'start_local_dt_hour', 'start_local_dt_minute', 'start_local_dt_second',
    'start_local_dt_weekday', 'start_local_dt_timezone',
    'end_local_dt_year', 'end_local_dt_month', 'end_local_dt_day',
    'end_local_dt_hour', 'end_local_dt_minute', 'end_local_dt_second',
    'end_local_dt_weekday', 'end_local_dt_timezone'], axis = 1)

expanded_labeled_trips['distance_miles'] = expanded_labeled_trips.distance*METERS_TO_MILES

# Group together the prepilot participants
prepilot_list = ['84Q9SsrH','cwZazZLJ','CudLAeg8','sxxcLqbK','Q8T7QTXK','5KEGHHuf','e9MaNVU7','7c797MRD','rhBZukxY','k36cxmfA','FmxVf8u6','F3jxHLSW']
expanded_labeled_trips['program'] = expanded_labeled_trips.program.replace(prepilot_list, "prepilot")

In [2]:
%store -r expanded_labeled_trips

In [3]:
unit_dist_MCS_df = pd.read_csv("unit_distance_MCS.csv").set_index("moment")
energy_dict = cm_handling.get_energy_dict(df_EI)

In [4]:
expanded_labeled_trips = hf.drop_unwanted_trips(expanded_labeled_trips,drop_not_a_trip=False)
expanded_labeled_trips = hf.get_primary_modes(expanded_labeled_trips,energy_dict,MODE_MAPPING_DICT)
print('Here are the number of labeled trips remaining in each program dataset:')
expanded_labeled_trips.program.value_counts()

Dropping user labeled AIR trips and trips with no OS.
Dropped 93 trips with no sensed sections.
Here are the number of labeled trips remaining in each program dataset:


cc          28768
pc          17880
fc          11744
stage       10715
sc           9092
vail         6348
4c           5262
prepilot     2425
Name: program, dtype: int64

In [5]:
# Get the confusion matrices and then the EI moments from those.
android_confusion = pd.read_csv("android_confusion.csv").set_index('gt_mode')
ios_confusion = pd.read_csv("ios_confusion.csv").set_index('gt_mode')

android_confusion = cm_handling.collapse_confusion_matrix(android_confusion, rows_to_collapse={"Train": ["Train"]}, columns_to_collapse={})
ios_confusion = cm_handling.collapse_confusion_matrix(ios_confusion, rows_to_collapse={"Train": ["Train"]}, columns_to_collapse={})

# here I'm referring to car_load_factor the number that we divide the drove alone energy intensity by
# for r = 1, car_load_factor is 4/3.
sensed_car_EI = hf.find_sensed_car_energy_intensity(energy_dict, ECAR_PROPORTION, DROVE_ALONE_TO_SHARED_RIDE_RATIO)
energy_dict.update({"Car, sensed": sensed_car_EI})
expanded_labeled_trips['distance_miles'] = expanded_labeled_trips.distance*METERS_TO_MILES
EI_length_cov = 0

In [6]:
# note. Approach differently if the android ground truth modes are not the same set as ios ground truth modes in the test dataset.
available_ground_truth_modes = android_confusion.index
uniform_prior = hf.construct_prior_dict({},available_ground_truth_modes)
mostly_car_prior = hf.construct_prior_dict({"Car, sensed": 0.80, "Pilot ebike": 0.05}, available_ground_truth_modes)
mostly_ebike_prior = hf.construct_prior_dict({"Pilot ebike": 0.80, "Pilot ebike": 0.05}, available_ground_truth_modes)
mostly_train_prior = hf.construct_prior_dict({"Train": 0.30}, available_ground_truth_modes)
half_car = hf.construct_prior_dict({"Car, sensed": 0.50}, available_ground_truth_modes)
half_ebike = hf.construct_prior_dict({"Pilot ebike": 0.50}, available_ground_truth_modes)
close_to_CEO_true_car_and_ebike = hf.construct_prior_dict({"Car, sensed": 0.7, "Pilot ebike": 0.13}, available_ground_truth_modes)



prior_mode_distributions_map = {
    "80 Percent Car": mostly_car_prior,
    "80 Percent Ebike": mostly_ebike_prior,
    "30 Percent Train": mostly_train_prior,
    "50 Percent Car": half_car,
    "50 Percent Ebike": half_ebike,
    "No Bayes Update": {},
    "MobilityNet Specific to OS": {},
    "Uniform Prior": uniform_prior,
    "Close to CEO Mode Distribution": close_to_CEO_true_car_and_ebike,
}
#prior_and_error_dataframe_vail,_  = hf.prior_mode_distribution_sensitivity_analysis(expanded_labeled_trips[expanded_labeled_trips.program == 'vail'], prior_mode_distributions_map, android_confusion, ios_confusion, unit_dist_MCS_df, energy_dict, EI_length_cov)

In [33]:
prior_and_error_dataframe,_  = hf.prior_mode_distribution_sensitivity_analysis(expanded_labeled_trips, prior_mode_distributions_map, android_confusion, ios_confusion, unit_dist_MCS_df, energy_dict, EI_length_cov)

80 Percent Car
Computing energy consumption for each trip.
Using EI length covariance = 0.
80 Percent Ebike
Computing energy consumption for each trip.
Using EI length covariance = 0.
30 Percent Train
Computing energy consumption for each trip.
Using EI length covariance = 0.
50 Percent Car
Computing energy consumption for each trip.
Using EI length covariance = 0.
50 Percent Ebike
Computing energy consumption for each trip.
Using EI length covariance = 0.
No Bayes Update
Computing energy consumption for each trip.
Using EI length covariance = 0.
MobilityNet Specific to OS
Computing energy consumption for each trip.
Using EI length covariance = 0.
Uniform Prior
Computing energy consumption for each trip.
Using EI length covariance = 0.
Close to CEO Mode Distribution
Computing energy consumption for each trip.
Using EI length covariance = 0.


In [35]:
priors_for_paper = ["80 Percent Car", "No Bayes Update", "MobilityNet Specific to OS", "Uniform Prior"]
print(prior_and_error_dataframe.query('`Prior Name` == @priors_for_paper').round(2).to_latex(index=False))

\begin{tabular}{lrrr}
\toprule
                 Prior Name &  Percent Error &  Estimated Standard Deviation (SD) &  Number of Standard Deviations to Truth \\
\midrule
             80 Percent Car &          21.99 &                          136955.85 &                                    0.76 \\
            No Bayes Update &           9.17 &                          170977.64 &                                    0.25 \\
 MobilityNet Specific to OS &           9.17 &                          170977.64 &                                    0.25 \\
              Uniform Prior &          14.00 &                          158802.60 &                                    0.41 \\
\bottomrule
\end{tabular}



In [7]:
prior_and_error_dataframe_cc,_  = hf.prior_mode_distribution_sensitivity_analysis(expanded_labeled_trips[expanded_labeled_trips.program == "cc"], prior_mode_distributions_map, android_confusion, ios_confusion, unit_dist_MCS_df, energy_dict, EI_length_cov)

80 Percent Car
Computing energy consumption for each trip.
Using EI length covariance = 0.
80 Percent Ebike
Computing energy consumption for each trip.
Using EI length covariance = 0.
30 Percent Train
Computing energy consumption for each trip.
Using EI length covariance = 0.
50 Percent Car
Computing energy consumption for each trip.
Using EI length covariance = 0.
50 Percent Ebike
Computing energy consumption for each trip.
Using EI length covariance = 0.
No Bayes Update
Computing energy consumption for each trip.
Using EI length covariance = 0.
MobilityNet Specific to OS
Computing energy consumption for each trip.
Using EI length covariance = 0.
Uniform Prior
Computing energy consumption for each trip.
Using EI length covariance = 0.
Close to CEO Mode Distribution
Computing energy consumption for each trip.
Using EI length covariance = 0.


In [None]:
prior_mode_distributions_map = {"Uniform Prior": uniform_prior}
uniform_prior_dataframe_map = {}
for program in expanded_labeled_trips.program.unique():
    print(program)
    uniform_prior_df, _ = hf.prior_mode_distribution_sensitivity_analysis(
        expanded_labeled_trips[expanded_labeled_trips.program == program], 
        prior_mode_distributions_map, android_confusion, ios_confusion, 
        unit_dist_MCS_df, energy_dict, EI_length_cov
        )
    uniform_prior_df["program"] = program
    uniform_prior_dataframe_map[program] = uniform_prior_df
uniform_prior_df_all_programs = pd.concat(uniform_prior_dataframe_map, ignore_index=True)

In [13]:
# these percent errors seem similar to those we get with the predicted values and no uncertainty.
uniform_prior_df_all_programs.set_index('program')

Unnamed: 0_level_0,Prior Name,Percent Error,Estimated Standard Deviation (SD),Number of Standard Deviations to Truth
program,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
sc,Uniform Prior,8.680951,8180.883109,0.400043
fc,Uniform Prior,17.648382,9644.569281,0.70918
4c,Uniform Prior,2.143461,10143.262349,0.05023
stage,Uniform Prior,4.110845,22301.729757,0.137969
pc,Uniform Prior,28.87958,31489.776542,0.880324
vail,Uniform Prior,9.573135,14729.696118,0.211626
cc,Uniform Prior,12.217138,64144.146505,0.295193
prepilot,Uniform Prior,19.952889,3450.215716,0.698082


In [7]:
durations_in_modes = android_confusion.sum(axis=1)
prior_mode_probs = durations_in_modes/android_confusion.sum().sum()


# for each prior: compute energy consumption.
android_EI_moments_df = cm_handling.get_conditional_EI_expectation_and_variance(android_confusion,energy_dict)
ios_EI_moments_df = cm_handling.get_conditional_EI_expectation_and_variance(ios_confusion,energy_dict)
os_EI_moments_map = {'ios': ios_EI_moments_df, 'android': android_EI_moments_df}
prior_mode_probs

gt_mode
Walk               0.204984
Regular Bike       0.063802
Scooter share      0.029817
Pilot ebike        0.029817
Car, sensed    0.049948
Bus                0.144903
no_gt              0.119467
Train              0.357264
dtype: float64

### Sensitivity Analysis: vary the prior mode distribution

In [None]:
prior_probs_prespecified = {"Car, sensed": 0.85, "Pilot ebike": 0.05}
prior_probs = prior_probs_prespecified.copy()
n_other_modes = len(android_confusion.index) - len(prior_probs_prespecified)
probability_remaining = 1 - sum(prior_probs_prespecified.values())
prior_probs.update({x: probability_remaining/n_other_modes for x in android_confusion.index if x not in prior_probs_prespecified.keys()})
#prior_probs = {x: 1/len(android_confusion.index) for x in android_confusion.index} # if you want a uniform prior.

android_EI_moments_with_Bayes_update_df = cm_handling.get_Bayesian_conditional_EI_expectation_and_variance(android_confusion,energy_dict, prior_probs)
ios_EI_moments_with_Bayes_update_df = cm_handling.get_Bayesian_conditional_EI_expectation_and_variance(ios_confusion,energy_dict, prior_probs)
os_EI_moments_with_Bayes_update_map = {'ios': ios_EI_moments_with_Bayes_update_df, 'android': android_EI_moments_with_Bayes_update_df}
energy_consumption_with_Bayes_update_df = get_EC.compute_all_EC_values(expanded_labeled_trips,unit_dist_MCS_df,energy_dict,\
    android_EI_moments_with_Bayes_update_df,\
    ios_EI_moments_with_Bayes_update_df, \
    EI_length_cov, print_info=False)