## Generate Static Graphs

These are the input parameters for the notebook. They will be automatically changed when the scripts to generate monthly statistics are run. You can modify them manually to generate multiple plots locally as well.

Pass in `None` to remove the filters and plot all data. This is not recommended for production settings, but might be useful for reports based on data snapshots.

In [None]:
year = None
month = None
program = "prepilot"
study_type = "program"
mode_of_interest = "pilot_ebike"

In [None]:
SAVE_DIR = r'C:\Users\dakcicek\Documents\GitHub\ceo_analysis\viz_scripts\plots'

In [None]:
import sys
 
# adding emission folder to the system path
sys.path.insert(0, r"C:\Users\dakcicek\Documents\GitHub\e-mission-server")

In [None]:
from collections import defaultdict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import linear_model

import emission.core.get_database as edb
from plots import *
import scaffolding

sns.set_style("whitegrid")
sns.set()
%matplotlib inline

In [None]:
import plots
import importlib
importlib.reload(plots)

In [None]:
# Do not run this notebook at all unless it is for a program; nbclient will run up through this cell
if study_type != "program":
    raise Exception("The plots in this notebook are only relevant to programs")

In [None]:
# Loading mapping dictionaries from mapping_dictionaries notebook
%store -r df_ei
%store -r dic_re
%store -r dic_pur
%store -r dic_fuel

# convert a dictionary to a defaultdict
dic_re = defaultdict(lambda: 'Other',dic_re)
dic_pur = defaultdict(lambda: 'Other',dic_pur)
dic_fuel = defaultdict(lambda: 'Other',dic_fuel)

## Get UUIDs by Program

In [None]:
# Split UUIDs by program
program_uuid_map = {}
for ue in edb.get_uuid_db().find():
    program = ue['user_email'].split("_")[0]
    if program in program_uuid_map.keys():
        program_uuid_map[program].append(str(ue['uuid']))
    else:
        print(f"Found new program {program}, creating new list")
        program_uuid_map[program] = []
        program_uuid_map[program].append(str(ue['uuid']))

uuid_program_list = []
for ue in edb.get_uuid_db().find():
    program = ue['user_email'].split("_")[0]
    uuid_program_list.append({"program": program, "opcode": ue["user_email"], "user_id_str": str(ue['uuid'])})

In [None]:
uuid_program_df = pd.DataFrame.from_dict(uuid_program_list)
uuid_program_df.head()

## Collect Data From Database

In [None]:
expanded_ct, file_suffix, quality_text = scaffolding.load_viz_notebook_data(year,
                                                                            month,
                                                                            program,
                                                                            study_type,
                                                                            dic_re,
                                                                            dic_pur=dic_pur)
expanded_ct = scaffolding.add_energy_labels(expanded_ct, df_ei, dic_fuel)

In [None]:
file_suffix = '_all'

In [None]:
# Join to the program df to get each user's program
expanded_ct = expanded_ct.merge(uuid_program_df, on='user_id_str')

In [None]:
from collections import defaultdict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import linear_model

#import emission.core.get_database as edb
from plots import *
#import scaffolding

sns.set_style("whitegrid")
sns.set()
%matplotlib inline

import plots
import importlib
importlib.reload(plots)

In [None]:
df_pur = pd.read_csv(r'auxiliary_files/purpose_labels.csv')
df_re = pd.read_csv(r'auxiliary_files/mode_labels.csv')
df_ei = pd.read_csv(r'auxiliary_files/energy_intensity.csv')

#dictionaries:
dic_pur = dict(zip(df_pur['purpose_confirm'],df_pur['bin_purpose'])) # bin purpose
dic_re  = dict(zip(df_re['replaced_mode'],df_re['mode_clean'])) # bin modes
dic_fuel = dict(zip(df_ei['mode'],df_ei['fuel']))

In [None]:
#reading the trip table
#expanded_ct = pd.read_csv("C:\\Users\\dakcicek\\Desktop\\CEO_dumps_final\\trip_program.csv")
expanded_ct=pd.read_csv("C:\\Users\\dakcicek\\Documents\\GitHub\\ceo_analysis\\viz_scripts\\filtered_merged_trips_2.csv")

In [None]:
trip_program=pd.read_csv("C:\\Users\\dakcicek\\Desktop\\CEO_dumps_final\\trip_program.csv")

In [None]:
trip_program['program'].unique()

In [None]:
trip_program['user_id'].nunique()

In [None]:
print(len(trip_program))

In [None]:
data_non_stage = trip_program[trip_program.program != "stage"]
print(len(data_non_stage))

In [None]:
data_non_stage['user_id'].nunique()

In [None]:
data_non_stage_non_mini = data_non_stage[data_non_stage.program != "prepilot"]
print(len(data_non_stage_non_mini))

In [None]:
data_non_stage_non_mini['user_id'].nunique()

In [None]:
expanded_ct=pd.read_csv("C:\\Users\\dakcicek\\Documents\\GitHub\\ceo_analysis\\viz_scripts\\expanded_ct.csv")

In [None]:
expanded_ct['user_id'].nunique()

In [None]:
data_non_stage = expanded_ct[expanded_ct.program != "stage"]
print(len(data_non_stage))

In [None]:
data_non_stage_non_mini = data_non_stage[data_non_stage.program != "prepilot"]
print(len(data_non_stage_non_mini))

In [None]:
expanded_ct = trip_program.copy()

In [None]:
# Join the expanded database data to socioeconomic data
socio_data = pd.read_csv("C:\\Users\\dakcicek\\Desktop\\CEO_dumps_final\\Can Do Colorado eBike Program - en.csv")
socio_data.rename(columns={'Unique User ID (auto-filled, do not edit)':'user_id',
                          'Please identify which category represents your total household income, before taxes, for last year.':'HHINC',
                          'How many motor vehicles are owned, leased, or available for regular use by the people who currently live in your household?':'VEH',
                           'In which year were you born?':'AGE',
                          'Including yourself, how many people live in your home?':'HHSIZE',
                          'How many children under age 18 live in your home?':'CHILDREN',
                          'What is your gender?':'GENDER',
                          'If you were unable to use your household vehicle(s), which of the following options would be available to you to get you from place to place?':'available_modes',
                          'Are you a student?':'STUDENT'}, inplace=True)
socio_data = socio_data[~socio_data.user_id.isnull()]

# Deal with people who have multiple responses by using most recent
socio_data = socio_data.sort_values(by=['user_id', 'Timestamp'])
socio_data.drop_duplicates(subset=['user_id'], keep='last', inplace=True)
socio_data['user_id_socio'] = socio_data.user_id
socio_data = socio_data.drop(labels='user_id', axis=1)

# Lose some trips due to people with no survey responses
expanded_ct['user_id_socio'] = expanded_ct.user_id.astype(str)
expanded_ct.user_id_socio = [i.replace('-','') for i in expanded_ct.user_id_socio] # remove all dashes from strings
expanded_ct = expanded_ct.merge(socio_data, on='user_id_socio')
#expanded_ct.to_csv("expanded_ct.csv")

In [None]:
print(len(expanded_ct))

In [None]:
# Optionally initialize from saved dataset
#data = pd.read_csv("expanded_ct.csv")
# Or continue from data loaded from the database
data = expanded_ct.copy()

# # Add non-label category
# expanded_ct['replaced_mode'] = expanded_ct['replaced_mode'].fillna('Unlabeled')
# expanded_ct.loc[expanded_ct['replaced_mode'] == 'Unlabeled', 'Replaced_mode'] = "Unlabeled"

# # List of variables to keep in data but not turn into categorical number variables
# dont_categorize = ['user_id','_id','cleaned_trip']

# # Make copy of user_id to be categorized since both versions are needed
# data['user_id_int'] = data['user_id']

# Get timestamp from known year/month/day aggregated to days
data.rename(columns={'start_local_dt_year':'year','start_local_dt_month':'month','start_local_dt_day':'day'}, inplace=True)
data['date_time'] = pd.to_datetime(data[['year','month','day']])

# Fix age
data['AGE'] = 2022 - data['AGE']

# Number of workers
data['WORKERS'] = data['HHSIZE'] - data['CHILDREN']

# Duration in minutes
data['duration'] = data['duration'] / 60

# E-bike/not E-Bike variable
data['is_ebike'] = "E-Bike Trips"
data.loc[data['Mode_confirm']!="E-bike", 'is_ebike'] = "Non E-Bike Trips"

data = data[~data['HHINC'].isin(['Prefer not to say', '$150,000'])] # Side note why is 150k (n=7) its own bin?
data['HHINC_NUM'] = data.HHINC.replace(['Less than $24,999',
                                       '$25,000-$49,999',
                                       '$50,000-$99,999',
                                       '$100,000 -$149,999',
                                       '$150,000-$199,999',
                                       '$200,000 or more'], [12500,37500,75000,125000,175000,250000])

# Calculate average income per adult in the household
data['PINC'] = data['HHINC_NUM'] / data['WORKERS']

# Combine variable categories
data = data.replace('Gas Car, drove alone', 'Car')
data = data.replace('Gas Car, with others', 'Shared Car')
data = data.replace('Bikeshare', 'Shared Micromobility')
data = data.replace('Scooter share', 'Shared Micromobility')
data = data.replace('Regular Bike', 'Personal Micromobility')
data = data.replace('Skate board', 'Personal Micromobility')
data = data.replace('Train', 'Transit')
data = data.replace('Free Shuttle', 'Transit')
data = data.replace('Bus', 'Transit')
data = data.replace('Walk', 'Walk')
data = data.replace('Taxi/Uber/Lyft', 'Ridehail')
data = data.replace('Pilot ebike', 'E-Bike')

# Categorical type will include all days/modes in groupby even if there is no data for a particular tabulation
data.user_id = pd.Categorical(data.user_id)
data.date_time = pd.Categorical(data.date_time)
data.mode_confirm = pd.Categorical(data.mode_confirm, ordered=True, categories=np.unique(list(dic_re.keys())))

# Add order to categorical variables
data.HHINC = pd.Categorical(data.HHINC, ordered=True, categories=['Less than $24,999',
                                                                 '$25,000-$49,999',
                                                                 '$50,000-$99,999'])
data['Mode'] = pd.Categorical(data.Mode_confirm, ordered=True, categories=[
    'E-bike',
    'Car',
    'Shared Car',
    'Walk',
    'Transit',
    'Personal Micromobility',
    'Shared Micromobility',
    'Ridehail',
    'Other'])
data.VEH = pd.Categorical(data.VEH, ordered=True, categories=['0','1','2','3','4+'])
data['PINC_NUM'] = data['PINC']
data.PINC = pd.cut(data.PINC, bins=[0,10000,20000,30000,40000,50000,60000,70000,999999],
                  labels=["$0-9",
                         "$10-19",
                         "$20-29",
                         "$30-39",
                         "$40-49",
                         "$50-59",
                         "$60-69",
                         "$70+"])

# Vehicles per driver
data['VEH_num'] = data['VEH'].replace(['1','2','3','4+'],[1,2,3,4]).astype(int)
data['DRIVERS'] = data["Including yourself, how many people have a driver's license in your household?"]
data['DRIVERS_num'] = data['DRIVERS'].replace
data['veh_per_driver'] = (data['VEH_num'] / data['DRIVERS']).fillna(0)
data.loc[data['veh_per_driver']==np.inf, 'veh_per_driver'] = 0

In [None]:
# Data Filtering
print(len(data))
data = data[data['AGE']<100]
data = data[data['distance_miles']<50]
data = data[data['duration']<480]
data = data[~data['Mode_confirm'].isin(['Not a Trip'])]
data = data[~data['Replaced_mode'].isin(['Not a Trip'])]
#data = data[~data['HHINC'].isin(['Prefer not to say','$100,000 -$149,999','$150,000','$150,000-$199,999','$200,000 or more'])] # Side note why is 150k (n=7) its own bin?
#data = data[~data['VEH'].isin(['Prefer not to say / Prefiero no decir.'])]
data = data[~data['Trip_purpose'].isin(['not_a_trip'])]
#data = data[~data['available_modes'].isin(['None', 'Prefer not to say'])]
data = data[data['HHSIZE']<10]
data = data[data['HHSIZE']>data['CHILDREN']]
print(len(data))

In [None]:
a = data[data['AGE']>100]
print(len(a))

## Data Overview

In [None]:
data_eb = data.query(f"mode_confirm == '{mode_of_interest}'")

In [None]:
data.columns

In [None]:
# Summary statistics table
print(len(pd.unique(data.user_id)))
stat_data = data[['distance_miles','duration']]
stat_data.describe()

## Figures

### Program Participation

In [None]:
# Access database
tq = scaffolding.get_time_query(year, month)
## I EDITED SCAFFOLDING TO REMOVE STAGE INSTEAD OF TEST
participant_ct_df = scaffolding.load_all_participant_trips(program, tq, load_test_users=False) 
participant_ct_df["user_id_str"] = participant_ct_df.user_id.apply(lambda u: str(u))

In [None]:
# Proportion of trips labeled by user
data = pd.read_csv("trip_program.csv") ##all the labeled trips
data = data[data.program != 'stage']
data.user_id_str.nunique()

all_user_trips = participant_ct_df.groupby(['user_id_str'], as_index=False).count()[['user_id_str','distance']]
print(len(all_user_trips))
labeled_user_trips = data.groupby(['user_id_str'], as_index=False).count()[['user_id_str','distance']]
print(len(labeled_user_trips))

plot_data = all_user_trips.merge(labeled_user_trips, how='left', on='user_id_str').fillna(0)
plot_data.head()

In [None]:
plot_data['proportion'] = plot_data['distance_y'] / plot_data['distance_x']
data_order = plot_data.sort_values('proportion', ascending=True).user_id_str
print(len(plot_data))

plot_title='Distribution of User Response Rates'
ylab='Proportion of Trips Labeled'
file_name='CanBikeCO_report_user_participation%s'
fig, ax = plt.subplots(figsize=(10,4))
sns.barplot(data=plot_data, x='user_id_str', y='proportion', order=data_order, color='blue').set(title=plot_title,xlabel='Individual Users (183)',ylabel=ylab,xticklabels=[])
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig(file_name+".png", bbox_inches='tight')

## trying to plot labeling rate across time

In [None]:
#group the total data by day
all_data = (participant_ct_df.groupby(['start_local_dt_month', 'start_local_dt_year']).size() 
   .reset_index(name='count'))

all_data = all_data.sort_values(['start_local_dt_year', 'start_local_dt_month'])

all_data = all_data.astype({'start_local_dt_month': 'str'})
all_data = all_data.astype({'start_local_dt_year': 'str'})
all_data['Month'] = all_data[['start_local_dt_year', 'start_local_dt_month']].agg('-'.join, axis=1)


#group the labeled data by day#group the total data by day
labeled = (data.groupby(['start_local_dt_month', 'start_local_dt_year']).size() 
   .reset_index(name='count'))

labeled = labeled.sort_values(['start_local_dt_year', 'start_local_dt_month'])

labeled = labeled.astype({'start_local_dt_month': 'str'})
labeled = labeled.astype({'start_local_dt_year': 'str'})
labeled['Month'] = labeled[['start_local_dt_year', 'start_local_dt_month']].agg('-'.join, axis=1)


#merge them
plot_data = all_data.merge(labeled, how='left', on='Month').fillna(0)

#calc the proportion
plot_data['proportion'] = plot_data['count_y'] / plot_data['count_x']

#drop data before 6/2022 (when the full pilot started)
plot_data = plot_data.iloc[5:]
plot_data

In [None]:
#graph it - line way -- but that's 'connect-the-dots' style (BAD)
plot_title = 'Labeling Rates Over Time'
ylab = 'Labeling Rate'
file_name = "CanBikeCO_report_ts_labels"
fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(data=plot_data, x='Month', y='proportion', estimator=np.mean).set(title=plot_title, xlabel='Month', ylabel=ylab)
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
#graph it - bar way -- making no connect the dots claims :)

plot_title='Response Rates Over Time'
ylab='Proportion of Trips Labeled'
file_name='CanBikeCO_report_ts_labels'
fig, ax = plt.subplots(figsize=(8,4))
sns.barplot(data=plot_data, x='Month', y='proportion', color='blue').set(title=plot_title,xlabel='Month',ylabel=ylab)
plt.xticks(rotation=35, ha='right', fontsize=10)
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig(file_name+".jpeg", bbox_inches='tight')

# bar_data = plot_data[['Month', 'proportion']]

# ax = bar_data.plot.bar(title='Response Rates Over Time', 
#                         ylabel='Proportion of Trips Labeled',
#                         xlabel = 'Month')

# ax.set_xticklabels(plot_data.Month, rotation=45, ha='right')
# ax.get_legend().remove()

# plt.savefig("CanBikeCO_report_ts_labels.jpeg", bbox_inches='tight')

### Updates to the Pilot Study

In [None]:
#remove stage users
data_non_stage = data[data.program != "stage"]
print(len(data_non_stage))

In [None]:
#remove minipilot users
data_non_stage_non_mini = data_non_stage[data_non_stage.program != "prepilot"]
print(len(data_non_stage_non_mini))

In [None]:
#separating programs
four_corners = data_non_stage_non_mini[data_non_stage_non_mini.program == "4c"]
community_cycles = data_non_stage_non_mini[data_non_stage_non_mini.program == "cc"]
fort_collins = data_non_stage_non_mini[data_non_stage_non_mini.program == "fc"]
pueblo = data_non_stage_non_mini[data_non_stage_non_mini.program == "pc"]
smart_commute = data_non_stage_non_mini[data_non_stage_non_mini.program == "sc"]
vail = data_non_stage_non_mini[data_non_stage_non_mini.program == "vail"]

In [None]:
four_corners

In [None]:
print(four_corners['user_id'].nunique())
print(community_cycles['user_id'].nunique())
print(fort_collins['user_id'].nunique())
print(pueblo['user_id'].nunique())
print(smart_commute['user_id'].nunique())
print(vail['user_id'].nunique())

In [None]:
print(len(four_corners))
print(len(community_cycles))
print(len(fort_collins))
print(len(pueblo))
print(len(smart_commute))
print(len(vail))

# Filtering smart commute

In [None]:
from datetime import datetime
smart_commute['start_ts']= pd.to_datetime(smart_commute['start_ts'], utc=True, unit='s')

In [None]:
smart_commute['start_ts']

In [None]:
trip_sep=smart_commute.groupby(['user_id','Mode_confirm']).apply(lambda x:x[x.start_ts==min(x.start_ts)])

In [None]:
print(trip_sep['user_id'].nunique())

In [None]:
sc_unique=trip_sep['user_id'].unique()
print(*sc_unique)

In [None]:
smart_commute.to_csv("smart_commute.csv")

In [None]:
sc_ebike_first=trip_sep[trip_sep['Mode_confirm']=='E-bike']
sc_ebike_first

In [None]:
sc_ebike_user_list= sc_ebike_first['user_id'].tolist()
smart_commute_incl_ebike = smart_commute[smart_commute['user_id'].isin(sc_ebike_user_list)]
print(smart_commute_incl_ebike['user_id'].nunique())

In [None]:
for unique_id in sc_ebike_first['user_id']:
    for date in sc_ebike_first['start_ts']:
        smart_commute_ebike_first=smart_commute_incl_ebike[(smart_commute_incl_ebike['start_ts'] >= date)]

In [None]:
smart_commute_ebike_first

In [None]:
sc_unique_ebikefirst=smart_commute_ebike_first['user_id'].unique()
print(*sc_unique_ebikefirst)
print(smart_commute_ebike_first['user_id'].nunique())

# Filtering Four corners

In [None]:
four_corners['start_ts']= pd.to_datetime(four_corners['start_ts'], utc=True, unit='s')

In [None]:
trip_sep_fc=four_corners.groupby(['user_id','Mode_confirm']).apply(lambda x:x[x.start_ts==min(x.start_ts)])
print(trip_sep_fc['user_id'].nunique())

In [None]:
fc_ebike_first=trip_sep_fc[trip_sep_fc['Mode_confirm']=='E-bike']
fc_ebike_first

In [None]:
fc_ebike_user_list= fc_ebike_first['user_id'].tolist()
four_corners_incl_ebike = four_corners[four_corners['user_id'].isin(fc_ebike_user_list)]
print(four_corners_incl_ebike['user_id'].nunique())

In [None]:
for unique_id in fc_ebike_first['user_id']:
    for date in fc_ebike_first['start_ts']:
        four_corners_ebike_first=four_corners_incl_ebike[(four_corners_incl_ebike['start_ts'] >= date)]

In [None]:
four_corners_ebike_first

In [None]:
four_corners_ebike_first['Mode_confirm'].unique()

In [None]:
fc_unique_ebikefirst=four_corners_ebike_first['user_id'].unique()
print(*fc_unique_ebikefirst)
print(four_corners_ebike_first['user_id'].nunique())

# Filtering community cycles

In [None]:
community_cycles['start_ts']= pd.to_datetime(community_cycles['start_ts'], utc=True, unit='s')

In [None]:
trip_sep_cc=community_cycles.groupby(['user_id','Mode_confirm']).apply(lambda x:x[x.start_ts==min(x.start_ts)])
print(trip_sep_cc['user_id'].nunique())

In [None]:
cc_ebike_first=trip_sep_cc[trip_sep_cc['Mode_confirm']=='E-bike']
cc_ebike_first

In [None]:
cc_ebike_user_list= cc_ebike_first['user_id'].tolist()
community_cycles_incl_ebike = community_cycles[community_cycles['user_id'].isin(cc_ebike_user_list)]
print(community_cycles_incl_ebike['user_id'].nunique())

In [None]:
for unique_id in cc_ebike_first['user_id']:
    for date in cc_ebike_first['start_ts']:
        community_cycles_ebike_first=community_cycles_incl_ebike[(community_cycles_incl_ebike['start_ts'] >= date)]

In [None]:
community_cycles_ebike_first

In [None]:
community_cycles_ebike_first['Mode_confirm'].unique()

In [None]:
cc_unique_ebikefirst=community_cycles_ebike_first['user_id'].unique()
print(*cc_unique_ebikefirst)
print(community_cycles_ebike_first['user_id'].nunique())

# Filtering Fort Collins

In [None]:
fort_collins['start_ts']= pd.to_datetime(fort_collins['start_ts'], utc=True, unit='s')

In [None]:
trip_sep_fc=fort_collins.groupby(['user_id','Mode_confirm']).apply(lambda x:x[x.start_ts==min(x.start_ts)])
print(trip_sep_fc['user_id'].nunique())

In [None]:
fc_ebike_first=trip_sep_fc[trip_sep_fc['Mode_confirm']=='E-bike']
fc_ebike_first

In [None]:
fc_ebike_user_list= fc_ebike_first['user_id'].tolist()
fort_collins_incl_ebike = fort_collins[fort_collins['user_id'].isin(fc_ebike_user_list)]
print(fort_collins_incl_ebike['user_id'].nunique())

In [None]:
for unique_id in fc_ebike_first['user_id']:
    for date in fc_ebike_first['start_ts']:
        fort_collins_ebike_first=fort_collins_incl_ebike[(fort_collins_incl_ebike['start_ts'] >= date)]

In [None]:
fort_collins_ebike_first

In [None]:
fort_collins_ebike_first['Mode_confirm'].unique()

In [None]:
fc_unique_ebikefirst=fort_collins_ebike_first['user_id'].unique()
print(*fc_unique_ebikefirst)
print(fort_collins_ebike_first['user_id'].nunique())

# Filtering pueblo

In [None]:
pueblo['start_ts']= pd.to_datetime(pueblo['start_ts'], utc=True, unit='s')

In [None]:
trip_sep_pu=pueblo.groupby(['user_id','Mode_confirm']).apply(lambda x:x[x.start_ts==min(x.start_ts)])
print(trip_sep_pu['user_id'].nunique())

In [None]:
pu_ebike_first=trip_sep_pu[trip_sep_pu['Mode_confirm']=='E-bike']
pu_ebike_first

In [None]:
pu_ebike_user_list= pu_ebike_first['user_id'].tolist()
pueblo_incl_ebike = pueblo[pueblo['user_id'].isin(pu_ebike_user_list)]
print(pueblo_incl_ebike['user_id'].nunique())

In [None]:
for unique_id in pu_ebike_first['user_id']:
    for date in pu_ebike_first['start_ts']:
        pueblo_ebike_first=pueblo_incl_ebike[(pueblo_incl_ebike['start_ts'] >= date)]

In [None]:
pueblo_ebike_first

In [None]:
pueblo_ebike_first['Mode_confirm'].unique()

In [None]:
pu_unique_ebikefirst=pueblo_ebike_first['user_id'].unique()
print(*pu_unique_ebikefirst)
print(pueblo_ebike_first['user_id'].nunique())

# Filtering Vail

In [None]:
vail['start_ts']= pd.to_datetime(vail['start_ts'], utc=True, unit='s')

In [None]:
trip_sep_va=vail.groupby(['user_id','Mode_confirm']).apply(lambda x:x[x.start_ts==min(x.start_ts)])
print(trip_sep_va['user_id'].nunique())

In [None]:
va_ebike_first=trip_sep_va[trip_sep_va['Mode_confirm']=='E-bike']
va_ebike_first

In [None]:
va_ebike_user_list= va_ebike_first['user_id'].tolist()
vail_incl_ebike = vail[vail['user_id'].isin(va_ebike_user_list)]
print(vail_incl_ebike['user_id'].nunique())

In [None]:
for unique_id in va_ebike_first['user_id']:
    for date in va_ebike_first['start_ts']:
        vail_ebike_first=vail_incl_ebike[(vail_incl_ebike['start_ts'] >= date)]

In [None]:
vail_ebike_first

In [None]:
vail_ebike_first['Mode_confirm'].unique()

In [None]:
va_unique_ebikefirst=vail_ebike_first['user_id'].unique()
print(*va_unique_ebikefirst)
print(vail_ebike_first['user_id'].nunique())

# Combining the filtered datasets

In [None]:
#Combining the filtered datasets
print(four_corners_ebike_first['user_id'].nunique())
print(community_cycles_ebike_first['user_id'].nunique())
print(fort_collins_ebike_first['user_id'].nunique())
print(pueblo_ebike_first['user_id'].nunique())
print(smart_commute_ebike_first['user_id'].nunique())
print(vail_ebike_first['user_id'].nunique())

print(len(four_corners_ebike_first))
print(len(community_cycles_ebike_first))
print(len(fort_collins_ebike_first))
print(len(pueblo_ebike_first))
print(len(smart_commute_ebike_first))
print(len(vail_ebike_first))

In [None]:
filtered_merged = pd.concat([four_corners_ebike_first, community_cycles_ebike_first, fort_collins_ebike_first, 
                             pueblo_ebike_first, smart_commute_ebike_first,vail_ebike_first], axis=0)
print(len(filtered_merged))
print(filtered_merged['user_id'].nunique())

In [None]:
filtered_merged.to_csv("filtered_merged_trips.csv")

# Analysis

In [None]:
#data=pd.read_csv("C:\\Users\\dakcicek\\Documents\\GitHub\\ceo_analysis\\viz_scripts\\expanded_ct.csv")
data=pd.read_csv("C:\\Users\\dakcicek\\Documents\\GitHub\\ceo_analysis\\viz_scripts\\filtered_merged_trips_2.csv")

In [None]:
# Age, Income, Gender
plot_data = data.copy()
plot_data = plot_data.groupby(['user_id']).nth(0)[['AGE','GENDER','VEH','HHINC']].dropna()
plot_data = plot_data[plot_data['GENDER'].isin(['Man','Woman'])]
plot_data = plot_data.groupby(['user_id'], as_index=False).nth(0)

plot_title='Participant Demographics'
ylab='Count'
file_name='CanBikeCO_report_demog%s'%file_suffix

fig, axs = plt.subplots(2,2,figsize=(10,6))
sns.histplot(data=plot_data, x='GENDER', ax=axs[0,0], color='purple', stat='probability').set(xlabel='Sex',ylabel='proportion')
sns.histplot(data=plot_data, x='AGE', ax=axs[0,1], color='red', stat='probability').set(xlabel='Age',ylabel='proportion')
sns.histplot(data=plot_data, x='VEH', ax=axs[1,0], color='blue', stat='probability').set(xlabel='Household Vehicles',ylabel='proportion')
sns.histplot(data=pd.DataFrame(plot_data['HHINC'].dropna()), x='HHINC', ax=axs[1,1], color='green', stat='probability').set(xlabel='Household Income',ylabel='proportion')
plt.xticks(rotation=35, ha='right')
plt.tight_layout()

fig.savefig(file_name+".png", bbox_inches='tight')

In [None]:
plot_data=plot_data.reset_index()

In [None]:
plot_data.columns

In [None]:
sns.set_palette('tab20', 9)

In [None]:
minipilot_mode_labels = ['Car',
                   'Shared Car',
                   'Walk',
                   'Transit',
                   'Ridehail',
                   'Other',
                   'E-bike']
minipilot_all_data = [.293,
                     .209,
                     .075,
                     .054,
                     .029,
                     .029,
                     .313]
minipilot_commute_data = [.270,
                         .294,
                         .050,
                         .0,
                         .0,
                         .062,
                         .324]
colnames = ['Mode_confirm','All Trips','Commute Trips']
minipilot_mode_data_1 = pd.DataFrame(list(zip(minipilot_mode_labels, minipilot_all_data)))
minipilot_mode_data_2 = pd.DataFrame(list(zip(minipilot_mode_labels, minipilot_commute_data)))
minipilot_mode_data_1['Trip Type'] = 'All Trips'
minipilot_mode_data_2['Trip Type'] = 'Work Trips'
minipilot_mode_data = pd.concat([minipilot_mode_data_1, minipilot_mode_data_2])
minipilot_mode_data['Dataset'] = 'Minipilot'
minipilot_mode_data.columns = ['Mode','Proportion','Trip Type','Dataset']

minipilot_purp_labels = ['Home',
                   'Work',
                   'Pick-up/Drop off',
                   'Shopping',
                   'Recreation/Exercise',
                   'Meal',
                   'Entertainment/Social',
                   'Transit transfer',
                   'Other',
                   'Personal/Medical']
minipilot_all_data = [.171,
                     .163,
                     .138,
                     .120,
                     .058,
                     .055,
                     .045,
                     .022,
                     .051,
                     .176]
minipilot_commute_data = [.162,
                         .166,
                         .190,
                         .097,
                         .086,
                         .082,
                         .061,
                          0,
                         .052,
                         .104]
colnames = ['Trip_purpose','All Trips','E-Bike Trips']
minipilot_purp_data_1 = pd.DataFrame(list(zip(minipilot_purp_labels, minipilot_all_data)))
minipilot_purp_data_2 = pd.DataFrame(list(zip(minipilot_purp_labels, minipilot_commute_data)))
minipilot_purp_data_1['Trip Type'] = 'All Trips'
minipilot_purp_data_2['Trip Type'] = 'E-Bike Trips'
minipilot_purp_data = pd.concat([minipilot_purp_data_1, minipilot_purp_data_2])
minipilot_purp_data['Dataset'] = 'Minipilot'
minipilot_purp_data.columns = ['Purpose','Proportion','Trip Type','Dataset']

In [None]:
plot_data = data.copy()
plot_data.loc[plot_data['Mode_confirm']=='Personal Micromobility', 'Mode_confirm'] = 'Other'
plot_data.loc[plot_data['Mode_confirm']=='Shared Micromobility', 'Mode_confirm'] = 'Other'

t1 = plot_data.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]
t1['proportion'] = t1['distance_miles'] / np.sum(t1.distance_miles)
t1['distance_miles'] = 'All Trips'

t2 = plot_data[plot_data['Trip_purpose']=='Work'].copy()
t2 = t2.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]
t2['proportion'] = t2['distance_miles'] / np.sum(t2.distance_miles)
t2['distance_miles'] = 'Work Trips'

plot_data = pd.concat([t1,t2])
plot_data['Dataset'] = 'Long Term'
plot_data.columns = ['Mode','Trip Type','Proportion','Dataset']
plot_data = pd.concat([plot_data, minipilot_mode_data])

In [None]:
plot_data = data.copy()
plot_data.loc[plot_data['Mode_confirm']=='Personal Micromobility', 'Mode_confirm'] = 'Other'
plot_data.loc[plot_data['Mode_confirm']=='Shared Micromobility', 'Mode_confirm'] = 'Other'

t1 = plot_data.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]
t1['proportion'] = t1['distance_miles'] / np.sum(t1.distance_miles)
t1['distance_miles'] = 'All Trips'

t2 = plot_data[plot_data['Trip_purpose']=='Work'].copy()
t2 = t2.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]
t2['proportion'] = t2['distance_miles'] / np.sum(t2.distance_miles)
t2['distance_miles'] = 'Work Trips'
t2.loc[len(t2.index)] = ['Ridehail', 'Work Trips', 0]
#t2.loc[len(t2.index)] = ['Transit', 'Work Trips', 0] 

plot_data = pd.concat([t1,t2])
plot_data['Dataset'] = 'Vail'
plot_data.columns = ['Mode','Trip Type','Proportion','Dataset']
plot_data = pd.concat([plot_data, minipilot_mode_data])

In [None]:
t2

In [None]:
plot_data

In [None]:
width = 0.5
fig, ax = plt.subplots(1,1, figsize=(15,6))
running_total_mini = [0,0]
running_total_long = [0,0]
fig_data_mini = plot_data[plot_data['Dataset']=='Minipilot']
fig_data_long = plot_data[plot_data['Dataset']=='Long Term']

for mode in pd.unique(fig_data_mini.Mode):
    mini = fig_data_mini[fig_data_mini['Mode']==mode]
    long = fig_data_long[fig_data_long['Mode']==mode]

    #labels = mini['Trip Type']
    #vals = mini['Proportion']*100
    #vals_str = [round(v,1) if v>1 else '' for v in vals]
    #bar = ax.barh(labels, vals, width, left=running_total_mini, label=mode)
    #ax.bar_label(bar, label_type='center', labels=vals_str, rotation=90)
    #running_total_mini[0] = running_total_mini[0]+vals.iloc[0]
    #running_total_mini[1] = running_total_mini[1]+vals.iloc[1]

    labels = long['Trip Type']
    vals = long['Proportion']*100
    vals_str = [round(v,1) if v>1 else '' for v in vals]
    bar = ax.barh(labels, vals, width, left=running_total_long, label=mode)
    ax.bar_label(bar, label_type='center', labels=vals_str, rotation=90)
    running_total_long[0] = running_total_long[0]+vals.iloc[0]
    running_total_long[1] = running_total_long[1]+vals.iloc[1]

file_name='CanBikeCO_report_mode_share%s'%
ax.set_title('Minipilot')
ax.set_title('Mode Share')
ax.legend(bbox_to_anchor=(1,1), fancybox=True, shadow=True)
plt.subplots_adjust(bottom=0.25)
fig.tight_layout()
plt.show()
fig.savefig(file_name+".png", bbox_inches='tight')

In [None]:
sns.set_palette('tab20', 12)

In [None]:
plot_data = data.copy()
plot_data.loc[plot_data['Trip_purpose']=='Religious', 'Trip_purpose'] = 'Other'
plot_data.loc[plot_data['Trip_purpose']=='School', 'Trip_purpose'] = 'Other'
t1 = plot_data.groupby(['Trip_purpose'], as_index=False).count()[['Trip_purpose','distance_miles']]
t1['proportion'] = t1['distance_miles'] / np.sum(t1.distance_miles)
t1['distance_miles'] = 'All Trips'

t2 = plot_data[plot_data['Mode_confirm']=='E-bike'].copy()
t2 = t2.groupby(['Trip_purpose'], as_index=False).count()[['Trip_purpose','distance_miles']]
t2['proportion'] = t2['distance_miles'] / np.sum(t2.distance_miles)
t2['distance_miles'] = 'E-Bike Trips'

plot_data = pd.concat([t1,t2])
plot_data['Dataset'] = 'Long_Term'
plot_data.columns = ['Purpose','Trip Type','Proportion','Dataset']
plot_data = pd.concat([plot_data, minipilot_purp_data])

In [None]:
plot_data

In [None]:
width = 0.5
fig, ax = plt.subplots(1,1, figsize=(15,6))
running_total_mini = [0,0]
running_total_long = [0,0]
fig_data_mini = plot_data[plot_data['Dataset']=='Minipilot']
fig_data_long = plot_data[plot_data['Dataset']=='Minipilot']

for purp in pd.unique(fig_data_mini.Purpose):
    mini = fig_data_mini[fig_data_mini['Purpose']==purp]
    long = fig_data_long[fig_data_long['Purpose']==purp]
    
    #labels = mini['Trip Type']
    #vals = mini['Proportion']*100
    #vals_str = [round(v,1) if v>1 else '' for v in vals]
    #bar = ax.barh(labels, vals, width, left=running_total_mini, label=purp)
    #ax.bar_label(bar, label_type='center', labels=vals_str, rotation=90)
    #running_total_mini[0] = running_total_mini[0]+vals.iloc[0]
    #running_total_mini[1] = running_total_mini[1]+vals.iloc[1]

    labels = long['Trip Type']
    vals = long['Proportion']*100
    vals_str = [round(v,1) if v>1 else '' for v in vals]
    bar = ax.barh(labels, vals, width, left=running_total_long, label=purp)
    ax.bar_label(bar, label_type='center', labels=vals_str, rotation=90)
    running_total_long[0] = running_total_long[0]+vals.iloc[0]
    running_total_long[1] = running_total_long[1]+vals.iloc[1]

file_name='CanBikeCO_report_purp_share%s'
ax.set_title('Minipilot')
ax.set_title('Trip Purpose')
ax.legend(bbox_to_anchor=(1,1), fancybox=True, shadow=True)
plt.subplots_adjust(bottom=0.25)
fig.tight_layout()
plt.show()
fig.savefig(file_name+".png", bbox_inches='tight')

In [None]:
# Gas car
5170 * 1 / 1000000 * 157.2 * 453 * .62

# Ebike
.022 * 1 / 1000 * 1166 * 453 * .62

In [None]:
for i in range(0,len(df_ei)):
    if df_ei.iloc[i,:]['fuel']=='electric':
        z = df_ei.iloc[i,:]['energy_intensity_factor'] * df_ei.iloc[i,:]['CO2_factor'] * .001
        print(df_ei.iloc[i,:]['mode'])
        print(f"{z} lb C02/pass-mi")
    elif df_ei.iloc[i,:]['fuel']=='gasoline':
        z = df_ei.iloc[i,:]['energy_intensity_factor'] * df_ei.iloc[i,:]['CO2_factor'] * .000001
        print(df_ei.iloc[i,:]['mode'])
        print(f"{z} lb C02/pass-mi")
df_ei['energy_intensity_factor'] * df_ei['CO2_factor']

In [None]:
# Emissions
plot_data = data_eb.copy()
co2rp = plot_data.groupby('Replaced_mode').agg({'CO2_Impact(lb)': ['sum','mean']},)
co2rp.columns = ['total_lb_CO2_emissions', 'average_lb_CO2_emission']
co2rp = co2rp.reset_index()
co2rp= co2rp.sort_values(by=['total_lb_CO2_emissions'], ascending=False)
co2rp['boolean'] = co2rp['total_lb_CO2_emissions'] > 0

net_CO2_saved = round(sum(co2rp['total_lb_CO2_emissions']), 2)

x = co2rp['total_lb_CO2_emissions']
y = co2rp['Replaced_mode']
color =co2rp['boolean']

plot_title="Sketch of CO2 Emissions Impact for all confirmed trips \n Contribution by mode towards a total of %s (kWH) \n%s" % (net_CO2_saved, quality_text)
file_name ='CanBikeCO_report_sketch_all_mode_CO2impact%s' % file_suffix
CO2_impact(x,y,color,plot_title,file_name)

file_name='CanBikeCO_report_purp_share%s'%file_suffix
fig.tight_layout()
plt.show()
fig.savefig(SAVE_DIR+file_name+".png", bbox_inches='tight')

### Who Rides E-Bikes, and Why?

In [None]:
sns.set_palette('Set1', 9)

In [None]:
print(four_corners['user_id'].nunique())
print(community_cycles['user_id'].nunique())
print(fort_collins['user_id'].nunique())
print(pueblo['user_id'].nunique())
print(smart_commute['user_id'].nunique())
print(vail['user_id'].nunique())

In [None]:
# Distribution of mode by age
plot_data = data.copy()

plot_title = 'Distribution of CanBikeCO Trips by Age and Mode'
ylab = 'Age'
file_name = "CanBikeCO_report_age%s"%file_suffix

fig, ax = plt.subplots(figsize=(10,4))
sns.boxplot(ax=ax, data=plot_data, x='Mode_confirm', y='AGE', hue='Mode_confirm').set(title=plot_title, xlabel='', ylabel=ylab)
plt.subplots_adjust(bottom=0.25)
plt.xticks(rotation=35, ha='right')
plt.legend([])
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
# Distribution of mode by age
plot_data = data.copy()
plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
plot_data['Program'] = plot_data['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners','Community Cycles (Boulder)','Fort Collins','Pueblo County','Smart Commute (Northridge)','Vail'])

plot_title = 'Distribution of E-Bike Trips by Age and Program'
ylab = 'Age'
file_name = "CanBikeCO_report_age%s"%file_suffix

fig, ax = plt.subplots(figsize=(10,4))
sns.boxplot(ax=ax, data=plot_data, x='Program', y='AGE', hue='Mode_confirm').set(title=plot_title, xlabel='', ylabel=ylab)
plt.subplots_adjust(bottom=0.25)
plt.xticks(rotation=35, ha='right')
plt.legend([])
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
# Distribution of distances by program
plot_data = data_2.copy()
plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
plot_data['Program'] = plot_data['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners','Community Cycles (Boulder)','Fort Collins','Pueblo County','Smart Commute (Denver North)','Vail'])

plot_title = 'Distribution of E-Bike Trip Distances by Program'
ylab = 'Distance (miles)'

fig, ax = plt.subplots(figsize=(10,8))
sns.boxplot(ax=ax, data=plot_data, x='Program', y='distance_miles', hue='Mode_confirm', showfliers=False).set(title=plot_title, xlabel='', ylabel=ylab)
plt.subplots_adjust(bottom=0.25)
plt.xticks(rotation=35, ha='right')
plt.legend([])

In [None]:
# Distribution of distances by program
plot_data = data_2.copy()
#plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
#plot_data['Program'] = plot_data['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners','Community Cycles (Boulder)','Fort Collins','Pueblo County','Smart Commute (Denver North)','Vail'])

plot_title = 'Distribution of Distances by Mode'
ylab = 'Distance (miles)'

fig, ax = plt.subplots(figsize=(10,8))
sns.boxplot(ax=ax, data=plot_data, x='Mode_confirm', y='distance_miles', hue='Mode_confirm', showfliers=False).set(title=plot_title, xlabel='', ylabel=ylab)
plt.subplots_adjust(bottom=0.25)
plt.xticks(rotation=35, ha='right')
plt.legend([])

In [None]:
# Distribution of distances by program
plot_data = data_2.copy()
#plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
#plot_data['Program'] = plot_data['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners','Community Cycles (Boulder)','Fort Collins','Pueblo County','Smart Commute (Denver North)','Vail'])

plot_title = 'Distribution of Trips Durations by Mode'
ylab = 'Duration (minutes)'

fig, ax = plt.subplots(figsize=(10,8))
sns.boxplot(ax=ax, data=plot_data, x='Mode_confirm', y='duration', hue='Mode_confirm', showfliers=False).set(title=plot_title, xlabel='', ylabel=ylab)
plt.subplots_adjust(bottom=0.25)
plt.xticks(rotation=35, ha='right')
plt.legend([])

In [None]:
sns.set_palette('Set2', 6)

In [None]:
# Proportion of trips that are ebike by income group
plot_data = data.copy()

t1 = plot_data.groupby(['user_id','Mode_confirm'], as_index=False).count()[['user_id','Mode_confirm','distance_miles']]
t1['distance_miles'].fillna(0, inplace=True)
t2 = plot_data.groupby(['user_id'], as_index=False).count()[['user_id','distance_miles']]
plot_data = t1.merge(t2, on='user_id')
plot_data['proportion'] = plot_data['distance_miles_x'] / plot_data['distance_miles_y']
plot_data['proportion'].fillna(0, inplace=True)
t3 = data.copy().groupby(['HHINC','user_id'], as_index=False).nth(0)[['HHINC','user_id']]

plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
plot_data = plot_data.merge(t3, on='user_id')

file_name = 'CanBikeCO_report_income_trip%s'%file_suffix
ylab='Proportion of Total Trips'
plot_title = 'E-bike Use (Trips) by Income'
fig, ax = plt.subplots(figsize=(6,4))
sns.barplot(data=plot_data, x='HHINC', y='proportion', estimator=np.mean).set(title=plot_title,xlabel='',ylabel=ylab)
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
# Proportion of trips that are ebike by income group
plot_data = data.copy()

t1 = plot_data.groupby(['user_id','Mode_confirm'], as_index=False).sum()[['user_id','Mode_confirm','distance_miles']]
t1['distance_miles'].fillna(0, inplace=True)
t2 = plot_data.groupby(['user_id'], as_index=False).sum()[['user_id','distance_miles']]
plot_data = t1.merge(t2, on='user_id')
plot_data['proportion'] = plot_data['distance_miles_x'] / plot_data['distance_miles_y']
plot_data['proportion'].fillna(0, inplace=True)
t3 = data.copy().groupby(['HHINC','user_id'], as_index=False).nth(0)[['HHINC','user_id']]

plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
plot_data = plot_data.merge(t3, on='user_id')

file_name = 'CanBikeCO_report_income_mileage%s'%file_suffix
ylab='Proportion of Total Mileage'
plot_title = 'E-bike Use (Miles) by Income'
fig, ax = plt.subplots(figsize=(6,4))
sns.barplot(data=plot_data, x='HHINC', y='proportion', estimator=np.mean).set(title=plot_title,xlabel='',ylabel=ylab)
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
sns.set_palette('Paired', 5)

In [None]:
# Proportion of trips that are ebike by income group
plot_data = data.copy()

t1 = plot_data.groupby(['user_id','Mode_confirm'], as_index=False).count()[['user_id','Mode_confirm','distance_miles']]
t1['distance_miles'].fillna(0, inplace=True)
t2 = plot_data.groupby(['user_id'], as_index=False).count()[['user_id','distance_miles']]
plot_data = t1.merge(t2, on='user_id')
plot_data['proportion'] = plot_data['distance_miles_x'] / plot_data['distance_miles_y']
plot_data['proportion'].fillna(0, inplace=True)
t3 = data.copy().groupby(['VEH','user_id'], as_index=False).nth(0)[['VEH','user_id']]

plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
plot_data = plot_data.merge(t3, on='user_id')

file_name = 'CanBikeCO_report_veh_trip%s'%file_suffix
plot_title = 'E-bike Use (Trips) by Available Vehicles'
ylab='Proportion of Total Trips'
fig, ax = plt.subplots(figsize=(6,4))
sns.barplot(data=plot_data, x='VEH', y='proportion', estimator=np.mean).set(title=plot_title,xlabel='',ylabel=ylab)
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
# Proportion of trips that are ebike by income group
plot_data = data.copy()

t1 = plot_data.groupby(['user_id','Mode_confirm'], as_index=False).count()[['user_id','Mode_confirm','distance_miles']]
t1['distance_miles'].fillna(0, inplace=True)
t2 = plot_data.groupby(['user_id'], as_index=False).count()[['user_id','distance_miles']]
plot_data = t1.merge(t2, on='user_id')
plot_data['proportion'] = plot_data['distance_miles_x'] / plot_data['distance_miles_y']
plot_data['proportion'].fillna(0, inplace=True)
t3 = data.copy().groupby(['VEH','user_id'], as_index=False).nth(0)[['veh_per_driver','user_id']]
plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
plot_data = plot_data.merge(t3, on='user_id')
#plot_data['veh_per_driver'] = pd.cut(plot_data['veh_per_driver'], bins=[.25,.5,.75,1,1.25,1.5,1.75,2])
plot_data['veh_per_driver'] = pd.cut(plot_data['veh_per_driver'], bins=[.5,1,1.5,2])

file_name = 'CanBikeCO_report_vehpdriver_trip%s'%file_suffix
plot_title = 'E-bike Use (Trips) by Available Vehicles per Driver'
ylab='Proportion of Total Trips'
fig, ax = plt.subplots(figsize=(6,4))
# sns.boxplot(ax=ax, data=plot_data, x='veh_per_driver', y='proportion').set(title=plot_title, xlabel='', ylabel=ylab)
sns.barplot(data=plot_data, x='veh_per_driver', y='proportion', estimator=np.mean).set(title=plot_title,xlabel='',ylabel=ylab)
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
sns.set_palette('Set1', 3)

In [None]:
# How total mileage changes over time
plot_data = data.copy()

t1 = plot_data.groupby(['user_id','date_time'], as_index=False).sum()[['user_id','date_time','distance_miles']]
t1['distance_miles'].fillna(0, inplace=True)
plot_data = t1

plot_title = 'Total Mileage Over Time'
ylab = 'Daily Miles per User'
file_name = "CanBikeCO_report_ts_miles%s"%file_suffix
fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(data=plot_data, x='date_time', y='distance_miles', estimator=np.mean).set(title=plot_title, xlabel='Date', ylabel=ylab)
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
# How ebike mileage changes over time
plot_data = data.copy()

t1 = plot_data.groupby(['user_id','date_time','Mode_confirm'], as_index=False).sum()[['user_id','date_time','Mode_confirm','distance_miles']]
t1['distance_miles'].fillna(0, inplace=True)
t2 = plot_data.groupby(['user_id','date_time'], as_index=False).sum()[['user_id','date_time','distance_miles']]
plot_data = t1.merge(t2, on=['user_id','date_time'])
plot_data['proportion'] = plot_data['distance_miles_x'] / plot_data['distance_miles_y']
plot_data['proportion'].fillna(0, inplace=True)
plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
plot_data = plot_data[plot_data['distance_miles_y'].notnull()]

plot_title = 'E-Bike Mileage Proportion Over Time'
ylab = 'Proportion of Daily Miles'
file_name = "CanBikeCO_report_ts_mileage_proportion%s"%file_suffix
fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(data=plot_data, x='date_time', y='proportion', estimator=np.mean).set(title=plot_title, xlabel='Date', ylabel=ylab)
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
sns.set_palette('Set1', 9)

In [None]:
# How ebike mileage changes over time by program
plot_data = data.copy()

plot_data['month'] = plot_data.date_time.dt.month
plot_data['month'] = plot_data['month'].replace([6,7,8,9,10,11,12,1,2,3,4,5], ['Summer','Summer','Summer','Fall','Fall','Fall','Winter','Winter','Winter','Spring','Spring','Spring'])
t1 = plot_data.groupby(['program','month','Mode_confirm'], as_index=False).sum()[['program','month','Mode_confirm','distance_miles']]
t1['distance_miles'].fillna(0, inplace=True)
t2 = plot_data.groupby(['program','month'], as_index=False).sum()[['program','month','distance_miles']]
plot_data = t1.merge(t2, on=['program','month'])
plot_data['proportion'] = plot_data['distance_miles_x'] / plot_data['distance_miles_y']
plot_data['proportion'].fillna(0, inplace=True)
plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
plot_data['Program'] = plot_data['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners','Community Cycles (Boulder)','Fort Collins','Pueblo County','Smart Commute (Northridge)','Vail'])

plot_title = 'E-Bike Mileage Proportion By Season And Program'
ylab = 'Proportion of Daily Miles'
file_name = "CanBikeCO_report_ts_mileage_season%s"%file_suffix
fig, ax = plt.subplots(figsize=(16,4))
sns.barplot(data=plot_data, x='month', y='proportion', hue='Program', estimator=np.mean, ci=None).set(title=plot_title, xlabel='Date', ylabel=ylab)
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
sns.set_palette('Set1', 9)

In [None]:
# Distance and mode chosen relationship
plot_data = data.copy()
plot_data = plot_data[plot_data['distance_miles']<6]

plot_title = 'Mode Share by Trip Distance'
ylab = 'Total Trips'
file_name = "CanBikeCO_report_mode_share_distance_short%s"%file_suffix
proportion_hist_plot(plot_data, 'distance_miles', plot_title, ylab, file_name)

In [None]:
# Distance and mode chosen relationship
plot_data = data.copy()
plot_data = plot_data[plot_data['distance_miles']>5]

plot_title = 'Mode Share by Trip Distance'
ylab = 'Total Trips'
file_name = "CanBikeCO_report_mode_share_distance_long%s"%file_suffix
proportion_hist_plot(plot_data, 'distance_miles', plot_title, ylab, file_name)

### What Does an E-Bike Replace

In [None]:
sns.set_palette('Dark2', 10)

In [None]:
# Substitution rate of ebike trips
plot_data = data.copy()
t1 = plot_data.groupby(['user_id','Replaced_mode'], as_index=False).count()[['user_id','Replaced_mode','distance_miles']]
t1['distance_miles'].fillna(0, inplace=True)
t2 = plot_data.groupby(['user_id'], as_index=False).count()[['user_id','distance_miles']]
plot_data = t1.merge(t2, on='user_id')
plot_data['proportion'] = plot_data['distance_miles_x'] / plot_data['distance_miles_y']
plot_data['proportion'].fillna(0, inplace=True)

data_order = plot_data.groupby(['Replaced_mode'], as_index=False).mean().sort_values('proportion', ascending=False).Replaced_mode
labels = plot_data.groupby(['Replaced_mode'], as_index=False).mean().sort_values('proportion', ascending=False).Replaced_mode

plot_title='Stated Replacement for E-Bike Trips'
ylab='Proportion of Trips'
file_name='CanBikeCO_report_replacement%s'%file_suffix
fig, ax = plt.subplots(figsize=(10,4))
sns.barplot(data=plot_data, x='Replaced_mode', y='proportion', estimator=np.mean, order=data_order).set(title=plot_title,xlabel='',ylabel=ylab,ylim=(0,.5))
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)
ax.bar_label(ax.containers[0], fmt='%.2f', padding=30)
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
# Substitution rate of ebike trips
plot_data = data.copy()
t1 = plot_data.groupby(['user_id','Replaced_mode'], as_index=False).sum()[['user_id','Replaced_mode','distance_miles']]
t1['distance_miles'].fillna(0, inplace=True)
t2 = plot_data.groupby(['user_id'], as_index=False).sum()[['user_id','distance_miles']]
plot_data = t1.merge(t2, on='user_id')
plot_data['proportion'] = plot_data['distance_miles_x'] / plot_data['distance_miles_y']
plot_data['proportion'].fillna(0, inplace=True)

data_order = plot_data.groupby(['Replaced_mode'], as_index=False).mean().sort_values('proportion', ascending=False).Replaced_mode
labels = plot_data.groupby(['Replaced_mode'], as_index=False).mean().sort_values('proportion', ascending=False).Replaced_mode

plot_title='Stated Replacement for E-Bike Trips'
ylab='Proportion of Miles'
file_name='CanBikeCO_report_replacement_miles%s'%file_suffix
fig, ax = plt.subplots(figsize=(10,4))
sns.barplot(data=plot_data, x='Replaced_mode', y='proportion', estimator=np.mean, order=data_order).set(title=plot_title,xlabel='',ylabel=ylab,ylim=(0,.5))
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)
ax.bar_label(ax.containers[0], fmt='%.2f', padding=30)
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
t1

In [None]:
plot_data

In [None]:
# Emissions analysis
z = data_eb.groupby('Replaced_mode', as_index=False).sum('distance')[['Replaced_mode','distance']]

co2_combo_factors = [5170*157.2*.000001,
                    .022*1166*.001,
                    0,
                    0,
                    0,
                    7214*157.2*.000001,
                    2585*157.2*.000001,
                    .027*1166*.001,
                    4560*161.3*.000001,
                    0]
co2_ebike_factor = .022*1166*.001
z['co2_impact_factor'] = [x-co2_ebike_factor for x in co2_combo_factors]
z['lb_co2'] = z['distance']*z['co2_impact_factor'] # Reduction=positive
metric_ton_co2_reduction = sum(z['lb_co2'])/2204
unique_users = len(pd.unique(data_eb['user_id']))
print(f"Net reduction: {metric_ton_co2_reduction} metric tons over {unique_users} users.")
print(f"Net program costs: {2400*unique_users}")
print(f"Program $/mtCO2: {(2400*unique_users) / (metric_ton_co2_reduction)}")

In [None]:
# Income analysis
mean_hhinc = np.mean(data.groupby('user_id', as_index=False).min('HHINC_NUM')[['user_id','HHINC_NUM']].HHINC_NUM.dropna())
mean_hourly_wage = mean_hhinc / 52 / 40
9.70 / 16

In [None]:
data.columns

In [None]:
# Demographic and trip summary table
print(data.drop_duplicates(['user_id'])[['user_id','AGE','HHINC_NUM','VEH_num']].agg(['mean','std','min','max']))
data['distance_mi'] = data['distance'] / 1609
print(data[['user_id','distance_mi','duration']].agg(['mean','std','min','max']))

In [None]:
# Demographic and trip summary table
print(data.drop_duplicates('user_id')['HHINC'].value_counts())
print(data.drop_duplicates('user_id')['GENDER'].value_counts())
print(pd.cut(data.drop_duplicates('user_id')['AGE'], bins=[0,18,24,40,65,75]).value_counts())
print(pd.cut(data['duration'], bins=[0,15,30,60,90,120,9999]).value_counts())
print(data['Mode_confirm'].value_counts())
print(data.drop_duplicates('user_id')['STUDENT'].value_counts())
print(data.drop_duplicates('user_id')['What is the highest grade or degree that you have completed?'].value_counts())

In [None]:
# Demographic and trip summary table
n_persons = len(data.drop_duplicates('user_id'))
n_trips = len(data)

print(data.drop_duplicates('user_id')['HHINC'].value_counts() / n_persons)
print(data.drop_duplicates('user_id')['GENDER'].value_counts() / n_persons)
print(pd.cut(data.drop_duplicates('user_id')['AGE'], bins=[0,18,24,40,65,75]).value_counts() / n_persons)
print(pd.cut(data['duration'], bins=[0,15,30,60,90,120,9999]).value_counts() / n_trips)
print(data['Mode_confirm'].value_counts() / n_trips)
print(data.drop_duplicates('user_id')['STUDENT'].value_counts() / n_persons)
print(data.drop_duplicates('user_id')['What is the highest grade or degree that you have completed?'].value_counts() / n_persons)

# Additonal analysis

In [None]:
from collections import defaultdict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import linear_model

#import emission.core.get_database as edb
from plots import *
#import scaffolding

sns.set_style("whitegrid")
sns.set()
%matplotlib inline

import plots
import importlib
importlib.reload(plots)

In [None]:
data=pd.read_csv("C:\\Users\\dakcicek\\Documents\\GitHub\\ceo_analysis\\viz_scripts\\filtered_merged_trips_2.csv")

In [None]:
data['Program'] = data['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners (Durango)','Community Cycles (Boulder)','Fort Collins','Pueblo County','Smart Commute (Northridge)','Vail'])

In [None]:
#separating programs
four_corners = data[data.Program == "Four Corners (Durango)"]
community_cycles = data[data.Program == "Community Cycles (Boulder)"]
fort_collins = data[data.Program == "Fort Collins"]
pueblo = data[data.Program == "Pueblo County"]
smart_commute = data[data.Program == "Smart Commute (Northridge)"]
vail = data[data.Program == "Vail"]

In [None]:
plot_data = smart_commute.copy()
plot_data.loc[plot_data['Mode_confirm']=='Personal Micromobility', 'Mode_confirm'] = 'Other'
plot_data.loc[plot_data['Mode_confirm']=='Shared Micromobility', 'Mode_confirm'] = 'Other'

t1 = plot_data.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]
t1['proportion'] = t1['distance_miles'] / np.sum(t1.distance_miles)
t1['distance_miles'] = 'All Trips'

t2 = plot_data[plot_data['Trip_purpose']=='Work'].copy()
t2 = t2.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]
t2['proportion'] = t2['distance_miles'] / np.sum(t2.distance_miles)
t2['distance_miles'] = 'Work Trips'
#t2.loc[len(t2.index)] = ['Ridehail', 'Work Trips', 0]
#t2.loc[len(t2.index)] = ['Transit', 'Work Trips', 0] 

plot_data = pd.concat([t1,t2])
plot_data['Dataset'] = 'smart_commute'
plot_data.columns = ['Mode','Trip Type','Proportion','Dataset']
plot_data = pd.concat([plot_data, minipilot_mode_data])

In [None]:
plot_data

In [None]:
width = 0.5
fig, ax = plt.subplots(1,1, figsize=(15,6))
running_total_mini = [0,0]
running_total_long = [0,0]
fig_data_mini = plot_data[plot_data['Dataset']=='smart_commute']
fig_data_long = plot_data[plot_data['Dataset']=='smart_commute']

for mode in pd.unique(fig_data_mini.Mode):
    mini = fig_data_mini[fig_data_mini['Mode']==mode]
    long = fig_data_long[fig_data_long['Mode']==mode]

    #labels = mini['Trip Type']
    #vals = mini['Proportion']*100
    #vals_str = [round(v,1) if v>1 else '' for v in vals]
    #bar = ax.barh(labels, vals, width, left=running_total_mini, label=mode)
    #ax.bar_label(bar, label_type='center', labels=vals_str, rotation=90)
    #running_total_mini[0] = running_total_mini[0]+vals.iloc[0]
    #running_total_mini[1] = running_total_mini[1]+vals.iloc[1]

    labels = long['Trip Type']
    vals = long['Proportion']*100
    vals_str = [round(v,1) if v>1 else '' for v in vals]
    bar = ax.barh(labels, vals, width, left=running_total_long, label=mode)
    ax.bar_label(bar, label_type='center', labels=vals_str, rotation=90)
    running_total_long[0] = running_total_long[0]+vals.iloc[0]
    running_total_long[1] = running_total_long[1]+vals.iloc[1]

file_name='CanBikeCO_report_mode_share%s'
ax.set_title('Minipilot')
ax.set_title('Mode Share')
ax.legend(bbox_to_anchor=(1,1), fancybox=True, shadow=True)
plt.subplots_adjust(bottom=0.25)
fig.tight_layout()
plt.show()
fig.savefig(file_name+".png", bbox_inches='tight')

In [None]:
# What purpose ebike is used for by program
plot_data = data.copy()
#plot_data['month'] = plot_data.date_time.dt.month
#plot_data['month'] = plot_data['month'].replace([6,7,8,9,10,11,12,1,2,3,4,5], ['Summer','Summer','Summer','Fall','Fall','Fall','Winter','Winter','Winter','Spring','Spring','Spring'])
t1 = plot_data.groupby(['Program','Trip_purpose','Mode_confirm'], as_index=False).sum()[['Program','Trip_purpose','Mode_confirm','distance_miles']]
t1['distance_miles'].fillna(0, inplace=True)
t2 = plot_data.groupby(['Program','Trip_purpose'], as_index=False).sum()[['Program','Trip_purpose','distance_miles']]
plot_data = t1.merge(t2, on=['Program','Trip_purpose'])
plot_data['proportion'] = plot_data['distance_miles_x'] / plot_data['distance_miles_y']
plot_data['proportion'].fillna(0, inplace=True)
plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
#plot_data['Program'] = plot_data['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners','Community Cycles (Boulder)','Fort Collins','Pueblo County','Smart Commute (Northridge)','Vail'])

plot_title = 'E-Bike Mileage Proportion By Trip Purpose And Program'
ylab = 'Proportion of Daily Miles'
file_name = "CanBikeCO_report_ts_mileage_season%s"
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(data=plot_data, x='Trip_purpose', y='proportion', hue='Program', estimator=np.mean, ci=None).set(title=plot_title, xlabel='Date', ylabel=ylab)
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
# What purpose ebike is used for by program
plot_data = data.copy()
#plot_data['month'] = plot_data.date_time.dt.month
#plot_data['month'] = plot_data['month'].replace([6,7,8,9,10,11,12,1,2,3,4,5], ['Summer','Summer','Summer','Fall','Fall','Fall','Winter','Winter','Winter','Spring','Spring','Spring'])
t1 = plot_data.groupby(['Program','Replaced_mode','Mode_confirm'], as_index=False).sum()[['Program','Replaced_mode','Mode_confirm','distance_miles']]
t1['distance_miles'].fillna(0, inplace=True)
t2 = plot_data.groupby(['Program','Replaced_mode'], as_index=False).sum()[['Program','Replaced_mode','distance_miles']]
plot_data = t1.merge(t2, on=['Program','Replaced_mode'])
plot_data['proportion'] = plot_data['distance_miles_x'] / plot_data['distance_miles_y']
plot_data['proportion'].fillna(0, inplace=True)
plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
#plot_data['Program'] = plot_data['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners','Community Cycles (Boulder)','Fort Collins','Pueblo County','Smart Commute (Northridge)','Vail'])

plot_title = 'E-Bike Mileage Proportion By Replaced Mode And Program'
ylab = 'Proportion of Daily Miles'
file_name = "CanBikeCO_report_ts_mileage_season%s"
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(data=plot_data, x='Replaced_mode', y='proportion', hue='Program', estimator=np.mean, ci=None).set(title=plot_title, xlabel='Date', ylabel=ylab)
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)
ax.figure.savefig(file_name+".png", bbox_inches='tight')

In [None]:
data_2=pd.read_csv("C:\\Users\\dakcicek\\Documents\\GitHub\\ceo_analysis\\viz_scripts\\filtered_merged_trips.csv")

In [None]:
data_2['Program'] = data_2['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners (Durango)','Community Cycles (Boulder)','Fort Collins','Pueblo County','Smart Commute (Northridge)','Vail'])

In [None]:
data['Program'] = data['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners (Durango)','Community Cycles (Boulder)','Fort Collins','Pueblo County','Smart Commute (Northridge)','Vail'])

In [None]:
# E-bike Trip Distance by Program
plot_data = data.copy()
plot_data = plot_data[plot_data['distance_miles']<6]
plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']

plot_title = 'E-bike Trip Distance by Program'
ylab = 'Total Trips'
file_name = "CanBikeCO_report_mode_share_distance_short%s"
fig, ax = plt.subplots(figsize=(12,6))
sns.histplot(ax=ax, data=plot_data, x='distance_miles', hue='Program', element='poly', multiple='stack').set(title=plot_title, ylabel=ylab, xlabel='Trip Distance (mi)')
plt.subplots_adjust(bottom=0.25)

In [None]:
# E-bike Trip Distance by Program
plot_data = data.copy()
plot_data = plot_data[plot_data['distance_miles']>5]
plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']

plot_title = 'E-bike Trip Distance by Program'
ylab = 'Total Trips'
file_name = "CanBikeCO_report_mode_share_distance_short%s"
fig, ax = plt.subplots(figsize=(12,8))
sns.histplot(ax=ax, data=plot_data, x='distance_miles', hue='Program', element='poly', multiple='stack').set(title=plot_title, ylabel=ylab, xlabel='Trip Distance (mi)')
plt.subplots_adjust(bottom=0.25)

# descriptive stats by program

In [None]:
data_2=pd.read_csv("C:\\Users\\dakcicek\\Documents\\GitHub\\ceo_analysis\\viz_scripts\\filtered_merged_trips.csv")

In [None]:
data_2['Program'] = data_2['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners (Durango)','Community Cycles (Boulder)','Fort Collins','Pueblo County','Smart Commute (Northridge)','Vail'])

In [None]:
#separating programs
four_corners = data_2[data_2.Program == "Four Corners (Durango)"]
community_cycles = data_2[data_2.Program == "Community Cycles (Boulder)"]
fort_collins = data_2[data_2.Program == "Fort Collins"]
pueblo = data_2[data_2.Program == "Pueblo County"]
smart_commute = data_2[data_2.Program == "Smart Commute (Northridge)"]
vail = data_2[data_2.Program == "Vail"]

In [None]:
plot_data_fourc = four_corners.copy()
plot_data_fourc = plot_data_fourc.groupby(['user_id']).nth(0)[['AGE','GENDER','VEH','HHINC']].dropna()
plot_data_fourc = plot_data_fourc[plot_data_fourc['GENDER'].isin(['Man','Woman'])]
plot_data_fourc = plot_data_fourc.groupby(['user_id'], as_index=False).nth(0)

plot_data_cc = community_cycles.copy()
plot_data_cc = plot_data_cc.groupby(['user_id']).nth(0)[['AGE','GENDER','VEH','HHINC']].dropna()
plot_data_cc = plot_data_cc[plot_data_cc['GENDER'].isin(['Man','Woman'])]
plot_data_cc = plot_data_cc.groupby(['user_id'], as_index=False).nth(0)

plot_data_fc = fort_collins.copy()
plot_data_fc = plot_data_fc.groupby(['user_id']).nth(0)[['AGE','GENDER','VEH','HHINC']].dropna()
plot_data_fc = plot_data_fc[plot_data_fc['GENDER'].isin(['Man','Woman'])]
plot_data_fc = plot_data_fc.groupby(['user_id'], as_index=False).nth(0)

plot_data_pue = pueblo.copy()
plot_data_pue = plot_data_pue.groupby(['user_id']).nth(0)[['AGE','GENDER','VEH','HHINC']].dropna()
plot_data_pue = plot_data_pue[plot_data_pue['GENDER'].isin(['Man','Woman'])]
plot_data_pue = plot_data_pue.groupby(['user_id'], as_index=False).nth(0)

plot_data_sc = smart_commute.copy()
plot_data_sc = plot_data_sc.groupby(['user_id']).nth(0)[['AGE','GENDER','VEH','HHINC']].dropna()
plot_data_sc = plot_data_sc[plot_data_sc['GENDER'].isin(['Man','Woman'])]
plot_data_sc = plot_data_sc.groupby(['user_id'], as_index=False).nth(0)

plot_data_vail = vail.copy()
plot_data_vail = plot_data_vail.groupby(['user_id']).nth(0)[['AGE','GENDER','VEH','HHINC']].dropna()
plot_data_vail = plot_data_vail[plot_data_vail['GENDER'].isin(['Man','Woman'])]
plot_data_vail = plot_data_vail.groupby(['user_id'], as_index=False).nth(0)

In [None]:
print(min(vail['start_ts']))
print(max(vail['start_ts']))

In [None]:
print(four_corners['user_id'].nunique())
print(community_cycles['user_id'].nunique())
print(fort_collins['user_id'].nunique())
print(pueblo['user_id'].nunique())
print(smart_commute['user_id'].nunique())
print(vail['user_id'].nunique())

In [None]:
print(len(four_corners))
print(len(community_cycles))
print(len(fort_collins))
print(len(pueblo))
print(len(smart_commute))
print(len(vail))

In [None]:
four_corners['Mode'].unique()

In [None]:
print(len(four_corners[four_corners['Mode']=='E-bike']))
print(len(community_cycles[community_cycles['Mode']=='E-bike']))
print(len(fort_collins[fort_collins['Mode']=='E-bike']))
print(len(pueblo[pueblo['Mode']=='E-bike']))
print(len(smart_commute[smart_commute['Mode']=='E-bike']))
print(len(vail[vail['Mode']=='E-bike']))

In [None]:
fcc=four_corners[four_corners['Mode']=='E-bike']
cc=community_cycles[community_cycles['Mode']=='E-bike']
fc=fort_collins[fort_collins['Mode']=='E-bike']
p=pueblo[pueblo['Mode']=='E-bike']
sc=smart_commute[smart_commute['Mode']=='E-bike']
v=vail[vail['Mode']=='E-bike']

In [None]:
v['distance_miles'].describe()

In [None]:
#total miles for all modes
print(four_corners['distance_miles'].sum())
print(community_cycles['distance_miles'].sum())
print(fort_collins['distance_miles'].sum())
print(pueblo['distance_miles'].sum())
print(smart_commute['distance_miles'].sum())
print(vail['distance_miles'].sum())

In [None]:
#total miles for just ebike trips
fcctotal = fcc['distance_miles'].sum()
cctotal = cc['distance_miles'].sum()
fctotal = fc['distance_miles'].sum()
ptotal = p['distance_miles'].sum()
sctotal = sc['distance_miles'].sum()
vtotal = v['distance_miles'].sum()
print(fcctotal)
print(cctotal)
print(fctotal)
print(ptotal)
print(sctotal)
print(vtotal)

In [None]:
pueblo['Which best describes your primary job?'].value_counts()

In [None]:
fort_collins['Which best describes your primary job?'].value_counts()

In [None]:
smart_commute['Which best describes your primary job?'].value_counts()

In [None]:
vail['Which best describes your primary job?'].value_counts()

In [None]:
four_corners['Which best describes your primary job?'].value_counts()

In [None]:
community_cycles['Which best describes your primary job?'].value_counts()

In [None]:
data['Trip_purpose'].unique()

In [None]:
plot_data_3 = data_2.copy()
plot_data_3['induced']=np.where(data_2['Replaced_mode']== 'No Travel', 'Induced', 'Non-induced')

In [None]:
plot_data

In [None]:
# Substitution rate of ebike trips
plot_data = data_2.copy()
plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
t1 = plot_data.groupby(['Mode_confirm','Replaced_mode'], as_index=False).count()[['Mode_confirm','Replaced_mode','distance_miles']]
t1['distance_miles'].fillna(0, inplace=True)
t2 = plot_data.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]
plot_data = t1.merge(t2, on='Mode_confirm')
plot_data['proportion'] = plot_data['distance_miles_x'] / plot_data['distance_miles_y']
plot_data['proportion'].fillna(0, inplace=True)

data_order = plot_data.groupby(['Replaced_mode'], as_index=False).mean().sort_values('proportion', ascending=False).Replaced_mode
labels = plot_data.groupby(['Replaced_mode'], as_index=False).mean().sort_values('proportion', ascending=False).Replaced_mode

plot_title='Stated Replacement for E-Bike Trips'
ylab='Proportion of Trips'

fig, ax = plt.subplots(figsize=(10,4))
sns.barplot(data=plot_data, x='Replaced_mode', y='proportion', estimator=np.mean, order=data_order).set(title=plot_title,xlabel='',ylabel=ylab,ylim=(0,.5))
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)
ax.bar_label(ax.containers[0], fmt='%.2f', padding=30)

In [None]:
plot_data_3 = data_2.copy()
plot_data_3['induced']=np.where(plot_data_3['Replaced_mode']=='No Travel', 'Induced', 'Non-induced')
t1 = plot_data_3[plot_data_3['Mode_confirm']=='E-bike']
t2 = t1.groupby(['induced'], as_index=False).count()[['induced','distance_miles']]

In [None]:
t2

In [None]:
t1.induced.value_counts().plot(kind='pie',autopct='%1.1f%%', figsize=(5, 5), label='')

In [None]:
# What purpose ebike is used for by program
plot_data = plot_data_3.copy()
t1 = plot_data.groupby(['induced','Trip_purpose','Mode_confirm'], as_index=False).count()[['induced','Trip_purpose','Mode_confirm','distance_miles']]
t1['distance_miles'].fillna(0, inplace=True)
t2 = plot_data.groupby(['induced','Trip_purpose'], as_index=False).count()[['induced','Trip_purpose','distance_miles']]
plot_data = t1.merge(t2, on=['induced','Trip_purpose'])
plot_data['proportion'] = (plot_data['distance_miles_x'] / plot_data['distance_miles_y'])*100
plot_data['proportion'].fillna(0, inplace=True)
plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']
plot_data = plot_data[plot_data['induced']=='Induced']
#plot_data['Program'] = plot_data['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners','Community Cycles (Boulder)','Fort Collins','Pueblo County','Smart Commute (Northridge)','Vail'])

plot_title = 'Induced E-Bike Trip Proportion By Trip Purpose'
ylab = 'Proportion of Induced Trips'
fig, ax = plt.subplots(figsize=(20,10))
sns.barplot(data=plot_data, x='Trip_purpose', y='proportion', hue='induced',estimator=np.mean, ci=None).set(title=plot_title, xlabel='Date', ylabel=ylab)
plt.xticks(rotation=35, ha='right')
plt.subplots_adjust(bottom=0.25)


In [None]:
plot_data

In [None]:
data=pd.read_csv("C:\\Users\\dakcicek\\Documents\\GitHub\\ceo_analysis\\viz_scripts\\filtered_merged_trips.csv")

In [None]:
data['duration'].describe()

In [None]:
data['distance_miles'].describe()

In [None]:
four_corners['VEH_num'].describe()

In [None]:
pueblo['VEH_num'].describe()

In [None]:
vail['VEH_num'].describe()

In [None]:
smart_commute['VEH_num'].describe()

In [None]:
fort_collins['VEH_num'].describe()

In [None]:
community_cycles['VEH_num'].describe()

# Energy Impact

In [None]:
mode_of_interest = "E-bike"

In [None]:
data_eb = data_2.query(f"Mode == '{mode_of_interest}'") if "Mode" in data_2.columns else data_2

In [None]:
data_eb

In [None]:
# Emissions analysis
z = data_eb.groupby('Replaced_mode', as_index=False).sum('distance_miles')[['Replaced_mode','distance_miles']]

co2_combo_factors = [5170*157.2*.000001,
                    .022*1166*.001,
                    0,
                    0,
                    0,
                    7214*157.2*.000001,
                    2585*157.2*.000001,
                    .027*1166*.001,
                    4560*161.3*.000001,
                    0]
co2_ebike_factor = .022*1166*.001
z['co2_impact_factor'] = [x-co2_ebike_factor for x in co2_combo_factors]
z['CO2_Impact(lb)'] = z['distance_miles']*z['co2_impact_factor'] # Reduction=positive
metric_ton_co2_reduction = sum(z['CO2_Impact(lb)'])/2204
unique_users = len(pd.unique(data_eb['user_id']))
print(f"Net reduction: {metric_ton_co2_reduction} metric tons over {unique_users} users.")
print(f"Net program costs: {2400*unique_users}")
print(f"Program $/mtCO2: {(2400*unique_users) / (metric_ton_co2_reduction)}")

In [None]:
def CO2_impact_lb(df,distance):
    if 'Mode_confirm_lb_CO2' not in df.columns:
        print("Mode confirm footprint not found, computing before impact")
        df = CO2_footprint_lb(df, distance, "Mode_confirm")
    df = CO2_footprint_lb(df, distance, "Replaced_mode")
    df['CO2_Impact(lb)']  = round((df['Replaced_mode_lb_CO2'] - df['Mode_confirm_lb_CO2']),3)
    return df

In [None]:
year = None
month = None
program = "default"
study_type = "program"
mode_of_interest = "e-bike"
include_test_users = False

In [None]:
debug_df = pd.DataFrame.from_dict({
            "year": year,
            "month": month,
            "Registered_participants": len(get_participant_uuids(program, include_test_users)),
            "Participants_with_at_least_one_trip": unique_users(participant_ct_df),
            "Participant_with_at_least_one_labeled_trip": unique_users(labeled_ct),
            "Trips_with_at_least_one_label": len(labeled_ct),
            "Trips_with_mode_confirm_label": trip_label_count("Mode_confirm", expanded_ct),
            "Trips_with_trip_purpose_label": trip_label_count("Trip_purpose", expanded_ct)
            },
        orient='index', columns=["value"])

In [None]:
plot_title_no_quality=f"Sketch of Total Pounds of CO2 Emissions of {mode_of_interest} trips"
file_name =f'sketch_energy_impact_{mode_of_interest}%s'
quality_text=f"Based on 17348 confirmed E-bike trips from 120 users of 61496 total confirmed trips from 122 users (28.21%)"

try:
    ebco2=z.groupby('Replaced_mode').agg({'CO2_Impact(lb)': ['sum', 'mean']},)
    ebco2.columns = ['total_lb_CO2_emissions', 'average_lb_CO2_emission']
    ebco2 = ebco2.reset_index()
    ebco2 = ebco2.sort_values(by=['total_lb_CO2_emissions'], ascending=False)
    ebco2['boolean'] = ebco2['total_lb_CO2_emissions'] > 0
    net_CO2_emissions = round(sum(ebco2['total_lb_CO2_emissions']), 2)

    x = ebco2['total_lb_CO2_emissions']
    y = ebco2['Replaced_mode']
    color = ebco2['boolean']

    fig_size=(15,10)
    plot_title= plot_title_no_quality+f"\n Contribution by replaced mode towards a total of {net_CO2_emissions}(lb CO2 Emissions )\n"+quality_text
    CO2_impact(x,y,color,plot_title,file_name)
    alt_text = store_alt_text_bar(pd.DataFrame(x.values,y), file_name, plot_title)
except Exception as e:
    print(e)
    generate_missing_plot(plot_title_no_quality,debug_df,file_name)
    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)