In [42]:
import pandas as pd
import numpy as np
import geopandas as gpd

import plotly.graph_objects as go
import plotly.express as px

pd.set_option('display.max_columns', None)

In [43]:
def check(row):
    return (row.filter(regex="^rp_code_")!=row.filter(regex="^sp.*adj$").min()).all()

def filter_edf(edf):
    # Update vehicle ownership to re-code "other" to "large car" and "other powertrain" to "ICE"
    # Also, add a new column with the SP code for each vehicle
    df = pd.DataFrame(data={"veh_type" : [1,2,3,1,2,3,1,2,3,1,2,3], "veh_pt" : [1,1,1,2,2,2,3,3,3,4,4,4], 
                    "rp_code" : [1,3,5,2,4,6,2,4,6,2,4,6]})

    for i in range(1,7):
        edf.loc[edf["veh_pt_{0}".format(i)]==5,"veh_pt_{0}".format(i)] = 1
        edf.loc[edf["veh_type_{0}".format(i)]==4,"veh_type_{0}".format(i)] = 2
        edf = edf.merge(df, how="left", left_on=["veh_type_{0}".format(i),"veh_pt_{0}".format(i)], right_on=["veh_type","veh_pt"], suffixes=[None,"_{0}".format(i)])
    edf.rename(columns={"rp_code": "rp_code_1"}, inplace=True)

    edf.veh_type_1.head(25)
    # Check condition that min SP response = max SP response - i.e., that all responses are the same
    colmin = edf.loc[:,edf.filter(regex="^sp.*adj$").columns].min(axis=1)
    colmax = edf.loc[:,edf.filter(regex="^sp.*adj$").columns].max(axis=1)
    cond1 = (colmin==colmax)
    # Check that none of the current HH vehicles are the same as the min SP choice, conditional on only one SP choice


    cond2 = edf.apply(check, axis=1)

    return edf[~(cond1*cond2)]

In [44]:
edf = pd.read_csv("data/ev_survey_data.csv")
# filter out records where the respondent gave the same answer to all SP experiments AND they also don't own that vehicle type
print("edf rows before filter:", edf.shape[0])

filt_edf = filter_edf(edf)
print("edf rows after filter:", filt_edf.shape[0])

geo_edf = gpd.GeoDataFrame(filt_edf, 
    geometry = gpd.points_from_xy(filt_edf['longitude'], filt_edf['latitude']), 
    crs = 'EPSG:3857')

ctydf = gpd.read_file("data/county_shp/county_L48_only.shp")
# ctydf = ctydf.to_crs(3857)
# filter states to include only survey region
survey_states = ["19","20","27","29","31","38","46"]
ctydf = ctydf.loc[ctydf.STATEFP.isin(survey_states)]

edf rows before filter: 1954
edf rows after filter: 1954


##### Color and basic naming conventions
##### These standard colors are not working


In [45]:
hh_size_dict = {
    16.0: "1",
    17.0: "2",
    18.0: "3",
    19.0: "4",
    20.0: "5",
    21.0: "6 or more"
}

next_veh_dict = {
    1: "Small car",
    2: "Large car",
    3: "Pickup truck",
    4: "Other vehicles"
}

color_map_veh_choice = {
    1: 'black',   # Dark Blue
    2: '#ff7f0e',   # Dark Orange
    3: '#2ca02c',   # Dark Green
    4: '#d62728',   # Dark Red
}


occupation_dict = {
    1.0: "Management, professional, and related",
    2.0: "Service",
    3.0: "Sales and office",
    4.0: "Farming, fishing, and forestry",
    5.0: "Construction, extraction, and maintenance",
    6.0: "Production, transportation, and material moving",
    7.0: "Government",
    8.0: "Retired",
    9.0: "Unemployed"
}


off_road_freq_dict = {
    1.0: "Rarely \n (1-3 times per year)",
    2.0: "Sometimes \n (1-3 times per month)",
    3.0: "Seasonally \n (1 or more times per week \n for one season)",
    4.0: "Frequently \n (1 or more times per week \n throughout the year)"
}

# make this dict more hex
color_map_off_road_freq = {
    1.0: '#1f77b4',
    2.0: '#2ca02c',
    3.0: '#d62728',
    4.0: '#ff7f0e',
}

parking_dict = {
    "bev_dwell_1": "Private enclosed garage",
    "bev_dwell_2": "Private non-enclosed garage",
    "bev_dwell_3": "Dedicated parking in shared facility (accommodate)",
    "bev_dwell_4": "Dedicated parking in shared facility (not accommodate)",
    "bev_dwell_5": "Street parking (accommodate)",
    "bev_dwell_6": "Street parking (not accommodate)",
    "bev_dwell_7": "No dedicated parking facility"
}


geo_edf.loc[:, 'hh_size'] = geo_edf['hh_size'].map(hh_size_dict)
geo_edf.loc[:, 'next_veh_type_1'] = geo_edf['next_veh_type_1'].map(next_veh_dict)
geo_edf.loc[:, 'occupation'] = geo_edf['occupation'].map(occupation_dict)
geo_edf.loc[:, 'off_road_freq'] = geo_edf['off_road_freq'].map(off_road_freq_dict)

# keys are current column names values are new column names
geo_edf.rename(columns=parking_dict, inplace=True)

#### This is the columns which i grouped by question

In [46]:
cols = ['survey_duration', 'person_age', 'survey_state_code', 'state_name',
       'county_name', 'geoid', 'county_IA', 'county_KS', 'county_MN',
       'county_MO', 'county_NE', 'county_ND', 'county_SD',
       'definition_check', 'hh_veh_own', 'veh_pt_1', 'veh_pt_2',
       'veh_pt_3', 'veh_pt_4', 'veh_pt_5', 'veh_pt_6', 'veh_type_1',
       'veh_type_2', 'veh_type_3', 'veh_type_4', 'veh_type_5',
       'veh_type_6', 'veh_make_1', 'veh_make_2', 'veh_make_3',
       'veh_make_4', 'veh_make_5', 'veh_make_6', 'veh_year_1',
       'veh_year_2', 'veh_year_3', 'veh_year_4', 'veh_year_5',
       'veh_year_6', 'next_veh', 'next_veh_new', 'next_veh_pt_1',
       'next_veh_pt_2', 'next_veh_pt_3', 'next_veh_pt_4', 'next_veh_pt_5',
       'next_veh_pt_6', 'next_veh_type_1', 'next_veh_type_2',
       'next_veh_type_3', 'next_veh_type_4', 'next_veh_type_5',
       'next_veh_type_6', 'veh_tow', 'veh_off_road', 'tow_freq',
       'off_road_freq', 'ctrip_min', 'ctrip_mode', 'nc_min', 'trip_purp',
       'trip_mode', 'ld_trip_freq', 'ld_75_nb_mode', 'ld_b_75_mode',
       'ld_500_nb_mode', 'ld_b_500_mode', 'ld_rental',
       'pub_transit_veh_own', 'bev_concern', 'bev_concern_1',
       'bev_concern_2', 'bev_concern_3', 'bev_concern_7', 'bev_concern_6',
       'bev_concern_0', 'bev_factor', 'bev_factor_1', 'bev_factor_2',
       'bev_factor_3', 'bev_factor_4', 'bev_factor_5', 'bev_factor_6',
       'bev_factor_7',
       'Which of the following statements most closely reflects the potential for electric vehicle charging at your primary residence? - Selected Choice',
       'bev_dwell_1', 'bev_dwell_2', 'bev_dwell_3', 'bev_dwell_4',
       'bev_dwell_5', 'bev_dwell_6', 'bev_dwell_7', 'env_import',
       'air_rank', 'water_rank', 'soil_rank', 'xweather_rank',
       'biodiversity_rank', 'deforest_rank', 'plastic_rank', 'educ_rank',
       'health_rank', 'climate_rank', 'immigrate_rank', 'race_rank',
       'econ_rank', 'poverty_rank', 'gun_rank', 'poli_rank', 'crime_rank',
       'sp1', 'sp2', 'sp3', 'sp4', 'sp5', 'sp6', 'sp_set1', 'sp_set2',
       'sp_set3', 'sp_set4', 'sp_set5', 'sp_set6', 'spe1', 'spe3',
       'spe14', 'spe19', 'spe22', 'spe23', 'spe6', 'spe7', 'spe12',
       'spe16', 'spe20', 'spe24', 'spe4', 'spe5', 'spe8', 'spe10',
       'spe11', 'spe21', 'spe2', 'spe20.1', 'spe13', 'spe15', 'spe17',
       'spe18', 'gender', 'race', 'marital_status', 'license',
       'emp_status', 'occupation', 'educ', 'zipcode', 'latitude',
       'longitude', 'dwell_type', 'dwell_tenure', 'hh_size', 'hh_arrange',
       'hh_license', 'hh_ftw', 'hh_ptw', 'hh_students', 'hh_child',
       'hh_65plus', 'hh_income', 'CUID', 'person_weight', 'hh_weight',
       'comb_weight', 'veh_type', 'veh_pt', 'rp_code_1', 'veh_type_2',
       'veh_pt_2', 'rp_code_2', 'veh_type_3', 'veh_pt_3', 'rp_code_3',
       'veh_type_4', 'veh_pt_4', 'rp_code_4', 'veh_type_5', 'veh_pt_5',
       'rp_code_5', 'veh_type_6', 'veh_pt_6', 'rp_code_6', 'geometry']

questions_dict = {
    "person_age": "person_age",
    "survey_state_loc_details": {
        "survey_state_code": "survey_state_code",
        "state_name": "state_name",
        "county_name": "county_name",
        "geoid": "geoid",
        "county": ["county_IA", "county_KS", "county_MN", "county_MO", "county_NE", "county_ND", "county_SD"],
    },
    "definition_check": "definition_check",
    "curr_veh": {
        "curr_veh": "hh_veh_own", #does the household own a vehicle
        "curr_veh_det": {
            "veh_pt": ["veh_pt_1", "veh_pt_2", "veh_pt_3", "veh_pt_4", "veh_pt_5", "veh_pt_6"],
            "veh_type": ["veh_type_1", "veh_type_2", "veh_type_3", "veh_type_4", "veh_type_5", "veh_type_6"],
            "veh_make": ["veh_make_1", "veh_make_2", "veh_make_3", "veh_make_4", "veh_make_5", "veh_make_6"],
            "veh_year": ["veh_year_1", "veh_year_2", "veh_year_3", "veh_year_4", "veh_year_5", "veh_year_6"]
        },
    },
    "next_veh": {
        "next_veh_purchase_time": "next_veh", # when to get next vehicle
        "next_veh_new": "next_veh_new", # is it new/used/unsure
        "next_veh_det": {
            "veh_pt": ["next_veh_pt_1", "next_veh_pt_2", "next_veh_pt_3", "next_veh_pt_4", "next_veh_pt_5", "next_veh_pt_6"],
            "veh_type": ["next_veh_type_1", "next_veh_type_2", "next_veh_type_3", "next_veh_type_4", "next_veh_type_5", "next_veh_type_6"]
        },
    },
    "towing": {
        "veh_tow": "veh_tow", # tow other vehicle
        "tow_freq": "tow_freq", # how often tow other vehicle
    },
    "off_road": {
        "veh_off_road": "veh_off_road", # off road driving
        "off_road_freq": "off_road_freq", # how often off road driving
        "commute": {
            "ctrip_min": "ctrip_min", # commute trip minutes
            "ctrip_mode": "ctrip_mode", # commute trip mode
        },
    },
    "n-commute": {
        "nc_min": "nc_min", # non-commute trip minutes
        "trip_purp": "trip_purp", # non-commute trip purpose
        "trip_mode": "trip_mode" # non-commute trip mode
    },
    "long_drive": {
        "ld_trip_freq": "ld_trip_freq", # long distance trip frequency
        "ld_trip_mode": {
            "ld_75_nb_mode": "ld_75_nb_mode", # long distance trip mode for trips less than 75 miles
            "ld_b_75_mode": "ld_b_75_mode", # long distance trip mode for trips greater than 75 miles
            "ld_500_nb_mode": "ld_500_nb_mode", # long distance trip mode for trips less than 500 miles
            "ld_b_500_mode": "ld_b_500_mode" # long distance trip mode for trips greater than 500 miles
        },
        "ld_rental": "ld_rental" # long distance trip rental
    },
    "public_transit": "pub_transit_veh_own", # public transit usage if own vehicle
    "bev_concern": {
        "bev_concern": ["bev_concern_1", "bev_concern_2", "bev_concern_3", "bev_concern_7", "bev_concern_6", "bev_concern_0"], # concern about Battery electric vehicles
        "bev_factor": ["bev_factor_1", "bev_factor_2", "bev_factor_3", "bev_factor_4", "bev_factor_5", "bev_factor_6", "bev_factor_7"], # factor influencing bev purchase
        "bev_dwell": ["bev_dwell_1", "bev_dwell_2", "bev_dwell_3", "bev_dwell_4", "bev_dwell_5", "bev_dwell_6", "bev_dwell_7"] # dwelling type
    },
    "env_import": "env_import", # environmental import
    "env_rank": ["air_rank", "water_rank", "soil_rank", "xweather_rank", "biodiversity_rank", "deforest_rank", "plastic_rank"], # environmental rank
    "issue_rank": ['educ_rank', 'health_rank', 'climate_rank', 'immigrate_rank','race_rank','econ_rank', 'poverty_rank', 'gun_rank', 'poli_rank', 'crime_rank',], # issue rank
    'sp': ['sp1', 'sp2', 'sp3', 'sp4', 'sp5', 'sp6'], # stated preference
    'sp_set': ['sp_set1', 'sp_set2', 'sp_set3', 'sp_set4', 'sp_set5', 'sp_set6'], # stated preference set
    'spe': ['spe1', 'spe3', 'spe14', 'spe19', 'spe22', 'spe23', 'spe6', 'spe7', 'spe12', 'spe16', 'spe20', 'spe24', 'spe4', 'spe5', 'spe8', 'spe10', 'spe11', 'spe21', 'spe2', 'spe20.1', 'spe13', 'spe15', 'spe17', 'spe18'], # stated preference experiment
    'personal_details': ['gender', 'race', 'marital_status', 'license', 'emp_status', 'occupation', 'educ', 'zipcode','latitude', 'longitude',],
    'household_details': ['dwell_type', 'dwell_tenure', 'hh_size', 'hh_arrange'],
    'hh_people_details': {
        'hh_license': 'hh_license', # num people in household with license
        'hh_ftw': 'hh_ftw', # num people in household with full time work
        'hh_ptw': 'hh_ptw', # num people in household with part time work
        'hh_students': 'hh_students', # num people in household who are students
        'hh_child': 'hh_child', # num people in household who are children
        'hh_65plus': 'hh_65plus', # num people in household who are 65+
        'hh_income': 'hh_income', # household income
    },
    'additional_info': {
        'weights': {
            'person_weight': 'person_weight', # person weight
            'hh_weight': 'hh_weight', # household weight
            'comb_weight': 'comb_weight', # combined weight
        },
        'veh_type': 'veh_type', # vehicle type
        'veh_pt': 'veh_pt', # vehicle powertrain
        'rp_code': ['rp_code_1', 'rp_code_2', 'rp_code_3', 'rp_code_4', 'rp_code_5', 'rp_code_6'], # stated preference code
        'geometry': 'geometry' # geometry
    }
}

In [48]:
np.array(list(questions_dict.keys()))

array(['person_age', 'survey_state_loc_details', 'definition_check',
       'curr_veh', 'next_veh', 'towing', 'off_road', 'n-commute',
       'long_drive', 'public_transit', 'bev_concern', 'env_import',
       'env_rank', 'issue_rank', 'sp', 'sp_set', 'spe',
       'personal_details', 'household_details', 'hh_people_details',
       'additional_info'], dtype='<U24')

##### Next vehicle choice by household size - possibly as a stacked bar chart
- x axis = household size (describe the household size range 1-3, 3-6)
- y axis = type of vehicle (gas, hybrid, electric, etc.)

In [49]:
print(questions_dict['household_details'])

questions_dict['next_veh']

['dwell_type', 'dwell_tenure', 'hh_size', 'hh_arrange']


{'next_veh_purchase_time': 'next_veh',
 'next_veh_new': 'next_veh_new',
 'next_veh_det': {'veh_pt': ['next_veh_pt_1',
   'next_veh_pt_2',
   'next_veh_pt_3',
   'next_veh_pt_4',
   'next_veh_pt_5',
   'next_veh_pt_6'],
  'veh_type': ['next_veh_type_1',
   'next_veh_type_2',
   'next_veh_type_3',
   'next_veh_type_4',
   'next_veh_type_5',
   'next_veh_type_6']}}

In [50]:
# adhawk testing for next step
# for i, j in df_hh_size.groupby('hh_size'):
#     print(j['next_veh_pt_1'].value_counts())

In [51]:
df_hh_size = geo_edf[['next_veh_type_1', 'hh_size']].copy()


result = df_hh_size.groupby('hh_size')['next_veh_type_1'].value_counts().reset_index(name='count_each_veh_type')

result['percentage'] = (result['count_each_veh_type'] / result.groupby('hh_size')['count_each_veh_type'].transform('sum'))

# result['percentage'] = result['percentage'].apply(lambda x: round(x, 2))

# convert result next_veh_type_1 to string
result['next_veh_type_1'] = result['next_veh_type_1'].astype(str)

fig = px.bar(result, x="hh_size", y="percentage", color="next_veh_type_1", barmode="stack")
fig.update_layout(yaxis=dict(tickformat=".2%"))

fig.update_layout(
    xaxis=dict(
        tickmode='linear',
        dtick=1,
        tickangle=0,
        title_text=None
    ),
    yaxis=dict(
        title_text=None
    ),
    height=500,
    title_text="Off-road driving frequency by expected next vehicle type"
)

fig.show()


In [52]:
# next vehicle choice by household size
df_hh_size = geo_edf[['next_veh_type_1', 'hh_size']]

   
result = df_hh_size.groupby('hh_size')['next_veh_type_1'].value_counts().reset_index(name='count_each_veh_type')
result
# calcuate the percentage of each vehicle type
result['percentage'] = (result['count_each_veh_type'] / result.groupby('hh_size')['count_each_veh_type'].transform('sum'))

# plot this into stacked bar chart
# convert result next_veh_type_1 to string
result['next_veh_type_1'] = result['next_veh_type_1'].astype(str)

fig = px.bar(result, x="hh_size", y="percentage", color="next_veh_type_1", barmode="stack")
fig.update_layout(yaxis=dict(tickformat=".2%"))

fig.update_layout(
    xaxis=dict(
        tickmode='linear',
        dtick=1,
        tickangle=0,
        title_text=None
    ),
    yaxis=dict(
        title_text=None
    ),
    height=500,
    title_text="Expected next vehicle type by household size"
)

fig.show()


##### Off-road driving frequency by occupation?
Q 394
x axis = occupation types
y axis = frequency of off-road driving

In [53]:
questions_dict['off_road']

{'veh_off_road': 'veh_off_road',
 'off_road_freq': 'off_road_freq',
 'commute': {'ctrip_min': 'ctrip_min', 'ctrip_mode': 'ctrip_mode'}}

In [54]:
questions_dict['personal_details']

['gender',
 'race',
 'marital_status',
 'license',
 'emp_status',
 'occupation',
 'educ',
 'zipcode',
 'latitude',
 'longitude']

In [55]:
df_off_freq_by_occ = geo_edf[['occupation', 'off_road_freq']].copy()

# count the number of each off road frequency by occupation
result = df_off_freq_by_occ.groupby('occupation')['off_road_freq'].value_counts().reset_index(name='count_each_off_road_freq')

# calulate the percentage of each off road frequency
result['percentage'] = result['count_each_off_road_freq'] / result.groupby('occupation')['count_each_off_road_freq'].transform('sum')

# convert off_road_freq to string as it is categorical (We need to change this to proper categorical variable later)
result['off_road_freq'] = result['off_road_freq'].astype(str)


# plot it in stacked bar chart
fig = px.bar(result, x="occupation", y="percentage", color="off_road_freq", barmode="stack")

fig.update_layout(
    xaxis=dict(
        tickmode='linear',
        dtick=1,
        tickangle=45,
        title_text=None
    ),
    yaxis=dict(
        title_text=None
    ),
    height=800,
    title_text="Off-road driving frequency by occupation"


)
fig.for_each_trace(lambda t: t.update(name=t.name.replace("\n", "<br>")))
fig.update_layout(yaxis=dict(tickformat=".2%"))



fig.show()


# plot a heatmap of the result




##### Off-road driving frequency by next vehicle purchase type

In [56]:
questions_dict['next_veh']

{'next_veh_purchase_time': 'next_veh',
 'next_veh_new': 'next_veh_new',
 'next_veh_det': {'veh_pt': ['next_veh_pt_1',
   'next_veh_pt_2',
   'next_veh_pt_3',
   'next_veh_pt_4',
   'next_veh_pt_5',
   'next_veh_pt_6'],
  'veh_type': ['next_veh_type_1',
   'next_veh_type_2',
   'next_veh_type_3',
   'next_veh_type_4',
   'next_veh_type_5',
   'next_veh_type_6']}}

In [57]:
questions_dict['off_road']

{'veh_off_road': 'veh_off_road',
 'off_road_freq': 'off_road_freq',
 'commute': {'ctrip_min': 'ctrip_min', 'ctrip_mode': 'ctrip_mode'}}

In [58]:
df_off_road_next_veh = geo_edf[['next_veh_type_1', 'off_road_freq']].copy()

# Define the vehicle order
vehicle_order = ["Small car", "Large car", "Pickup truck", "Other vehicles"]

# sort the graph in order
df_off_road_next_veh_copy = df_off_road_next_veh.copy()
df_off_road_next_veh_copy['next_veh_type_1'] = pd.Categorical(df_off_road_next_veh_copy['next_veh_type_1'], categories=vehicle_order, ordered=True)
df_off_road_next_veh = df_off_road_next_veh_copy


# count the number of each off road frequency by next vehicle type
result = df_off_road_next_veh.groupby('next_veh_type_1')['off_road_freq'].value_counts().reset_index(name='count_each_off_road_freq')

# calulate the percentage of each off road frequency make it out of 100
result['percentage'] = result['count_each_off_road_freq'] / result.groupby('next_veh_type_1')['count_each_off_road_freq'].transform('sum')

# convert off_road_freq to string as it is categorical (We need to change this to proper categorical variable later)
result['off_road_freq'] = result['off_road_freq'].astype(str)

# result['off_road_freq'] = result['off_road_freq'].map(color_map_off_road_freq)

# plot it in stacked bar chart
fig = px.bar(result, x="next_veh_type_1", y="percentage", color="off_road_freq", barmode="stack", text_auto=True)

# change the percentage to 2 decimal places
fig.for_each_trace(lambda t: t.update(name=t.name.replace("\n", "<br>")))
fig.update_layout(yaxis=dict(tickformat=".2%"))




fig.update_layout(
    xaxis=dict(
        tickmode='linear',
        dtick=1,
        tickangle=0,
        title_text=None
    ),
    yaxis=dict(
        title_text=None
    ),
    height=500,
    title_text="Off-road driving frequency by expected next vehicle type"
)


fig.show()

##### Time to next vehicle purchase and next vehicle fuel type


In [59]:
questions_dict['next_veh']

{'next_veh_purchase_time': 'next_veh',
 'next_veh_new': 'next_veh_new',
 'next_veh_det': {'veh_pt': ['next_veh_pt_1',
   'next_veh_pt_2',
   'next_veh_pt_3',
   'next_veh_pt_4',
   'next_veh_pt_5',
   'next_veh_pt_6'],
  'veh_type': ['next_veh_type_1',
   'next_veh_type_2',
   'next_veh_type_3',
   'next_veh_type_4',
   'next_veh_type_5',
   'next_veh_type_6']}}

In [60]:
df_time_fuel_type = geo_edf[['next_veh', 'next_veh_type_1']]
df_time_fuel_type.columns = ['next_veh_purchase_time', 'next_veh_type']

result = df_time_fuel_type.groupby('next_veh_purchase_time')['next_veh_type'].value_counts().reset_index(name='count_each_fuel_type')

# convert count to percentage
result['percentage'] = result['count_each_fuel_type'] / result.groupby('next_veh_purchase_time')['count_each_fuel_type'].transform('sum')

# convert next_veh_type to string as it is categorical (We need to change this to proper categorical variable later)
result['next_veh_type'] = result['next_veh_type'].astype(str)
result['next_veh_purchase_time'] = result['next_veh_purchase_time'].astype(str)

#show points on line chart
fig = px.line(result, x="next_veh_purchase_time", y="percentage", color="next_veh_type", markers=True)

fig.update_layout(
    xaxis=dict(
        tickmode='linear',
        dtick=1,
        tickangle=0,
        title_text=None
    ),
    yaxis=dict(
        title_text=None
    ),
    height=500,
    title_text="Next vehicle type by next vehicle purchase time"
)
fig.update_layout(yaxis=dict(tickformat=".2%"))


fig.show()

##### charging station locations

In [61]:
questions_dict['bev_concern']

{'bev_concern': ['bev_concern_1',
  'bev_concern_2',
  'bev_concern_3',
  'bev_concern_7',
  'bev_concern_6',
  'bev_concern_0'],
 'bev_factor': ['bev_factor_1',
  'bev_factor_2',
  'bev_factor_3',
  'bev_factor_4',
  'bev_factor_5',
  'bev_factor_6',
  'bev_factor_7'],
 'bev_dwell': ['bev_dwell_1',
  'bev_dwell_2',
  'bev_dwell_3',
  'bev_dwell_4',
  'bev_dwell_5',
  'bev_dwell_6',
  'bev_dwell_7']}

In [62]:
df_charging_loc = geo_edf[parking_dict.values()].copy()

# df_charging_loc.astype(int)

result = pd.DataFrame(df_charging_loc.sum().reset_index(name='count'))

# change the column name
result.columns = ['charging_loc', 'count']

# make a pie chart
fig = px.pie(result, values='count', names='charging_loc', title='Charging location')

fig.update_layout(
    xaxis=dict(
        tickmode='linear',
        dtick=1,
        tickangle=0
    ),
    height=600,
    title_text="Charging Locations"
)
fig.show()



### Sankey Diagram

In [63]:
questions_dict.keys()

dict_keys(['person_age', 'survey_state_loc_details', 'definition_check', 'curr_veh', 'next_veh', 'towing', 'off_road', 'n-commute', 'long_drive', 'public_transit', 'bev_concern', 'env_import', 'env_rank', 'issue_rank', 'sp', 'sp_set', 'spe', 'personal_details', 'household_details', 'hh_people_details', 'additional_info'])

In [64]:
questions_dict['curr_veh']

{'curr_veh': 'hh_veh_own',
 'curr_veh_det': {'veh_pt': ['veh_pt_1',
   'veh_pt_2',
   'veh_pt_3',
   'veh_pt_4',
   'veh_pt_5',
   'veh_pt_6'],
  'veh_type': ['veh_type_1',
   'veh_type_2',
   'veh_type_3',
   'veh_type_4',
   'veh_type_5',
   'veh_type_6'],
  'veh_make': ['veh_make_1',
   'veh_make_2',
   'veh_make_3',
   'veh_make_4',
   'veh_make_5',
   'veh_make_6'],
  'veh_year': ['veh_year_1',
   'veh_year_2',
   'veh_year_3',
   'veh_year_4',
   'veh_year_5',
   'veh_year_6']}}

In [65]:
questions_dict['next_veh']

{'next_veh_purchase_time': 'next_veh',
 'next_veh_new': 'next_veh_new',
 'next_veh_det': {'veh_pt': ['next_veh_pt_1',
   'next_veh_pt_2',
   'next_veh_pt_3',
   'next_veh_pt_4',
   'next_veh_pt_5',
   'next_veh_pt_6'],
  'veh_type': ['next_veh_type_1',
   'next_veh_type_2',
   'next_veh_type_3',
   'next_veh_type_4',
   'next_veh_type_5',
   'next_veh_type_6']}}

In [66]:
questions_dict['sp']

['sp1', 'sp2', 'sp3', 'sp4', 'sp5', 'sp6']

In [67]:
edf = pd.read_csv("data/ev_survey_data.csv")
# filter out records where the respondent gave the same answer to all SP experiments AND they also don't own that vehicle type
print("edf rows before filter:", edf.shape[0])

filt_edf = filter_edf(edf)
print("edf rows after filter:", filt_edf.shape[0])

geo_edf = edf
# geo_edf = gpd.GeoDataFrame(filt_edf, 
#     geometry = gpd.points_from_xy(filt_edf['longitude'], filt_edf['latitude']), 
#     crs = 'EPSG:3857')

df_for_sankey = geo_edf[
    ['veh_type_1',  'sp1', 'next_veh_type_1']
]
df_for_sankey

sankey_conn_curr_next = df_for_sankey.groupby('veh_type_1').agg({'next_veh_type_1': 'value_counts'}).rename(columns={'next_veh_type_1': 'delta_curr_to_next'}).reset_index()
sankey_conn_curr_next['curr_veh_type_source'] = sankey_conn_curr_next['veh_type_1'].copy().apply(lambda x: float(x)-1)
sankey_conn_curr_next['next_veh_type_source'] = sankey_conn_curr_next['next_veh_type_1'].copy().apply(lambda x: 3+float(x)-1)



fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=['curr_veh_count_1', 'curr_veh_count_2', 'curr_veh_count_3', 'next_veh_count1', 'next_veh_count2', 'next_veh_count3', 'next_veh_count4'],
        # color="blue",
        x=[0, 0, 0, 1, 1, 1, 1]  # Adjust x-axis position for each node
    ),
    link=dict(
        source=sankey_conn_curr_next['curr_veh_type_source'].to_list(),
        target=sankey_conn_curr_next['next_veh_type_source'].to_list(),
        value=sankey_conn_curr_next['delta_curr_to_next'].to_list()
    )
)])

fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
fig.show()


edf rows before filter: 1954
edf rows after filter: 1954


In [68]:
sankey_conn_curr_next

Unnamed: 0,veh_type_1,next_veh_type_1,delta_curr_to_next,curr_veh_type_source,next_veh_type_source
0,1.0,1.0,406,0.0,3.0
1,1.0,2.0,185,0.0,4.0
2,1.0,3.0,52,0.0,5.0
3,1.0,4.0,27,0.0,6.0
4,2.0,2.0,389,1.0,4.0
5,2.0,4.0,125,1.0,6.0
6,2.0,1.0,106,1.0,3.0
7,2.0,3.0,104,1.0,5.0
8,3.0,3.0,117,2.0,5.0
9,3.0,2.0,64,2.0,4.0


In [69]:
edf = pd.read_csv("data/ev_survey_data.csv")
# filter out records where the respondent gave the same answer to all SP experiments AND they also don't own that vehicle type
print("edf rows before filter:", edf.shape[0])

filt_edf = filter_edf(edf)
print("edf rows after filter:", filt_edf.shape[0])

geo_edf = edf
# geo_edf = gpd.GeoDataFrame(filt_edf, 
#     geometry = gpd.points_from_xy(filt_edf['longitude'], filt_edf['latitude']), 
#     crs = 'EPSG:3857')

df_for_sankey = geo_edf[
    ['veh_type_1',  'sp1', 'next_veh_type_1']
]
df_for_sankey

edf rows before filter: 1954
edf rows after filter: 1954


Unnamed: 0,veh_type_1,sp1,next_veh_type_1
0,1.0,3,2.0
1,2.0,4,3.0
2,2.0,1,2.0
3,2.0,1,4.0
4,2.0,3,4.0
...,...,...,...
1949,2.0,3,2.0
1950,3.0,6,2.0
1951,2.0,1,2.0
1952,2.0,1,2.0


In [70]:
vehicles = {
    1: "Vehicle 1",
    2: "Vehicle 2",
    3: "Vehicle 3",
    4: "Vehicle 4",
    5: "Vehicle 5",
    6: "Vehicle 6"
}

In [71]:


sankey_conn_curr_next = df_for_sankey.groupby('veh_type_1').agg({'next_veh_type_1': 'value_counts'}).rename(columns={'next_veh_type_1': 'delta_curr_to_next'}).reset_index()
sankey_conn_curr_next['curr_veh_type_source'] = sankey_conn_curr_next['veh_type_1'].copy().apply(lambda x: float(x)-1)
sankey_conn_curr_next['next_veh_type_source'] = sankey_conn_curr_next['next_veh_type_1'].copy().apply(lambda x: 3+float(x)-1)

sankey_conn_curr_next

Unnamed: 0,veh_type_1,next_veh_type_1,delta_curr_to_next,curr_veh_type_source,next_veh_type_source
0,1.0,1.0,406,0.0,3.0
1,1.0,2.0,185,0.0,4.0
2,1.0,3.0,52,0.0,5.0
3,1.0,4.0,27,0.0,6.0
4,2.0,2.0,389,1.0,4.0
5,2.0,4.0,125,1.0,6.0
6,2.0,1.0,106,1.0,3.0
7,2.0,3.0,104,1.0,5.0
8,3.0,3.0,117,2.0,5.0
9,3.0,2.0,64,2.0,4.0


In [72]:
sankey_sp_next = df_for_sankey.groupby('sp1').agg({'next_veh_type_1': 'value_counts'}).rename(columns={'next_veh_type_1': 'delta_sp_to_next'}).reset_index()
sankey_sp_next['sp1'] = sankey_sp_next['sp1'].copy().map(vehicles)
sankey_sp_next['next_veh_type_1'] = sankey_sp_next['next_veh_type_1'].copy().astype(int).map(vehicles)

source = np.array(sankey_sp_next['sp1'].unique())
destination = np.array(sankey_sp_next['next_veh_type_1'].unique())

sankey_sp_next['sp_source'] = sankey_sp_next['sp1'].copy().apply(lambda x: float(x[-1])-1)
sankey_sp_next['next_veh_type_source'] = sankey_sp_next['next_veh_type_1'].copy().apply(lambda x: len(source)+float(x[-1])-1)


sankey_sp_next

Unnamed: 0,sp1,next_veh_type_1,delta_sp_to_next,sp_source,next_veh_type_source
0,Vehicle 1,Vehicle 1,296,0.0,6.0
1,Vehicle 1,Vehicle 2,132,0.0,7.0
2,Vehicle 1,Vehicle 4,49,0.0,9.0
3,Vehicle 1,Vehicle 3,40,0.0,8.0
4,Vehicle 2,Vehicle 1,144,1.0,6.0
5,Vehicle 2,Vehicle 2,91,1.0,7.0
6,Vehicle 2,Vehicle 4,33,1.0,9.0
7,Vehicle 2,Vehicle 3,17,1.0,8.0
8,Vehicle 3,Vehicle 2,244,2.0,7.0
9,Vehicle 3,Vehicle 1,57,2.0,6.0


In [73]:
# make a long combined list of source and destination
label = np.concatenate((source, destination), axis=None)
label


array(['Vehicle 1', 'Vehicle 2', 'Vehicle 3', 'Vehicle 4', 'Vehicle 5',
       'Vehicle 6', 'Vehicle 1', 'Vehicle 2', 'Vehicle 4', 'Vehicle 3'],
      dtype=object)

In [74]:

fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=label,
        # color="blue",
        x=[0, 0, 0, 0, 0, 0, 1, 1, 1, 1]  # Adjust x-axis position for each node
    ),
    link=dict(
        source=sankey_sp_next['sp_source'].to_list(),
        target=sankey_sp_next['next_veh_type_source'].to_list(),
        value=sankey_sp_next['delta_sp_to_next'].to_list()
    )
)])

fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
fig.show()