In [2]:
import os, sys
import pandas as pd
import json

curr_dir = sys.path[0]
path_file = os.path.join(curr_dir.split("travel_survey")[0], "paths.json")
assert os.path.exists(path_file), "Cannot find paths.json file, please specify out_dir"

with open(path_file, "r") as f:
    paths = json.load(f)
datasets_dir = paths["datasets"]
ca_vehicle_dir = paths["ca_vehicle"]
ca_vehicle_dir = os.path.join(datasets_dir, ca_vehicle_dir)
assert os.path.exists(ca_vehicle_dir)

# residential PEV

In [5]:
year = 2019
pev_data_dir = os.path.join(ca_vehicle_dir, f"ca_vehicle_{year}")

res_fn = "survey_res_pev.csv"
res_path = os.path.join(pev_data_dir, res_fn)
assert os.path.exists(res_path)

res_df = pd.read_csv(res_path)
res_df.head()

Unnamed: 0,sampno,vehno,vehicle_num,veh_year,veh_make,veh_model,fuel,veh_type,pev_miles_week,pev_miles_month,...,pev_incentives_2,pev_incentives_3,pev_incentives_4,pev_incentives_5,pev_incentives_6,pev_incentives_7,pev_experience,pev_recommend,pev_confidence,pev_confidence_4_x
0,190107,190107.v01,1,2017,FORD,REDACTED,3,3,250,,...,5,4,4,1,1,4,5,5,,
1,190111,190111.v03,3,2018,VOLKSWAGEN,,5,1,175,,...,5,5,5,1,1,4,5,4,,
2,190120,190120.v02,2,2016,NISSAN,,5,2,160,,...,4,4,4,2,1,2,7,5,,
3,190127,190127.v01,1,2013,TOYOTA,,3,3,200,,...,7,7,7,2,7,5,7,4,,
4,190138,190138.v03,3,2017,CHEVROLET,,5,2,200,,...,5,5,5,1,1,1,7,5,2.0,


In [11]:
res_df.columns

Index(['sampno', 'vehno', 'vehicle_num', 'veh_year', 'veh_make', 'veh_model',
       'fuel', 'veh_type', 'pev_miles_week', 'pev_miles_month',
       ...
       'pev_incentives_2', 'pev_incentives_3', 'pev_incentives_4',
       'pev_incentives_5', 'pev_incentives_6', 'pev_incentives_7',
       'pev_experience', 'pev_recommend', 'pev_confidence',
       'pev_confidence_4_x'],
      dtype='object', length=114)

In [14]:
"fuel" in res_df.columns

True

In [31]:
n_obs = len(res_df) # also the number of respondents
n_var = len(res_df.columns) - 1 # exclude the first column
print(f"Number of observations: {n_obs}, number of variables: {n_var}")

# index: 3: PHEV, 5: full EV
n_phev = len(res_df[res_df["fuel"] == 3])
n_ev = len(res_df[res_df["fuel"] == 5])
print(f"Number of PHEV: {n_phev}, number of full EV: {n_ev}")

Number of observations: 451, number of variables: 113
Number of PHEV: 173, number of full EV: 278


## commercial PEV

In [4]:
year = 2019
pev_data_dir = os.path.join(ca_vehicle_dir, f"ca_vehicle_{year}")

com_fn = "survey_com_pev.csv"
com_path = os.path.join(pev_data_dir, com_fn)
assert os.path.exists(com_path)

com_df = pd.read_csv(com_path)
com_df.head()

Unnamed: 0,sampno,total_num_pev,total_num_bev,flag_company_charging,primary_chargeloc_14,primary_chargeloc_17,primary_chargeloc_24,primary_chargeloc_27,primary_chargeloc_34,primary_chargeloc_37,...,home_pay,incentive_importance_1,incentive_importance_2,incentive_importance_3,incentive_importance_4,incentive_importance_5,incentive_importance_6,incentive_importance_7,incentive_importance_8,pev_experience
0,192013,0,1,1,,3.0,,,,,...,2.0,2,5,5,2,2,2,2,2,5
1,190445,0,4,1,,1.0,,,,,...,,5,6,5,2,4,1,5,7,7
2,191137,0,2,0,,,,,,,...,2.0,5,5,5,4,2,2,2,1,7
3,192203,0,1,1,,1.0,,,,,...,,6,5,3,6,7,6,5,7,7
4,190029,3,0,1,,,,,1.0,,...,1.0,5,3,5,4,4,4,5,3,4


In [29]:
n_obs = len(com_df) # also the number of respondents
n_var = len(com_df.columns) - 1 # exclude the first column
print(f"Number of observations: {n_obs}, number of variables: {n_var}")

tot_phev = com_df["total_num_pev"].sum()
tot_bev = com_df["total_num_bev"].sum()
print(f"Total number of PHEV: {tot_phev}, total number of BEV: {tot_bev}")

onsite_charging = com_df["flag_company_charging"].mean()
print(f"Percentage of companies with onsite charging: {onsite_charging:.2%}")

Number of observations: 267, number of variables: 123
Total number of PHEV: 911, total number of BEV: 506
Percentage of companies with onsite charging: 54.68%
