In [16]:
import pandas as pd

#### Load Data

In [17]:
def load_data():
    sales_url = 'https://api.iea.org/evs?parameters=EV%20sales&category=Historical&mode=Cars&csv=true'
    charging_points_url = 'https://api.iea.org/evs?parameters=EV%20charging%20points&category=Historical&mode=EV&csv=true'
    
    ev_sales_df = pd.read_csv(sales_url)
    ev_charging_points_df = pd.read_csv(charging_points_url)
    
    return ev_sales_df, ev_charging_points_df

ev_sales_df, ev_charging_points_df = load_data()

#### Classify Columns (Categorical vs Numerical)

In [18]:
def classify_columns(df):
    categorical_cols = df.select_dtypes(include=['object', 'category']).columns
    numerical_cols = df.select_dtypes(include=['number']).columns
    
    categorical_info = {col: df[col].unique().tolist() for col in categorical_cols}
    
    return {
        'categorical': list(categorical_cols),
        'numerical': list(numerical_cols),
        'categorical_classes': categorical_info
    }

sales_classification = classify_columns(ev_sales_df)
charging_points_classification = classify_columns(ev_charging_points_df)

In [19]:
print('EV Sales DataFrame Column Classification:')
print(sales_classification)

EV Sales DataFrame Column Classification:
{'categorical': ['region', 'category', 'parameter', 'mode', 'powertrain', 'unit'], 'numerical': ['year', 'value'], 'categorical_classes': {'region': ['Australia', 'Austria', 'Belgium', 'Brazil', 'Bulgaria', 'Canada', 'Chile', 'China', 'Colombia', 'Costa Rica', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'EU27', 'Europe', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Iceland', 'India', 'Ireland', 'Israel', 'Italy', 'Japan', 'Korea', 'Latvia', 'Lithuania', 'Luxembourg', 'Mexico', 'Netherlands', 'New Zealand', 'Norway', 'Poland', 'Portugal', 'Rest of the world', 'Romania', 'Seychelles', 'Slovakia', 'Slovenia', 'South Africa', 'Spain', 'Sweden', 'Switzerland', 'Turkiye', 'United Arab Emirates', 'United Kingdom', 'USA', 'World'], 'category': ['Historical'], 'parameter': ['EV sales', 'EV stock share', 'EV sales share', 'EV stock', 'Electricity demand', 'Oil displacement Mbd', 'Oil displacement, million lge'], 'mode': ['Cars']

In [20]:
print('\nEV Charging Points DataFrame Column Classification:')
print(charging_points_classification)


EV Charging Points DataFrame Column Classification:
{'categorical': ['region', 'category', 'parameter', 'mode', 'powertrain', 'unit'], 'numerical': ['year', 'value'], 'categorical_classes': {'region': ['Australia', 'Austria', 'Belgium', 'Brazil', 'Canada', 'Chile', 'China', 'Denmark', 'EU27', 'Europe', 'Finland', 'France', 'Germany', 'Greece', 'Iceland', 'India', 'Indonesia', 'Israel', 'Italy', 'Japan', 'Korea', 'Mexico', 'Netherlands', 'New Zealand', 'Norway', 'Poland', 'Portugal', 'South Africa', 'Spain', 'Sweden', 'Switzerland', 'Thailand', 'Turkiye', 'United Kingdom', 'USA', 'World'], 'category': ['Historical'], 'parameter': ['EV charging points'], 'mode': ['EV'], 'powertrain': ['Publicly available fast', 'Publicly available slow'], 'unit': ['charging points']}}


#### Data Exploration

In [21]:
for col in ev_sales_df.columns:
    print(col)
    print(ev_sales_df[col].unique())
    print('')

region
['Australia' 'Austria' 'Belgium' 'Brazil' 'Bulgaria' 'Canada' 'Chile'
 'China' 'Colombia' 'Costa Rica' 'Croatia' 'Cyprus' 'Czech Republic'
 'Denmark' 'Estonia' 'EU27' 'Europe' 'Finland' 'France' 'Germany' 'Greece'
 'Hungary' 'Iceland' 'India' 'Ireland' 'Israel' 'Italy' 'Japan' 'Korea'
 'Latvia' 'Lithuania' 'Luxembourg' 'Mexico' 'Netherlands' 'New Zealand'
 'Norway' 'Poland' 'Portugal' 'Rest of the world' 'Romania' 'Seychelles'
 'Slovakia' 'Slovenia' 'South Africa' 'Spain' 'Sweden' 'Switzerland'
 'Turkiye' 'United Arab Emirates' 'United Kingdom' 'USA' 'World']

category
['Historical']

parameter
['EV sales' 'EV stock share' 'EV sales share' 'EV stock'
 'Electricity demand' 'Oil displacement Mbd'
 'Oil displacement, million lge']

mode
['Cars']

powertrain
['BEV' 'EV' 'PHEV' 'FCEV']

year
[2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2010]

unit
['Vehicles' 'percent' 'GWh' 'Milion barrels per day'
 'Oil displacement, million lge']

value
[4.90000000e+01 3.90000

#### Create clean df for sales data

In [22]:
def load_sales_data():
    sales_df = ev_sales_df[ev_sales_df['parameter'] == 'EV sales']    
    sales_df = sales_df[sales_df['powertrain'] == 'BEV']    
    cols_to_keep = ['region', 'year', 'value']
    sales_df = sales_df[cols_to_keep]
    
    return sales_df

sales_df = load_sales_data()
print(sales_df)


         region  year      value
0     Australia  2011       49.0
9     Australia  2012      170.0
10    Australia  2013      190.0
21    Australia  2014      370.0
22    Australia  2015      760.0
...         ...   ...        ...
3750      World  2019  1500000.0
3757      World  2020  2000000.0
3772      World  2021  4700000.0
3779      World  2022  7300000.0
3794      World  2023  9500000.0

[604 rows x 3 columns]


#### Summary Data

In [25]:
ev_sales_df.head(15)

Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
0,Australia,Historical,EV sales,Cars,BEV,2011,Vehicles,49.0
1,Australia,Historical,EV stock share,Cars,EV,2011,percent,0.00039
2,Australia,Historical,EV sales share,Cars,EV,2011,percent,0.0065
3,Australia,Historical,EV stock,Cars,BEV,2011,Vehicles,49.0
4,Australia,Historical,EV stock,Cars,BEV,2012,Vehicles,220.0
5,Australia,Historical,EV stock,Cars,PHEV,2012,Vehicles,80.0
6,Australia,Historical,EV sales,Cars,PHEV,2012,Vehicles,80.0
7,Australia,Historical,EV sales share,Cars,EV,2012,percent,0.03
8,Australia,Historical,EV stock share,Cars,EV,2012,percent,0.0024
9,Australia,Historical,EV sales,Cars,BEV,2012,Vehicles,170.0


In [None]:
def load_summary_data(df):
    sales_df = df[df['parameter'] == 'EV sales'].copy()
    
    summary_df = sales_df[['region', 'year', 'powertrain', 'value']]
    
    summary_df.rename(columns={'value': 'units_sold'}, inplace=True)
    
    summary_df.loc[:, 'units_sold'] = summary_df['units_sold'].astype(int)
    
    return summary_df

summary_data = load_summary_data(ev_sales_df)
print(summary_data)

         region  year powertrain  units_sold
0     Australia  2011        BEV        49.0
6     Australia  2012       PHEV        80.0
9     Australia  2012        BEV       170.0
10    Australia  2013        BEV       190.0
13    Australia  2013       PHEV       100.0
...         ...   ...        ...         ...
3781      World  2022       FCEV     15000.0
3783      World  2022       PHEV   2900000.0
3788      World  2023       PHEV   4300000.0
3792      World  2023       FCEV      8900.0
3794      World  2023        BEV   9500000.0

[1342 rows x 4 columns]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_df.rename(columns={'value': 'units_sold'}, inplace=True)
