In [2]:
import pandas as pd




In [3]:
# Import csv file and read it into a dataframe
csv_file_path = '../Resources/IEA Global EV Data 2024.csv'
df =pd.read_csv(csv_file_path)

# Inspect the first or last few rows of the dataset to see that the data loaded properly and what you're working with
df.head(40)

Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
0,Australia,Historical,EV stock share,Cars,EV,2011,percent,0.00039
1,Australia,Historical,EV sales share,Cars,EV,2011,percent,0.0065
2,Australia,Historical,EV sales,Cars,BEV,2011,Vehicles,49.0
3,Australia,Historical,EV stock,Cars,BEV,2011,Vehicles,49.0
4,Australia,Historical,EV stock,Cars,BEV,2012,Vehicles,220.0
5,Australia,Historical,EV sales,Cars,BEV,2012,Vehicles,170.0
6,Australia,Historical,EV sales share,Cars,EV,2012,percent,0.03
7,Australia,Historical,EV stock share,Cars,EV,2012,percent,0.0024
8,Australia,Historical,EV stock,Cars,PHEV,2012,Vehicles,80.0
9,Australia,Historical,EV sales,Cars,PHEV,2012,Vehicles,80.0


In [4]:
# get a better idea of the dataset
df.describe()

Unnamed: 0,year,value
count,12654.0,12654.0
mean,2019.822112,427374.2
std,5.476494,6860498.0
min,2010.0,1.2e-06
25%,2016.0,2.0
50%,2020.0,130.0
75%,2022.0,5500.0
max,2035.0,440000000.0


In [5]:
# Check how many non null values each column has
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12654 entries, 0 to 12653
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   region      12654 non-null  object 
 1   category    12654 non-null  object 
 2   parameter   12654 non-null  object 
 3   mode        12654 non-null  object 
 4   powertrain  12654 non-null  object 
 5   year        12654 non-null  int64  
 6   unit        12654 non-null  object 
 7   value       12654 non-null  float64
dtypes: float64(1), int64(1), object(6)
memory usage: 791.0+ KB


In [6]:
# how many rows have data
df.count()

region        12654
category      12654
parameter     12654
mode          12654
powertrain    12654
year          12654
unit          12654
value         12654
dtype: int64

In [7]:
# find the datatypes
df.dtypes

region         object
category       object
parameter      object
mode           object
powertrain     object
year            int64
unit           object
value         float64
dtype: object

In [8]:
# Check for missing values in the dataset
print(df.isnull().sum())

region        0
category      0
parameter     0
mode          0
powertrain    0
year          0
unit          0
value         0
dtype: int64


In [9]:
# Remove duplicate rows
df = df.drop_duplicates()

In [10]:
#Check dataset to see if any duplicates were dropped
df.describe()

Unnamed: 0,year,value
count,12654.0,12654.0
mean,2019.822112,427374.2
std,5.476494,6860498.0
min,2010.0,1.2e-06
25%,2016.0,2.0
50%,2020.0,130.0
75%,2022.0,5500.0
max,2035.0,440000000.0


In [11]:
# list the  countries/regions where we have data
df['region'].unique()

array(['Australia', 'Austria', 'Belgium', 'Brazil', 'Bulgaria', 'Canada',
       'Chile', 'China', 'Colombia', 'Costa Rica', 'Croatia', 'Cyprus',
       'Czech Republic', 'Denmark', 'Estonia', 'EU27', 'Europe',
       'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Iceland',
       'India', 'Indonesia', 'Ireland', 'Israel', 'Italy', 'Japan',
       'Korea', 'Latvia', 'Lithuania', 'Luxembourg', 'Mexico',
       'Netherlands', 'New Zealand', 'Norway', 'Poland', 'Portugal',
       'Rest of the world', 'Romania', 'Seychelles', 'Slovakia',
       'Slovenia', 'South Africa', 'Spain', 'Sweden', 'Switzerland',
       'Thailand', 'Turkiye', 'United Arab Emirates', 'United Kingdom',
       'USA', 'World'], dtype=object)

In [None]:
# Make dataframe of ev adoption 2023 by ev sales share 
df_2023_evsales = df.loc[
    (df['year']==2023) &
    (df['parameter']== 'EV sales share')
]

In [14]:
df_2023_evsales.head()

Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
90,Australia,Historical,EV sales share,Cars,EV,2023,percent,12.0
201,Austria,Historical,EV sales share,Cars,EV,2023,percent,26.0
493,Belgium,Historical,EV sales share,Trucks,EV,2023,percent,1.7
495,Belgium,Historical,EV sales share,Vans,EV,2023,percent,4.6
497,Belgium,Historical,EV sales share,Buses,EV,2023,percent,62.0


In [18]:
df_2023_evsales_ordered = df_2023_evsales.sort_values(by='value', ascending = False)
df_2023_evsales_ordered.head(25)

Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
7922,Norway,Historical,EV sales share,Cars,EV,2023,percent,93.0
5392,Iceland,Historical,EV sales share,Cars,EV,2023,percent,71.0
10203,Switzerland,Historical,EV sales share,Buses,EV,2023,percent,65.0
497,Belgium,Historical,EV sales share,Buses,EV,2023,percent,62.0
9936,Sweden,Historical,EV sales share,Cars,EV,2023,percent,60.0
4496,Finland,Historical,EV sales share,Cars,EV,2023,percent,54.0
7928,Norway,Historical,EV sales share,Buses,EV,2023,percent,53.0
1859,China,Historical,EV sales share,Buses,EV,2023,percent,50.0
1924,China,Projection-APS,EV sales share,Buses,EV,2023,percent,50.0
1828,China,Projection-STEPS,EV sales share,Buses,EV,2023,percent,50.0
