In [89]:
import pandas as pd
import countryinfo as countries # Source: https://gist.github.com/canfixit/1662664


In [90]:
countries = countryinfo.countries
eu_codes = []
eu_names = []
iso_name = {}
exclusion_list = ['San Marino', 'Russia', 'Vatican City', 'Monaco']
for co in countries:
    
    # Excluding Russia, Monacco, San Marino and Vatican City bc. lack of data
    if co['name'] in exclusion_list:
        print(co['name'])
        continue
        
    if co['continent'].lower() == 'europe':
        iso = co['code']
        name = co['name']
        eu_codes.append(iso)
        eu_names.append(name)
        iso_name[iso] = name
        
tot_num_c = len(eu_codes)
print("Number of countries: ", tot_num_c)

Russia
San Marino
Vatican City
Monaco
Number of countries:  40


#### Source: https://gist.github.com/canfixit/1662664


#### Must rename some of the countries in some datasets in order to merge on the right values, as the source uses different names for the countries. Best method is to use ISO-codes, as these are generalistic but not all datasets might be provided with ISO, only country name. 

In [91]:
"""
Temperature x
Emissions, Population x
GDP x
Green bonds x
Environmental taxes
Environmental protection expenditure
Electric vehicles x
Greenhouse policies x
"""
# Main dataframe
df_main = None
# Time period from 2012 to 2022
time_period = list(range(2012, 2022))

In [92]:
df_temp_change = pd.read_csv("data/Annual_Surface_Temperature_Change.csv")

df_temp_change = df_temp_change[df_temp_change['ISO2'].isin(eu_codes)]

df_temp_change = df_temp_change.drop(['Unit', 'CTS_Code', 'Indicator', 'Source', 'CTS_Name', 'ObjectId', 'CTS_Full_Descriptor'], axis = 1, inplace=False)
df_temp_change.columns = [str(x).split("F")[-1] for x in df_temp_change.columns]

df_temp_change.columns= df_temp_change.columns.str.lower()
df_temp_change = pd.melt(df_temp_change, id_vars=["country", "iso2", 'iso3'], var_name = ['year'], value_name = 'temp_change C')
df_temp_change["year"] = pd.to_numeric(df_temp_change["year"])

df_temp_change.sort_values(by=["country", 'year'], inplace=True)
df_temp_change.columns = ['country', 'ISO2', 'ISO3', 'year', 'Temperature Change C']
df_temp_change.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 2440 entries, 0 to 2439
Data columns (total 5 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   country               2440 non-null   object 
 1   ISO2                  2440 non-null   object 
 2   ISO3                  2440 non-null   object 
 3   year                  2440 non-null   int64  
 4   Temperature Change C  1898 non-null   float64
dtypes: float64(1), int64(1), object(3)
memory usage: 114.4+ KB


In [93]:
df_co2 = pd.read_csv("data/annual-co2-emissions-per-country.csv")
df_co2.columns = df_co2.columns.str.lower()
df_co2.columns = ['country', 'ISO3', 'year', 'annual co2 emmisions']
df_co2 = df_co2[df_co2['ISO3'].isin(set(df_temp_change_euro['ISO3']))]

df_co2["year"] = pd.to_numeric(df_co2["year"])
df_co2 = df_co2[['ISO3', 'year', 'annual co2 emmisions']]
df_co2.rename(columns={'annual co2 emmisions' : 'annual t co2 emmisions'}, inplace = True)
print("All countries: ", len((set(eu_codes) - set(df_co2['ISO3']))) == 0)
df_co2.info()

All countries:  False
<class 'pandas.core.frame.DataFrame'>
Int64Index: 6556 entries, 517 to 29726
Data columns (total 3 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   ISO3                    6556 non-null   object 
 1   year                    6556 non-null   int64  
 2   annual t co2 emmisions  6556 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 204.9+ KB


In [94]:
df_gdp = pd.read_csv("data/GDP.csv",delimiter=";")
#Drop last column, as it is no values
df_gdp = df_gdp.iloc[: , :-1]

df_gdp = df_gdp[df_gdp['Country Code'].isin(set(df_temp_change_euro['ISO3']))]
df_gdp.drop(['Country Name', 'Indicator Name', 'Indicator Code'], axis = 1, inplace = True)
df_gdp.columns = df_gdp.columns.str.lower()
df_gdp.rename(columns = {'country code' : "ISO3"}, inplace=True)

df_gdp = pd.melt(df_gdp, id_vars=["ISO3"], var_name = ['year'], value_name = 'GDP US$')
df_gdp["year"] = pd.to_numeric(df_gdp["year"])

print("All countries: ", len((set(eu_codes) - set(df_gdp['ISO3']))) == 0)

df_gdp.info()

All countries:  False
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2480 entries, 0 to 2479
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   ISO3     2480 non-null   object 
 1   year     2480 non-null   int64  
 2   GDP US$  1824 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 58.2+ KB


In [95]:
# Green Bonds (in Billion $) Probably not inflation adjusted

df_green_bonds = pd.read_csv("data/Green_Bonds.csv")

df_green_bonds = df_green_bonds[df_green_bonds['ISO3'].isin(set(df_temp_change_euro['ISO3']))]
df_green_bonds.tail(3)
df_green_bonds.drop(['ISO2', 'Indicator', 'Source', 'CTS_Code', 'CTS_Full_Descriptor', 'Type_of_Issuer', 'Use_of_Proceed', 'Principal_Currency'],axis = 1, inplace=True)
df_green_bonds.columns = df_green_bonds.columns.str.lower()
df_green_bonds.columns = [str(x).split("f")[-1] for x in df_green_bonds.columns]

# Only keep cts_name equals Green Bonds Issuances, as we can calculate total later (cts_name == Green Bonds)
df_green_bonds = df_green_bonds.loc[df_green_bonds['cts_name'] == 'Green Bonds Issuances'] 
df_green_bonds.drop(['cts_name', 'unit', 'objectid', 'country'],axis = 1, inplace=True)

df_green_bonds.rename(columns = {'iso3' : "ISO3"}, inplace=True)

df_green_bonds = pd.melt(df_green_bonds, id_vars=["ISO3"], var_name = ['year'], value_name = 'Green Bonds Issuance US$')
df_green_bonds["year"] = pd.to_numeric(df_green_bonds["year"])
df_green_bonds.sort_values(by=["ISO3", 'year'], inplace=True)

print("Missing countries: ", (set(eu_codes) - set(df_green_bonds['ISO3'])))

df_green_bonds.info()

Missing countries:  {'HR', 'BY', 'IE', 'ME', 'MD', 'ES', 'MT', 'CZ', 'BG', 'RO', 'BA', 'GB', 'AD', 'LV', 'RS', 'LU', 'DE', 'IS', 'MK', 'AT', 'BE', 'CH', 'AL', 'DK', 'IT', 'LI', 'EE', 'NL', 'FI', 'NO', 'GR', 'FR', 'SE', 'UA', 'LT', 'SK', 'SI', 'PT', 'HU', 'PL'}
<class 'pandas.core.frame.DataFrame'>
Int64Index: 840 entries, 0 to 838
Data columns (total 3 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   ISO3                      840 non-null    object 
 1   year                      840 non-null    int64  
 2   Green Bonds Issuance US$  158 non-null    float64
dtypes: float64(1), int64(1), object(1)
memory usage: 26.2+ KB


As we observe from the output, we are missing substantial data from some countries, as well as from many years, as green bonds issuance is quite new

In [96]:
# Population df

df_pop = pd.read_csv("data/population.csv")
df_pop = df_pop[df_pop['Country Code'].isin(set(df_temp_change_euro['ISO3']))]
df_pop.columns = df_pop.columns.str.lower()
df_pop.columns = [str(x).split("[")[0] for x in df_pop.columns]
df_pop.drop(['series name', 'series code', 'country name'] ,axis=1, inplace = True)
df_pop.rename(columns = {'country code' : "ISO3"}, inplace=True)
df_pop = pd.melt(df_pop, id_vars=["ISO3"], var_name = ['year'], value_name = 'population')
df_pop["year"] = pd.to_numeric(df_pop["year"])
df_pop["population"] = pd.to_numeric(df_pop["population"])
df_pop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   ISO3        2000 non-null   object
 1   year        2000 non-null   int64 
 2   population  2000 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 47.0+ KB


In [97]:
df_sales = pd.read_csv("data/IEA-EV-dataEV salesCarsHistorical.csv")
df_ev_chargingP = pd.read_csv("data/IEA-EV-dataEV charging pointsHistorical.csv")
#df_sales_share = pd.read_csv("data/")

Notes

BEVs are battery electric vehicles. 

PHEVs are plug-in hybrid electric vehicles. 

FCEVs are fuel cell electric vehicles. 

EVs refers to all electric vehicles (BEVs + PHEVs).

In [98]:
df_sales[df_sales['region'] == "Norway"].head()


Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
397,Norway,Historical,EV sales,Cars,BEV,2010,sales,360
398,Norway,Historical,EV sales,Cars,BEV,2011,sales,2000
399,Norway,Historical,EV sales,Cars,BEV,2012,sales,3900
400,Norway,Historical,EV sales,Cars,PHEV,2012,sales,320
401,Norway,Historical,EV sales,Cars,PHEV,2013,sales,340


In [99]:

# one for BEVs and one for PHEV ??
df_bev_sales = df_sales[df_sales['powertrain'] == "BEV"]
df_phev_sales = df_sales[df_sales['powertrain'] == "PHEV"]
df_bev_sales.head()

Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
0,Australia,Historical,EV sales,Cars,BEV,2011,sales,49
1,Australia,Historical,EV sales,Cars,BEV,2012,sales,170
4,Australia,Historical,EV sales,Cars,BEV,2013,sales,190
5,Australia,Historical,EV sales,Cars,BEV,2014,sales,370
8,Australia,Historical,EV sales,Cars,BEV,2015,sales,760


In [100]:
df_ev_chargingP[df_ev_chargingP['region'] == "Italy"].head()

Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
209,Italy,Historical,EV charging points,EV,Publicly available fast,2012,charging points,1.0
210,Italy,Historical,EV charging points,EV,Publicly available slow,2012,charging points,1400.0
211,Italy,Historical,EV charging points,EV,Publicly available fast,2013,charging points,6.0
212,Italy,Historical,EV charging points,EV,Publicly available slow,2013,charging points,1400.0
213,Italy,Historical,EV charging points,EV,Publicly available fast,2014,charging points,10.0


In [101]:
## EEA Greenhouse Policies by country

df_greenhouse_policies = pd.read_csv("data/EEA_greenhouse_policies.csv")
df_greenhouse_policies.head(1)

Unnamed: 0,Country,ID of policy or measure,Name of policy or measure,Description,Geographical_coverage,"Single policy or measure, or group of measures",Report_ID,Policies or measures included in the group,Type of policy instrument,Status of implementation,...,Year realised cost has been calculated for,Price reference year (realised costs),Realised benefits (EUR per tonne CO2eq reduced/ sequestered),Realised absolute benefit per year (EUR),Realised net costs (EUR per tonne CO2eq reduced/ sequestered),Realised net cost per year (EUR),Description of realised cost estimates,Description of non-GHG mitigation realized benefits,Reference for realised costs and benefits,Web link for realised costs and benefits
0,Austria,1,EU Emission Trading Scheme (ETS),The objective is to limit the CO2 emission fro...,National,Single,526,Single PaM,Economic; Regulatory,Implemented,...,,,,,,,,,,


In [102]:
set(eu_names) - set(df_greenhouse_policies['Country'].unique())

{'Albania',
 'Andorra',
 'Belarus',
 'Bosnia and Herzegovina',
 'Czech Republic',
 'Kingdom of the Netherlands',
 'Liechtenstein',
 'Macedonia',
 'Moldova',
 'Montenegro',
 'Republic of Ireland',
 'Serbia',
 'Ukraine',
 'United Kingdom'}

In [103]:
set(df_greenhouse_policies['Country'].unique()) - set(eu_names)

{'Cyprus', 'Czechia', 'Ireland', 'Netherlands'}

In [104]:
country_name_change = { 
    'Czechia' : 'Czech Republic', 
    'Ireland' : 'Republic of Ireland', 
    'Netherlands' : 'Kingdom of the Netherlands'}

# Merging into one main dataframe
- With average and total as two seperate datasheets
- Writing all final DataFrames into a complete Excel file

In [107]:
df_main = df_temp_done

# Check if all conseccutive years are present
## Check if all years still in the dataset for every country
for c in df_main['country'].unique():
    for_this_c = df_main[df_main['country'] == c]
    all_years = (for_this_c['year'] == list(range(min(for_this_c['year']), max(for_this_c['year']) + 1))).all()
    if not all_years:
        print("Not Consecutive years included for: ", c)
print("Rest have all years included")

# Check if all countries in dataset
print("Number of countries before: ",len(df_main['country'].unique()) == len(eu_codes))

# Merge instead of join, because join uses index, while merge could specify
# multiple columns
# Using left joint, since we want to hold all values already in the dataframe
# for instance the years, so all missing years in later merged dataframes
# will be set to Nans

# Merge all dataframes

dfs = [df_co2, df_gdp, df_green_bonds, df_pop]

for frame in dfs:
    
    df_main = df_main.merge(frame, left_on=['ISO3','year'], right_on = ['ISO3', 'year'], how = 'left')

# Insert country name
df_main['country'] = df_main['ISO2']
df_main['country'].replace(iso_name, inplace=True)

## Only keep relevant time period
df_main = df_main[df_main['year'].isin(set(time_period))]

# Check if all countries still in the dataset
print("Number of countries after: ",len(df_main['country'].unique()) == len(eu_codes))

## Check if all years still in the dataset for every country
for c in df_main['country'].unique():
    for_this_c = df_main[df_main['country'] == c]
    all_years = (for_this_c['year'] == time_period).all()
    if not all_years:
        print("Not Consecutive years included for: ", c)
print("Rest have all years included")

df_main


Rest have all years included
Number of countries before:  True
Number of countries after:  True
Rest have all years included


Unnamed: 0,country,ISO2,ISO3,year,Temperature Change C,annual t co2 emmisions,GDP US$,Green Bonds Issuance US$,population
51,Albania,AL,ALB,2012,1.568,4850060.0,1.231983e+10,,2900401.0
52,Albania,AL,ALB,2013,1.444,5287466.0,1.277622e+10,,2895092.0
53,Albania,AL,ALB,2014,1.322,5999658.0,1.322815e+10,,2889104.0
54,Albania,AL,ALB,2015,1.665,4712137.0,1.138685e+10,,2880703.0
55,Albania,AL,ALB,2016,1.601,4631977.0,1.186120e+10,,2876101.0
...,...,...,...,...,...,...,...,...,...
2435,United Kingdom,GB,GBR,2017,1.437,387367140.0,2.699017e+12,2.196610,66058859.0
2436,United Kingdom,GB,GBR,2018,1.012,379729760.0,2.900791e+12,8.175406,66460344.0
2437,United Kingdom,GB,GBR,2019,1.247,364753280.0,2.878674e+12,4.058670,66836327.0
2438,United Kingdom,GB,GBR,2020,1.383,326263200.0,2.756900e+12,4.891677,67081000.0


In [108]:
df_main.describe()

Unnamed: 0,year,Temperature Change C,annual t co2 emmisions,GDP US$,Green Bonds Issuance US$,population
count,400.0,400.0,400.0,398.0,136.0,400.0
mean,2016.5,1.72504,97049860.0,491000200000.0,6.36969,14920500.0
std,2.875878,0.552375,151478500.0,857111200000.0,10.864977,21091710.0
min,2012.0,0.373,141996.0,2789881000.0,0.022642,36615.0
25%,2014.0,1.3655,10418110.0,30529700000.0,0.824461,2457198.0
50%,2016.5,1.708,41947400.0,148509600000.0,2.199497,6350406.0
75%,2019.0,2.11275,90624150.0,492723900000.0,6.760721,11060490.0
max,2021.0,3.595,831207600.0,4223116000000.0,71.70096,83160870.0


In [109]:
df_main.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 400 entries, 51 to 2439
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   country                   400 non-null    object 
 1   ISO2                      400 non-null    object 
 2   ISO3                      400 non-null    object 
 3   year                      400 non-null    int64  
 4   Temperature Change C      400 non-null    float64
 5   annual t co2 emmisions    400 non-null    float64
 6   GDP US$                   398 non-null    float64
 7   Green Bonds Issuance US$  136 non-null    float64
 8   population                400 non-null    float64
dtypes: float64(5), int64(1), object(3)
memory usage: 31.2+ KB


## Average DataFrame

In [80]:
# Making a average DataFrame
df_average = df_main.groupby(["year"], as_index=False)
df_average = df_average.mean()
df_average


Unnamed: 0,year,Temperature Change C,annual t co2 emmisions,GDP US$,Green Bonds Issuance US$,population
0,2012,1.412825,107100900.0,472476600000.0,0.650395,14792550.0
1,2013,0.999375,104698300.0,492762300000.0,0.83007,14828570.0
2,2014,1.9823,98959790.0,507333600000.0,1.821085,14864130.0
3,2015,1.71095,99007670.0,445995000000.0,3.003919,14899090.0
4,2016,1.81825,98932650.0,448078200000.0,2.225136,14931980.0
5,2017,1.5063,99003480.0,471185600000.0,4.39836,14956010.0
6,2018,2.041925,97179360.0,509231300000.0,4.61024,14978020.0
7,2019,1.986375,92942010.0,501286100000.0,8.428476,14987610.0
8,2020,2.302875,84256040.0,500171600000.0,8.403601,14992380.0
9,2021,1.489225,88418430.0,563523700000.0,11.78355,14974620.0


## Total DataFrame

In [81]:
# Making total DataFrame
df_total = df_main.groupby(["year"], as_index=False)
df_total = df_total.sum()
df_total

Unnamed: 0,year,Temperature Change C,annual t co2 emmisions,GDP US$,Green Bonds Issuance US$,population
0,2012,56.513,4284036000.0,18899060000000.0,0.650395,591702029.0
1,2013,39.975,4187930000.0,19710490000000.0,4.150349,593142931.0
2,2014,79.292,3958392000.0,20293350000000.0,16.389768,594565062.0
3,2015,68.438,3960307000.0,17839800000000.0,27.035269,595963744.0
4,2016,72.73,3957306000.0,17923130000000.0,31.151905,597279081.0
5,2017,60.252,3960139000.0,18847420000000.0,65.975395,598240534.0
6,2018,81.677,3887175000.0,20369250000000.0,82.984313,599120957.0
7,2019,79.455,3717680000.0,20051440000000.0,151.712572,599504550.0
8,2020,92.115,3370242000.0,19506690000000.0,168.072022,599695156.0
9,2021,59.569,3536737000.0,21977430000000.0,318.155847,598984930.0


In [82]:
## Write all DataFrames to excel

writer = pd.ExcelWriter('data/main.xlsx', engine='xlsxwriter')

df_main.to_excel(writer, sheet_name = "main")
df_average.to_excel(writer, sheet_name = "average")
df_total.to_excel(writer, sheet_name = "total")
#critical last step
writer.save()

In [83]:
s = "country,year,iso_code,population,gdp,cement_co2,cement_co2_per_capita,co2,co2_growth_abs,co2_growth_prct,co2_including_luc,co2_including_luc_growth_abs,co2_including_luc_growth_prct,co2_including_luc_per_capita,co2_including_luc_per_gdp,co2_including_luc_per_unit_energy,co2_per_capita,co2_per_gdp,co2_per_unit_energy,coal_co2,coal_co2_per_capita,consumption_co2,consumption_co2_per_capita,consumption_co2_per_gdp,cumulative_cement_co2,cumulative_co2,cumulative_co2_including_luc,cumulative_coal_co2,cumulative_flaring_co2,cumulative_gas_co2,cumulative_luc_co2,cumulative_oil_co2,cumulative_other_co2,energy_per_capita,energy_per_gdp,flaring_co2,flaring_co2_per_capita,gas_co2,gas_co2_per_capita,ghg_excluding_lucf_per_capita,ghg_per_capita,land_use_change_co2,land_use_change_co2_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,oil_co2,oil_co2_per_capita,other_co2_per_capita,other_industry_co2,primary_energy_consumption,share_global_cement_co2,share_global_co2,share_global_co2_including_luc,share_global_coal_co2,share_global_cumulative_cement_co2,share_global_cumulative_co2,share_global_cumulative_co2_including_luc,share_global_cumulative_coal_co2,share_global_cumulative_flaring_co2,share_global_cumulative_gas_co2,share_global_cumulative_luc_co2,share_global_cumulative_oil_co2,share_global_cumulative_other_co2,share_global_flaring_co2,share_global_gas_co2,share_global_luc_co2,share_global_oil_co2,share_global_other_co2,total_ghg,total_ghg_excluding_lucf,trade_co2,trade_co2_share"

for i in s.split(","):
    print(i)

country
year
iso_code
population
gdp
cement_co2
cement_co2_per_capita
co2
co2_growth_abs
co2_growth_prct
co2_including_luc
co2_including_luc_growth_abs
co2_including_luc_growth_prct
co2_including_luc_per_capita
co2_including_luc_per_gdp
co2_including_luc_per_unit_energy
co2_per_capita
co2_per_gdp
co2_per_unit_energy
coal_co2
coal_co2_per_capita
consumption_co2
consumption_co2_per_capita
consumption_co2_per_gdp
cumulative_cement_co2
cumulative_co2
cumulative_co2_including_luc
cumulative_coal_co2
cumulative_flaring_co2
cumulative_gas_co2
cumulative_luc_co2
cumulative_oil_co2
cumulative_other_co2
energy_per_capita
energy_per_gdp
flaring_co2
flaring_co2_per_capita
gas_co2
gas_co2_per_capita
ghg_excluding_lucf_per_capita
ghg_per_capita
land_use_change_co2
land_use_change_co2_per_capita
methane
methane_per_capita
nitrous_oxide
nitrous_oxide_per_capita
oil_co2
oil_co2_per_capita
other_co2_per_capita
other_industry_co2
primary_energy_consumption
share_global_cement_co2
share_global_co2
share_g