In [1]:
import pandas as pd
import countryinfo as cf # Source: https://gist.github.com/canfixit/1662664


In [2]:
countries = cf.countries
eu_codes = []
eu_names = []
iso_name = {}
name_iso = {}
exclusion_list = ['San Marino', 'Russia', 'Vatican City', 'Monaco']
for co in countries:
    
    # Excluding Russia, Monacco, San Marino and Vatican City bc. lack of data
    if co['name'] in exclusion_list:
        print(co['name'])
        continue
        
    if co['continent'].lower() == 'europe':
        name = co['name']
        if name == 'Kingdom of the Netherlands':
            name = "Netherlands"

        iso = co['code']
        eu_codes.append(iso)
        eu_names.append(name)
        iso_name[iso] = name
        name_iso[name] = iso
        
tot_num_c = len(eu_codes)

print("Number of countries: ", tot_num_c)

Russia
San Marino
Vatican City
Monaco
Number of countries:  40


#### Source: https://gist.github.com/canfixit/1662664


#### Must rename some of the countries in some datasets in order to merge on the right values, as the source uses different names for the countries. Best method is to use ISO-codes, as these are generalistic but not all datasets might be provided with ISO, only country name. 

In [3]:
"""
Temperature x
Emissions, Population x
GDP x
Green bonds x
Environmental taxes
Environmental protection expenditure
Electric vehicles x
Greenhouse policies x
"""
# Main dataframe
df_main = None
# Time period from 2012 to 2022
time_period = list(range(2011, 2022))

In [4]:
df_temp_change = pd.read_csv("data/Annual_Surface_Temperature_Change.csv")

df_temp_change = df_temp_change[df_temp_change['ISO2'].isin(eu_codes)]

df_temp_change = df_temp_change.drop(['Unit','Country', 'CTS_Code', 'Indicator', 'Source', 'CTS_Name', 'ObjectId', 'CTS_Full_Descriptor'], axis = 1, inplace=False)
df_temp_change.columns = [str(x).split("F")[-1] for x in df_temp_change.columns]

df_temp_change.columns= df_temp_change.columns.str.lower()
df_temp_change = pd.melt(df_temp_change, id_vars=["iso2", 'iso3'], var_name = ['year'], value_name = 'temp_change C')
df_temp_change["year"] = pd.to_numeric(df_temp_change["year"])

df_temp_change.sort_values(by=["iso3", 'year'], inplace=True)
df_temp_change.columns = ['ISO2', 'ISO3', 'year', 'Temperature Change C']
df_temp_change.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2440 entries, 0 to 2438
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   ISO2                  2440 non-null   object 
 1   ISO3                  2440 non-null   object 
 2   year                  2440 non-null   int64  
 3   Temperature Change C  1898 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 95.3+ KB


In [5]:
df_co2 = pd.read_csv("data/annual-co2-emissions-per-country.csv")
df_co2.columns = df_co2.columns.str.lower()
df_co2.columns = ['country', 'ISO3', 'year', 'annual co2 emmisions']
df_co2 = df_co2[df_co2['ISO3'].isin(set(df_temp_change['ISO3']))]

df_co2["year"] = pd.to_numeric(df_co2["year"])
df_co2 = df_co2[['ISO3', 'year', 'annual co2 emmisions']]
df_co2.rename(columns={'annual co2 emmisions' : 'annual t co2 emmisions'}, inplace = True)
print("All countries: ", len((set(eu_codes) - set(df_co2['ISO3']))) == 0)
df_co2.info()

All countries:  False
<class 'pandas.core.frame.DataFrame'>
Int64Index: 6556 entries, 517 to 29726
Data columns (total 3 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   ISO3                    6556 non-null   object 
 1   year                    6556 non-null   int64  
 2   annual t co2 emmisions  6556 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 204.9+ KB


In [6]:
df_gdp = pd.read_csv("data/GDP.csv",delimiter=";")
#Drop last column, as it is no values
df_gdp = df_gdp.iloc[: , :-1]

df_gdp = df_gdp[df_gdp['Country Code'].isin(set(df_temp_change['ISO3']))]
df_gdp.drop(['Country Name', 'Indicator Name', 'Indicator Code'], axis = 1, inplace = True)
df_gdp.columns = df_gdp.columns.str.lower()
df_gdp.rename(columns = {'country code' : "ISO3"}, inplace=True)

df_gdp = pd.melt(df_gdp, id_vars=["ISO3"], var_name = ['year'], value_name = 'GDP US$')
df_gdp["year"] = pd.to_numeric(df_gdp["year"])

print("All countries: ", len((set(eu_codes) - set(df_gdp['ISO3']))) == 0)

df_gdp.info()

All countries:  False
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2480 entries, 0 to 2479
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   ISO3     2480 non-null   object 
 1   year     2480 non-null   int64  
 2   GDP US$  1824 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 58.2+ KB


In [7]:
# Green Bonds (in Billion $) Probably not inflation adjusted

df_green_bonds = pd.read_csv("data/Green_Bonds.csv")

df_green_bonds = df_green_bonds[df_green_bonds['ISO3'].isin(set(df_temp_change['ISO3']))]
df_green_bonds.tail(3)
df_green_bonds.drop(['ISO2', 'Indicator', 'Source', 'CTS_Code', 'CTS_Full_Descriptor', 'Type_of_Issuer', 'Use_of_Proceed', 'Principal_Currency'],axis = 1, inplace=True)
df_green_bonds.columns = df_green_bonds.columns.str.lower()
df_green_bonds.columns = [str(x).split("f")[-1] for x in df_green_bonds.columns]

# Only keep cts_name equals Green Bonds Issuances, as we can calculate total later (cts_name == Green Bonds)
df_green_bonds = df_green_bonds.loc[df_green_bonds['cts_name'] == 'Green Bonds Issuances'] 
df_green_bonds.drop(['cts_name', 'unit', 'objectid', 'country'],axis = 1, inplace=True)

df_green_bonds.rename(columns = {'iso3' : "ISO3"}, inplace=True)

df_green_bonds = pd.melt(df_green_bonds, id_vars=["ISO3"], var_name = ['year'], value_name = 'Green Bonds Issuance (Billion US$)')
df_green_bonds["year"] = pd.to_numeric(df_green_bonds["year"])
df_green_bonds.sort_values(by=["ISO3", 'year'], inplace=True)

print("Missing countries: ", (set(eu_codes) - set(df_green_bonds['ISO3'])))

df_green_bonds.info()

Missing countries:  {'AL', 'MK', 'LU', 'HR', 'DK', 'BE', 'HU', 'UA', 'IE', 'AD', 'NL', 'CH', 'AT', 'IS', 'FR', 'BA', 'SE', 'GB', 'PT', 'BY', 'PL', 'RS', 'MD', 'ME', 'LV', 'CZ', 'DE', 'IT', 'EE', 'LT', 'RO', 'MT', 'FI', 'NO', 'GR', 'BG', 'LI', 'SK', 'SI', 'ES'}
<class 'pandas.core.frame.DataFrame'>
Int64Index: 840 entries, 0 to 838
Data columns (total 3 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   ISO3                                840 non-null    object 
 1   year                                840 non-null    int64  
 2   Green Bonds Issuance (Billion US$)  158 non-null    float64
dtypes: float64(1), int64(1), object(1)
memory usage: 26.2+ KB


As we observe from the output, we are missing substantial data from some countries, as well as from many years, as green bonds issuance is quite new

In [8]:
# Population df

df_pop = pd.read_csv("data/population.csv")
df_pop = df_pop[df_pop['Country Code'].isin(set(df_temp_change['ISO3']))]
df_pop.columns = df_pop.columns.str.lower()
df_pop.columns = [str(x).split("[")[0] for x in df_pop.columns]
df_pop.drop(['series name', 'series code', 'country name'] ,axis=1, inplace = True)
df_pop.rename(columns = {'country code' : "ISO3"}, inplace=True)
df_pop = pd.melt(df_pop, id_vars=["ISO3"], var_name = ['year'], value_name = 'population')
df_pop["year"] = pd.to_numeric(df_pop["year"])
df_pop["population"] = pd.to_numeric(df_pop["population"])
df_pop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   ISO3        2000 non-null   object
 1   year        2000 non-null   int64 
 2   population  2000 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 47.0+ KB


In [9]:
df_sales = pd.read_csv("data/IEA-EV-dataEV salesCarsHistorical.csv")


Notes

BEVs are battery electric vehicles. 

PHEVs are plug-in hybrid electric vehicles. 

FCEVs are fuel cell electric vehicles. 

EVs refers to all electric vehicles (BEVs + PHEVs).

In [10]:
df_sales[df_sales['region'] == "Norway"].head()


Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
397,Norway,Historical,EV sales,Cars,BEV,2010,sales,360
398,Norway,Historical,EV sales,Cars,BEV,2011,sales,2000
399,Norway,Historical,EV sales,Cars,BEV,2012,sales,3900
400,Norway,Historical,EV sales,Cars,PHEV,2012,sales,320
401,Norway,Historical,EV sales,Cars,PHEV,2013,sales,340


In [11]:

# one for BEVs and one for PHEV ??
df_bev_sales = df_sales[df_sales['powertrain'] == "BEV"]
df_phev_sales = df_sales[df_sales['powertrain'] == "PHEV"]

df_bev_sales = df_bev_sales[['region', 'year', 'value']]
df_phev_sales = df_bev_sales[['region', 'year', 'value']]

s = set(df_phev_sales['region'].unique())
s-set(eu_names)
before = set(eu_names) - s

df_bev_sales = df_bev_sales[df_bev_sales['region'].isin(eu_names)]
df_phev_sales = df_phev_sales[df_phev_sales['region'].isin(eu_names)]

s = set(df_phev_sales['region'].unique())
s-set(eu_names)
after = set(eu_names) - s
print(before - after)
df_bev_sales['region'].replace(name_iso, inplace = True)
df_phev_sales['region'].replace(name_iso, inplace = True)

df_bev_sales.rename(columns = {"region" : "ISO2", "value" : "Battery Electric Vehicles sales"}, inplace = True)
df_phev_sales.rename(columns = {"region" : "ISO2", "value" : "Plugin Hybrid Vehicles sales"}, inplace = True)

df_phev_sales

set()


Unnamed: 0,ISO2,year,Plugin Hybrid Vehicles sales
21,BE,2010,35
22,BE,2011,280
25,BE,2012,1000
26,BE,2013,610
29,BE,2014,1400
...,...,...,...
585,GB,2017,14000
586,GB,2018,16000
589,GB,2019,38000
590,GB,2020,110000


In [12]:
df_ev_chargingP = pd.read_csv("data/IEA-EV-dataEV charging pointsHistorical.csv")
df_ev_chargingP

Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
0,Australia,Historical,EV charging points,EV,Publicly available fast,2017,charging points,40.0
1,Australia,Historical,EV charging points,EV,Publicly available slow,2017,charging points,440.0
2,Australia,Historical,EV charging points,EV,Publicly available fast,2018,charging points,61.0
3,Australia,Historical,EV charging points,EV,Publicly available slow,2018,charging points,670.0
4,Australia,Historical,EV charging points,EV,Publicly available fast,2019,charging points,250.0
...,...,...,...,...,...,...,...,...
522,World,Historical,EV charging points,EV,Publicly available slow,2019,charging points,630000.0
523,World,Historical,EV charging points,EV,Publicly available fast,2020,charging points,390000.0
524,World,Historical,EV charging points,EV,Publicly available slow,2020,charging points,910000.0
525,World,Historical,EV charging points,EV,Publicly available fast,2021,charging points,560000.0


In [13]:
df_ev_chargingP = pd.read_csv("data/IEA-EV-dataEV charging pointsHistorical.csv")

s = set(df_ev_chargingP['region'].unique())
s-set(eu_names)
before = set(eu_names) - s

df_ev_chargingP = df_ev_chargingP[df_ev_chargingP['region'].isin(eu_names)]
df_ev_chargingP = df_ev_chargingP[['region', 'year', 'powertrain', 'value']]
df_ev_chargingP_fast = df_ev_chargingP[df_ev_chargingP['powertrain'] == 'Publicly available fast']
df_ev_chargingP_slow = df_ev_chargingP[df_ev_chargingP['powertrain'] == 'Publicly available slow']

s = set(df_ev_chargingP['region'].unique())
s-set(eu_names)
after = set(eu_names) - s
print(before - after)

df_ev_chargingP = df_ev_chargingP_fast.merge(
    df_ev_chargingP_slow, 
    left_on = ['region', 'year'], 
    right_on = ['region', 'year'],
    how = 'inner',
    suffixes=('_fast', '_slow')
)
df_ev_chargingP['total number of chargingpoints'] = df_ev_chargingP['value_fast'] + df_ev_chargingP['value_slow']
df_ev_chargingP = df_ev_chargingP[['region', 'year', 'value_fast', 'value_slow', 'total number of chargingpoints']]
df_ev_chargingP['region'].replace(name_iso, inplace = True)
df_ev_chargingP.rename(
    columns = {'value_fast': 'number of fast EV charginpoints', 
               'value_slow' : 'number of slow EV charginpoints',
               'region' : 'ISO2'},
                inplace = True)
df_ev_chargingP


set()


Unnamed: 0,ISO2,year,number of fast EV charginpoints,number of slow EV charginpoints,total number of chargingpoints
0,BE,2013,47.0,330.0,377.0
1,BE,2014,55.0,560.0,615.0
2,BE,2015,77.0,1300.0,1377.0
3,BE,2016,110.0,1500.0,1610.0
4,BE,2017,220.0,1500.0,1720.0
...,...,...,...,...,...
142,GB,2017,2200.0,13000.0,15200.0
143,GB,2018,2700.0,15000.0,17700.0
144,GB,2019,4700.0,22000.0,26700.0
145,GB,2020,6200.0,27000.0,33200.0


In [14]:
## EEA Greenhouse Policies by country

df_greenhouse_policies = pd.read_csv("data/EEA_greenhouse_policies.csv")
df_greenhouse_policies.head(1)

Unnamed: 0,Country,ID of policy or measure,Name of policy or measure,Description,Geographical_coverage,"Single policy or measure, or group of measures",Report_ID,Policies or measures included in the group,Type of policy instrument,Status of implementation,...,Year realised cost has been calculated for,Price reference year (realised costs),Realised benefits (EUR per tonne CO2eq reduced/ sequestered),Realised absolute benefit per year (EUR),Realised net costs (EUR per tonne CO2eq reduced/ sequestered),Realised net cost per year (EUR),Description of realised cost estimates,Description of non-GHG mitigation realized benefits,Reference for realised costs and benefits,Web link for realised costs and benefits
0,Austria,1,EU Emission Trading Scheme (ETS),The objective is to limit the CO2 emission fro...,National,Single,526,Single PaM,Economic; Regulatory,Implemented,...,,,,,,,,,,


In [15]:
set(eu_names) - set(df_greenhouse_policies['Country'])

{'Albania',
 'Andorra',
 'Belarus',
 'Bosnia and Herzegovina',
 'Czech Republic',
 'Liechtenstein',
 'Macedonia',
 'Moldova',
 'Montenegro',
 'Republic of Ireland',
 'Serbia',
 'Ukraine',
 'United Kingdom'}

In [16]:
set(df_greenhouse_policies['Country']) - set(eu_names)

{'Cyprus', 'Czechia', 'Ireland'}

In [17]:
country_name_change = { 
    'Czechia' : 'Czech Republic', 
    'Ireland' : 'Republic of Ireland', 
    'Netherlands' : 'Kingdom of the Netherlands'}

# Merging into one main dataframe
- With average and total as two seperate datasheets
- Writing all final DataFrames into a complete Excel file

In [65]:
df_main = df_temp_change

# Check if all conseccutive years are present
# Check if all years still in the dataset for every country

for c in df_main['ISO3'].unique():
    for_this_c = df_main[df_main['ISO3'] == c]
    all_years = (for_this_c['year'] == list(range(min(for_this_c['year']), max(for_this_c['year']) + 1))).all()
    if not all_years:
        print("Not Consecutive years included for: ", c)
        
print("Rest have all years included")

# Check if all countries in dataset
print("Number of countries before: ",len(df_main['ISO2'].unique()) == len(eu_codes))

# Merge instead of join, because join uses index, while merge could specify
# multiple columns
# Using left joint, since we want to hold all values already in the dataframe
# for instance the years, so all missing years in later merged dataframes
# will be set to Nans

# Merge all dataframes

dfs = [df_co2, df_gdp, df_green_bonds, df_pop]

for frame in dfs:
    
    df_main = df_main.merge(frame, left_on=['ISO3','year'], right_on = ['ISO3', 'year'], how = 'left')

# Merge EV-files on ISO2
dfs = [df_phev_sales, df_bev_sales, df_ev_chargingP]
for frame in dfs:
    df_main = df_main.merge(frame, left_on=['ISO2','year'], right_on = ['ISO2', 'year'], how = 'left')
    
# Insert country name
df_main['country'] = df_main['ISO2']
df_main['country'].replace(iso_name, inplace=True)

## Only keep relevant time period
df_main = df_main[df_main['year'].isin(set(time_period))]

# Check if all countries still in the dataset
print("Number of countries after: ",len(df_main['country'].unique()) == len(eu_codes))

## Check if all years still in the dataset for every country
for c in df_main['country'].unique():
    for_this_c = df_main[df_main['country'] == c]
    all_years = (for_this_c['year'] == time_period).all()
    if not all_years:
        print("Not Consecutive years included for: ", c)
print("Rest have all years included")

df_main = df_main.astype({'year' : 'int32'}, copy = True)
df_main.year = pd.to_datetime(df_main.year, format='%Y')

#f = df_main[df_main['country'] == 'United Kingdom']["annual t co2 emmisions"]
#f
df_main

Rest have all years included
Number of countries before:  True
Number of countries after:  True
Rest have all years included


Unnamed: 0,ISO2,ISO3,year,Temperature Change C,annual t co2 emmisions,GDP US$,Green Bonds Issuance (Billion US$),population,Plugin Hybrid Vehicles sales,Battery Electric Vehicles sales,number of fast EV charginpoints,number of slow EV charginpoints,total number of chargingpoints,country
50,AL,ALB,2011-01-01,1.108,5314676.0,1.289076e+10,,2905195.0,,,,,,Albania
51,AL,ALB,2012-01-01,1.568,4850060.0,1.231983e+10,,2900401.0,,,,,,Albania
52,AL,ALB,2013-01-01,1.444,5287466.0,1.277622e+10,,2895092.0,,,,,,Albania
53,AL,ALB,2014-01-01,1.322,5999658.0,1.322815e+10,,2889104.0,,,,,,Albania
54,AL,ALB,2015-01-01,1.665,4712137.0,1.138685e+10,,2880703.0,,,,,,Albania
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2435,UA,UKR,2017-01-01,1.401,223087090.0,1.120905e+11,,44831135.0,,,,,,Ukraine
2436,UA,UKR,2018-01-01,2.222,231674700.0,1.308910e+11,,44622518.0,,,,,,Ukraine
2437,UA,UKR,2019-01-01,2.355,222056700.0,1.538830e+11,,44386203.0,,,,,,Ukraine
2438,UA,UKR,2020-01-01,3.014,206941170.0,1.566179e+11,,44132049.0,,,,,,Ukraine


In [66]:
f_list = f.tolist()
(f_list[-1]-f_list[-2])/f_list[-2]

0.06286335694617107

In [67]:
df_main.describe()

Unnamed: 0,Temperature Change C,annual t co2 emmisions,GDP US$,Green Bonds Issuance (Billion US$),population,Plugin Hybrid Vehicles sales,Battery Electric Vehicles sales,number of fast EV charginpoints,number of slow EV charginpoints,total number of chargingpoints
count,440.0,440.0,438.0,137.0,440.0,173.0,173.0,147.0,147.0,147.0
mean,1.665191,98110400.0,491792500000.0,6.323308,14906570.0,16269.427746,16269.427746,954.789116,8036.979592,8991.768707
std,0.56985,152888200.0,858085200000.0,10.838563,21055180.0,40159.809113,40159.809113,1605.803316,12906.046446,13922.754161
min,0.365,141996.0,2789881000.0,0.015355,36299.0,3.0,3.0,1.0,11.0,22.0
25%,1.2975,10762860.0,30485230000.0,0.754205,2457198.0,670.0,670.0,78.0,1100.0,1219.5
50%,1.6445,42254400.0,148509600000.0,2.19661,6350406.0,2700.0,2700.0,340.0,2700.0,3260.0
75%,2.05625,91779330.0,497750800000.0,6.752021,11059980.0,13000.0,13000.0,1000.0,8300.0,9260.0
max,3.595,831207600.0,4223116000000.0,71.70096,83160870.0,360000.0,360000.0,9200.0,83000.0,85600.0


In [68]:
df_main.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 440 entries, 50 to 2439
Data columns (total 14 columns):
 #   Column                              Non-Null Count  Dtype         
---  ------                              --------------  -----         
 0   ISO2                                440 non-null    object        
 1   ISO3                                440 non-null    object        
 2   year                                440 non-null    datetime64[ns]
 3   Temperature Change C                440 non-null    float64       
 4   annual t co2 emmisions              440 non-null    float64       
 5   GDP US$                             438 non-null    float64       
 6   Green Bonds Issuance (Billion US$)  137 non-null    float64       
 7   population                          440 non-null    float64       
 8   Plugin Hybrid Vehicles sales        173 non-null    float64       
 9   Battery Electric Vehicles sales     173 non-null    float64       
 10  number of fast EV chargi

## Average DataFrame

In [69]:
# Making a average DataFrame
df_average = df_main.groupby(["year"], as_index=False)
df_average = df_average.mean()
df_average


Unnamed: 0,year,Temperature Change C,annual t co2 emmisions,GDP US$,Green Bonds Issuance (Billion US$),population,Plugin Hybrid Vehicles sales,Battery Electric Vehicles sales,number of fast EV charginpoints,number of slow EV charginpoints,total number of chargingpoints
0,2011-01-01,1.0667,108715800.0,499676800000.0,0.015355,14767300.0,721.642857,721.642857,12.0,2100.0,2112.0
1,2012-01-01,1.412825,107100900.0,472476600000.0,0.650395,14792550.0,1162.933333,1162.933333,16.2,1545.0,1561.2
2,2013-01-01,0.999375,104698300.0,492762300000.0,0.83007,14828570.0,1978.625,1978.625,137.307692,1990.769231,2128.076923
3,2014-01-01,1.9823,98959790.0,507333600000.0,1.821085,14864130.0,3546.8125,3546.8125,180.071429,2596.428571,2776.5
4,2015-01-01,1.71095,99007670.0,445995000000.0,3.003919,14899090.0,5245.9375,5245.9375,335.866667,3880.733333,4216.6
5,2016-01-01,1.81825,98932650.0,448078200000.0,2.225136,14931980.0,5442.5625,5442.5625,563.533333,7180.733333,7744.266667
6,2017-01-01,1.5063,99003480.0,471185600000.0,4.39836,14956010.0,8138.3125,8138.3125,642.4,7690.133333,8332.533333
7,2018-01-01,2.041925,97179360.0,509231300000.0,4.61024,14978020.0,11815.5,11815.5,875.0,8596.0,9471.0
8,2019-01-01,1.986375,92942010.0,501286100000.0,8.428476,14987610.0,21412.5,21412.5,1392.0,11097.0,12489.0
9,2020-01-01,2.302875,84256040.0,500171600000.0,8.403601,14992380.0,44086.25,44086.25,2087.4375,13908.125,15995.5625


## Total DataFrame

In [70]:
# Making total DataFrame
df_total = df_main.groupby(["year"], as_index=False)
df_total = df_total.sum()
df_total

Unnamed: 0,year,Temperature Change C,annual t co2 emmisions,GDP US$,Green Bonds Issuance (Billion US$),population,Plugin Hybrid Vehicles sales,Battery Electric Vehicles sales,number of fast EV charginpoints,number of slow EV charginpoints,total number of chargingpoints
0,2011-01-01,42.668,4348631000.0,19987070000000.0,0.015355,590692156.0,10103.0,10103.0,24.0,4200.0,4224.0
1,2012-01-01,56.513,4284036000.0,18899060000000.0,0.650395,591702029.0,17444.0,17444.0,162.0,15450.0,15612.0
2,2013-01-01,39.975,4187930000.0,19710490000000.0,4.150349,593142931.0,31658.0,31658.0,1785.0,25880.0,27665.0
3,2014-01-01,79.292,3958392000.0,20293350000000.0,16.389768,594565062.0,56749.0,56749.0,2521.0,36350.0,38871.0
4,2015-01-01,68.438,3960307000.0,17839800000000.0,27.035269,595963744.0,83935.0,83935.0,5038.0,58211.0,63249.0
5,2016-01-01,72.73,3957306000.0,17923130000000.0,31.151905,597279081.0,87081.0,87081.0,8453.0,107711.0,116164.0
6,2017-01-01,60.252,3960139000.0,18847420000000.0,65.975395,598240534.0,130213.0,130213.0,9636.0,115352.0,124988.0
7,2018-01-01,81.677,3887175000.0,20369250000000.0,82.984313,599120957.0,189048.0,189048.0,13125.0,128940.0,142065.0
8,2019-01-01,79.455,3717680000.0,20051440000000.0,151.712572,599504550.0,342600.0,342600.0,22272.0,177552.0,199824.0
9,2020-01-01,92.115,3370242000.0,19506690000000.0,168.072022,599695156.0,705380.0,705380.0,33399.0,222530.0,255929.0


In [71]:
## Write all DataFrames to excel

writer = pd.ExcelWriter('data/main.xlsx', engine='xlsxwriter')

df_main.to_excel(writer, sheet_name = "main")
df_average.to_excel(writer, sheet_name = "average")
df_total.to_excel(writer, sheet_name = "total")
df_greenhouse_policies.to_excel(writer, sheet_name = "policies")

writer.save()

In [72]:
df_co2_sector = pd.read_csv("data/co-emissions-by-sector.csv")
df_methane_sector = pd.read_csv("data/methane-emissions-by-sector.csv")
df_nitrous_sector = pd.read_csv("data/nitrous-oxide-emissions-by-sector.csv")

df_co2_sector = df_co2_sector[df_co2_sector['Code'].isin(set(df_temp_change['ISO3']))]


In [73]:
df_co2_sector = pd.read_csv("data/co-emissions-by-sector.csv")
df_co2_sector = df_co2_sector[df_co2_sector['Code'].isin(set(df_temp_change['ISO3']))]
df_co2_sector.drop(columns=['Entity'], inplace = True)
df_co2_sector.rename(columns = {'Code' : "ISO3"}, inplace = True)
df = pd.melt(df_co2_sector, id_vars=["ISO3", 'Year'])#, var_name = ['year'])
df.sort_values(by=["ISO3", 'Year'], inplace=True)
df

Unnamed: 0,ISO3,Year,variable,value
0,ALB,1990,Buildings,3.100000e+05
1200,ALB,1990,Industry,3.100000e+05
2400,ALB,1990,Land-use change and forestry,1.900000e+05
3600,ALB,1990,Other fuel combustion,1.790000e+06
4800,ALB,1990,Transport,7.100000e+05
...,...,...,...,...
4769,UKR,2019,Other fuel combustion,4.110000e+06
5969,UKR,2019,Transport,2.667000e+07
7169,UKR,2019,Manufacturing and construction,3.535000e+07
8369,UKR,2019,Fugitive emissions,2.500000e+05


In [74]:
df_methane_sector

Unnamed: 0,Entity,Code,Year,Agriculture,Fugitive emissions,Waste,Land-use change and forestry,Industry,Other fuel combustion
0,Afghanistan,AFG,1990,5.360000e+06,280000.001192,1.090000e+06,0.000000,0.000000,0.000000
1,Afghanistan,AFG,1991,5.610000e+06,239999.994636,1.170000e+06,0.000000,0.000000,0.000000
2,Afghanistan,AFG,1992,5.670000e+06,200000.002980,1.250000e+06,0.000000,0.000000,0.000000
3,Afghanistan,AFG,1993,5.720000e+06,159999.996424,1.330000e+06,0.000000,0.000000,0.000000
4,Afghanistan,AFG,1994,5.930000e+06,119999.997318,1.410000e+06,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...
6145,Zimbabwe,ZWE,2015,6.650000e+06,660000.026226,2.270000e+06,140000.000596,479999.989271,119999.997318
6146,Zimbabwe,ZWE,2016,6.150000e+06,680000.007153,2.320000e+06,109999.999404,479999.989271,119999.997318
6147,Zimbabwe,ZWE,2017,6.250000e+06,699999.988079,2.370000e+06,59999.998659,479999.989271,119999.997318
6148,Zimbabwe,ZWE,2018,6.420000e+06,709999.978542,2.420000e+06,109999.999404,479999.989271,119999.997318


In [75]:
df_nitrous_sector

Unnamed: 0,Entity,Code,Year,Agriculture,Industry,Other fuel combustion,Waste,Land-use change and forestry,Fugitive emissions
0,Afghanistan,AFG,1990,2.710000e+06,0.0,0.000000,140000.000596,0.000000,0.0
1,Afghanistan,AFG,1991,2.790000e+06,0.0,0.000000,140000.000596,0.000000,0.0
2,Afghanistan,AFG,1992,2.740000e+06,0.0,0.000000,150000.005960,0.000000,0.0
3,Afghanistan,AFG,1993,2.770000e+06,0.0,0.000000,159999.996424,0.000000,0.0
4,Afghanistan,AFG,1994,2.600000e+06,0.0,0.000000,170000.001788,0.000000,0.0
...,...,...,...,...,...,...,...,...,...
6145,Zimbabwe,ZWE,2015,4.790000e+06,0.0,19999.999553,159999.996424,150000.005960,0.0
6146,Zimbabwe,ZWE,2016,4.360000e+06,0.0,19999.999553,159999.996424,119999.997318,0.0
6147,Zimbabwe,ZWE,2017,4.550000e+06,0.0,19999.999553,159999.996424,70000.000298,0.0
6148,Zimbabwe,ZWE,2018,4.660000e+06,0.0,19999.999553,170000.001788,119999.997318,0.0
