In [91]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re

# Data Description

* Population
* GDP
* DCT - $CO_{2}$ Emissions – Total - `CDI = CO2 Intensity Current-Year Score`
* DPT - $CO_{2}$ emissions/kWh elect. & heat - `CEH = CO2 Emissions per kWh`
* DMT - $CH_{4}$ emissions - `CHI = $CH_{4}$ Intensity Current-Year Score`
* DNT - $N_{2}O$ emissions - `NOI = $N_{2}O$ Intensity Current-Year Score`
* DBT - Black Carbon emissions - `BCI = Black Carbon Current-Year Score`

In [92]:
climate = pd.read_csv('data_2020.csv')

In [93]:
climate.drop(columns=['Unnamed: 0'],inplace=True)

In [94]:
climate.shape

(3094, 7)

In [95]:
climate.head(10)

Unnamed: 0,country,year,CDA,CHA,FGA,NDA,BCA
0,Albania,1995,100.0,73.07252,0.0,78.221486,100.0
1,Algeria,1995,40.836434,98.636534,96.334651,72.620891,70.996796
2,Angola,1995,12.888007,70.729875,0.0,100.0,43.865361
3,Antigua and Barbuda,1995,44.483927,80.575637,0.0,36.690524,68.683208
4,Argentina,1995,40.485271,100.0,100.0,63.435774,45.942816
5,Armenia,1995,72.726561,89.833401,0.0,97.707838,100.0
6,Australia,1995,42.566037,99.66689,100.0,76.458848,82.549405
7,Austria,1995,50.903881,100.0,92.800663,93.225015,100.0
8,Azerbaijan,1995,60.41653,86.983408,100.0,74.176342,88.589201
9,Bahrain,1995,25.727473,33.991729,100.0,43.459231,42.950239


In [96]:
climate = climate[climate['year']!=2020]

In [97]:
climate['year'].unique()

array([1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
       2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
       2017, 2018, 2019], dtype=int64)

# `Time Series` for 2021-2027 per country

In [98]:
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.ar_model import AR
import datetime

In [99]:
countries = list(climate['country'].unique())
len(countries)

119

In [100]:
climate.isna().sum() # checking missing values

country    0
year       0
CDA        0
CHA        0
FGA        0
NDA        0
BCA        0
dtype: int64

In [141]:
cda = []
cha = []
fga = []
nda = []
bca = []

In [142]:
# CDA time series

for country in countries:
    df = climate[climate['country']==str(country)].reset_index()
    #df.index = pd.to_datetime(df['year'],format='%Y')#.dt.year
    features = df['CDA']
    model = ARIMA(features1, order=(3,1,0))
    model_fit = model.fit(disp=0)
    model = AR(features)
    model_fit = model.fit(maxlag=3)
    d_f1 = model_fit.predict(start=len(features1), end=len(features1)+7, dynamic=False)
    cda += d_f1.to_list()

    #print('Country {0} completed'.format(country))

In [143]:
# CHA time series

for country in countries:
    df = climate[climate['country']==str(country)].reset_index()
    #df.index = pd.to_datetime(df['year'],format='%Y')#.dt.year
    features = df['CHA']
    if len((climate[climate['country']==str(country)]['CHA']).unique()) <=3:
        cha_ = features[:8].tolist()
        cha+=cha_
    else:
        model = AR(features)
        model_fit = model.fit(maxlag=3)
        d_f = model_fit.predict(start=len(features), end=len(features)+7, dynamic=False)
        cha+=d_f.to_list()
    #print('Country {0} completed'.format(country)) # problems with values repetition

In [144]:
# FGA time series

for country in countries:
    df = climate[climate['country']==str(country)].reset_index()
    #df.index = pd.to_datetime(df['year'],format='%Y')#.dt.year
    features = df['FGA']
    if len((climate[climate['country']==str(country)]['FGA']).unique()) <=3:
        fga_ = features[:8].tolist()
        fga+=fga_
    else:
        model = AR(features)
        model_fit = model.fit(maxlag=3)
        d_f = model_fit.predict(start=len(features), end=len(features)+7, dynamic=False)
        fga+=d_f.to_list()
    #print('Country {0} completed'.format(country))

In [145]:
# NDA time series

for country in countries:
    df = climate[climate['country']==str(country)].reset_index()
    #df.index = pd.to_datetime(df['year'],format='%Y')#.dt.year
    features = df['NDA']
    #if len((climate[climate['country']==str(country)]['NDA']).unique()) <=3:
        #nda_ = features[:8].tolist()
        #nda+=fga_
    #else:
    model = AR(features)
    model_fit = model.fit(maxlag=3)
    d_f = model_fit.predict(start=len(features), end=len(features)+7, dynamic=False)
    nda+=d_f.to_list()
    #print('Country {0} completed'.format(country))

In [146]:
# BCA time series

for country in countries:
    df = climate[climate['country']==str(country)].reset_index()
    #df.index = pd.to_datetime(df['year'],format='%Y')#.dt.year
    features = df['BCA']
    if len((climate[climate['country']==str(country)]['BCA']).unique()) <=3:
        bca_ = features[:8].tolist()
        bca+=bca_
    else:
        model = AR(features)
        model_fit = model.fit(maxlag=3)
        d_f = model_fit.predict(start=len(features), end=len(features)+7, dynamic=False)
        bca+=d_f.to_list()
    #print('Country {0} completed'.format(country))

## We have to generate total 952 predictions

In [147]:
print('The CDA columns has {0} predictions'.format(len(cda)))
print('The CHA columns has {0} predictions'.format(len(cha)))
print('The FGA columns has {0} predictions'.format(len(fga)))
print('The NDA columns has {0} predictions'.format(len(nda)))
print('The BCA columns has {0} predictions'.format(len(bca)))

The CDA columns has 952 predictions
The CHA columns has 952 predictions
The FGA columns has 952 predictions
The NDA columns has 952 predictions
The BCA columns has 952 predictions


In [148]:
df_cda = pd.DataFrame({'CDA':cda})
df_cha = pd.DataFrame({'CHA':cha})
df_fga = pd.DataFrame({'FGA':fga})
df_nda = pd.DataFrame({'NDA':nda})
df_bca = pd.DataFrame({'BCA':bca})

In [149]:
print('The CDA columns has {0} predictions'.format(df_cda.shape[0]))
print('The CHA columns has {0} predictions'.format(df_cha.shape[0]))
print('The FGA columns has {0} predictions'.format(df_fga.shape[0]))
print('The NDA columns has {0} predictions'.format(df_nda.shape[0]))
print('The BCA columns has {0} predictions'.format(df_bca.shape[0]))

The CDA columns has 952 predictions
The CHA columns has 952 predictions
The FGA columns has 952 predictions
The NDA columns has 952 predictions
The BCA columns has 952 predictions


In [150]:
years = [num for num in range(2020,2028)]
year = pd.DataFrame({'year':years*119})
year.shape

(952, 1)

In [151]:
countries_total = []
for i in countries:
    for j in range(0,8):
        countries_total.append(i)

In [152]:
len(countries_total)

952

In [153]:
data = pd.DataFrame(countries_total,columns=['country'])
climate_pred = data.join(year)

In [154]:
climate_pred = climate_pred.join(df_cda)
climate_pred = climate_pred.join(df_cha)
climate_pred = climate_pred.join(df_fga)
climate_pred = climate_pred.join(df_nda)
climate_pred = climate_pred.join(df_bca)

In [155]:
climate_pred.shape

(952, 7)

In [156]:
climate_pred.describe()

Unnamed: 0,year,CDA,CHA,FGA,NDA,BCA
count,952.0,952.0,952.0,952.0,952.0,952.0
mean,2023.5,46.395024,70.006197,89.913426,-868937.1,57.320049
std,2.292492,20.136789,24.309251,7.384151,17496650.0,28.120888
min,2020.0,-71.101826,0.408033,52.691352,-467833900.0,-1.940207
25%,2021.75,34.701094,54.000772,87.715299,43.50532,37.260919
50%,2023.5,47.377325,74.051693,90.851267,61.53819,56.165949
75%,2025.25,60.106498,91.670508,94.110473,76.59303,80.385092
max,2027.0,99.865077,108.513217,102.515922,99.98047,107.459585


In [157]:
climate_pred['CDA'] = climate_pred['CDA'].apply(lambda x: 0 if x<=0 else x)
climate_pred['CHA'] = climate_pred['CHA'].apply(lambda x: 0 if x<=0 else x)
climate_pred['FGA'] = climate_pred['FGA'].apply(lambda x: 0 if x<=0 else x)
climate_pred['NDA'] = climate_pred['NDA'].apply(lambda x: 0 if x<=0 else x)
climate_pred['BCA'] = climate_pred['BCA'].apply(lambda x: 0 if x<=0 else x)

climate_pred['CDA'] = climate_pred['CDA'].apply(lambda x: 100 if x>=100 else x)
climate_pred['CHA'] = climate_pred['CHA'].apply(lambda x: 100 if x>=100 else x)
climate_pred['FGA'] = climate_pred['FGA'].apply(lambda x: 100 if x>=100 else x)
climate_pred['NDA'] = climate_pred['NDA'].apply(lambda x: 100 if x>=100 else x)
climate_pred['BCA'] = climate_pred['BCA'].apply(lambda x: 100 if x>=100 else x)

In [158]:
climate_pred.describe()

Unnamed: 0,year,CDA,CHA,FGA,NDA,BCA
count,952.0,952.0,952.0,952.0,952.0,952.0
mean,2023.5,46.784059,69.949255,89.90037,58.340987,57.276219
std,2.292492,18.819844,24.233365,7.364784,23.911146,27.973173
min,2020.0,0.0,0.408033,52.691352,0.0,0.0
25%,2021.75,34.701094,54.000772,87.715299,43.505318,37.260919
50%,2023.5,47.377325,74.051693,90.851267,61.538187,56.165949
75%,2025.25,60.106498,91.670508,94.110473,76.593028,80.385092
max,2027.0,99.865077,100.0,100.0,99.980469,100.0


In [159]:
final_df = []
for country in countries:
    df1 = climate[climate['country']==str(country)].reset_index()
    df2 = climate_pred[climate_pred['country']==str(country)].reset_index()
    final = df1.append(df2)
    final = final.reset_index()
    final.drop(columns=['level_0','index'],inplace=True)
    final_df.append(final)

In [160]:
def list_to_df(lst):
    df = lst[0]
    for item in range(1,len(lst)):
        df = df.append(lst[item])
    return df

In [161]:
all_pred_2019 = list_to_df(final_df)
all_pred_2019 = all_pred_2019.reset_index()
all_pred_2019.drop(columns=['index'],inplace=True)
all_pred_2019.shape

(3927, 7)

In [127]:
len(countries)*len(all_pred_2019['year'].unique())

3927

In [162]:
all_pred_2019.describe()

Unnamed: 0,year,CDA,CHA,FGA,NDA,BCA
count,3927.0,3927.0,3927.0,3927.0,3927.0,3927.0
mean,2011.0,45.955022,69.909114,79.728208,59.985748,59.230412
std,9.523117,20.125432,27.476624,24.050253,27.730268,30.771665
min,1995.0,0.0,0.0,0.0,0.0,0.0
25%,2003.0,33.260775,52.18819,76.485167,40.925789,36.3788
50%,2011.0,46.116374,75.198841,88.707759,62.475991,58.80269
75%,2019.0,58.927134,96.015574,93.209392,80.133832,88.61631
max,2027.0,100.0,100.0,100.0,100.0,100.0


# Export xlsx and csv

In [163]:
climate_pred.to_csv('climate_pred_2019.csv')

In [164]:
all_pred_2019.to_csv('climate_all_pred_2019.csv')

# `Survival Analysis` to predict which variable represents the biggest change concern