In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re

# Data Description

* Population
* GDP
* DCT - $CO_{2}$ Emissions – Total - `CDI = CO2 Intensity Current-Year Score`
* DPT - $CO_{2}$ emissions/kWh elect. & heat - `CEH = CO2 Emissions per kWh`
* DMT - $CH_{4}$ emissions - `CHI = $CH_{4}$ Intensity Current-Year Score`
* DNT - $N_{2}O$ emissions - `NOI = $N_{2}O$ Intensity Current-Year Score`
* DBT - Black Carbon emissions - `BCI = Black Carbon Current-Year Score`

In [2]:
climate = pd.read_csv('data_2020.csv')

In [3]:
climate.drop(columns=['Unnamed: 0'],inplace=True)

In [4]:
climate.shape

(3094, 7)

In [5]:
climate.head(10)

Unnamed: 0,country,year,CDA,CHA,FGA,NDA,BCA
0,Albania,1995,100.0,73.07252,0.0,78.221486,100.0
1,Algeria,1995,40.836434,98.636534,96.334651,72.620891,70.996796
2,Angola,1995,12.888007,70.729875,0.0,100.0,43.865361
3,Antigua and Barbuda,1995,44.483927,80.575637,0.0,36.690524,68.683208
4,Argentina,1995,40.485271,100.0,100.0,63.435774,45.942816
5,Armenia,1995,72.726561,89.833401,0.0,97.707838,100.0
6,Australia,1995,42.566037,99.66689,100.0,76.458848,82.549405
7,Austria,1995,50.903881,100.0,92.800663,93.225015,100.0
8,Azerbaijan,1995,60.41653,86.983408,100.0,74.176342,88.589201
9,Bahrain,1995,25.727473,33.991729,100.0,43.459231,42.950239


In [6]:
climate = climate[climate['year']!=2020]
climate = climate[climate['year']!=2019]

In [7]:
climate['year'].unique()

array([1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
       2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
       2017, 2018], dtype=int64)

# `Time Series` for 2021-2027 per country

In [8]:
from statsmodels.tsa.ar_model import AR
import datetime

In [9]:
countries = list(climate['country'].unique())
len(countries)

119

In [10]:
climate.isna().sum() # checking missing values

country    0
year       0
CDA        0
CHA        0
FGA        0
NDA        0
BCA        0
dtype: int64

In [18]:
cda = []
cha = []
fga = []
nda = []
bca = []

In [19]:
# CDA time series

for country in countries:
    df = climate[climate['country']==str(country)].reset_index()
    #df.index = pd.to_datetime(df['year'],format='%Y')#.dt.year
    features1 = df['CDA']
    model1 = AR(features1)
    model1_fit = model1.fit(maxlag=3)
    d_f1 = model1_fit.predict(start=len(features1), end=len(features1)+8, dynamic=False)
    cda += d_f1.to_list()

    #print('Country {0} completed'.format(country))

In [20]:
# CHA time series

for country in countries:
    df = climate[climate['country']==str(country)].reset_index()
    #df.index = pd.to_datetime(df['year'],format='%Y')#.dt.year
    features = df['CHA']
    if len((climate[climate['country']==str(country)]['CHA']).unique()) <=3:
        cha_ = features[:9].tolist()
        cha+=cha_
    else:
        model = AR(features)
        model_fit = model.fit(maxlag=3)
        d_f = model_fit.predict(start=len(features), end=len(features)+8, dynamic=False)
        cha+=d_f.to_list()
    #print('Country {0} completed'.format(country)) # problems with values repetition

In [21]:
# FGA time series

for country in countries:
    df = climate[climate['country']==str(country)].reset_index()
    #df.index = pd.to_datetime(df['year'],format='%Y')#.dt.year
    features = df['FGA']
    if len((climate[climate['country']==str(country)]['FGA']).unique()) <=3:
        fga_ = features[:9].tolist()
        fga+=fga_
    else:
        model = AR(features)
        model_fit = model.fit(maxlag=3)
        d_f = model_fit.predict(start=len(features), end=len(features)+8, dynamic=False)
        fga+=d_f.to_list()
    #print('Country {0} completed'.format(country))

In [22]:
# NDA time series

for country in countries:
    df = climate[climate['country']==str(country)].reset_index()
    #df.index = pd.to_datetime(df['year'],format='%Y')#.dt.year
    features = df['NDA']
    if len((climate[climate['country']==str(country)]['NDA']).unique()) <=3:
        nda_ = features[:9].tolist()
        nda+=nda_
    else:
        model = AR(features)
        model_fit = model.fit(maxlag=3)
        d_f = model_fit.predict(start=len(features), end=len(features)+8, dynamic=False)
        nda+=d_f.to_list()
    #print('Country {0} completed'.format(country))

In [23]:
# BCA time series

for country in countries:
    df = climate[climate['country']==str(country)].reset_index()
    #df.index = pd.to_datetime(df['year'],format='%Y')#.dt.year
    features = df['BCA']
    if len((climate[climate['country']==str(country)]['BCA']).unique()) <=3:
        bca_ = features[:9].tolist()
        bca+=bca_
    else:
        model = AR(features)
        model_fit = model.fit(maxlag=3)
        d_f = model_fit.predict(start=len(features), end=len(features)+8, dynamic=False)
        bca+=d_f.to_list()
    #print('Country {0} completed'.format(country))

## We have to generate total 1071 predictions

In [24]:
print('The CDA columns has {0} predictions'.format(len(cda)))
print('The CHA columns has {0} predictions'.format(len(cha)))
print('The FGA columns has {0} predictions'.format(len(fga)))
print('The NDA columns has {0} predictions'.format(len(nda)))
print('The BCA columns has {0} predictions'.format(len(bca)))

The CDA columns has 1071 predictions
The CHA columns has 1071 predictions
The FGA columns has 1071 predictions
The NDA columns has 1071 predictions
The BCA columns has 1071 predictions


In [25]:
df_cda = pd.DataFrame({'CDA':cda})
df_cha = pd.DataFrame({'CHA':cha})
df_fga = pd.DataFrame({'FGA':fga})
df_nda = pd.DataFrame({'NDA':fga})
df_bca = pd.DataFrame({'BCA':bca})

In [26]:
print('The CDA columns has {0} predictions'.format(df_cda.shape[0]))
print('The CHA columns has {0} predictions'.format(df_cha.shape[0]))
print('The FGA columns has {0} predictions'.format(df_fga.shape[0]))
print('The NDA columns has {0} predictions'.format(df_nda.shape[0]))
print('The BCA columns has {0} predictions'.format(df_bca.shape[0]))

The CDA columns has 1071 predictions
The CHA columns has 1071 predictions
The FGA columns has 1071 predictions
The NDA columns has 1071 predictions
The BCA columns has 1071 predictions


In [27]:
years = [num for num in range(2019,2028)]
year = pd.DataFrame({'year':years*119})
year.shape

(1071, 1)

In [28]:
countries_total = []
for i in countries:
    for j in range(0,9):
        countries_total.append(i)

In [29]:
len(countries_total)

1071

In [30]:
data = pd.DataFrame(countries_total,columns=['country'])
climate_pred = data.join(year)

In [31]:
climate_pred = climate_pred.join(df_cda)
climate_pred = climate_pred.join(df_cha)
climate_pred = climate_pred.join(df_fga)
climate_pred = climate_pred.join(df_nda)
climate_pred = climate_pred.join(df_bca)

In [32]:
climate_pred.shape

(1071, 7)

# Export xlsx and csv

In [33]:
climate_pred.to_csv('climate_pred_2018.csv')
climate_pred.to_excel('climate_pred_2018.xlsx')

# `Survival Analysis` to predict which variable represents the biggest change concern