# Carbon_Prediction_Time_Series_Analysis

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from pandas_datareader import wb

In [2]:
# URL for data site
# https://databank.worldbank.org/source/world-development-indicators

In [3]:
code = 'EN.CO2.BLDG.ZS'
regions = ['BRA', 'IND', 'CHN', 'ZAF', 'USA', 'GBR', 'WLD', 'EUU']

In [4]:
df_indicator = wb.download(
    country=regions,
    indicator=code, 
    start=1971,
    end=2014
)



In [5]:
# df_indicator['Country'].unique()

In [6]:
df_indicator.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 352 entries, ('Brazil', '2014') to ('South Africa', '1971')
Data columns (total 1 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   EN.CO2.BLDG.ZS  352 non-null    float64
dtypes: float64(1)
memory usage: 5.3+ KB


In [7]:
df_indicator

Unnamed: 0_level_0,Unnamed: 1_level_0,EN.CO2.BLDG.ZS
country,year,Unnamed: 2_level_1
Brazil,2014,4.289736
Brazil,2013,4.508896
Brazil,2012,4.746116
Brazil,2011,5.159270
Brazil,2010,5.346396
...,...,...
South Africa,1975,6.572400
South Africa,1974,6.815974
South Africa,1973,6.949115
South Africa,1972,7.742371


In [8]:
df_indicator.to_excel('indicator_world_bank.xlsx')

In [9]:
pd.set_option("display.max_rows", None)

In [10]:
df_indicator

Unnamed: 0_level_0,Unnamed: 1_level_0,EN.CO2.BLDG.ZS
country,year,Unnamed: 2_level_1
Brazil,2014,4.289736
Brazil,2013,4.508896
Brazil,2012,4.746116
Brazil,2011,5.15927
Brazil,2010,5.346396
Brazil,2009,6.042296
Brazil,2008,5.728822
Brazil,2007,6.021721
Brazil,2006,6.104512
Brazil,2005,6.177134


In [11]:
df_indicator.isnull().sum()

EN.CO2.BLDG.ZS    0
dtype: int64

In [12]:
df_indicator = df_indicator.reset_index()

In [13]:
df_indicator.head()

Unnamed: 0,country,year,EN.CO2.BLDG.ZS
0,Brazil,2014,4.289736
1,Brazil,2013,4.508896
2,Brazil,2012,4.746116
3,Brazil,2011,5.15927
4,Brazil,2010,5.346396


In [14]:
type(df_indicator['year'][0])

str

In [15]:
df_indicator['year'] = df_indicator['year'].astype(int)
type(df_indicator['year'][0])

numpy.int32

In [16]:
df_indicator.set_index('year', inplace=True)
df_indicator.head()

Unnamed: 0_level_0,country,EN.CO2.BLDG.ZS
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,Brazil,4.289736
2013,Brazil,4.508896
2012,Brazil,4.746116
2011,Brazil,5.15927
2010,Brazil,5.346396


In [17]:
df_indicator = df_indicator.sort_index()
df_indicator.head()

Unnamed: 0_level_0,country,EN.CO2.BLDG.ZS
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1971,United Kingdom,17.230274
1971,World,18.615556
1971,United States,19.0614
1971,India,15.087564
1971,European Union,21.950546


In [18]:
df_indicator.columns = ["country", "Indicator"]
df_indicator.head()

Unnamed: 0_level_0,country,Indicator
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1971,United Kingdom,17.230274
1971,World,18.615556
1971,United States,19.0614
1971,India,15.087564
1971,European Union,21.950546


In [19]:
df_indicator.head(10)

Unnamed: 0_level_0,country,Indicator
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1971,United Kingdom,17.230274
1971,World,18.615556
1971,United States,19.0614
1971,India,15.087564
1971,European Union,21.950546
1971,China,21.596575
1971,Brazil,7.044025
1971,South Africa,7.925393
1972,United States,18.390586
1972,Brazil,7.056473


In [20]:
# df_indicator.info

In [21]:
df_indicator = pd.pivot_table(data=df_indicator, columns='country', 
                              index='year', values='Indicator')

In [22]:
df_indicator

country,Brazil,China,European Union,India,South Africa,United Kingdom,United States,World
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1971,7.044025,21.596575,21.950546,15.087564,7.925393,17.230274,19.0614,18.615556
1972,7.056473,21.329853,22.129084,14.035274,7.742371,18.141505,18.390586,18.432397
1973,6.568496,20.831115,21.894976,14.183907,6.949115,17.421817,17.048219,17.565715
1974,6.133508,20.318703,20.240601,12.503688,6.815974,18.435426,16.687413,16.947899
1975,5.927298,20.465564,20.72844,12.209276,6.5724,17.964301,17.031575,17.173455
1976,5.767214,19.931858,20.164812,12.336263,6.429296,17.870337,17.26468,17.137039
1977,5.670138,19.983016,19.668188,12.602554,5.527939,18.214177,16.128702,16.512286
1978,5.635035,18.922859,19.879768,13.587028,6.764253,17.977528,16.105992,16.480944
1979,5.783516,19.026029,19.61691,13.698245,6.058166,18.140961,14.846802,15.885459
1980,6.153479,18.139023,18.604842,12.617357,4.856512,18.444117,13.948417,15.118228


In [26]:
regions

['BRA', 'IND', 'CHN', 'ZAF', 'USA', 'GBR', 'WLD', 'EUU']

In [27]:
df_indicator.columns = regions[0], regions[2], regions[7], regions[1], regions[3], regions[5], regions[4], regions[6]

In [28]:
df_indicator

Unnamed: 0_level_0,BRA,CHN,EUU,IND,ZAF,GBR,USA,WLD
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1971,7.044025,21.596575,21.950546,15.087564,7.925393,17.230274,19.0614,18.615556
1972,7.056473,21.329853,22.129084,14.035274,7.742371,18.141505,18.390586,18.432397
1973,6.568496,20.831115,21.894976,14.183907,6.949115,17.421817,17.048219,17.565715
1974,6.133508,20.318703,20.240601,12.503688,6.815974,18.435426,16.687413,16.947899
1975,5.927298,20.465564,20.72844,12.209276,6.5724,17.964301,17.031575,17.173455
1976,5.767214,19.931858,20.164812,12.336263,6.429296,17.870337,17.26468,17.137039
1977,5.670138,19.983016,19.668188,12.602554,5.527939,18.214177,16.128702,16.512286
1978,5.635035,18.922859,19.879768,13.587028,6.764253,17.977528,16.105992,16.480944
1979,5.783516,19.026029,19.61691,13.698245,6.058166,18.140961,14.846802,15.885459
1980,6.153479,18.139023,18.604842,12.617357,4.856512,18.444117,13.948417,15.118228
