# Analyzing the Time Series

In [1]:
from statsmodels.tsa.ar_model import AR

In [2]:
from random import random

In [3]:
%matplotlib notebook
import matplotlib.pyplot as plt

In [4]:
import csv
import pandas as pd
import numpy as np
import pickle

In [5]:
WDI_path="/home/hendra/Datasets/WDI_Indonesia/"

### Constructing a Dictionary for Converting Indicator names into Codes

In [16]:
Tbl_konversi_indikator_names = pd.read_csv(WDI_path+"WDI_Tabel_Konversi_Indikator.csv", sep=";")

In [20]:
keys = list(Tbl_konversi_indikator_names["Series Name"])

In [22]:
values = list(Tbl_konversi_indikator_names["Series Code"])

In [23]:
the_dictionary = dict(zip(keys, values))

In [24]:
the_dictionary

{'Agricultural machinery, tractors': 'AG.AGR.TRAC.NO',
 'Fertilizer consumption (% of fertilizer production)': 'AG.CON.FERT.PT.ZS',
 'Fertilizer consumption (kilograms per hectare of arable land)': 'AG.CON.FERT.ZS',
 'Agricultural land (sq. km)': 'AG.LND.AGRI.K2',
 'Agricultural land (% of land area)': 'AG.LND.AGRI.ZS',
 'Arable land (hectares)': 'AG.LND.ARBL.HA',
 'Arable land (hectares per person)': 'AG.LND.ARBL.HA.PC',
 'Arable land (% of land area)': 'AG.LND.ARBL.ZS',
 'Land under cereal production (hectares)': 'AG.LND.CREL.HA',
 'Permanent cropland (% of land area)': 'AG.LND.CROP.ZS',
 'Rural land area where elevation is below 5 meters (sq. km)': 'AG.LND.EL5M.RU.K2',
 'Rural land area where elevation is below 5 meters (% of total land area)': 'AG.LND.EL5M.RU.ZS',
 'Urban land area where elevation is below 5 meters (sq. km)': 'AG.LND.EL5M.UR.K2',
 'Urban land area where elevation is below 5 meters (% of total land area)': 'AG.LND.EL5M.UR.ZS',
 'Land area where elevation is below 5 

We add a key that is not included in the dictionary

In [35]:
the_dictionary['Net official development assistance received (constant 2013 US$)'] = 'DT.ODA.ODAT.KD' 

In [72]:
code_to_name_dict = {}

In [77]:
len(list_of_indicator_codes)

172

In [80]:
list_of_indicator_codes.remove('Year')

In [82]:
len(list_of_indicator_codes)

171

In [81]:
len(list_of_indicator_names)

171

In [83]:
len(list_of_indicator_codes)==len(list_of_indicator_names)

True

In [86]:
code_to_name_dict = {}

In [87]:
counter = 0
for code in list_of_indicator_codes:
    code_to_name_dict[code] = list_of_indicator_names[counter]
    counter+=1

In [88]:
code_to_name_dict

{'EN.ATM.CO2E.KD.GD': 'CO2 emissions (kg per 2010 US$ of GDP)',
 'NY.GNP.MKTP.KN': 'GNI (constant LCU)',
 'NY.GNP.MKTP.CN': 'GNI (current LCU)',
 'NY.GNP.PCAP.KD.ZG': 'GNI per capita growth (annual %)',
 'NY.GNP.MKTP.KD': 'GNI (constant 2010 US$)',
 'NE.CON.GOVT.CN': 'General government final consumption expenditure (current LCU)',
 'DC.DAC.GBRL.CD': 'Net bilateral aid flows from DAC donors, United Kingdom (current US$)',
 'AG.PRD.FOOD.XD': 'Food production index (2004-2006 = 100)',
 'NE.CON.PRVT.KD.ZG': 'Household final consumption expenditure (annual % growth)',
 'AG.LND.CROP.ZS': 'Permanent cropland (% of land area)',
 'NY.GNP.PCAP.KD': 'GNI per capita (constant 2010 US$)',
 'SP.POP.DPND.YG': 'Age dependency ratio, young (% of working-age population)',
 'EN.POP.DNST': 'Population density (people per sq. km of land area)',
 'NV.IND.TOTL.KD.ZG': 'Industry, value added (annual % growth)',
 'SP.POP.DPND.OL': 'Age dependency ratio, old (% of working-age population)',
 'NY.GNP.PCAP.KN': '

In [91]:
code_to_name_dict['NY.GNP.MKTP.KN']

'GNI (constant LCU)'

In [90]:
pickle.dump(code_to_name_dict, open( WDI_path+"code_to_name_dict_of_indicator_names.p", "wb" ))

In [36]:
WDI_design_matrix_df = pd.read_csv(WDI_path+"completed_values_1962_2013_design_matrix.csv")

Let us load the **code-to-name dictionary**.

In [6]:
code_to_name_dict = pickle.load(open( WDI_path+"code_to_name_dict_of_indicator_names.p", "rb" ))

In [7]:
code_to_name_dict.keys()

dict_keys(['EN.ATM.CO2E.KD.GD', 'NY.GNP.MKTP.KN', 'NY.GNP.MKTP.CN', 'NY.GNP.PCAP.KD.ZG', 'NY.GNP.MKTP.KD', 'NE.CON.GOVT.CN', 'DC.DAC.GBRL.CD', 'AG.PRD.FOOD.XD', 'NE.CON.PRVT.KD.ZG', 'AG.LND.CROP.ZS', 'NY.GNP.PCAP.KD', 'SP.POP.DPND.YG', 'EN.POP.DNST', 'NV.IND.TOTL.KD.ZG', 'SP.POP.DPND.OL', 'NY.GNP.PCAP.KN', 'AG.LND.CREL.HA', 'AG.LND.AGRI.K2', 'AG.YLD.CREL.KG', 'AG.PRD.CREL.MT', 'NV.AGR.TOTL.KN', 'NE.CON.PETC.KD.ZG', 'NE.GDI.TOTL.KD.ZG', 'ER.FSH.AQUA.MT', 'NE.CON.TETC.CN', 'NE.CON.TETC.KN', 'NE.CON.TETC.KD', 'MS.MIL.MPRT.KD', 'NE.CON.TETC.ZS', 'NE.CON.TOTL.CN', 'NE.CON.TOTL.KN', 'NE.CON.TOTL.KD', 'NV.AGR.TOTL.CN', 'NE.CON.GOVT.KD.ZG', 'NV.AGR.TOTL.KD', 'NV.AGR.TOTL.ZS', 'AG.SRF.TOTL.K2', 'AG.PRD.CROP.XD', 'NE.CON.GOVT.KD', 'NE.EXP.GNFS.KD.ZG', 'NE.CON.GOVT.KN', 'NE.CON.GOVT.ZS', 'NE.CON.PRVT.PC.KD.ZG', 'AG.LND.AGRI.ZS', 'NY.GNP.MKTP.KD.ZG', 'EN.URB.LCTY.UR.ZS', 'NE.EXP.GNFS.CN', 'EN.URB.LCTY', 'TM.VAL.MRCH.CD.WT', 'SP.POP.GROW', 'NE.EXP.GNFS.KN', 'NV.IND.TOTL.ZS', 'ER.FSH.CAPT.MT', 'NE.R

In [37]:
list_of_indicator_names = list(WDI_design_matrix_df.columns)[1:]

In [38]:
list_of_indicator_codes = ["Year"]

In [39]:
for name in list_of_indicator_names:
    list_of_indicator_codes.append( the_dictionary[name] )

In [43]:
list_of_indicator_codes

['Year',
 'EN.ATM.CO2E.KD.GD',
 'NY.GNP.MKTP.KN',
 'NY.GNP.MKTP.CN',
 'NY.GNP.PCAP.KD.ZG',
 'NY.GNP.MKTP.KD',
 'NE.CON.GOVT.CN',
 'DC.DAC.GBRL.CD',
 'AG.PRD.FOOD.XD',
 'NE.CON.PRVT.KD.ZG',
 'AG.LND.CROP.ZS',
 'NY.GNP.PCAP.KD',
 'SP.POP.DPND.YG',
 'EN.POP.DNST',
 'NV.IND.TOTL.KD.ZG',
 'SP.POP.DPND.OL',
 'NY.GNP.PCAP.KN',
 'AG.LND.CREL.HA',
 'AG.LND.AGRI.K2',
 'AG.YLD.CREL.KG',
 'AG.PRD.CREL.MT',
 'NV.AGR.TOTL.KN',
 'NE.CON.PETC.KD.ZG',
 'NE.GDI.TOTL.KD.ZG',
 'ER.FSH.AQUA.MT',
 'NE.CON.TETC.CN',
 'NE.CON.TETC.KN',
 'NE.CON.TETC.KD',
 'MS.MIL.MPRT.KD',
 'NE.CON.TETC.ZS',
 'NE.CON.TOTL.CN',
 'NE.CON.TOTL.KN',
 'NE.CON.TOTL.KD',
 'NV.AGR.TOTL.CN',
 'NE.CON.GOVT.KD.ZG',
 'NV.AGR.TOTL.KD',
 'NV.AGR.TOTL.ZS',
 'AG.SRF.TOTL.K2',
 'AG.PRD.CROP.XD',
 'NE.CON.GOVT.KD',
 'NE.EXP.GNFS.KD.ZG',
 'NE.CON.GOVT.KN',
 'NE.CON.GOVT.ZS',
 'NE.CON.PRVT.PC.KD.ZG',
 'AG.LND.AGRI.ZS',
 'NY.GNP.MKTP.KD.ZG',
 'EN.URB.LCTY.UR.ZS',
 'NE.EXP.GNFS.CN',
 'EN.URB.LCTY',
 'TM.VAL.MRCH.CD.WT',
 'SP.POP.GROW',
 'NE.EXP.GN

In [44]:
WDI_design_matrix_df.columns = list_of_indicator_codes

In [63]:
WDI_design_matrix_df_by_year = WDI_design_matrix_df.set_index("Year")

In [65]:
WDI_design_matrix_df_by_year.to_csv(WDI_path+"completed_values_1962_2013_design_matrix_with_codes_by_year.csv")

### Constructing a Dictionary for Y

In [41]:
WDI_design_matrix_Y_df = pd.read_csv(WDI_path+"complete_values_Y_1962_2013_design_matrix.csv")

In [49]:
list_of_Y_indicator_names = list(WDI_design_matrix_Y_df.columns)[1:]

In [50]:
list_of_Y_indicator_names

['GDP (constant LCU)',
 'GDP growth (annual %)',
 'GDP per capita growth (annual %)',
 'GDP per capita (constant 2010 US$)',
 'GDP at market prices (constant 2010 US$)',
 'GDP per capita (constant LCU)',
 'GDP deflator (base year varies by country)',
 'GDP (current LCU)',
 'GDP per capita (current LCU)']

In [53]:
list_of_Y_indicator_codes = ["Year"]

In [54]:
counter = 1

In [55]:
for name in list_of_Y_indicator_names:
    code = "Y"+str(counter)
    list_of_Y_indicator_codes.append(code)
    counter+=1

In [56]:
list_of_Y_indicator_codes

['Year', 'Y1', 'Y2', 'Y3', 'Y4', 'Y5', 'Y6', 'Y7', 'Y8', 'Y9']

In [57]:
WDI_design_matrix_Y_df.columns = list_of_Y_indicator_codes

In [69]:
WDI_design_matrix_Y_df_by_year = WDI_design_matrix_Y_df.set_index("Year")

In [71]:
WDI_design_matrix_Y_df_by_year.to_csv(WDI_path+"completed_values_Y_1962_2013_design_matrix_with_codes_by_year.csv")

These are examples from [Time Series Forecasting Methods](https://machinelearningmastery.com/time-series-forecasting-methods-in-python-cheat-sheet/).     
Please use the `wdi-project` conda virtual environment.

In [17]:
import csv
import pandas as pd
import numpy as np
import pickle

In [5]:
# contrived dataset
data = [ x + random() for x in range(1,100) ]

In [13]:
data[98]

99.56526230271257

In [6]:
# fit model
model = AR(data)

In [7]:
model_fit = model.fit()

In [14]:
# make prediction
yhat = model_fit.predict(len(data), len(data)+1)

In [15]:
print(yhat)

[100.39176043 101.42197356]


The WDI path is shown below.

In [18]:
WDI_path="/home/hendra/Datasets/WDI_Indonesia/"