In [62]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from seaborn import color_palette,  lineplot, heatmap, scatterplot
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from numpy.fft import fft
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.stats.stattools import durbin_watson
from scipy.stats import jarque_bera
import functions as f
import warnings
warnings.filterwarnings("ignore")


palette = color_palette('muted', n_colors = 4)
mut_pal = palette.as_hex()

plt.rcParams['figure.figsize'] = [8, 6]
plt.rcParams['figure.dpi'] = 140
plt.rcParams['lines.linewidth'] = 2.5
plt.rcParams.update({'axes.facecolor' : 'w'}) 
plt.rcParams['figure.facecolor'] = 'f7ead4'

In [63]:
df_iniziale = pd.read_csv('GDPconstant-USD-countries.csv', index_col = 'CountryID')

df, valid_nations = f.preprocess_df(df_iniziale)

In [64]:
df.IndicatorName.unique()

array(['Final consumption expenditure',
       'Household consumption expenditure (including Non-profit institutions serving households)',
       'General government final consumption expenditure',
       'Gross capital formation',
       'Gross fixed capital formation (including Acquisitions less disposals of valuables)',
       'Changes in inventories', 'Exports of goods and services',
       'Imports of goods and services', 'Gross Domestic Product (GDP)',
       'Agriculture, hunting, forestry, fishing (ISIC A-B)',
       'Mining, Manufacturing, Utilities (ISIC C-E)',
       'Manufacturing (ISIC D)', 'Construction (ISIC F)',
       'Wholesale, retail trade, restaurants and hotels (ISIC G-H)',
       'Transport, storage and communication (ISIC I)',
       'Other Activities (ISIC J-P)', 'Total Value Added'], dtype=object)

In [65]:
nation_list = f.create_nation_list(valid_nations, df)
nation_list

['Finland', 'Sweden', 'Portugal', 'Algeria', 'Germany']

In [66]:
def create_df(df : pd.DataFrame):
    """
    Transforms the input dataframe in a dataframe with time as index and 4 macroeconomic indicators as 
    columns. The indicators are: 'Construction (ISIC F)', 'Final consumption expenditure', 'Gross 
    Domestic Product (GDP)', 'General government final consumption expenditure', 'Gross capital formation'

    Parameters:
        df (pandas DataFrame): dataframe to be transformed. It must be of only one nation
    
    Returns:
        DataFrame: transformed dataframe 
    """
    df = df.reset_index().drop(['Country', 'CountryID'], axis = 1)
    df = df.T.reset_index().drop(['index'], axis = 1)
    df.columns = df.loc[0]
    df = df.drop(0)
    df.index = pd.date_range(start="1970-01-01", end="2020-12-31", freq="YS")
    df = df[['Construction (ISIC F)', 'Final consumption expenditure', 'Gross Domestic Product (GDP)', 'General government final consumption expenditure', 'Gross capital formation']]
    df = df.iloc[:, [0, 1, 3, 4, 2]]
    df.columns = ['Construction', 'Consumption', 'Gov_exp', 'Gross_capital', 'GDP']
    return df

In [67]:
df_1 = df[df['Country'] == nation_list[0]]
df_1 = create_df(df_1)
df_2 = df[df['Country'] == nation_list[1]]
df_2 = create_df(df_2)
df_3 = df[df['Country'] == nation_list[2]]
df_3 = create_df(df_3)
df_4 = df[df['Country'] == nation_list[3]]
df_4 = create_df(df_4)
df_5 = df[df['Country'] == nation_list[4]]
df_5 = create_df(df_5)

df_list = [df_1, df_2, df_3, df_4, df_5]

In [68]:
df_1 = df_1.astype(float)
df_2 = df_2.astype(float)
df_3 = df_3.astype(float)
df_4 = df_4.astype(float)
df_5 = df_5.astype(float)

df_list = [df_1, df_2, df_3, df_4, df_5]

In [69]:
df_train_test = {}

for idx, df in enumerate(df_list):
    df_train_test[nation_list[idx]] = f.train_test_split(df)

In [70]:
df_train_test['Finland'][0].columns

Index(['Construction', 'Consumption', 'Gov_exp', 'Gross_capital', 'GDP'], dtype='object')

In [71]:
df_train_test_log_dif = {}

for idx, nation in enumerate(nation_list):
    df_train_test_log_dif[nation] = f.difference(df_train_test, 1, nation)

print('Stationarity check after first order differencing')
stationarity_df =  f.check_stationarity(df_train_test_log_dif, nation_list, 'GDP')
stationarity_df

Stationarity check after first order differencing


Unnamed: 0,ADF,P-value for ADF,ADF stationarity,KPSS,P-value for KPSS,KPSS stationarity
Finland,-3.358618,0.012453,Yes,0.076583,0.1,Yes
Sweden,-2.878037,0.047962,Yes,0.171736,0.1,Yes
Portugal,-3.433653,0.009864,Yes,0.112797,0.1,Yes
Algeria,-3.59146,0.005926,Yes,0.224629,0.1,Yes
Germany,-3.56472,0.006472,Yes,0.153294,0.1,Yes


In [72]:
#df_train_test_2dif = {}
#for idx, nation in enumerate(nation_list):
#    df_train_test_2dif[nation] = f.difference(df_train_test_log_dif, 1, nation)

In [73]:
for col in ['Construction', 'Consumption', 'Gov_exp', 'Gross_capital']:
    print(col)
    display(f.check_stationarity(df_train_test_log_dif, nation_list, col))

Construction


Unnamed: 0,ADF,P-value for ADF,ADF stationarity,KPSS,P-value for KPSS,KPSS stationarity
Finland,-3.714901,0.003908373,Yes,0.07955,0.1,Yes
Sweden,-6.022868,1.479411e-07,Yes,0.123063,0.1,Yes
Portugal,-4.090776,0.00100309,Yes,0.226677,0.1,Yes
Algeria,0.485445,0.9844239,No,0.58112,0.024353,No
Germany,-4.552743,0.0001577108,Yes,0.244817,0.1,Yes


Consumption


Unnamed: 0,ADF,P-value for ADF,ADF stationarity,KPSS,P-value for KPSS,KPSS stationarity
Finland,-3.681809,0.004376,Yes,0.057796,0.1,Yes
Sweden,-4.139858,0.000832,Yes,0.131851,0.1,Yes
Portugal,-3.51113,0.007706,Yes,0.089327,0.1,Yes
Algeria,-3.540152,0.007014,Yes,0.208094,0.1,Yes
Germany,-3.60863,0.005598,Yes,0.347697,0.0997,Yes


Gov_exp


Unnamed: 0,ADF,P-value for ADF,ADF stationarity,KPSS,P-value for KPSS,KPSS stationarity
Finland,-4.547552,0.0001611896,Yes,0.244043,0.1,Yes
Sweden,-4.116962,0.0009078527,Yes,0.372798,0.08888,Yes
Portugal,-5.753258,5.899234e-07,Yes,0.080402,0.1,Yes
Algeria,-5.069864,1.611283e-05,Yes,0.218127,0.1,Yes
Germany,-3.170661,0.02174697,Yes,0.145837,0.1,Yes


Gross_capital


Unnamed: 0,ADF,P-value for ADF,ADF stationarity,KPSS,P-value for KPSS,KPSS stationarity
Finland,-3.924825,0.00186,Yes,0.060923,0.1,Yes
Sweden,-3.997835,0.001422,Yes,0.0703,0.1,Yes
Portugal,-4.15474,0.000785,Yes,0.115169,0.1,Yes
Algeria,-4.075376,0.001063,Yes,0.303988,0.1,Yes
Germany,-4.552078,0.000158,Yes,0.116979,0.1,Yes
