# Тесты на стационарность и кросс-корреляции 


In [1]:
import pandas as pd
import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from utils import dickey_fuller_test, bkxg_filter, hodrick_prescott_filter



import warnings
warnings.filterwarnings('ignore')
mpl.rcParams['figure.figsize'] = (10, 6)
mpl.rcParams['figure.dpi'] = 150

## Предобработка данных

In [2]:
df = pd.read_excel('data/Macro_data.xlsx', index_col=0).reset_index()
df = df.rename(columns = {"TIME": "date"})
df['date'] = pd.to_datetime(df['date'])
df = df[
    (df["date"] >= "1970-01-01") & 
    (df["date"] <= "2001-01-01")
]

In [3]:
df.head(5)

Unnamed: 0,date,GDP Nominal,GDP Real,Private Consumtion,Investments,Residential,Non-Residential,Changes in inventories,Cumulated Chnge in iventories,Total export,Total import,Government Consumption Expenditures,GDP Deflator,Consumption Deflator,"CPI, Index",Industrial production Index,Share prices,Unemployment,Personal Consumprion Expenditure
0,1970-01-01,1051200000000.0,4939759000000.0,157921200000.0,575953000000.0,42493000000.0,123796000000.0,1823000000.0,1823000000,57031000000.0,53517000000.0,247889000000.0,0.212804,17.416415,38.1,37.9469,6.34526,0.044,1895000000000.0
1,1970-04-01,1067375000000.0,4946770000000.0,160392500000.0,577205000000.0,41365000000.0,125008000000.0,5081000000.0,6904000000,60411000000.0,55204000000.0,249143000000.0,0.215772,17.629837,38.633333,37.683,5.56439,0.049,1924700000000.0
2,1970-07-01,1086059000000.0,4992357000000.0,163370500000.0,586598000000.0,42566000000.0,126255000000.0,5083000000.0,11987000000,60519000000.0,56431000000.0,254584000000.0,0.217544,17.806946,39.033333,37.4482,5.48283,0.054,1960500000000.0
3,1970-10-01,1088608000000.0,4938857000000.0,165040200000.0,555454000000.0,47218000000.0,123520000000.0,-3984000000.0,8003000000,60873000000.0,57888000000.0,258708000000.0,0.220417,18.027467,39.6,37.3145,6.01062,0.061,1980500000000.0
4,1971-01-01,1135156000000.0,5072996000000.0,169796500000.0,620212000000.0,50961000000.0,126281000000.0,12254000000.0,20257000000,63221000000.0,58651000000.0,261905000000.0,0.223764,18.182825,39.933333,37.4892,6.80859,0.06,2037500000000.0


## Логарифмы

In [4]:
df["CPI, Index_ln"], df["GDP Real_ln"], df["Personal Consumprion Expenditure_ln"] = np.log([df["CPI, Index"], 
                                                 df["GDP Real"], df["Personal Consumprion Expenditure "]]) 

## Тест Дики-Фуллера

In [5]:
dickey_fuller_test(df["CPI, Index"])

Dickey-Fuller test for CPI, Index
------------------------------
adf: -0.556
p-value: 0.881
1%: -3.489
5%: -2.887
10%: -2.58
Есть единичные корни, ряд не стационарен




In [6]:
dickey_fuller_test(df["GDP Real"])

Dickey-Fuller test for GDP Real
------------------------------
adf: 3.018
p-value: 1.0
1%: -3.491
5%: -2.888
10%: -2.581
Есть единичные корни, ряд не стационарен




In [7]:
dickey_fuller_test(df["Personal Consumprion Expenditure "])

Dickey-Fuller test for Personal Consumprion Expenditure 
------------------------------
adf: 3.791
p-value: 1.0
1%: -3.487
5%: -2.886
10%: -2.58
Есть единичные корни, ряд не стационарен




### BXKG

In [8]:
for bxkg_params in [(6, 40, 12), (6, 40, 8), (6, 32, 8)]:
    df[
        [f"GDP Real_ln_BXKG {bxkg_params}", 
         f"CPI, Index_ln_BXKG {bxkg_params}",
        f"Personal Consumprion Expenditure_ln_BXKG {bxkg_params}"]
    ] = bkxg_filter(df[['GDP Real_ln', 'CPI, Index_ln', 'Personal Consumprion Expenditure_ln']], bxkg_params)

### Тесты после фильтра

In [9]:
for col in ['GDP Real_ln', 'Personal Consumprion Expenditure_ln',
       'GDP Real_ln_BXKG (6, 40, 12)', 'CPI, Index_ln_BXKG (6, 40, 12)',
       'Personal Consumprion Expenditure_ln_BXKG (6, 40, 12)',
       'GDP Real_ln_BXKG (6, 40, 8)', 'CPI, Index_ln_BXKG (6, 40, 8)',
       'Personal Consumprion Expenditure_ln_BXKG (6, 40, 8)',
       'GDP Real_ln_BXKG (6, 32, 8)', 'CPI, Index_ln_BXKG (6, 32, 8)',
       'Personal Consumprion Expenditure_ln_BXKG (6, 32, 8)']:
    dickey_fuller_test(df[col])

Dickey-Fuller test for GDP Real_ln
------------------------------
adf: 1.385
p-value: 0.997
1%: -3.491
5%: -2.888
10%: -2.581
Есть единичные корни, ряд не стационарен


Dickey-Fuller test for Personal Consumprion Expenditure_ln
------------------------------
adf: -6.285
p-value: 0.0
1%: -3.485
5%: -2.885
10%: -2.579
Единичных корней нет, ряд стационарен


Dickey-Fuller test for GDP Real_ln_BXKG (6, 40, 12)
------------------------------
adf: -4.471
p-value: 0.0
1%: -3.502
5%: -2.893
10%: -2.583
Единичных корней нет, ряд стационарен


Dickey-Fuller test for CPI, Index_ln_BXKG (6, 40, 12)
------------------------------
adf: -2.359
p-value: 0.154
1%: -3.505
5%: -2.894
10%: -2.584
Есть единичные корни, ряд не стационарен


Dickey-Fuller test for Personal Consumprion Expenditure_ln_BXKG (6, 40, 12)
------------------------------
adf: -1.907
p-value: 0.329
1%: -3.505
5%: -2.894
10%: -2.584
Есть единичные корни, ряд не стационарен


Dickey-Fuller test for GDP Real_ln_BXKG (6, 40, 8)
---------

### Hodrick-Prescott Filter

In [10]:
for col in ["GDP Real_ln", "CPI, Index_ln", "Personal Consumprion Expenditure_ln"]:
    cycle, trend = hodrick_prescott_filter(df[col])
    df[f'{col}_HP'] = cycle


## Кросcкорреляции

In [11]:
def get_corr_cols_list(df, value):
    return [f"{value}_HP", f"{value}_BXKG (6, 40, 8)", 
            f"{value}_BXKG (6, 32, 8)", f"{value}_BXKG (6, 40, 12)"]

def get_corrs_and_stds(df, value):
    columns = get_corr_cols_list(df, value)
    stds = df[columns].std().reset_index()
    corrs = df[columns].corr().reset_index()
    res = stds.merge(corrs)
    res = res.rename(columns = {0: "std"})
    return res


### GDP

In [12]:
get_corrs_and_stds(df, "GDP Real_ln")

Unnamed: 0,index,std,GDP Real_ln_HP,"GDP Real_ln_BXKG (6, 40, 8)","GDP Real_ln_BXKG (6, 32, 8)","GDP Real_ln_BXKG (6, 40, 12)"
0,GDP Real_ln_HP,0.015946,1.0,0.920389,0.914373,0.967897
1,"GDP Real_ln_BXKG (6, 40, 8)",0.012695,0.920389,1.0,0.999761,0.940263
2,"GDP Real_ln_BXKG (6, 32, 8)",0.01222,0.914373,0.999761,1.0,0.933799
3,"GDP Real_ln_BXKG (6, 40, 12)",0.017552,0.967897,0.940263,0.933799,1.0


### CPI

In [13]:
get_corrs_and_stds(df, "CPI, Index_ln")

Unnamed: 0,index,std,"CPI, Index_ln_HP","CPI, Index_ln_BXKG (6, 40, 8)","CPI, Index_ln_BXKG (6, 32, 8)","CPI, Index_ln_BXKG (6, 40, 12)"
0,"CPI, Index_ln_HP",0.015256,1.0,0.956726,0.954414,0.972024
1,"CPI, Index_ln_BXKG (6, 40, 8)",0.01045,0.956726,1.0,0.999898,0.968745
2,"CPI, Index_ln_BXKG (6, 32, 8)",0.009901,0.954414,0.999898,1.0,0.965601
3,"CPI, Index_ln_BXKG (6, 40, 12)",0.017444,0.972024,0.968745,0.965601,1.0


## PCE

In [14]:
get_corrs_and_stds(df, "Personal Consumprion Expenditure_ln")

Unnamed: 0,index,std,Personal Consumprion Expenditure_ln_HP,"Personal Consumprion Expenditure_ln_BXKG (6, 40, 8)","Personal Consumprion Expenditure_ln_BXKG (6, 32, 8)","Personal Consumprion Expenditure_ln_BXKG (6, 40, 12)"
0,Personal Consumprion Expenditure_ln_HP,0.007731,1.0,0.882034,0.879205,0.851156
1,"Personal Consumprion Expenditure_ln_BXKG (6, 4...",0.0062,0.882034,1.0,0.999665,0.936815
2,"Personal Consumprion Expenditure_ln_BXKG (6, 3...",0.005984,0.879205,0.999665,1.0,0.930285
3,"Personal Consumprion Expenditure_ln_BXKG (6, 4...",0.008402,0.851156,0.936815,0.930285,1.0
