# Тесты на стационарность и кросс-корреляции 


In [1]:
import pandas as pd
import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import statsmodels.api as sm
import statsmodels.tsa.stattools as tsa
from zipfile import ZipFile
import os
import sys
from statsmodels.tsa.stattools import adfuller

from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.stats.diagnostic import acorr_ljungbox 
from utils import dickey_fuller_test, bkxg_filter, hodrick_prescott_filter
from statsmodels.tsa.stattools import kpss
from statsmodels.tsa.stattools import coint


import warnings
warnings.filterwarnings('ignore')
mpl.rcParams['figure.figsize'] = (10, 6)
mpl.rcParams['figure.dpi'] = 150

## Предобработка данных

In [2]:
df = pd.read_excel('data/Macro_data.xlsx', index_col=0).reset_index()
df = df.rename(columns = {"TIME": "date"})
df['date'] = pd.to_datetime(df['date'])
df = df[
    (df["date"] >= "1970-01-01") & 
    (df["date"] <= "2001-01-01")
]

## Логарифмы

In [4]:
df["CPI, Index_ln"], df["GDP Real_ln"] = np.log([df["CPI, Index"], df["GDP Real"]]) 

## Тест Дики-Фуллера

In [5]:
dickey_fuller_test(df["CPI, Index"])

Dickey-Fuller test for CPI, Index
------------------------------
adf: -0.556
p-value: 0.881
1%: -3.489
5%: -2.887
10%: -2.58
Есть единичные корни, ряд не стационарен




In [6]:
dickey_fuller_test(df["GDP Real"])

Dickey-Fuller test for GDP Real
------------------------------
adf: 3.018
p-value: 1.0
1%: -3.491
5%: -2.888
10%: -2.581
Есть единичные корни, ряд не стационарен




### BXKG

In [7]:
for bxkg_params in [(6, 40, 12), (6, 40, 8), (6, 32, 8)]:
    df[
        [f"GDP Real_ln_BXKG {bxkg_params}", 
         f"CPI, Index_ln_BXKG {bxkg_params}"]
    ] = bkxg_filter(df[['GDP Real_ln', 'CPI, Index_ln']], bxkg_params)

### Тесты после фильтра

In [8]:
for col in ['CPI, Index_ln', 'GDP Real_ln', 'GDP Real_ln_BXKG (6, 40, 12)',
       'CPI, Index_ln_BXKG (6, 40, 12)', 'GDP Real_ln_BXKG (6, 40, 8)',
       'CPI, Index_ln_BXKG (6, 40, 8)', 'GDP Real_ln_BXKG (6, 32, 8)',
       'CPI, Index_ln_BXKG (6, 32, 8)']:
    dickey_fuller_test(df[col])

Dickey-Fuller test for CPI, Index_ln
------------------------------
adf: -2.596
p-value: 0.094
1%: -3.487
5%: -2.886
10%: -2.58
Есть единичные корни, ряд не стационарен


Dickey-Fuller test for GDP Real_ln
------------------------------
adf: 1.385
p-value: 0.997
1%: -3.491
5%: -2.888
10%: -2.581
Есть единичные корни, ряд не стационарен


Dickey-Fuller test for GDP Real_ln_BXKG (6, 40, 12)
------------------------------
adf: -4.471
p-value: 0.0
1%: -3.502
5%: -2.893
10%: -2.583
Единичных корней нет, ряд стационарен


Dickey-Fuller test for CPI, Index_ln_BXKG (6, 40, 12)
------------------------------
adf: -2.359
p-value: 0.154
1%: -3.505
5%: -2.894
10%: -2.584
Есть единичные корни, ряд не стационарен


Dickey-Fuller test for GDP Real_ln_BXKG (6, 40, 8)
------------------------------
adf: -4.464
p-value: 0.0
1%: -3.5
5%: -2.892
10%: -2.583
Единичных корней нет, ряд стационарен


Dickey-Fuller test for CPI, Index_ln_BXKG (6, 40, 8)
------------------------------
adf: -2.915
p-value: 0.044

### Hodrick-Prescott Filter

In [9]:
for col in ["GDP Real_ln", "CPI, Index_ln"]:
    cycle, trend = hodrick_prescott_filter(df[col])
    df[f'{col}_HP'] = cycle


## Кросcкорреляции

In [10]:
def get_corr_cols_list(df, value):
    return [f"{value}_HP", f"{value}_BXKG (6, 40, 8)", 
            f"{value}_BXKG (6, 32, 8)", f"{value}_BXKG (6, 40, 12)"]

def get_corrs_and_stds(df, value):
    columns = get_corr_cols_list(df, value)
    stds = df[columns].std().reset_index()
    corrs = df[columns].corr().reset_index()
    res = stds.merge(corrs)
    res = res.rename(columns = {0: "std"})
    return res


### GDP

In [12]:
get_corrs_and_stds(df, "GDP Real_ln")

Unnamed: 0,index,std,GDP Real_ln_HP,"GDP Real_ln_BXKG (6, 40, 8)","GDP Real_ln_BXKG (6, 32, 8)","GDP Real_ln_BXKG (6, 40, 12)"
0,GDP Real_ln_HP,0.015946,1.0,0.920389,0.914373,0.967897
1,"GDP Real_ln_BXKG (6, 40, 8)",0.012695,0.920389,1.0,0.999761,0.940263
2,"GDP Real_ln_BXKG (6, 32, 8)",0.01222,0.914373,0.999761,1.0,0.933799
3,"GDP Real_ln_BXKG (6, 40, 12)",0.017552,0.967897,0.940263,0.933799,1.0


### CPI

In [13]:
get_corrs_and_stds(df, "CPI, Index_ln")

Unnamed: 0,index,std,"CPI, Index_ln_HP","CPI, Index_ln_BXKG (6, 40, 8)","CPI, Index_ln_BXKG (6, 32, 8)","CPI, Index_ln_BXKG (6, 40, 12)"
0,"CPI, Index_ln_HP",0.015256,1.0,0.956726,0.954414,0.972024
1,"CPI, Index_ln_BXKG (6, 40, 8)",0.01045,0.956726,1.0,0.999898,0.968745
2,"CPI, Index_ln_BXKG (6, 32, 8)",0.009901,0.954414,0.999898,1.0,0.965601
3,"CPI, Index_ln_BXKG (6, 40, 12)",0.017444,0.972024,0.968745,0.965601,1.0
