# Тесты на стационарность и кросс-корреляции 


In [1]:
import pandas as pd
import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import statsmodels.api as sm
import statsmodels.tsa.stattools as tsa
from zipfile import ZipFile
import os
import sys

from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.stats.diagnostic import acorr_ljungbox 
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
from statsmodels.tsa.stattools import coint


import warnings
warnings.filterwarnings('ignore')
mpl.rcParams['figure.figsize'] = (10, 6)
mpl.rcParams['figure.dpi'] = 150

## Предобработка данных

In [2]:
df = pd.read_excel('data/GDP_PCE_CPI.xlsx', index_col=0)
df['date'] = pd.to_datetime(df['date'])
df = df[
    (df["date"] >= "1970-01-01") & 
    (df["date"] <= "2001-01-01")
]
df = df.rename(columns = {"CORESTICKM159SFRBATL": "CPI", "PCE": "Consumption"})
df[['ln_GDP', 'ln_CPI', 'ln_Consumption']] = np.log(df[['GDP', 'CPI', 'Consumption']])

## Тест Дики-Фуллера

In [3]:
def dickey_fuller(series):
    # тест Дики-Фуллера для логарифма GDP (до фильтров)
    test = adfuller(series.dropna())
    print("\n")
    print(f"Dickey-Fuller test for {series.name}")
    print("---------------------")
    print('adf:', test[0])
    print('p-value:', test[1])
    print('critical value:', test[4])
    print("---------------------")
    if test[0] > test[4]['5%']:
        print('Есть единичные корни, ряд не стационарен')
    else:
        print('Единичных корней нет, ряд стационарен')

In [4]:
dickey_fuller(df["ln_CPI"])



Dickey-Fuller test for ln_CPI
---------------------
adf: -0.5014141553650601
p-value: 0.8917402380586106
critical value: {'1%': -3.4901313156261384, '5%': -2.8877122815688776, '10%': -2.5807296460459184}
---------------------
Есть единичные корни, ряд не стационарен


In [5]:
dickey_fuller(df["ln_Consumption"])



Dickey-Fuller test for ln_Consumption
---------------------
adf: -5.342023982272903
p-value: 4.478461551034813e-06
critical value: {'1%': -3.4846672514209773, '5%': -2.8853397507076006, '10%': -2.5794629869786503}
---------------------
Единичных корней нет, ряд стационарен


In [6]:
dickey_fuller(df["ln_GDP"])



Dickey-Fuller test for ln_GDP
---------------------
adf: -2.4903991860875783
p-value: 0.11782781647339768
critical value: {'1%': -3.4901313156261384, '5%': -2.8877122815688776, '10%': -2.5807296460459184}
---------------------
Есть единичные корни, ряд не стационарен


### BXKG

In [7]:
for bxkg in [(6, 40, 12), (6, 40, 8), (6, 32, 8)]:
    df[[f"GDP_BXKG {bxkg}", f"CPI_BXKG {bxkg}", f"Consumption_BXKG {bxkg}"]] = sm.tsa.filters.bkfilter(df[['ln_GDP', 
                                                'ln_CPI', 'ln_Consumption']], bxkg[0], bxkg[1], bxkg[2])

In [8]:
df.head(10)

Unnamed: 0,date,GDP,CPI,Consumption,ln_GDP,ln_CPI,ln_Consumption,"GDP_BXKG (6, 40, 12)","CPI_BXKG (6, 40, 12)","Consumption_BXKG (6, 40, 12)","GDP_BXKG (6, 40, 8)","CPI_BXKG (6, 40, 8)","Consumption_BXKG (6, 40, 8)","GDP_BXKG (6, 32, 8)","CPI_BXKG (6, 32, 8)","Consumption_BXKG (6, 32, 8)"
96,1970-01-01,1051.2,6.628279,628.7,6.957688,1.891345,6.443654,,,,,,,,,
97,1970-04-01,1067.375,6.855423,636.0,6.972958,1.92504,6.455199,,,,,,,,,
98,1970-07-01,1086.059,7.04714,648.5,6.990311,1.952622,6.474662,,,,,,,,,
99,1970-10-01,1088.608,7.097175,658.3,6.992655,1.959697,6.489661,,,,,,,,,
100,1971-01-01,1135.156,7.042661,676.1,7.034525,1.951986,6.516341,,,,,,,,,
101,1971-04-01,1156.271,5.338291,688.8,7.052955,1.674906,6.534951,,,,,,,,,
102,1971-07-01,1177.675,5.199543,698.9,7.071297,1.648571,6.549508,,,,,,,,,
103,1971-10-01,1190.297,4.411237,715.8,7.081958,1.484155,6.573401,,,,,,,,,
104,1972-01-01,1230.609,3.755946,731.5,7.115264,1.32334,6.595097,,,,-0.010911,-0.108157,-0.010531,-0.010524,-0.099833,-0.010206
105,1972-04-01,1266.369,3.991016,752.5,7.143909,1.384046,6.623401,,,,-0.00873,-0.163503,-0.007025,-0.008528,-0.149369,-0.006831


### Тесты после фильтра

In [9]:
for col in ['GDP_BXKG (6, 40, 12)', 'CPI_BXKG (6, 40, 12)',
       'Consumption_BXKG (6, 40, 12)', 'GDP_BXKG (6, 40, 8)',
       'CPI_BXKG (6, 40, 8)', 'Consumption_BXKG (6, 40, 8)',
       'GDP_BXKG (6, 32, 8)', 'CPI_BXKG (6, 32, 8)',
       'Consumption_BXKG (6, 32, 8)']:
    dickey_fuller(df[col])



Dickey-Fuller test for GDP_BXKG (6, 40, 12)
---------------------
adf: -4.043484055400652
p-value: 0.0011991734656525256
critical value: {'1%': -3.5011373281819504, '5%': -2.8924800524857854, '10%': -2.5832749307479226}
---------------------
Единичных корней нет, ряд стационарен


Dickey-Fuller test for CPI_BXKG (6, 40, 12)
---------------------
adf: -2.6945998005631604
p-value: 0.07497741022510426
critical value: {'1%': -3.506944401824286, '5%': -2.894989819214876, '10%': -2.584614550619835}
---------------------
Есть единичные корни, ряд не стационарен


Dickey-Fuller test for Consumption_BXKG (6, 40, 12)
---------------------
adf: -1.949698722948869
p-value: 0.3090542469943206
critical value: {'1%': -3.50434289821397, '5%': -2.8938659630479413, '10%': -2.5840147047458037}
---------------------
Есть единичные корни, ряд не стационарен


Dickey-Fuller test for GDP_BXKG (6, 40, 8)
---------------------
adf: -3.9755701270564017
p-value: 0.001543976477632673
critical value: {'1%': -3.4

### Hodrick-Prescott Filter

In [10]:
for col in ["GDP", "CPI", "Consumption"]:
    cycle, trend = sm.tsa.filters.hpfilter(df[f'ln_{col}'])
    df[f'{col}_HP'] = cycle


## Кроскорреляции

In [11]:
def get_corr_cols_list(df, value):
    return [f"{value}_HP", f"{value}_BXKG (6, 40, 8)", 
            f"{value}_BXKG (6, 32, 8)", f"{value}_BXKG (6, 40, 12)"]

def get_corrs_and_stds(df, value):
    columns = get_corr_cols_list(df, value)
    stds = df[columns].std().reset_index()
    corrs = df[columns].corr().reset_index()
    res = stds.merge(corrs)
    res = res.rename(columns = {0: "std"})
    return res


### GDP

In [12]:
get_corrs_and_stds(df, "GDP")

Unnamed: 0,index,std,GDP_HP,"GDP_BXKG (6, 40, 8)","GDP_BXKG (6, 32, 8)","GDP_BXKG (6, 40, 12)"
0,GDP_HP,0.011525,1.0,0.90554,0.90125,0.917623
1,"GDP_BXKG (6, 40, 8)",0.010403,0.90554,1.0,0.999767,0.932531
2,"GDP_BXKG (6, 32, 8)",0.010083,0.90125,0.999767,1.0,0.926298
3,"GDP_BXKG (6, 40, 12)",0.012611,0.917623,0.932531,0.926298,1.0


### Consumption

In [13]:
get_corrs_and_stds(df, "Consumption")

Unnamed: 0,index,std,Consumption_HP,"Consumption_BXKG (6, 40, 8)","Consumption_BXKG (6, 32, 8)","Consumption_BXKG (6, 40, 12)"
0,Consumption_HP,0.008377,1.0,0.847852,0.844801,0.813061
1,"Consumption_BXKG (6, 40, 8)",0.006638,0.847852,1.0,0.999684,0.936374
2,"Consumption_BXKG (6, 32, 8)",0.006411,0.844801,0.999684,1.0,0.930022
3,"Consumption_BXKG (6, 40, 12)",0.008746,0.813061,0.936374,0.930022,1.0


### CPI

In [14]:
get_corrs_and_stds(df, "CPI")

Unnamed: 0,index,std,CPI_HP,"CPI_BXKG (6, 40, 8)","CPI_BXKG (6, 32, 8)","CPI_BXKG (6, 40, 12)"
0,CPI_HP,0.222739,1.0,0.951294,0.947226,0.974845
1,"CPI_BXKG (6, 40, 8)",0.181282,0.951294,1.0,0.999753,0.964537
2,"CPI_BXKG (6, 32, 8)",0.174059,0.947226,0.999753,1.0,0.960257
3,"CPI_BXKG (6, 40, 12)",0.242695,0.974845,0.964537,0.960257,1.0
