Setting up to test each column in the DataFrame for a unit root using the Augmented Dickey Fuller test

In [5]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller

In [15]:
# Start by loading the data
file_path = '/Users/asger/Documents/GitHub/Deep_Learning_Techniques/Master/Data/macrovariables_featured.csv'

df = pd.read_csv(file_path)

# Dropping the date column
df = df.drop(columns=['date'])

In [19]:
# Lets set up a for loop that rolls through each column except the date column, testing for a unit root using the statsmodel adfuller test

def check_stationarity(df):
    for column in df.columns:
        result = adfuller(df[column], autolag = 'AIC')
        print(f'ADF Statistic: {result[0]}')
        print(f'p-value: {result[1]}')
        print(f'Critical Values:')
        for key, value in result[4].items():
            print(f'\t{key}: {value}')
        print(f'Number of lags used: {result[2]}')
        print(f'Is {column} stationary? {"Yes" if result[1] < 0.05 else "No"}')
        print('\n')

check_stationarity(df)

ADF Statistic: -67.98816439717052
p-value: 0.0
Critical Values:
	1%: -3.43172512220716
	5%: -2.862147648515838
	10%: -2.567093453210195
Number of lags used: 0
Is usd_eur_exchange stationary? Yes


ADF Statistic: -2.947210643661061
p-value: 0.04012705981785254
Critical Values:
	1%: -3.431733559555839
	5%: -2.862151376002777
	10%: -2.567095437512054
Number of lags used: 29
Is brent stationary? Yes


ADF Statistic: -1.8159882305751163
p-value: 0.3725533670968354
Critical Values:
	1%: -3.43173238963315
	5%: -2.8621508591501033
	10%: -2.5670951623690046
Number of lags used: 25
Is eu_cpi stationary? No


ADF Statistic: -0.02607212902285502
p-value: 0.9563220914475979
Critical Values:
	1%: -3.4317344382982022
	5%: -2.8621517642166197
	10%: -2.567095644175108
Number of lags used: 32
Is eu_mro_rate stationary? No


ADF Statistic: 0.005565309046809921
p-value: 0.9589899204178115
Critical Values:
	1%: -3.4317318054134476
	5%: -2.8621506010513356
	10%: -2.5670950249718727
Number of lags used: 23
I

In [20]:
# now taking the difference of the data to make it stationary excluding the first column which is the date column
df_diff = df.diff().dropna()


check_stationarity(df_diff)


ADF Statistic: -20.758923019697367
p-value: 0.0
Critical Values:
	1%: -3.4317344382982022
	5%: -2.8621517642166197
	10%: -2.567095644175108
Number of lags used: 31
Is usd_eur_exchange stationary? Yes


ADF Statistic: -11.169972420635851
p-value: 2.6555068632988896e-20
Critical Values:
	1%: -3.431733559555839
	5%: -2.862151376002777
	10%: -2.567095437512054
Number of lags used: 28
Is brent stationary? Yes


ADF Statistic: -10.445161926785861
p-value: 1.486677801770791e-18
Critical Values:
	1%: -3.43173238963315
	5%: -2.8621508591501033
	10%: -2.5670951623690046
Number of lags used: 24
Is eu_cpi stationary? Yes


ADF Statistic: -8.233248404923431
p-value: 5.983948418622549e-13
Critical Values:
	1%: -3.431734731460428
	5%: -2.8621518937308172
	10%: -2.5670957131211334
Number of lags used: 32
Is eu_mro_rate stationary? Yes


ADF Statistic: -10.60185406394553
p-value: 6.142086416256715e-19
Critical Values:
	1%: -3.4317318054134476
	5%: -2.8621506010513356
	10%: -2.5670950249718727
Number of