In [4]:
import backtrader as bt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import io

from statsmodels.tsa.stattools import adfuller

plt.style.use('ggplot')

In [2]:
with pd.HDFStore('./large_files/data2.h5', mode='r') as store:
    df_ = store.get('data/data_imputed_20240226')
df_.info(verbose=3)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2522881 entries, 525 to 138563
Data columns (total 10 columns):
 #   Column    Dtype                           
---  ------    -----                           
 0   date      object                          
 1   time      object                          
 2   open      float64                         
 3   high      float64                         
 4   low       float64                         
 5   close     float64                         
 6   volume    int64                           
 7   datetime  datetime64[ns, America/New_York]
 8   day       object                          
 9   month     int64                           
dtypes: datetime64[ns, America/New_York](1), float64(4), int64(2), object(3)
memory usage: 211.7+ MB


In [3]:
df = df_.set_index('datetime', drop=False)
df = df.loc[df.index.year.isin([2021, 2022])]
df = df.loc[df.index.day_of_week == 2]

In [7]:
def adf_test(series):
    result = adfuller(series, autolag='AIC')
    data = {
        'adf': result[0],
        'p-value': result[1],
    }
    for key, value in result[4].items():
        data[f'critical {key}'] = value

    return data

In [14]:
results = []
for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
    if df_day.shape[0] == 0:
        continue
    
    df_day = df_day.copy()
    df_day = df_day[(df_day.index.hour >= 10) & (df_day.index.hour < 12)]
    close = df_day['close']
    adf_result = adf_test(close)
    adf_result['day'] = day.strftime('%Y%m%d')
    results.append(adf_result)

df_result = pd.DataFrame(results)

In [16]:
df_1 = df_result[df_result['p-value'] < df_result['critical 1%']]
df_5 = df_result[df_result['p-value'] < df_result['critical 5%']]
df_10 = df_result[df_result['p-value'] < df_result['critical 10%']]