In [None]:
import pandas as pd
import pytimetk as tk
from openbb import obb

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score

In [None]:
SYMBOL = 'SPY'
START_DATE = '2015-01-01'
END_DATE = '2023-11-30'
TRAIN_UNTIL = '2022-12-31'

In [None]:
#load data
data = obb.equity.price.historical(SYMBOL, adjusted=True, start_date=START_DATE, end_date=END_DATE, provider='yfinance')

In [None]:
df = data.to_df()

In [11]:
df = df.reset_index()[['date','open','high','low','adj_close']]

In [12]:
df.plot_timeseries(date_column='date', value_column='adj_close',title='SPY Close', x_lab='Date', y_lab='Close')

## Feature Engineering

In [13]:
# distance from moving averages
for m in [10,20,30,50,100]:
    df[f'f_dist_from_ma_{m}'] = df['adj_close']/df['adj_close'].rolling(m).mean() - 1

In [15]:
# distance from n day max/min
for m in [6, 10, 20, 30, 50, 100]:
    df[f'f_dist_from_high_{m}'] = df['adj_close'] / df['high'].rolling(m).max() -1
    df[f'f_dist_from_low{m}'] = df['adj_close'] / df['low'].rolling(m).min() -1

In [16]:
# price distance
for m in [6, 10, 20, 30, 50, 100]:
    df[f'f_price_dist_{m}'] = df['adj_close'] / df['adj_close'].shift(m) -1

In [17]:
df.glimpse()

<class 'pandas.core.frame.DataFrame'>: 2244 rows of 28 columns
date:                  datetime64[ns]    [Timestamp('2015-01-02 00:00:00 ...
open:                  float64           [206.38, 204.17, 202.09, 201.42 ...
high:                  float64           [206.88, 204.37, 202.72, 202.72 ...
low:                   float64           [204.18, 201.35, 198.86, 200.88 ...
adj_close:             float64           [175.52, 172.35, 170.72, 172.85 ...
f_dist_from_ma_10:     float64           [nan, nan, nan, nan, nan, nan,  ...
f_dist_from_ma_20:     float64           [nan, nan, nan, nan, nan, nan,  ...
f_dist_from_ma_30:     float64           [nan, nan, nan, nan, nan, nan,  ...
f_dist_from_ma_50:     float64           [nan, nan, nan, nan, nan, nan,  ...
f_dist_from_ma_100:    float64           [nan, nan, nan, nan, nan, nan,  ...
f_dist_from_high_6:    float64           [nan, nan, nan, nan, nan, -0.15 ...
f_dist_from_low6:      float64           [nan, nan, nan, nan, nan, -0.12 ...
f_dist_from_h