In [6]:
import sys

import pathlib

sys.path.append(str(pathlib.Path('.').resolve().parent.parent))



from app.parser.yahoo_parser import SP500Parser
import pandas as pd

from ta.trend import MACD
from typing import List, Optional
from datetime import datetime

In [7]:
parser = SP500Parser()


def apply_features(group):
    group.index = pd.to_datetime(group.index)

    for lag in range(1, 4):
        group[f"lag_{lag}"] = group["Close"].shift(lag)
    group["5_day_MA"] = group["Close"].rolling(window=5).mean()
    group["20_day_MA"] = group["Close"].rolling(window=20).mean()
    group["5_day_volatility"] = group["Close"].rolling(window=5).std()
    group["momentum"] = group["Close"] - group["Close"].shift(1)

    macd = MACD(close=group["Close"], window_slow=26, window_fast=12, window_sign=9)
    group["MACD"] = macd.macd()
    group["MACD_signal"] = macd.macd_signal()
    group["MACD_histogram"] = macd.macd_diff()

    group["week_of_year"] = group.index.isocalendar().week
    group["month"] = group.index.month
    # group.drop(['Date'], axis=1, inplace=True)

    return group.dropna()

In [8]:
from datetime import datetime, timedelta

today = datetime.now().date()


one_month_ago = today - timedelta(days=60)

df = parser.download_custom_data(['AAPL'], one_month_ago, today)
apply_features(df)

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Symbol,lag_1,lag_2,lag_3,5_day_MA,20_day_MA,5_day_volatility,momentum,MACD,MACD_signal,MACD_histogram,week_of_year,month
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2024-03-12,173.149994,174.029999,171.009995,173.229996,173.229996,59825400,AAPL,172.75,170.729996,169.0,170.965997,178.589999,1.977177,0.479996,-4.550251,-4.0963,-0.45395,11,3
2024-03-13,172.770004,173.190002,170.759995,171.130005,171.130005,52488700,AAPL,173.229996,172.75,170.729996,171.367999,177.8945,1.69175,-2.099991,-4.519194,-4.180879,-0.338315,11,3
2024-03-14,172.910004,174.309998,172.050003,173.0,173.0,72913500,AAPL,171.130005,173.229996,172.75,172.167999,177.337,1.151529,1.869995,-4.294188,-4.203541,-0.090647,11,3
2024-03-15,171.169998,172.619995,170.289993,172.619995,172.619995,121664700,AAPL,173.0,171.130005,173.229996,172.545999,176.775,0.825545,-0.380005,-4.099278,-4.182688,0.08341,11,3
2024-03-18,175.570007,177.710007,173.520004,173.720001,173.720001,75604200,AAPL,172.619995,173.0,171.130005,172.739999,176.3455,0.984198,1.100006,-3.812106,-4.108572,0.296466,12,3
2024-03-19,174.339996,176.610001,173.029999,176.080002,176.080002,55215200,AAPL,173.720001,172.619995,173.0,173.310001,176.0715,1.814221,2.360001,-3.355409,-3.957939,0.602531,12,3
2024-03-20,175.720001,178.669998,175.089996,178.669998,178.669998,53423100,AAPL,176.080002,173.720001,172.619995,174.817999,175.889,2.538331,2.589996,-2.75275,-3.716901,0.964152,12,3
2024-03-21,177.050003,177.490005,170.839996,171.369995,171.369995,106181300,AAPL,178.669998,176.080002,173.720001,174.491998,175.239,2.907951,-7.300003,-2.831547,-3.53983,0.708284,12,3
2024-03-22,171.759995,173.050003,170.059998,172.279999,172.279999,71106600,AAPL,171.369995,178.669998,176.080002,174.423999,174.727,2.966064,0.910004,-2.788421,-3.389549,0.601127,12,3


In [19]:


from predictor import FinancialPredictor


regressor = FinancialPredictor("../models/financial_data_only.cbm", "../models/preprocessor_pipeline.pkl")


In [17]:
# data = apply_features(df).iloc[[-1]]

data = parser.apply_features_to_stocks(df)