In [7]:
import yfinance as yf
import pandas as pd
from statsforecast import StatsForecast
import numpy as np
from statsforecast.models import Naive, ARCH
from abc import ABC, abstractmethod
from functools import wraps

from lib.utils import log_returns, get_hourly_market_freq

In [3]:
df = yf.download('MSFT', start = '2018-09-27', end = '2023-09-27', interval='1d') # use daily prices

[*********************100%%**********************]  1 of 1 completed


In [4]:
df.loc[:, 'log_rt'] = log_returns(df['Adj Close']) # make log_rt col

In [5]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,log_rt
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-09-27,114.779999,114.910004,114.199997,114.410004,108.395653,19091300,
2018-09-28,114.190002,114.570000,113.680000,114.370003,108.357750,21647800,-0.000350
2018-10-01,114.750000,115.680000,114.730003,115.610001,109.532547,18883100,0.010783
2018-10-02,115.300003,115.839996,114.440002,115.150002,109.096733,20787200,-0.003987
2018-10-03,115.419998,116.180000,114.930000,115.169998,109.115707,16648000,0.000174
...,...,...,...,...,...,...,...
2023-09-20,329.510010,329.589996,320.510010,320.769989,320.769989,21436500,-0.024269
2023-09-21,319.260010,325.350006,315.000000,319.529999,319.529999,35529500,-0.003873
2023-09-22,321.320007,321.450012,316.149994,317.010010,317.010010,21429700,-0.007918
2023-09-25,316.589996,317.670013,315.000000,317.540009,317.540009,17836000,0.001670


In [None]:
class Model(ABC):
    @abstractmethod
    def fit(self, df: pd.DataFrame) -> None:
        """Fit the model to the provided DataFrame."""
        pass

    @abstractmethod
    def predict(self, steps: int) -> pd.DataFrame:
        """Make predictions based on the fitted model."""
        pass

class Forecast():
    def __init__(self, df: pd.DataFrame, model: Model, freq):
        self._init_df(df)
        self.model = self._init_model(model)
        self.freq = self._init_freq(freq)

    def _init_df(self, df):
        assert set(df.columns.values) == set('ds', 'y', 'unique_id'), \
            f'df must have columns ds, y, unique_id. missing {set(df.columns.values) - set("ds", "y", "unique_id")}'

        self.df = df
        self.ds = pd.to_datetime(df['ds'])
        self.y = df['y']
        self.unique_id = df['unique_id']

    def _init_model(self, model) -> Model:
        valid_models = {} # TODO add models
        assert model in valid_models, f'model must be one of {valid_models.keys()}'
        return valid_models[model]
    
    def _init_freq(self, freq: str):
        assert freq in pd.tseries.frequencies.to_offset(), f'freq must be one of {pd.tseries.frequencies.to_offset()}'
        return freq

    def fit(self):
        self.model.fit(self.df)

    def predict(self, steps: int):
        return self.model.predict(self.df, steps)

In [12]:
class Naive(Model):
    def __init__(self):
        self.last_value = None

    def fit(self, df: pd.DataFrame, y_col: str = None) -> None:
        if not isinstance(df, pd.DataFrame):
            raise TypeError("df must be a DataFrame")
        if y_col is None and df.shape[1] != 1:
            raise ValueError("y_col must be specified if df has more than one column")
        if y_col is not None and y_col not in df.columns:
            raise ValueError(f"y_col '{y_col}' not found in df columns: {df.columns.values}")

        self.df = df
        self.y_col = y_col

    def forecast(self, horizon: int) -> pd.DataFrame:
        return pd.DataFrame(np.repeat(self.df, horizon))

In [13]:
model = Naive()

In [14]:
model.fit(df)

In [None]:
model.predict