In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import yfinance as yf
from statsmodels.tsa.arima.model import ARIMA
from abc import ABC, abstractmethod
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning)

NameError: name 'warnings' is not defined

In [17]:

# Strategy Pattern: Define an abstract base class for forecasting models
class ForecasterModel(ABC):
    @abstractmethod
    def train(self, data):
        pass

    @abstractmethod
    def predict(self, data):
        pass


# Concrete Strategy: ARIMA Model
class ARIMAForecaster(ForecasterModel):
    def train(self, data):
        # Define the order (p, d, q) manually
        p, d, q = 1, 1, 1  # Example values, adjust as needed
        self.model = ARIMA(data["Returns"], order=(p, d, q))
        self.model_fit = self.model.fit()

    def predict(self, data):
        return self.model_fit.forecast(steps=1)[0]


# Concrete Strategy: Random Forest Model
class RandomForestForecaster(ForecasterModel):
    def __init__(self):
        self.model = RandomForestRegressor(n_estimators=100, random_state=42)
        self.scaler = StandardScaler()

    def train(self, data):
        X = data.drop("Target", axis=1)
        y = data["Target"]
        X_scaled = self.scaler.fit_transform(X)
        self.model.fit(X_scaled, y)

    def predict(self, data):
        X_scaled = self.scaler.transform(data.reshape(1, -1))
        return self.model.predict(X_scaled)[0]


# Factory Pattern: Model Factory for creating different forecaster models
class ForecasterFactory:
    @staticmethod
    def get_forecaster(model_type):
        if model_type == "ARIMA":
            return ARIMAForecaster()
        elif model_type == "RandomForest":
            return RandomForestForecaster()
        else:
            raise ValueError(f"Unknown model type: {model_type}")


# Main Forecaster class using Composition
class StockForecaster:
    def __init__(self, symbol, start_date, end_date, model_type="ARIMA"):
        self.symbol = symbol
        self.start_date = start_date
        self.end_date = end_date
        self.data = None
        self.model = ForecasterFactory.get_forecaster(model_type)

    def fetch_data(self):
        print(
            f"Fetching data for {self.symbol} from {self.start_date} to {self.end_date}"
        )
        self.data = yf.download(self.symbol, start=self.start_date, end=self.end_date)
        print(f"Data fetched for {self.symbol}")
        self.data["Returns"] = self.data["Close"].pct_change()
        self.data["Target"] = self.data["Returns"].shift(-1)
        self.data = self.data.dropna()

    def add_features(self):
        # Add lag features
        for i in range(1, 6):
            self.data[f"Lag_{i}"] = self.data["Returns"].shift(i)

        # Add rolling mean and standard deviation
        self.data["Rolling_Mean_5"] = self.data["Returns"].rolling(window=5).mean()
        self.data["Rolling_Std_5"] = self.data["Returns"].rolling(window=5).std()

        # Add day of week and month
        self.data["Day_of_Week"] = self.data.index.dayofweek
        self.data["Month"] = self.data.index.month

        self.data = self.data.dropna()

    def train_model(self):
        self.model.train(self.data)

    def backtest(self, lookback_period=30):
        returns = []
        for i in range(lookback_period, len(self.data)):
            train_data = self.data.iloc[:i]
            test_data = self.data.iloc[i : i + 1]

            self.model.train(train_data)

            if isinstance(self.model, RandomForestForecaster):
                features = test_data.drop(["Target", "Returns"], axis=1).values[0]
            else:  # ARIMA
                features = train_data

            prediction = self.model.predict(features)
            returns.append(prediction)

        self.data["Predicted_Returns"] = pd.Series(
            returns, index=self.data.index[lookback_period:]
        )
        self.data["Strategy_Returns"] = self.data["Predicted_Returns"] * self.data[
            "Returns"
        ].shift(-1)

        cumulative_returns = (1 + self.data["Strategy_Returns"].fillna(0)).cumprod()
        sharpe_ratio = (
            np.sqrt(252)
            * self.data["Strategy_Returns"].mean()
            / self.data["Strategy_Returns"].std()
        )

        print(f"Cumulative Returns: {cumulative_returns.iloc[-1]}")
        print(f"Sharpe Ratio: {sharpe_ratio}")


def test_forecaster(model_type):
    forecaster = StockForecaster("AAPL", "2020-01-01", "2023-12-31", model_type)
    forecaster.fetch_data()
    forecaster.add_features()
    forecaster.train_model()
    # forecaster.backtest()


if __name__ == "__main__":
    print("Testing ARIMA Forecaster:")
    test_forecaster("ARIMA")
    print("\nTesting Random Forest Forecaster:")
    test_forecaster("RandomForest")


[*********************100%%**********************]  1 of 1 completed

Testing ARIMA Forecaster:
Fetching data for AAPL from 2020-01-01 to 2023-12-31
Data fetched for AAPL




A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.

[*********************100%%**********************]  1 of 1 completed


Testing Random Forest Forecaster:
Fetching data for AAPL from 2020-01-01 to 2023-12-31
Data fetched for AAPL





In [18]:
# Test ARIMA model
model_type = "ARIMA"
forecaster = StockForecaster("AAPL", "2020-01-01", "2023-12-31", model_type)
forecaster.fetch_data()

[*********************100%%**********************]  1 of 1 completed

Fetching data for AAPL from 2020-01-01 to 2023-12-31
Data fetched for AAPL





In [19]:
forecaster.data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-03,74.287498,75.144997,74.125000,74.357498,72.251137,146322800,-0.009722,0.007968
2020-01-06,73.447502,74.989998,73.187500,74.949997,72.826851,118387200,0.007968,-0.004703
2020-01-07,74.959999,75.224998,74.370003,74.597504,72.484337,108872000,-0.004703,0.016086
2020-01-08,74.290001,76.110001,74.290001,75.797501,73.650345,132079200,0.016086,0.021241
2020-01-09,76.809998,77.607498,76.550003,77.407501,75.214745,170108400,0.021241,0.002261
...,...,...,...,...,...,...,...,...
2023-12-21,196.100006,197.080002,193.500000,194.679993,194.168518,46482500,-0.000770,-0.005547
2023-12-22,195.179993,195.410004,192.970001,193.600006,193.091385,37122800,-0.005547,-0.002841
2023-12-26,193.610001,193.889999,192.830002,193.050003,192.542816,28919300,-0.002841,0.000518
2023-12-27,192.490005,193.500000,191.089996,193.149994,192.642548,48087700,0.000518,0.002226


In [20]:
import plotly.express as px
fig = px.line(forecaster.data, x=forecaster.data.index, y=["Open", "High", "Low", "Close"], title='AAPL Stock Price')
fig.show()


In [21]:
forecaster.add_features()

In [22]:
forecaster.data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,Target,Lag_1,Lag_2,Lag_3,Lag_4,Lag_5,Rolling_Mean_5,Rolling_Std_5,Day_of_Week,Month
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2020-01-10,77.650002,78.167503,77.062500,77.582497,75.384773,140644800,0.002261,0.021364,0.021241,0.016086,-0.004703,0.007968,-0.009722,0.008571,0.010409,4,1
2020-01-13,77.910004,79.267502,77.787498,79.239998,76.995331,121532000,0.021364,-0.013503,0.002261,0.021241,0.016086,-0.004703,0.007968,0.011250,0.011841,0,1
2020-01-14,79.175003,79.392502,78.042503,78.169998,75.955643,161954400,-0.013503,-0.004286,0.021364,0.002261,0.021241,0.016086,-0.004703,0.009490,0.015030,1,1
2020-01-15,77.962502,78.875000,77.387497,77.834999,75.630119,121923600,-0.004286,0.012526,-0.013503,0.021364,0.002261,0.021241,0.016086,0.005415,0.015547,2,1
2020-01-16,78.397499,78.925003,78.022499,78.809998,76.577507,108829200,0.012526,0.011071,-0.004286,-0.013503,0.021364,0.002261,0.021241,0.003673,0.013709,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-21,196.100006,197.080002,193.500000,194.679993,194.168518,46482500,-0.000770,-0.005547,-0.010714,0.005360,-0.008503,-0.002726,0.000758,-0.003471,0.006396,3,12
2023-12-22,195.179993,195.410004,192.970001,193.600006,193.091385,37122800,-0.005547,-0.002841,-0.000770,-0.010714,0.005360,-0.008503,-0.002726,-0.004035,0.006438,4,12
2023-12-26,193.610001,193.889999,192.830002,193.050003,192.542816,28919300,-0.002841,0.000518,-0.005547,-0.000770,-0.010714,0.005360,-0.008503,-0.002902,0.005934,1,12
2023-12-27,192.490005,193.500000,191.089996,193.149994,192.642548,48087700,0.000518,0.002226,-0.002841,-0.005547,-0.000770,-0.010714,0.005360,-0.003871,0.004460,2,12
