In [75]:
# importing relevant packages

import yfinance as yf
import pandas as pd


In [76]:
# getting ticker data from yfinance

ticker = 'MSFT'
df = yf.download(ticker)
df

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1986-03-13,0.088542,0.101563,0.088542,0.097222,0.060274,1031788800
1986-03-14,0.097222,0.102431,0.097222,0.100694,0.062427,308160000
1986-03-17,0.100694,0.103299,0.100694,0.102431,0.063504,133171200
1986-03-18,0.102431,0.103299,0.098958,0.099826,0.061889,67766400
1986-03-19,0.099826,0.100694,0.097222,0.098090,0.060812,47894400
...,...,...,...,...,...,...
2023-12-11,368.480011,371.600006,366.100006,371.299988,371.299988,27708800
2023-12-12,370.850006,374.420013,370.459991,374.380005,374.380005,24838300
2023-12-13,376.019989,377.640015,370.769989,374.369995,374.369995,30955500
2023-12-14,373.309998,373.760010,364.130005,365.929993,365.929993,43277500


In [77]:
# data preprocessing. Filtering out rows before 2017, and creating a column for % change from today to tomorrow

df = df.drop(columns = 'Adj Close')
df = df.loc['2019-01-01':,]

df['daily_change'] = df['Close'].pct_change()*100
df.loc[:, 'daily_change'] = df.daily_change.shift(-1)
df = df.dropna()
df


Unnamed: 0_level_0,Open,High,Low,Close,Volume,daily_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-02,99.550003,101.750000,98.940002,101.120003,35329300,-3.678799
2019-01-03,100.099998,100.190002,97.199997,97.400002,42579100,4.650923
2019-01-04,99.720001,102.510002,98.930000,101.930000,44060600,0.127536
2019-01-07,101.639999,103.269997,100.980003,102.059998,35656100,0.725069
2019-01-08,103.040001,103.970001,101.709999,102.800003,31514400,1.429955
...,...,...,...,...,...,...
2023-12-08,369.200012,374.459991,368.230011,374.230011,20144800,-0.782947
2023-12-11,368.480011,371.600006,366.100006,371.299988,27708800,0.829523
2023-12-12,370.850006,374.420013,370.459991,374.380005,24838300,-0.002674
2023-12-13,376.019989,377.640015,370.769989,374.369995,30955500,-2.254455


In [78]:
#  import files for the backtesting and machine learning

from backtesting import Backtest, Strategy
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error 

In [79]:
# our strategy class that inherits strategy from backtesting. Effectively, if the ML algorithm predicts the stock will go up, we buy, if not, we sell
trainingDays = 300

class Regression(Strategy):
    limit_buy = 1
    limit_sell = -5
    n_train = trainingDays
    coef_retrain = 50
    
    def init(self):
        self.model = DecisionTreeRegressor(max_depth=15)
        self.already_bought = False
        
        X_train = self.data.df.iloc[:self.n_train, :-1]
        y_train = self.data.df.iloc[:self.n_train, -1]
        
        self.model.fit(X=X_train, y=y_train)

    def next(self):
        explanatory_today = self.data.df.iloc[[-1], :-1]
        forecast_tomorrow = self.model.predict(explanatory_today)[0]
        
        if forecast_tomorrow > self.limit_buy and self.already_bought == False:
            self.buy()
            self.already_bought = True
        elif forecast_tomorrow < self.limit_sell and self.already_bought == True:
            self.sell()
            self.already_bought = False
        else:
            pass

In [80]:
class WalkForwardAnchored(Regression):
    def next(self):
        
        # we don't take any action and move on to the following day
        if len(self.data) < self.n_train:
            return
        
        # we retrain the model each x days
        if len(self.data) % self.coef_retrain == 0:
            X_train = self.data.df.iloc[:, :-1]
            y_train = self.data.df.iloc[:, -1]

            self.model.fit(X_train, y_train)

            super().next()
            
        else:
            
            super().next()

In [81]:
startingCap = 10000
bt = Backtest(df, WalkForwardAnchored, cash=startingCap, commission=.002, exclusive_orders=True)
results = bt.run()

years = ((results['Duration']).days-trainingDays)/365
returns = round(results['Return [%]'], 3)
yearly_returns = round(((1+returns/100)**(1/years)-1)*100, 3)

print("Results:\n\nStarting Capital: $" + str(startingCap) + "\nFinal Capital: $" + str(round(results['Equity Final [$]'], 3)) + "\nReturn: " + str(returns) + "%")
print("Time Taken: " + str(round(years, 3)) + " years")
print("Average yearly returns: " + str(yearly_returns) + "%")

Results:

Starting Capital: $10000
Final Capital: $20701.786
Return: 107.018%
Time Taken: 4.129 years
Average yearly returns: 19.272%
