In [41]:
#Initialize variables
from sklearn.metrics import precision_score #import to test the accuracy of our predictions
from sklearn.ensemble import RandomForestClassifier #import RFC as model
model = RandomForestClassifier(n_estimators=2000, min_samples_split=500, random_state=1) #update RFC to have more estimators and less split
import yfinance as yf #where the data is grabbed
import pandas as pd #python statistic toolkit
ETFS = [] #store all etfs used as predictors

In [36]:
#initialize TMF
tmf = yf.Ticker("TMF") #get the data on TMF
tmf = tmf.history(period="max") #get all time data on TMF
del tmf["Dividends"] #remove dividends and stock splits, unneeded
del tmf["Stock Splits"]
del tmf["Capital Gains"] #delete Capital Gains, unneeded
tmf["Tomorrow"] = tmf["Close"].shift(-1) #create a "tomorrow" column by taking the close value and shifting it back 1 row
tmf["Target"] = (tmf["Tomorrow"] > tmf["Close"]).astype(int) #create "Target", that is either 0 or 1 depending on whether Tomorrow's price is higher than Today's close


interest_rates = pd.read_csv('../FEDFUNDS.csv', parse_dates=["DATE"], index_col=["DATE"])
interest_rates.index #adding federal reserve rates to predict TMF
tmf = tmf.tz_localize(None)
tmf["interest"] = pd.Series(dtype='float64')
for i in range(0, tmf.shape[0]):
        if (tmf.index[i].strftime('%Y-%m') + '-01' == '2024-06-01'):
                tmf.at[tmf.index[i], "interest"] = float(5.33)
        else:
                tmf.at[tmf.index[i], "interest"] = interest_rates.at[tmf.index[i].strftime('%Y-%m') + '-01', "FEDFUNDS"]
tmf["Interest_Tomorrow"] = tmf["interest"].shift(-1) #create a "tomorrow" column by taking the close value and shifting it back 1 row
tmf["Interest_Target"] = (tmf["Interest_Tomorrow"] > tmf["interest"]).astype(int) #create "Target", that is either 0 or 1 depending on whether Tomorrow's price is higher than Today's close


horizons = [2, 5, 60, 250, 1000] #adding rolling mean close for the previous 2 days, week, three months, year, and four years
#use the ratio between the closing price now and the closing price then to make predictions
new_predictors = [] #to store the new rolling means
for horizon in horizons: #loop through the data in horizions, and calculate a rolling average for each time window
        rolling_averages = tmf.rolling(horizon).mean()

        ratio_column = f"Close_Ratio_{horizon}"
        tmf[ratio_column] = tmf["Close"] / rolling_averages["Close"] #close price of the day / rolling avergage of "horizon" days

        trend_column = f"Trend_{horizon}" #the number of days in the past "horizon" days that the stock went up
        tmf[trend_column] = tmf.shift(1).rolling(horizon).sum()["Target"] #getting the sum of the target from the past "horizion" days

        interest_ratio = f"Interest_Ratio_{horizon}"
        tmf[interest_ratio] = tmf["Close"] / rolling_averages["interest"]

        interest_trend = f"Interest_Trend_{horizon}"
        tmf[interest_trend] = tmf.shift(1).rolling(horizon).sum()["Interest_Target"]

        new_predictors += [ratio_column, trend_column, interest_ratio, interest_trend]
tmf

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Tomorrow,Target,interest,Interest_Tomorrow,Interest_Target,...,Interest_Ratio_60,Interest_Trend_60,Close_Ratio_250,Trend_250,Interest_Ratio_250,Interest_Trend_250,Close_Ratio_1000,Trend_1000,Interest_Ratio_1000,Interest_Trend_1000
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-04-16,106.956214,108.144211,105.950987,106.261696,4560,101.509743,0,0.15,0.15,0,...,,,,,,,,,,
2009-04-17,104.543699,104.726467,100.906597,101.509743,12040,107.669029,1,0.15,0.15,0,...,,,,,,,,,,
2009-04-20,104.470577,107.669029,104.470577,107.669029,2720,104.415741,0,0.15,0.15,0,...,,,,,,,,,,
2009-04-21,110.410551,111.434062,104.379185,104.415741,6240,100.851776,0,0.15,0.15,0,...,,,,,,,,,,
2009-04-22,104.360938,104.360938,99.608951,100.851776,10120,101.381775,1,0.15,0.15,0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-06-14,53.570000,54.279999,53.290001,53.980000,6863100,52.299999,0,5.33,5.33,0,...,10.127580,0.0,0.988618,121.0,10.176841,2.0,0.320184,478.0,23.170465,22.0
2024-06-17,52.049999,52.520000,51.599998,52.299999,6033600,53.750000,1,5.33,5.33,0,...,9.812383,0.0,0.959591,120.0,9.858252,2.0,0.310800,477.0,22.398862,22.0
2024-06-18,52.630001,53.799999,52.340000,53.750000,5541300,52.660000,0,5.33,5.33,0,...,10.084428,0.0,0.987942,120.0,10.129660,2.0,0.320036,477.0,22.968221,22.0
2024-06-20,51.770000,52.750000,51.630001,52.660000,5372500,52.630001,0,5.33,5.33,0,...,9.879925,0.0,0.969494,120.0,9.922370,2.0,0.314149,477.0,22.452077,22.0


In [37]:
#Function to setup new ETF predictors, pass in original dataframe + the name you wish to give
def configureETF(etf, etf_name): 
        etf = etf.history(period="max")
        etf = etf.tz_localize(None)
        del etf["Dividends"]
        del etf["Capital Gains"]
        del etf["Stock Splits"]
        etf["Tomorrow"] = etf["Close"].shift(-1)
        etf[f"{etf_name}_Target"] = (etf["Tomorrow"] > etf["Close"]).astype('int')

        horizons = [2, 5, 60, 250, 1000] #adding rolling mean close for the previous 2 days, week, three months, year, and four years
        for horizon in horizons: #loop through the data in horizions, and calculate a rolling average for each time window
                etf_rolling_averages = etf.rolling(horizon).mean()

                etf_ratio_column = f"{etf_name}_Close_Ratio_{horizon}"
                etf[etf_ratio_column] = etf["Close"] / etf_rolling_averages["Close"] #close price of the day / rolling avergage of "horizon" days

                etf_trend_column = f"{etf_name}_Trend_{horizon}" #the number of days in the past "horizon" days that the stock went up
                etf[etf_trend_column] = etf.shift(1).rolling(horizon).sum()[f"{etf_name}_Target"] #getting the sum of the target from the past "horizion" days
        etf = etf.iloc[etf.shape[0]-3822:]
        return etf

        

In [38]:
#setup other ETF predictors

#set up BND
bnd = yf.Ticker("BND") #get data on BND
bnd = configureETF(bnd, "BND")
bnd

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Tomorrow,BND_Target,BND_Close_Ratio_2,BND_Trend_2,BND_Close_Ratio_5,BND_Trend_5,BND_Close_Ratio_60,BND_Trend_60,BND_Close_Ratio_250,BND_Trend_250,BND_Close_Ratio_1000,BND_Trend_1000
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2009-04-16,49.807404,49.943089,49.807404,49.910782,436800,49.645874,0,0.999030,1.0,1.000647,3.0,1.009750,31.0,1.035498,131.0,,
2009-04-17,49.859086,49.917237,49.548964,49.645874,609300,49.891380,1,0.997339,0.0,0.995672,3.0,1.004359,31.0,1.029824,130.0,,
2009-04-20,49.723394,49.943069,49.723394,49.891380,247700,49.794521,0,1.002466,1.0,1.000259,3.0,1.009247,31.0,1.034727,130.0,,
2009-04-21,49.930201,49.930201,49.697606,49.794521,408000,49.884972,1,0.999028,1.0,0.998886,2.0,1.007210,31.0,1.032533,130.0,,
2009-04-22,49.865590,49.891434,49.710528,49.884972,379400,49.923695,1,1.000907,1.0,1.001193,2.0,1.008874,32.0,1.034197,131.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-06-14,72.660004,72.730003,72.599998,72.660004,3527000,72.389999,0,1.000758,2.0,1.006511,4.0,1.019370,35.0,1.033227,129.0,0.980777,492.0
2024-06-17,72.370003,72.410004,72.290001,72.389999,4019000,72.639999,1,0.998139,1.0,1.000608,4.0,1.015408,34.0,1.029270,128.0,0.977222,491.0
2024-06-18,72.480003,72.739998,72.470001,72.639999,8491900,72.500000,0,1.001724,1.0,1.002014,4.0,1.018737,34.0,1.032699,128.0,0.980686,491.0
2024-06-20,72.400002,72.529999,72.349998,72.500000,4227800,72.519997,1,0.999035,1.0,0.999338,3.0,1.016597,34.0,1.030572,128.0,0.978887,490.0


In [None]:
#function to append an etf's predictors to the main TMF dataframe
def appendETF(etf, etf_name):
        #remove all columns that are not used in predictions
        etf_predictors = etf.copy()
        del etf_predictors["Open"]
        del etf_predictors["High"]
        del etf_predictors["Low"]
        del etf_predictors["Close"]
        del etf_predictors["Volume"]
        del etf_predictors["Tomorrow"]
        del etf_predictors[f"{etf_name}_Target"]
        return pd.concat([tmf, etf_predictors], axis=1)

In [42]:
#predict & backtest functions
#prediction and test function
def predict(test, predictors, train, model): #build an function for the testing of the model
        model.fit(train[predictors], train["Target"])
        preds = model.predict_proba(test[predictors])[:,1] #instead of returning 0 or 1, the prediction will return the probability of it being either 0 or 1 (with .predict_proba), and then get the second item in the array which is the probability of it going up
        preds[preds >= .6] = 1 #only predict that the stock goes up if the probability of it going up is over 60%
        preds[preds < .6] = 0 #otherwise predict that the stock goes down
        preds = pd.Series(preds, index = test.index, name="Predictions")
        combined = pd.concat([test["Target"], preds], axis=1)
        return combined

#backtesting algorithm
def backtest(data, model, predictors, start=2500, step=250): #backtesting: start at ten years time seeing ten years of historical data; step one year at a time
        all_predictions = [] #predictions for each year (step) is stored here

        for i in range(start, data.shape[0], step): #loop from 2500 to the end of the dataset, increasing i by 250 each time
                train = data.iloc[0:i].copy() #train from first entry to start entry (train with all years prior to current year)
                test = data.iloc[i:(i+step)].copy() #predict from start entry to start+step (predict upcoming year
                predictions = predict(test, predictors, train, model) #use prediction function to predict year
                all_predictions.append(predictions) #add the predictions for the current year to all_predictions
        return pd.concat(all_predictions) #takes the list of all predictions and combines them into one


In [28]:
#learn and test
predictions = backtest(droppedna_tmf, model, new_predictors) #getting rid of original predictors (open, close, volume, high low) because an absolute value does little to predict trends

In [30]:
predictions["Predictions"].value_counts()

Predictions
0.0    295
1.0     26
Name: count, dtype: int64

In [31]:
precision_score(predictions["Target"], predictions["Predictions"])

np.float64(0.5769230769230769)

In [237]:
no_interest_predictors = ['Close_Ratio_2',
 'Trend_2',
 'Close_Ratio_5',
 'Trend_5',
 'Close_Ratio_60',
 'Trend_60',
 'Close_Ratio_250',
 'Trend_250',
 'Close_Ratio_1000',
 'Trend_1000']

In [238]:
predictions = backtest(droppedna_tmf, model, no_interest_predictors) #getting rid of original predictors (open, close, volume, high low) because an absolute value does little to predict trends

In [239]:
predictions["Predictions"].value_counts()

Predictions
0.0    310
1.0     11
Name: count, dtype: int64

In [240]:
precision_score(predictions["Target"], predictions["Predictions"])

np.float64(0.5454545454545454)

In [243]:
tmf

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Tomorrow,Target,interest,Close_Ratio_2,Trend_2,...,Interest_Ratio_5,Close_Ratio_60,Trend_60,Interest_Ratio_60,Close_Ratio_250,Trend_250,Interest_Ratio_250,Close_Ratio_1000,Trend_1000,Interest_Ratio_1000
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-04-16,106.956261,108.144257,105.951033,106.261742,4560,101.509743,0,0.15,,,...,,,,,,,,,,
2009-04-17,104.543699,104.726467,100.906597,101.509743,12040,107.669037,1,0.15,0.977129,,...,,,,,,,,,,
2009-04-20,104.470585,107.669037,104.470585,107.669037,2720,104.415741,0,0.15,1.029445,1.0,...,,,,,,,,,,
2009-04-21,110.410551,111.434062,104.379185,104.415741,6240,100.851768,0,0.15,0.984660,1.0,...,,,,,,,,,,
2009-04-22,104.360930,104.360930,99.608944,100.851768,10120,101.381783,1,0.15,0.982637,0.0,...,672.345123,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-06-14,53.570000,54.279999,53.290001,53.980000,6863100,52.299999,0,5.33,1.012188,2.0,...,10.127580,1.113671,32.0,10.127580,0.988618,121.0,10.176841,0.320184,478.0,23.170465
2024-06-17,52.049999,52.520000,51.599998,52.299999,6033600,53.750000,1,5.33,0.984193,1.0,...,9.812383,1.078936,31.0,9.812383,0.959591,120.0,9.858252,0.310800,477.0,22.398862
2024-06-18,52.630001,53.799999,52.340000,53.750000,5541300,52.660000,0,5.33,1.013673,1.0,...,10.084428,1.108777,31.0,10.084428,0.987942,120.0,10.129660,0.320036,477.0,22.968221
2024-06-20,51.770000,52.750000,51.630001,52.660000,5372500,52.630001,0,5.33,0.989757,1.0,...,9.879925,1.086344,31.0,9.879925,0.969494,120.0,9.922370,0.314149,477.0,22.452077
