In [35]:
import yfinance as yf
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score
import pandas as pd

In [36]:
stock = yf.Ticker("^GSPC")  # Change Ticker Here

In [37]:
# Retrieving Complete History
stock = stock.history(period="max")

In [38]:
# Removing Unecessary Data
del stock["Dividends"]
del stock["Stock Splits"]

In [39]:
# Creating Target Column
stock["Tomorrow"] = stock["Close"].shift(-1)

stock["Target"] = (stock["Tomorrow"] > stock["Close"]).astype(int)

In [51]:
# Specifying Time Range
stock = stock.loc["2000-01-01":].copy()

In [52]:
# Model Creating and Fitting
model = RandomForestClassifier(n_estimators=200, min_samples_split=50, random_state=1)

train = stock.iloc[:-100]
test = stock.iloc[-100:]

features = ["Open", "High", "Low", "Close", "Volume"]
model.fit(train[features], train["Target"])

In [53]:
# Testing the Model
preds = model.predict(test[features])

preds = pd.Series(preds, index=test.index)

precision_score(test["Target"], preds)

1.0

In [54]:
# Creating Predict Function
def predict(train, test, features, model):
    model.fit(train[features], train["Target"])
    preds = model.predict_proba(test[features])[:,1]
    preds[preds >= .5] = 1
    preds[preds < .5] = 0
    preds = pd.Series(preds, index=test.index, name="Predictions")
    combined = pd.concat([test["Target"], preds], axis=1)
    return combined

In [55]:
# Creating Backtest Function
def backtest(data, model, features, start=2500, step=250):
    all_predictions = []
    
    for i in range(start, data.shape[0], step):
        train = data.iloc[0:i].copy()
        test = data.iloc[i:(i+step)].copy()
        predictions = predict(train, test, features, model)
        all_predictions.append(predictions)
    return pd.concat(all_predictions)

In [56]:
#Creating Moving Average Data
horizons = [2,5,60,250,1000]
new_features = []

for horizon in horizons:
    rolling_averages = stock.rolling(horizon).mean()
    
    ratio_column = f"Close_Ratio_{horizon}"
    stock[ratio_column] = stock["Close"] / rolling_averages["Close"]
    
    trend_column = f"Trend_{horizon}"
    stock[trend_column] = stock.shift(1).rolling(horizon).sum()["Target"]
    
    new_features += [ratio_column, trend_column]

In [57]:
stock = stock.dropna()

In [58]:
predictions = backtest(stock, model, new_features)
predictions["Predictions"].value_counts()

Predictions
1.0    1714
0.0     924
Name: count, dtype: int64

In [59]:
# Evaluating Model Accuracy Percentage
precision_score(predictions["Target"], predictions["Predictions"])

0.5326721120186698

In [50]:
#True Stock Evaluation
predictions["Target"].value_counts() / predictions.shape[0]

Target
1    0.529011
0    0.470989
Name: count, dtype: float64