In [None]:
import yfinance as yf

In [None]:
# Prompt user to input a stock ticker (Yahoo Finance format, e.g. 'AAPL' or 'TSLA'
stock = yf.Ticker(input("Enter In a Stock using the yahoo name:"))

In [None]:
# Retrieve full historical data for that stock
stock = stock.history(period ="max")

In [None]:
# Display full dataset
stock

In [None]:
# Display the DataFrame index (dates)
stock.index

In [None]:
# Plot the closing price of the stock over time
stock.plot.line(y ="Close", use_index=True)

In [None]:
# remove irrelevant columns
del stock["Dividends"]
del stock["Stock Splits"]

In [None]:
# Create a "Tomorrow" column that shifts the closing price by one day
stock["Tomorrow"] = stock["Close"].shift(-1)

In [None]:
# Check updated DataFrame
stock

In [None]:
# Creates an "Actual" column:
# 1 if there's a price increase, else 0
stock["Actual"] = (stock["Tomorrow"] > stock["Close"]).astype(int)

In [None]:
#limit dataset to start from 1988
stock = stock.loc["1988-01-01":].copy()
stock

In [None]:

from sklearn.ensemble import RandomForestClassifier

In [None]:
#Initialize Random Forest model
model = RandomForestClassifier(n_estimators=200, min_samples_split=25, random_state=1)
#split the data into training and testing sets
training = stock.iloc[:-100]
test = stock.iloc[-100:]

# Select feature columns for prediction
predictors = ["Close", "Volume", "Open", "High" , "Low"]
#Train model
model.fit(training[predictors], training["Actual"])

In [None]:
# import precision metric
from sklearn.metrics import precision_score
predicts = model.predict(test[predictors])

In [None]:
# convert predictions to a Pandas Series aligned with test index
import pandas as pd
predicts = pd.Series(predicts, index= test.index)

In [None]:
# measure precision of predictions
precision_score(test["Actual"], predicts)

In [None]:
# Combine actual vs predict results for visualization
combined = pd.concat([test["Actual"], predicts], axis =1)
combined.plot()

In [None]:

# Define a helper function to predict and combine results
def predict(training, test, predictors, model):
    model.fit(training[predictors], training["Actual"])
    predicts = model.predict(test[predictors])
    predicts = pd.Series(predicts, index = test.index, name ="Predictions")
    combined = pd.concat([test["Actual"], predicts], axis =1)
    return combined

In [None]:

# Define a backtesting function to simulate rolling training and testing
def backtest(data, model, predictors, start = 2500, step =250):
    all_predictions = []
    
    for i in range(start, data.shape[0], step):
        train = data.iloc[0:i].copy()
        test = data.iloc[i:(i+step)].copy()
        predictions = predict(train, test, predictors, model)
        all_predictions.append(predictions)
        
    return pd.concat(all_predictions)

In [None]:
# Running backtest
predictions = backtest(stock, model, predictors)

In [None]:
# evaluate class balance and accuracy
predictions["Predictions"].value_counts()
precision_score(predictions["Actual"], predictions["Predictions"])


In [None]:
# Check overall market trend distribution
predictions["Actual"].value_counts() / predictions.shape[0]


In [None]:

# Create new technical indicators over multiple time horizons 
newPredictor = []
horizons = [2, 5, 20, 60, 250, 1000]
for horizon in horizons:
    # Rolling mean
    average = stock.rolling(horizon).mean()
    
    # Ration of current price to rolling average
    ratioColumn = f"Close_Ration_{horizon}"
    stock[ratioColumn] = stock["Close"] / average["Close"]
    
    # Trend: sum of positive days over horizen
    trendColumn = f"Trend_{horizon}"
    stock[trendColumn] = stock.shift(1).rolling(horizon).sum()["Actual"]
    
    newPredictor += [ratioColumn, trendColumn]
    
# Drop NaN rows created by rolling calculations 
stock = stock.dropna()


In [None]:
stock.dropna()

In [None]:
# Initialize new Random Forest with different parameters
model = RandomForestClassifier(n_estimators=200, min_samples_split = 50, random_state=1)

In [None]:
# Define a new prediction function using probability thresholds
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Actual"])
    predicts = model.predict_proba(test[predictors])[:, 1]
    # Convert probabilities into binary outcomes based on confidence
    predicts[predicts >= .65] = 1
    predicts[predicts < .65] = 0
    predicts = pd.Series(predicts, index = test.index, name="Predictions")
    combined = pd.concat([test["Actual"], predicts], axis = 1)
    return combined

In [None]:
# Run backtest again with the new model and predictors
predictions = backtest(stock, model, newPredictor)

In [None]:
predictions["Predictions"].value_counts() * 100

In [None]:
# Display prediction counts and precision
predictions["Predictions"].value_counts() * 100
precision_score(predictions["Actual"], predictions["Predictions"])