In [1]:
## STEP 1: PRE-PROCESSING

## IMPORTING PACKAGES
import pandas as pd
import numpy as np
import matplotlib
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score
import Project_Library as pl

print('packages imported')


## IMPORTING DATASETS - FROM st1 to st10
for i in range(1, 11):
    globals()[f'df{i}'] = pd.read_csv(f'Data/st{i}.csv', parse_dates=['Date'], index_col='Date')

print('datasets imported')


## CREATING TARGET COLUMN
stocks_list = [df1, df2, df3, df4, df5, df6, df7, df8, df9, df10]
stocks_list = pl.create_target_column(stocks_list)



Predict function created.
Backtest function created.
Function to add more predictors has been created.
packages imported
datasets imported
Target columns created for all stocks


In [2]:
## STEP 2: BUILDING PREDICTION MACHINE LEARNING MODEL.

## BUILDING MACHINE LEARNING MODEL USING RANDOM FOREST - WITH BASIC PREDICTORS
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=200, min_samples_split=100, random_state=1)
print('Model created')

# DEFINE THE BASIC PREDICTORS
predictors = ['Close', 'Volume', 'Open', 'High', 'Low']
print('Basic Predictors:', predictors)

# Run model for all stocks and get top 5 stocks with the highest accuracy
top_5_stocks = pl.run_model_for_all_stocks(stocks_list, model, predictors)

# Print the top 5 stocks with their accuracy
print("Top 5 Stocks by Accuracy:")
for stock, accuracy in top_5_stocks:
    print(f"{stock}: {accuracy:.4f}")

Model created
Basic Predictors: ['Close', 'Volume', 'Open', 'High', 'Low']
Running model for stock 1
Running model for stock 2
Running model for stock 3
Running model for stock 4
Running model for stock 5
Running model for stock 6
Running model for stock 7
Running model for stock 8
Running model for stock 9
Running model for stock 10
Top 5 Stocks by Accuracy:
Stock_10: 0.5331
Stock_9: 0.5257
Stock_2: 0.5242
Stock_7: 0.5196
Stock_6: 0.5188


In [4]:
## STEP 3: BUILDING MACHINE LEARNING MODEL USING RANDOM FOREST - WITH DERIVED PREDICTORS

# ADDING THE ROLLING AVERAGES AND TRENDS.
horizons = [2, 5, 60, 250, 1000]
accuracy_scores = {}

## LOOPING THROUGH EACH STOCK
for idx, df in enumerate(stocks_list):
    print(f"\nRunning model for stock {idx + 1}...")

    df_stock = df.copy()

    ## ROLLING AVERAGE PREDICTORS
    new_predictors = []
    for horizon in horizons:
        rolling_averages = df_stock.rolling(horizon).mean()

        ratio_column = f"Close_Ratio_{horizon}"
        df_stock[ratio_column] = df_stock["Close"] / rolling_averages["Close"]

        trend_column = f"Trend_{horizon}"
        df_stock[trend_column] = df_stock.shift(1).rolling(horizon).sum()["Target"]

        new_predictors += [ratio_column, trend_column]

    ## ADDING ADDITIONAL PREDICTORS
    df_stock, additional_predictors = pl.add_more_predictors(df_stock)

    ## DEFINING THE PREDICTORS
    predictors = new_predictors + additional_predictors

    ## DROPPING NA AGAIN (JUST IN CASE)
    df_stock.dropna(inplace=True)

    # MODELLING WITH RF.
    model = RandomForestClassifier(n_estimators=200, min_samples_split=100, random_state=1)
    predictions = pl.backtest(df_stock, model, predictors)

    ## CALCULATING THE ACCURACY
    acc = accuracy_score(predictions["Target"], predictions["Predictions"])
    accuracy_scores[f"Stock_{idx + 1}"] = acc

## PICK TOP 5 BASED ON ACCURACY
print('Predictors:', predictors)
print('')
top_5 = sorted(accuracy_scores.items(), key=lambda x: x[1], reverse=True)[:5]

print("\nTop 5 Stocks with Highest Accuracy (with additional predictors):")
for stock, acc in top_5:
    print(f"{stock}: Accuracy = {acc:.4f}")




Running model for stock 1...

Running model for stock 2...

Running model for stock 3...


  df["Prev_Close_Return"] = df["Close"].pct_change()



Running model for stock 4...

Running model for stock 5...

Running model for stock 6...

Running model for stock 7...

Running model for stock 8...

Running model for stock 9...

Running model for stock 10...
Predictors: ['Close_Ratio_2', 'Trend_2', 'Close_Ratio_5', 'Trend_5', 'Close_Ratio_60', 'Trend_60', 'Close_Ratio_250', 'Trend_250', 'Close_Ratio_1000', 'Trend_1000', 'Prev_Close_Return', 'Intraday_Volatility', 'Volume_Spike_Ratio_5']


Top 5 Stocks with Highest Accuracy (with additional predictors):
Stock_9: Accuracy = 0.5367
Stock_6: Accuracy = 0.5266
Stock_2: Accuracy = 0.5222
Stock_10: Accuracy = 0.5214
Stock_8: Accuracy = 0.5179
