In [1]:
import pandas as pd


# List of stock symbols
stock_symbols = ["AMZN", "AAPL", "NVDA", "MSFT", "GOOG", "META", "TSLA", "WMT", "JPM", "NFLX"]  # Extend this list as needed
stock = {}

# Dynamically construct file path and load the data for each stock symbol
for symbol in stock_symbols:
    file_path = f"../processed_data/data{symbol}.csv"  # Dynamically generate file path
    stock[symbol] = pd.read_csv(file_path)

df = {}
for i in range(len(stock_symbols)):
    df[stock_symbols[i]] = stock[stock_symbols[i]][['today', 'previous1', 'previous2', 'previous3', 'previous4', 'previous5', 'Volume',
                                                'ema50', 'ema21', 'ema14', 'ema5', 'rsi', 'macd', 'roc', 'atr', 'obv', 'cmf', 'emv', 'stoch', 'mfi', 'cci', 'trend']]



In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from keras.src.models import Sequential
from keras.src.layers import LSTM, Dense, Dropout
from scipy.stats import loguniform

stock_symbols = ["AMZN", "AAPL", "NVDA", "MSFT", "GOOG", "META", "TSLA", "WMT", "JPM", "NFLX"]

for stock in stock_symbols:
    print(f"Processing {stock}...")

    # # LOGISTIC REGRESSION
    X = df[stock].loc[:, df[stock].columns != 'trend']
    y = df[stock]['trend']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

    scaler = MinMaxScaler()

    X_train_scaled = scaler.fit_transform(X_train.values)
    X_test_scaled = scaler.transform(X_test.values)

    lr = LogisticRegression(penalty='l2', C=0.1, random_state=42)

    lr.fit(X_train_scaled, y_train.values)

    predictions = lr.predict(X_test_scaled)
    accuracy = accuracy_score(y_test.values, predictions)
    print(f"{stock} Logistic Regression Accuracy: {accuracy:.2f}")
    classification_rep = classification_report(y_test.values, predictions)
    print(f"{stock} Logistic Regression Classification Report:\n", classification_rep)


    #RANDOM FOREST
    X = df[stock][['Volume', 'ema50', 'ema21',
                          'ema14', 'ema5', 'rsi',
                           'macd', 'roc', 'obv',
                            'atr', 'cmf', 'emv',
                             'stoch', 'cci', 'mfi']]
    y = df[stock]['trend']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

    rf = RandomForestClassifier(n_estimators=110, random_state=21)
    rf.fit(X_train.values, y_train.values)

    y_pred = rf.predict(X_test.values)
    accuracy = accuracy_score(y_test.values, y_pred)
    print(f"{stock} Random Forest Accuracy: {accuracy:.2f}")
    classification_rep = classification_report(y_test.values, y_pred)
    print(f"{stock} Random Forest Classification Report:\n", classification_rep)


    # LSTM
    X = df[stock].loc[:, df[stock].columns != 'trend']
    y = df[stock]['trend']
    scale = MinMaxScaler(feature_range=(0,1))
    X_scaled = scale.fit_transform(X)

    timesteps = 10
    feature = X_scaled.shape[1]
    
    test_len = int(len(X_scaled)*0.25)
    train_len = X_scaled.shape[0] - test_len
    
    X_train = []
    y_train = []

    for i in range(timesteps, train_len):
        X_train.append(X_scaled[i-timesteps:i])
        y_train.append(y.values[i])

    X_train = np.asarray(X_train)
    y_train = np.asarray(y_train)

    X_test = []
    y_test = []
    for i in range(train_len-1, len(X_scaled)):
        X_test.append(X_scaled[i-timesteps: i])
        y_test.append(y.values[i])

    X_test = np.asarray(X_test)
    y_test = np.asarray(y_test)

    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=(timesteps, feature)))
    model.add(Dropout(0.1))
    model.add(LSTM(64))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='mse')

    model.fit(X_train, y_train, epochs=15, batch_size=8, validation_split=0.1, verbose=0)

    y_pred = (model.predict(X_test) > 0.5).astype("int32")
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{stock} LSTM Model Accuracy: {accuracy:.2f}")
    classification_rep = classification_report(y_test, y_pred)
    print(f"{stock} LSTM Classification Report:\n", classification_rep)


Processing AMZN...
AMZN Logistic Regression Accuracy: 0.78
AMZN Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.63      0.71       241
           1       0.76      0.89      0.82       315

    accuracy                           0.78       556
   macro avg       0.79      0.76      0.77       556
weighted avg       0.78      0.78      0.77       556

AMZN Random Forest Accuracy: 0.80
AMZN Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.78      0.78       255
           1       0.81      0.81      0.81       301

    accuracy                           0.80       556
   macro avg       0.80      0.80      0.80       556
weighted avg       0.80      0.80      0.80       556



  super().__init__(**kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
AMZN LSTM Model Accuracy: 0.59
AMZN LSTM Classification Report:
               precision    recall  f1-score   support

           0       0.56      0.47      0.51       255
           1       0.60      0.69      0.64       301

    accuracy                           0.59       556
   macro avg       0.58      0.58      0.58       556
weighted avg       0.58      0.59      0.58       556

Processing AAPL...
AAPL Logistic Regression Accuracy: 0.76
AAPL Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.62      0.70       252
           1       0.73      0.88      0.80       304

    accuracy                           0.76       556
   macro avg       0.77      0.75      0.75       556
weighted avg       0.77      0.76      0.76       556

AAPL Random Forest Accuracy: 0.84
AAPL Random Forest Classification Report:
               precisio

  super().__init__(**kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
AAPL LSTM Model Accuracy: 0.64
AAPL LSTM Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.58      0.59       248
           1       0.67      0.70      0.68       308

    accuracy                           0.64       556
   macro avg       0.64      0.64      0.64       556
weighted avg       0.64      0.64      0.64       556

Processing NVDA...
NVDA Logistic Regression Accuracy: 0.72
NVDA Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.55      0.64       249
           1       0.70      0.86      0.77       307

    accuracy                           0.72       556
   macro avg       0.73      0.70      0.70       556
weighted avg       0.73      0.72      0.71       556

NVDA Random Forest Accuracy: 0.83
NVDA Random Forest Classification Report:
               precisio

  super().__init__(**kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
NVDA LSTM Model Accuracy: 0.61
NVDA LSTM Classification Report:
               precision    recall  f1-score   support

           0       0.54      0.47      0.50       234
           1       0.65      0.71      0.68       322

    accuracy                           0.61       556
   macro avg       0.59      0.59      0.59       556
weighted avg       0.60      0.61      0.60       556

Processing MSFT...
MSFT Logistic Regression Accuracy: 0.74
MSFT Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.55      0.67       264
           1       0.69      0.90      0.78       292

    accuracy                           0.74       556
   macro avg       0.77      0.73      0.73       556
weighted avg       0.76      0.74      0.73       556

MSFT Random Forest Accuracy: 0.83
MSFT Random Forest Classification Report:
               precisio

  super().__init__(**kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step
MSFT LSTM Model Accuracy: 0.60
MSFT LSTM Classification Report:
               precision    recall  f1-score   support

           0       0.59      0.35      0.44       252
           1       0.60      0.80      0.69       304

    accuracy                           0.60       556
   macro avg       0.60      0.58      0.56       556
weighted avg       0.60      0.60      0.57       556

Processing GOOG...
GOOG Logistic Regression Accuracy: 0.77
GOOG Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.60      0.71       266
           1       0.72      0.92      0.80       290

    accuracy                           0.77       556
   macro avg       0.79      0.76      0.76       556
weighted avg       0.79      0.77      0.76       556

GOOG Random Forest Accuracy: 0.82
GOOG Random Forest Classification Report:
               precisio

  super().__init__(**kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
GOOG LSTM Model Accuracy: 0.63
GOOG LSTM Classification Report:
               precision    recall  f1-score   support

           0       0.63      0.43      0.51       248
           1       0.63      0.80      0.71       308

    accuracy                           0.63       556
   macro avg       0.63      0.61      0.61       556
weighted avg       0.63      0.63      0.62       556

Processing META...
META Logistic Regression Accuracy: 0.72
META Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.54      0.64       261
           1       0.68      0.87      0.76       295

    accuracy                           0.72       556
   macro avg       0.73      0.71      0.70       556
weighted avg       0.73      0.72      0.71       556

META Random Forest Accuracy: 0.83
META Random Forest Classification Report:
               precisio

  super().__init__(**kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
META LSTM Model Accuracy: 0.59
META LSTM Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.11      0.19       239
           1       0.59      0.96      0.73       317

    accuracy                           0.59       556
   macro avg       0.63      0.53      0.46       556
weighted avg       0.62      0.59      0.50       556

Processing TSLA...
TSLA Logistic Regression Accuracy: 0.77
TSLA Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.70      0.73       254
           1       0.77      0.82      0.79       302

    accuracy                           0.77       556
   macro avg       0.77      0.76      0.76       556
weighted avg       0.77      0.77      0.77       556

TSLA Random Forest Accuracy: 0.83
TSLA Random Forest Classification Report:
               precisio

  super().__init__(**kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
TSLA LSTM Model Accuracy: 0.63
TSLA LSTM Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.50      0.57       277
           1       0.60      0.76      0.67       279

    accuracy                           0.63       556
   macro avg       0.64      0.63      0.62       556
weighted avg       0.64      0.63      0.62       556

Processing WMT...
WMT Logistic Regression Accuracy: 0.71
WMT Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.55      0.64       260
           1       0.68      0.85      0.76       296

    accuracy                           0.71       556
   macro avg       0.72      0.70      0.70       556
weighted avg       0.72      0.71      0.70       556

WMT Random Forest Accuracy: 0.83
WMT Random Forest Classification Report:
               precision    

  super().__init__(**kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step
WMT LSTM Model Accuracy: 0.61
WMT LSTM Classification Report:
               precision    recall  f1-score   support

           0       0.53      0.48      0.50       231
           1       0.65      0.70      0.67       325

    accuracy                           0.61       556
   macro avg       0.59      0.59      0.59       556
weighted avg       0.60      0.61      0.60       556

Processing JPM...
JPM Logistic Regression Accuracy: 0.69
JPM Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.68      0.55      0.61       242
           1       0.70      0.81      0.75       314

    accuracy                           0.69       556
   macro avg       0.69      0.68      0.68       556
weighted avg       0.69      0.69      0.69       556

JPM Random Forest Accuracy: 0.84
JPM Random Forest Classification Report:
               precision    re

  super().__init__(**kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step
JPM LSTM Model Accuracy: 0.66
JPM LSTM Classification Report:
               precision    recall  f1-score   support

           0       0.66      0.34      0.45       228
           1       0.66      0.88      0.75       328

    accuracy                           0.66       556
   macro avg       0.66      0.61      0.60       556
weighted avg       0.66      0.66      0.63       556

Processing NFLX...
NFLX Logistic Regression Accuracy: 0.71
NFLX Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.70      0.61      0.65       248
           1       0.72      0.79      0.75       307

    accuracy                           0.71       555
   macro avg       0.71      0.70      0.70       555
weighted avg       0.71      0.71      0.71       555

NFLX Random Forest Accuracy: 0.85
NFLX Random Forest Classification Report:
               precision 

  super().__init__(**kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
NFLX LSTM Model Accuracy: 0.59
NFLX LSTM Classification Report:
               precision    recall  f1-score   support

           0       0.57      0.31      0.40       248
           1       0.59      0.81      0.69       307

    accuracy                           0.59       555
   macro avg       0.58      0.56      0.54       555
weighted avg       0.58      0.59      0.56       555

