In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import pandas_ta as ta
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense,Dropout
import matplotlib.pyplot as plt
import keras_tuner as kt
import tensorflow as tf
import seaborn as sns


# Loading CSV data


In [None]:
time_steps = 60

In [None]:
csv_path = "/Users/jibanchaudhary/Documents/Projects/trading_project/merged_data.csv"

print("Loading csv data....")

df = pd.read_csv(csv_path)
df.sort_values(['Stock','Date'], inplace=True)
df.reset_index(drop=True, inplace=True)
df[:10]

#Encoding stock identity

In [None]:
# df['stock_id'] = df['Stock'].astype('category').cat.codes
stock_dummies = pd.get_dummies(df['Stock'], prefix='Stock')
df = pd.concat([df, stock_dummies],axis=1)



# Technical indicators

In [None]:
df_list = []
for stock, group in df.groupby('Stock'):
    group = group.copy()
    group.ta.sma(length=20, append=True)
    group.ta.ema(length=50, append=True)
    group.ta.rsi(length=14, append=True)
    group.ta.macd(fast=12, slow=26, signal=9, append=True)
    group.ta.bbands(length=20, append=True)

    group['Signal'] = 0
    group.loc[group['SMA_20'] > group['EMA_50'], 'Signal'] = 2
    group.loc[group['SMA_20'] < group['EMA_50'], 'Signal'] = 0
    group['Signal'] = np.where((group['Signal'] == 0) & (group['Signal'].shift(1) != 0), 1, group['Signal'])
    group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
    group.loc[group['Signal'] == 0, 'Signal'] = 1

    group['Target'] = group['Signal'].shift(-1)
    df_list.append(group)

df = pd.concat(df_list)
df.dropna(inplace=True)

In [None]:
df_list

In [None]:
stock_one_hot_cols = [col for col in df.columns if col.startswith('Stock_')]
feature_cols = ['Close', 'High', 'Low', 'Open'] + stock_one_hot_cols + [
    'SMA_20', 'EMA_50', 'RSI_14',
    'MACD_12_26_9', 'MACDs_12_26_9', 'MACDh_12_26_9',
    'BBL_20_2.0', 'BBM_20_2.0', 'BBU_20_2.0', 'BBB_20_2.0', 'BBP_20_2.0']

scaler = MinMaxScaler()
x_all,y_all = [],[]

# Sliding window for 60 days


In [None]:
def sliding_data(data, target, time_steps=60):
    x,y = [],[]
    for i in range(len(data)-time_steps):
        x.append(data[i:i+time_steps])
        y.append(target[i+time_steps])
    return np.array(x), np.array(y)

In [None]:
for _, group in df.groupby('Stock'):
    group= group.sort_values('Date')
    group = group.dropna(subset = feature_cols+['Target'])
    features = scaler.fit_transform(group[feature_cols])
    target = group['Target'].values
    x,y = sliding_data(features, target, time_steps)
    x_all.append(x)
    y_all.append(y)

In [None]:
x_final = np.concatenate(x_all,axis=0)
y_final = np.concatenate(y_all,axis=0)

In [None]:
idx = np.random.permutation(len(x_final))
x_final,y_final = x_final[idx],y_final[idx]
x_final

In [None]:
split = int(0.8 * len(x_final))
x_train,x_test = x_final[:split],x_final[split:]
y_train,y_test = y_final[:split],y_final[split:]

In [None]:
def build_model(hp):
    model = Sequential()
    model.add(LSTM(
        units=hp.Int('units_1', 32, 128, step=32),
        return_sequences=True,
        input_shape=(x_train.shape[1], x_train.shape[2])
    ))
    model.add(Dropout(hp.Float('dropout_1', 0.1, 0.5, step=0.1)))
    model.add(LSTM(units=hp.Int('units_2', 32, 128, step=32)))
    model.add(Dropout(hp.Float('dropout_2', 0.1, 0.5, step=0.1)))
    model.add(Dense(25))
    model.add(Dense(3, activation='softmax'))
    model.compile(
        optimizer=keras.optimizers.Adam(
            learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
        ),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


In [None]:
# Step 1: Perform hyperparameter search
tuner = kt.BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=3,
    directory='multi_stock_tuning',
    project_name='multi_stock_lstm'
)

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
tuner.search(x_train, y_train, epochs=30, validation_split=0.2, callbacks=[stop_early])

# Step 2: Retrieve the best hyperparameters
best_hps = tuner.get_best_hyperparameters(1)[0]

# Step 3: Build the model using the best hyperparameters
model = tuner.hypermodel.build(best_hps)

# Step 4: Train the final model
history = model.fit(x_train, y_train, epochs=50, validation_split=0.2, callbacks=[stop_early])

# Step 5: Save the trained model
model.save("final_multi_stock_lstm_model.h5")


In [None]:
model.save("final_multi_stock_lstm_model.keras")

In [None]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}, Loss: {test_loss:.4f}")

y_pred_probs = model.predict(x_test)
y_pred = np.argmax(y_pred_probs, axis=1)

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Sell (0)', 'Buy (2)']))

cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Sell', 'Buy'], yticklabels=['Sell', 'Buy'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

# ---------------------- 8. Training Curve ----------------------
plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.legend()
plt.title("Accuracy")

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.title("Loss")
plt.tight_layout()
plt.show()
