In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import pandas_ta as ta
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense,Dropout
import matplotlib.pyplot as plt
import keras_tuner as kt
import tensorflow as tf
import seaborn as sns


  from pkg_resources import get_distribution, DistributionNotFound


# Loading CSV data


In [2]:
time_steps = 60

In [3]:
csv_path = "/Users/jibanchaudhary/Documents/Projects/trading_bot/merged_data.csv"

print("Loading csv data....")

df = pd.read_csv(csv_path)
df.sort_values(['Stock','Date'], inplace=True)
df.reset_index(drop=True, inplace=True)
df[:10]

Loading csv data....


Unnamed: 0,Date,Close,% Change,High,Low,Open,Qty.,Turnover,Stock
0,2020/08/31,429.0,1.18,431.0,423.0,429.0,40437.0,17259504.0,ADBL
1,2020/09/02,436.0,1.63,440.0,432.0,436.0,108553.0,47330139.0,ADBL
2,2020/09/03,432.0,-0.92,438.0,430.0,436.0,54999.0,23870727.0,ADBL
3,2020/09/06,429.0,-0.69,447.0,429.0,431.0,51535.0,22416879.0,ADBL
4,2020/09/07,424.0,-1.16,429.0,422.0,426.0,30894.0,13097834.0,ADBL
5,2020/09/08,422.0,-0.47,424.0,417.0,423.0,18591.0,7821382.0,ADBL
6,2020/09/09,423.0,0.24,443.0,418.0,443.0,30486.0,12838279.0,ADBL
7,2020/09/10,425.0,0.47,444.0,422.0,444.0,30353.0,12915245.0,ADBL
8,2020/09/13,429.0,0.94,429.0,425.0,425.0,40426.0,17276363.0,ADBL
9,2020/09/14,435.0,1.4,439.0,429.0,429.0,64709.0,28833016.0,ADBL


#Encoding stock identity

In [4]:
# df['stock_id'] = df['Stock'].astype('category').cat.codes
stock_dummies = pd.get_dummies(df['Stock'], prefix='Stock')
df = pd.concat([df, stock_dummies],axis=1)
df


Unnamed: 0,Date,Close,% Change,High,Low,Open,Qty.,Turnover,Stock,Stock_ADBL,...,Stock_SAPDBL,Stock_SBI,Stock_SBL,Stock_SCB,Stock_SFCL,Stock_SHINE,Stock_SHL,Stock_SIFC,Stock_SINDU,Stock_merged_data
0,2020/08/31,429.0,1.18,431.0,423.0,429.0,40437.0,17259504.0,ADBL,True,...,False,False,False,False,False,False,False,False,False,False
1,2020/09/02,436.0,1.63,440.0,432.0,436.0,108553.0,47330139.0,ADBL,True,...,False,False,False,False,False,False,False,False,False,False
2,2020/09/03,432.0,-0.92,438.0,430.0,436.0,54999.0,23870727.0,ADBL,True,...,False,False,False,False,False,False,False,False,False,False
3,2020/09/06,429.0,-0.69,447.0,429.0,431.0,51535.0,22416879.0,ADBL,True,...,False,False,False,False,False,False,False,False,False,False
4,2020/09/07,424.0,-1.16,429.0,422.0,426.0,30894.0,13097834.0,ADBL,True,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117147,2024/11/26,969.0,2.87,980.0,942.0,942.0,38119.0,36661015.5,merged_data,False,...,False,False,False,False,False,False,False,False,False,True
117148,2024/11/26,681.9,1.62,688.9,670.0,680.0,32532.0,22126472.0,merged_data,False,...,False,False,False,False,False,False,False,False,False,True
117149,2024/11/26,932.0,-0.48,947.0,930.5,937.0,2991.0,2793351.1,merged_data,False,...,False,False,False,False,False,False,False,False,False,True
117150,2024/11/26,702.9,1.87,712.0,685.0,685.0,56792.0,39726033.6,merged_data,False,...,False,False,False,False,False,False,False,False,False,True


# Technical indicators

In [5]:
df_list = []
for stock, group in df.groupby('Stock'):
    group = group.copy()
    group.ta.sma(length=20, append=True)
    group.ta.ema(length=50, append=True)
    group.ta.rsi(length=14, append=True)
    group.ta.macd(fast=12, slow=26, signal=9, append=True)
    group.ta.bbands(length=20, append=True)

    group['Signal'] = 0
    group.loc[group['SMA_20'] > group['EMA_50'], 'Signal'] = 2
    group.loc[group['SMA_20'] < group['EMA_50'], 'Signal'] = 0
    group['Signal'] = np.where((group['Signal'] == 0) & (group['Signal'].shift(1) != 0), 1, group['Signal'])
    group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
    group.loc[group['Signal'] == 0, 'Signal'] = 1

    group['Target'] = group['Signal'].shift(-1)
    df_list.append(group)

df = pd.concat(df_list)
df.dropna(inplace=True)
df

  group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
  group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
  group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
  group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
  group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
  group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
  group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
  group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
  group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
  group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
  group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
  group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
  group['Signal'] = group['Signal'].replace(to_replace=0, method='ffill')
  group['Signal'] = group['Signal'].re

Unnamed: 0,Date,Close,% Change,High,Low,Open,Qty.,Turnover,Stock,Stock_ADBL,...,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,BBL_20_2.0,BBM_20_2.0,BBU_20_2.0,BBB_20_2.0,BBP_20_2.0,Signal,Target
49,2020/11/12,484.0,1.04,484.0,475.0,475.0,79435.0,41531034.0,ADBL,True,...,10.096023,1.167743,8.928279,430.158854,460.650,491.141146,13.238314,0.882898,2,2.0
50,2020/11/18,487.0,0.62,495.0,470.0,475.0,143816.0,69516294.0,ADBL,True,...,10.711870,1.426872,9.284997,432.198906,463.050,493.901094,13.325167,0.888155,2,2.0
51,2020/11/19,491.0,0.82,511.0,489.0,511.0,71429.0,35073486.0,ADBL,True,...,11.391387,1.685111,9.706275,434.580562,465.650,496.719438,13.344546,0.907957,2,2.0
52,2020/11/22,505.0,2.85,512.0,495.0,495.0,152285.0,77002047.0,ADBL,True,...,12.910765,2.563591,10.347173,435.282244,468.800,502.317756,14.299384,1.040012,2,2.0
53,2020/11/23,512.0,1.39,515.0,506.0,510.0,196565.0,100303247.0,ADBL,True,...,14.512434,3.332208,11.180225,436.872057,472.450,508.027943,15.061040,1.055822,2,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117146,2024/11/26,562.0,0.18,570.0,555.0,570.0,89930.0,50701412.7,merged_data,False,...,54.386511,-2.787559,57.174070,102.568141,647.305,1192.041859,168.309177,0.421701,2,2.0
117147,2024/11/26,969.0,2.87,980.0,942.0,942.0,38119.0,36661015.5,merged_data,False,...,71.255264,11.264956,59.990309,140.418818,679.985,1219.551182,158.699437,0.767822,2,2.0
117148,2024/11/26,681.9,1.62,688.9,670.0,680.0,32532.0,22126472.0,merged_data,False,...,60.756934,0.613301,60.143634,150.559276,686.700,1222.840724,156.149912,0.495524,2,2.0
117149,2024/11/26,932.0,-0.48,947.0,930.5,937.0,2991.0,2793351.1,merged_data,False,...,71.790358,9.317379,62.472979,150.750179,686.475,1222.199821,156.079922,0.729152,2,2.0


In [6]:
df_list

[            Date  Close  % Change   High    Low   Open      Qty.    Turnover  \
 0     2020/08/31  429.0      1.18  431.0  423.0  429.0   40437.0  17259504.0   
 1     2020/09/02  436.0      1.63  440.0  432.0  436.0  108553.0  47330139.0   
 2     2020/09/03  432.0     -0.92  438.0  430.0  436.0   54999.0  23870727.0   
 3     2020/09/06  429.0     -0.69  447.0  429.0  431.0   51535.0  22416879.0   
 4     2020/09/07  424.0     -1.16  429.0  422.0  426.0   30894.0  13097834.0   
 ...          ...    ...       ...    ...    ...    ...       ...         ...   
 998   2024/11/19  359.1     -1.07  365.0  359.0  360.0   65017.0  23467311.9   
 999   2024/11/20  358.9     -0.06  365.0  353.0  365.0   26760.0   9557312.8   
 1000  2024/11/21  353.0     -1.64  359.0  352.7  357.0   55719.0  19742115.6   
 1001  2024/11/24  350.0     -0.85  359.0  350.0  359.0   41256.0  14533107.2   
 1002  2024/11/25  351.9      0.54  357.0  350.0  357.0   32139.0  11298876.7   
 
      Stock  Stock_ADBL  .

In [8]:
stock_one_hot_cols = [col for col in df.columns if col.startswith('Stock_')]
import json
with open("stock_one_hot_cols.json", "w") as f:
    json.dump(stock_one_hot_cols, f)
feature_cols = ['Close', 'High', 'Low', 'Open'] + stock_one_hot_cols + [
    'SMA_20', 'EMA_50', 'RSI_14',
    'MACD_12_26_9', 'MACDs_12_26_9', 'MACDh_12_26_9',
    'BBL_20_2.0', 'BBM_20_2.0', 'BBU_20_2.0', 'BBB_20_2.0', 'BBP_20_2.0']

scaler = MinMaxScaler()
x_all,y_all = [],[]

feature_cols

['Close',
 'High',
 'Low',
 'Open',
 'Stock_ADBL',
 'Stock_BFC',
 'Stock_CFCL',
 'Stock_CORBL',
 'Stock_CZBIL',
 'Stock_EBL',
 'Stock_EDBL',
 'Stock_GBBL',
 'Stock_GBIME',
 'Stock_GFCL',
 'Stock_GMFIL',
 'Stock_GRDBL',
 'Stock_GUFL',
 'Stock_HBL',
 'Stock_ICFC',
 'Stock_JBBL',
 'Stock_JBLB',
 'Stock_KBL',
 'Stock_KRBL',
 'Stock_KSBBL',
 'Stock_LBBL',
 'Stock_LSL',
 'Stock_MBL',
 'Stock_MDB',
 'Stock_MLBL',
 'Stock_MNBBL',
 'Stock_NABBC',
 'Stock_NABIL',
 'Stock_NBL',
 'Stock_NICA',
 'Stock_NIMB',
 'Stock_NMB',
 'Stock_OHL',
 'Stock_PCBL',
 'Stock_PFL',
 'Stock_PROFL',
 'Stock_PRVU',
 'Stock_RLFL',
 'Stock_SADBL',
 'Stock_SANIMA',
 'Stock_SAPDBL',
 'Stock_SBI',
 'Stock_SBL',
 'Stock_SCB',
 'Stock_SFCL',
 'Stock_SHINE',
 'Stock_SHL',
 'Stock_SIFC',
 'Stock_SINDU',
 'Stock_merged_data',
 'SMA_20',
 'EMA_50',
 'RSI_14',
 'MACD_12_26_9',
 'MACDs_12_26_9',
 'MACDh_12_26_9',
 'BBL_20_2.0',
 'BBM_20_2.0',
 'BBU_20_2.0',
 'BBB_20_2.0',
 'BBP_20_2.0']

# Sliding window for 60 days


In [None]:
def sliding_data(data, target, time_steps=60):
    x,y = [],[]
    for i in range(len(data)-time_steps):
        x.append(data[i:i+time_steps])
        y.append(target[i+time_steps])
    return np.array(x), np.array(y)

In [None]:
for _, group in df.groupby('Stock'):
    group= group.sort_values('Date')
    group = group.dropna(subset = feature_cols+['Target'])
    features = scaler.fit_transform(group[feature_cols])
    target = group['Target'].values
    x,y = sliding_data(features, target, time_steps)
    x_all.append(x)
    y_all.append(y)

In [None]:
x_final = np.concatenate(x_all,axis=0)
y_final = np.concatenate(y_all,axis=0)

In [None]:
idx = np.random.permutation(len(x_final))
x_final,y_final = x_final[idx],y_final[idx]
x_final

In [None]:
split = int(0.8 * len(x_final))
x_train,x_test = x_final[:split],x_final[split:]
y_train,y_test = y_final[:split],y_final[split:]

In [None]:
def build_model(hp):
    model = Sequential()
    model.add(LSTM(
        units=hp.Int('units_1', 32, 128, step=32),
        return_sequences=True,
        input_shape=(x_train.shape[1], x_train.shape[2])
    ))
    model.add(Dropout(hp.Float('dropout_1', 0.1, 0.5, step=0.1)))
    model.add(LSTM(units=hp.Int('units_2', 32, 128, step=32)))
    model.add(Dropout(hp.Float('dropout_2', 0.1, 0.5, step=0.1)))
    model.add(Dense(25))
    model.add(Dense(3, activation='softmax'))
    model.compile(
        optimizer=keras.optimizers.Adam(
            learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
        ),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


In [None]:
# Step 1: Perform hyperparameter search
tuner = kt.BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=3,
    directory='multi_stock_tuning',
    project_name='multi_stock_lstm'
)

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
tuner.search(x_train, y_train, epochs=30, validation_split=0.2, callbacks=[stop_early])

# Step 2: Retrieve the best hyperparameters
best_hps = tuner.get_best_hyperparameters(1)[0]

# Step 3: Build the model using the best hyperparameters
model = tuner.hypermodel.build(best_hps)

# Step 4: Train the final model
history = model.fit(x_train, y_train, epochs=50, validation_split=0.2, callbacks=[stop_early])

# Step 5: Save the trained model
model.save("final_multi_stock_lstm_model.h5")


In [None]:
model.save("final_multi_stock_lstm_model.keras")

In [None]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}, Loss: {test_loss:.4f}")

y_pred_probs = model.predict(x_test)
y_pred = np.argmax(y_pred_probs, axis=1)

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Sell (0)', 'Buy (2)']))

cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Sell', 'Buy'], yticklabels=['Sell', 'Buy'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

# ---------------------- 8. Training Curve ----------------------
plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.legend()
plt.title("Accuracy")

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.title("Loss")
plt.tight_layout()
plt.show()
