In [10]:
import pandas as pd
import numpy as np

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, GRU
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

### IDEAs
- give the entire stock set and trade whether entire market is up or down
- give entire stock set and pick stock that increases the most

# PREP DATA

In [11]:
# Load data
df = pd.read_csv("Historical_Data.csv")
df["gmtTime"] = pd.to_datetime(df["gmtTime"])

# Dictionary to store processed data for each stock
stock_dfs = {}

print('unique stocks=',df["symbol"].unique())

# Feature engineering for each stock
for symbol in df["symbol"].unique():
    df_stock = df[df["symbol"] == symbol].copy()

    # Round numerical columns
    cols_to_round = [col for col in df_stock.columns if col not in ["gmtTime", "symbol"]]
    df_stock[cols_to_round] = df_stock[cols_to_round].round(2)

    # Time-based features
    df_stock['hour'] = df_stock['gmtTime'].dt.hour
    df_stock['day_of_week'] = df_stock['gmtTime'].dt.dayofweek

    # Rolling statistics
    df_stock['askMedian_rolling_mean_3h'] = df_stock['askMedian'].rolling(window=3, min_periods=1).mean()
    df_stock['bidMedian_rolling_mean_3h'] = df_stock['bidMedian'].rolling(window=3, min_periods=1).mean()
    df_stock['askMedian_rolling_std_3h'] = df_stock['askMedian'].rolling(window=3, min_periods=1).std()
    df_stock['bidMedian_rolling_std_3h'] = df_stock['bidMedian'].rolling(window=3, min_periods=1).std()

    # Percentage changes
    df_stock['askMedian_pct_change'] = df_stock['askMedian'].pct_change()
    df_stock['bidMedian_pct_change'] = df_stock['bidMedian'].pct_change()

    # Spread-related features
    df_stock['spread_ratio'] = df_stock['spreadMedian'] / (df_stock['askMedian'] + df_stock['bidMedian'])
    # df_stock['spread_pct_change'] = df_stock['spreadMedian'].pct_change()

    # Volume-related features
    df_stock['askVolume_relative'] = df_stock['askVolume'] / df_stock['askVolume'].rolling(window=5, min_periods=1).mean()
    df_stock['bidVolume_relative'] = df_stock['bidVolume'] / df_stock['bidVolume'].rolling(window=5, min_periods=1).mean()
    df_stock['volume_imbalance'] = (df_stock['askVolume'] - df_stock['bidVolume']) / (df_stock['askVolume'] + df_stock['bidVolume'])

    # Lagged features (e.g., previous hour's values)
    for lag in range(1, 25):  # Add lags for the last 3 hours
        df_stock[f'askMedian_lag_{lag}'] = df_stock['askMedian'].shift(lag)
        df_stock[f'bidMedian_lag_{lag}'] = df_stock['bidMedian'].shift(lag)
        df_stock[f'spreadMedian_lag_{lag}'] = df_stock['spreadMedian'].shift(lag)

    # Target variable: Direction of price movement (1 if bidMedian increases next hour, 0 otherwise)
    df_stock['target'] = (df_stock['bidMedian'].shift(-20) > df_stock['bidMedian']).astype(int)

    # Drop rows with missing values (due to lags and rolling features)
    df_stock = df_stock.dropna()

    # Store processed dataframe
    stock_dfs[symbol] = df_stock

# Example: View processed data for one stock
print(stock_dfs['STOCK1'].head())

# Save processed data to CSV (optional)
for symbol, df_stock in stock_dfs.items():
    df_stock.to_csv(f"{symbol}_processed.csv", index=False)

unique stocks= ['STOCK20' 'STOCK1' 'STOCK18' 'STOCK21' 'STOCK16' 'STOCK11' 'STOCK12'
 'STOCK17' 'STOCK6' 'STOCK9' 'STOCK2' 'STOCK3' 'STOCK14' 'STOCK10'
 'STOCK8' 'STOCK13' 'STOCK5' 'STOCK4' 'STOCK19' 'INDEX1']
                      gmtTime  askMedian  bidMedian  askVolume  bidVolume  \
501 1999-02-10 12:00:00+00:00     160.35     160.18    13868.0     3971.0   
521 1999-02-10 13:00:00+00:00     159.13     158.97     4390.0     6125.0   
541 1999-02-10 14:00:00+00:00     158.97     158.89     4650.0     4152.0   
561 1999-02-10 15:00:00+00:00     159.05     158.89    10550.0     2410.0   
581 1999-02-10 16:00:00+00:00     158.00     157.92     3542.0     5576.0   

     spreadMedian  symbol  hour  day_of_week  askMedian_rolling_mean_3h  ...  \
501          0.16  STOCK1    12            2                 159.943333  ...   
521          0.16  STOCK1    13            2                 159.780000  ...   
541          0.08  STOCK1    14            2                 159.483333  ...   
561    

## TRAIN FOR EVERY MODEL INDIVIDUALLY AND SAVE MODELS

In [12]:
import joblib 

features = [col for col in df.columns if col not in ['gmtTime', 'symbol', 'target']]
# features = [col for col in df.columns if col not in ['gmtTime', 'target']]


target = 'target'


stock_files = ['STOCK20', 'STOCK1', 'STOCK18', 'STOCK21', 'STOCK16', 'STOCK11', 'STOCK12',
 'STOCK17', 'STOCK6', 'STOCK9', 'STOCK2', 'STOCK3', 'STOCK14', 'STOCK10',
 'STOCK8', 'STOCK13', 'STOCK5', 'STOCK4', 'STOCK19', 'INDEX1']

# Load and combine all stock data
for i, stock_file in enumerate(stock_files):
    print("TRAINING MODEL FOR STOCK ", i)
    dfs = pd.read_csv(f"{stock_file}_processed.csv")
    scaler = StandardScaler()
    X = scaler.fit_transform(dfs[features])
    y = dfs[target].values

    # Save the scaler for later use during inference
    scaler_filename = f"{stock_file}_scaler.pkl"
    joblib.dump(scaler, scaler_filename)
    print(f"Saved scaler for {stock_file} to {scaler_filename}")


    # Reshape for LSTM (samples, timesteps, features)
    sequence_length = 10  # Use last 10 hours as input
    X_seq = []
    y_seq = []

    for i in range(len(X) - sequence_length):
        X_seq.append(X[i:i+sequence_length])
        y_seq.append(y[i+sequence_length])

    X_seq = np.array(X_seq)
    y_seq = np.array(y_seq)

    X_test = X_seq
    y_test = y_seq

    # Use the last 50% of the data as test set, maintaining chronological order
    split_idx = int(len(X_seq) * 0.5)
    X_train, X_test = X_seq[:split_idx], X_seq[split_idx:]
    y_train, y_test = y_seq[:split_idx], y_seq[split_idx:]
    
    #X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.5, random_state=42, shuffle=True)

    # Build LSTM mode


    model_2 = Sequential()
    model_2.add(
        LSTM(100, return_sequences=True, input_shape=(sequence_length, X_train.shape[2]))
    )
    model_2.add(Dropout(0.2))
    model_2.add(LSTM(100, return_sequences=True))
    model_2.add(Dropout(0.2))
    model_2.add(LSTM(100))
    model_2.add(Dropout(0.2))
    model_2.add(Dense(25, activation='relu'))
    model_2.add(Dense(1, activation='sigmoid'))

    # Compile model
    model_2.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    # Train model
    model_2.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

    # Evaluate model
    loss, accuracy = model_2.evaluate(X_test, y_test)
    print(f"Test Accuracy: {accuracy:.4f}")


    '''
    SAVE MODEL FOR EACH STOCK INDIVIDUALLY
    '''
    model_save_path = f"{stock_file}_lstm_model.keras"
    model_2.save(model_save_path)
    print(f"Saved model for {stock_file} to {model_save_path}")

TRAINING MODEL FOR STOCK  0
Saved scaler for STOCK20 to STOCK20_scaler.pkl
Epoch 1/10


  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.5424 - loss: 0.6879 - val_accuracy: 0.5203 - val_loss: 0.7675
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5673 - loss: 0.6798 - val_accuracy: 0.5081 - val_loss: 0.8186
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5839 - loss: 0.6720 - val_accuracy: 0.5258 - val_loss: 0.9105
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.5780 - loss: 0.6683 - val_accuracy: 0.5305 - val_loss: 1.3371
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5918 - loss: 0.6607 - val_accuracy: 0.5272 - val_loss: 1.8745
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5995 - loss: 0.6599 - val_accuracy: 0.5276 - val_loss: 1.6160
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5566 - loss: 0.6870 - val_accuracy: 0.4927 - val_loss: 0.7055
Epoch 2/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5701 - loss: 0.6791 - val_accuracy: 0.5000 - val_loss: 1.2123
Epoch 3/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5722 - loss: 0.6747 - val_accuracy: 0.5115 - val_loss: 0.9308
Epoch 4/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5777 - loss: 0.6699 - val_accuracy: 0.4967 - val_loss: 1.1080
Epoch 5/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5898 - loss: 0.6625 - val_accuracy: 0.5037 - val_loss: 1.2331
Epoch 6/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5976 - loss: 0.6608 - val_accuracy: 0.5046 - val_loss: 1.3023
Epoch 7/10
[1m244/244[0m [32m━

  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5541 - loss: 0.6860 - val_accuracy: 0.5108 - val_loss: 0.8770
Epoch 2/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 21ms/step - accuracy: 0.5886 - loss: 0.6743 - val_accuracy: 0.5074 - val_loss: 1.2158
Epoch 3/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5805 - loss: 0.6740 - val_accuracy: 0.5000 - val_loss: 0.8904
Epoch 4/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5874 - loss: 0.6658 - val_accuracy: 0.5182 - val_loss: 0.9781
Epoch 5/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5984 - loss: 0.6641 - val_accuracy: 0.5076 - val_loss: 1.1158
Epoch 6/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.6074 - loss: 0.6602 - val_accuracy: 0.5082 - val_loss: 1.6243
Epoch 7/10
[1m244/244[0m [32m━

  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.5066 - loss: 0.6932 - val_accuracy: 0.4772 - val_loss: 1.3577
Epoch 2/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5200 - loss: 0.6864 - val_accuracy: 0.5047 - val_loss: 1.0272
Epoch 3/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5264 - loss: 0.6840 - val_accuracy: 0.5144 - val_loss: 1.6353
Epoch 4/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5680 - loss: 0.6749 - val_accuracy: 0.5239 - val_loss: 1.4722
Epoch 5/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5744 - loss: 0.6688 - val_accuracy: 0.5286 - val_loss: 0.8937
Epoch 6/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5815 - loss: 0.6653 - val_accuracy: 0.5272 - val_loss: 1.5997
Epoch 7/10
[1m244/244[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.5226 - loss: 0.6912 - val_accuracy: 0.5600 - val_loss: 0.6815
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.5572 - loss: 0.6817 - val_accuracy: 0.5678 - val_loss: 0.6763
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5566 - loss: 0.6780 - val_accuracy: 0.5797 - val_loss: 0.6738
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5741 - loss: 0.6762 - val_accuracy: 0.5753 - val_loss: 0.6721
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5754 - loss: 0.6732 - val_accuracy: 0.5714 - val_loss: 0.6972
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5816 - loss: 0.6692 - val_accuracy: 0.5779 - val_loss: 0.6707
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 22ms/step - accuracy: 0.5291 - loss: 0.6915 - val_accuracy: 0.5199 - val_loss: 0.6897
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step - accuracy: 0.5508 - loss: 0.6851 - val_accuracy: 0.5462 - val_loss: 0.6953
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5493 - loss: 0.6877 - val_accuracy: 0.5528 - val_loss: 0.6860
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5593 - loss: 0.6791 - val_accuracy: 0.5520 - val_loss: 0.6852
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5562 - loss: 0.6798 - val_accuracy: 0.5561 - val_loss: 0.6845
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5596 - loss: 0.6793 - val_accuracy: 0.5425 - val_loss: 0.6897
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 19ms/step - accuracy: 0.5146 - loss: 0.6929 - val_accuracy: 0.5301 - val_loss: 0.6919
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5297 - loss: 0.6902 - val_accuracy: 0.5293 - val_loss: 0.6914
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5385 - loss: 0.6874 - val_accuracy: 0.5107 - val_loss: 0.6933
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5520 - loss: 0.6867 - val_accuracy: 0.5330 - val_loss: 0.6908
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5373 - loss: 0.6867 - val_accuracy: 0.5319 - val_loss: 0.6949
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5581 - loss: 0.6840 - val_accuracy: 0.5251 - val_loss: 0.6962
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5165 - loss: 0.6929 - val_accuracy: 0.4620 - val_loss: 0.7044
Epoch 2/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5368 - loss: 0.6903 - val_accuracy: 0.5103 - val_loss: 0.7198
Epoch 3/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5646 - loss: 0.6818 - val_accuracy: 0.4512 - val_loss: 0.8328
Epoch 4/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5604 - loss: 0.6818 - val_accuracy: 0.4634 - val_loss: 0.7322
Epoch 5/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5741 - loss: 0.6773 - val_accuracy: 0.4636 - val_loss: 1.0917
Epoch 6/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5746 - loss: 0.6767 - val_accuracy: 0.4457 - val_loss: 1.2884
Epoch 7/10
[1m244/244[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5107 - loss: 0.6909 - val_accuracy: 0.4771 - val_loss: 0.7003
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5620 - loss: 0.6875 - val_accuracy: 0.4747 - val_loss: 0.8011
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5691 - loss: 0.6795 - val_accuracy: 0.4744 - val_loss: 0.7744
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5834 - loss: 0.6747 - val_accuracy: 0.4740 - val_loss: 0.7594
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5956 - loss: 0.6686 - val_accuracy: 0.4741 - val_loss: 0.7660
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5905 - loss: 0.6676 - val_accuracy: 0.4730 - val_loss: 0.7719
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5404 - loss: 0.6892 - val_accuracy: 0.4992 - val_loss: 0.9609
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5512 - loss: 0.6822 - val_accuracy: 0.4898 - val_loss: 1.6309
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5519 - loss: 0.6788 - val_accuracy: 0.4853 - val_loss: 1.7508
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5720 - loss: 0.6762 - val_accuracy: 0.4942 - val_loss: 1.2286
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5925 - loss: 0.6717 - val_accuracy: 0.4906 - val_loss: 1.5746
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5832 - loss: 0.6678 - val_accuracy: 0.5012 - val_loss: 1.3355
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5499 - loss: 0.6843 - val_accuracy: 0.5965 - val_loss: 0.6609
Epoch 2/10
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6194 - loss: 0.6480 - val_accuracy: 0.6058 - val_loss: 0.6565
Epoch 3/10
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6267 - loss: 0.6427 - val_accuracy: 0.6169 - val_loss: 0.6432
Epoch 4/10
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6471 - loss: 0.6330 - val_accuracy: 0.6129 - val_loss: 0.6541
Epoch 5/10
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6530 - loss: 0.6224 - val_accuracy: 0.5963 - val_loss: 0.6700
Epoch 6/10
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6473 - loss: 0.6184 - val_accuracy: 0.6136 - val_loss: 0.6591
Epoch 7/10
[1m242/242[0m [32m━

  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5458 - loss: 0.6861 - val_accuracy: 0.5231 - val_loss: 0.6975
Epoch 2/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5474 - loss: 0.6822 - val_accuracy: 0.5274 - val_loss: 0.7096
Epoch 3/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5567 - loss: 0.6824 - val_accuracy: 0.5348 - val_loss: 0.7325
Epoch 4/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5683 - loss: 0.6759 - val_accuracy: 0.5306 - val_loss: 0.7163
Epoch 5/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5729 - loss: 0.6765 - val_accuracy: 0.5388 - val_loss: 0.7002
Epoch 6/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5724 - loss: 0.6708 - val_accuracy: 0.5351 - val_loss: 0.7000
Epoch 7/10
[1m244/244[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5151 - loss: 0.6939 - val_accuracy: 0.4583 - val_loss: 0.7406
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5465 - loss: 0.6890 - val_accuracy: 0.5002 - val_loss: 0.7313
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5647 - loss: 0.6809 - val_accuracy: 0.4915 - val_loss: 0.8306
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5643 - loss: 0.6798 - val_accuracy: 0.4996 - val_loss: 0.7394
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5706 - loss: 0.6793 - val_accuracy: 0.4833 - val_loss: 0.8624
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5823 - loss: 0.6735 - val_accuracy: 0.4838 - val_loss: 0.8032
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5291 - loss: 0.6869 - val_accuracy: 0.5847 - val_loss: 0.6777
Epoch 2/10
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5922 - loss: 0.6623 - val_accuracy: 0.5980 - val_loss: 0.6648
Epoch 3/10
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.6021 - loss: 0.6546 - val_accuracy: 0.5804 - val_loss: 0.6650
Epoch 4/10
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6038 - loss: 0.6575 - val_accuracy: 0.5947 - val_loss: 0.6610
Epoch 5/10
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.6024 - loss: 0.6510 - val_accuracy: 0.6043 - val_loss: 0.6589
Epoch 6/10
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.6044 - loss: 0.6445 - val_accuracy: 0.6097 - val_loss: 0.6504
Epoch 7/10
[1m243/243[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5309 - loss: 0.6906 - val_accuracy: 0.5770 - val_loss: 0.6803
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5700 - loss: 0.6808 - val_accuracy: 0.5805 - val_loss: 0.6828
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5640 - loss: 0.6796 - val_accuracy: 0.5661 - val_loss: 0.7016
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5836 - loss: 0.6724 - val_accuracy: 0.5641 - val_loss: 0.6811
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5898 - loss: 0.6718 - val_accuracy: 0.5578 - val_loss: 0.6811
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5911 - loss: 0.6700 - val_accuracy: 0.5698 - val_loss: 0.6994
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.5555 - loss: 0.6810 - val_accuracy: 0.5521 - val_loss: 0.7031
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5859 - loss: 0.6684 - val_accuracy: 0.5617 - val_loss: 0.6952
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5772 - loss: 0.6701 - val_accuracy: 0.5559 - val_loss: 0.6895
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5973 - loss: 0.6671 - val_accuracy: 0.5538 - val_loss: 0.6890
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6082 - loss: 0.6598 - val_accuracy: 0.5588 - val_loss: 0.6885
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6110 - loss: 0.6570 - val_accuracy: 0.5428 - val_loss: 0.7144
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5531 - loss: 0.6877 - val_accuracy: 0.4841 - val_loss: 0.7303
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.5690 - loss: 0.6811 - val_accuracy: 0.4772 - val_loss: 0.7470
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5735 - loss: 0.6767 - val_accuracy: 0.4763 - val_loss: 0.8027
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5826 - loss: 0.6717 - val_accuracy: 0.5365 - val_loss: 0.7045
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5816 - loss: 0.6731 - val_accuracy: 0.5030 - val_loss: 0.7185
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5955 - loss: 0.6716 - val_accuracy: 0.4942 - val_loss: 0.8694
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5174 - loss: 0.6907 - val_accuracy: 0.5352 - val_loss: 0.6904
Epoch 2/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5116 - loss: 0.6908 - val_accuracy: 0.5135 - val_loss: 0.6906
Epoch 3/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5230 - loss: 0.6906 - val_accuracy: 0.5123 - val_loss: 0.6912
Epoch 4/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5485 - loss: 0.6883 - val_accuracy: 0.5176 - val_loss: 0.6898
Epoch 5/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5402 - loss: 0.6844 - val_accuracy: 0.5017 - val_loss: 0.6938
Epoch 6/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5252 - loss: 0.6862 - val_accuracy: 0.5428 - val_loss: 0.6924
Epoch 7/10
[1m244/244[0m [32m━

  super().__init__(**kwargs)


Epoch 1/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.5356 - loss: 0.6893 - val_accuracy: 0.5334 - val_loss: 0.6922
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5527 - loss: 0.6846 - val_accuracy: 0.5413 - val_loss: 0.6886
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5591 - loss: 0.6839 - val_accuracy: 0.5421 - val_loss: 0.7029
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5570 - loss: 0.6846 - val_accuracy: 0.5462 - val_loss: 0.6915
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5608 - loss: 0.6819 - val_accuracy: 0.5297 - val_loss: 0.7018
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - accuracy: 0.5596 - loss: 0.6803 - val_accuracy: 0.5342 - val_loss: 0.7046
Epoch 7/10
[1m245/245

  super().__init__(**kwargs)


[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.5832 - loss: 0.6716 - val_accuracy: 0.6357 - val_loss: 0.6357
Epoch 2/10
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6328 - loss: 0.6385 - val_accuracy: 0.6415 - val_loss: 0.6367
Epoch 3/10
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6436 - loss: 0.6266 - val_accuracy: 0.6324 - val_loss: 0.6659
Epoch 4/10
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.6586 - loss: 0.6078 - val_accuracy: 0.6285 - val_loss: 0.7582
Epoch 5/10
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6766 - loss: 0.5963 - val_accuracy: 0.6563 - val_loss: 0.6354
Epoch 6/10
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6612 - loss: 0.5992 - val_accuracy: 0.6420 - val_loss: 0.7334
Epoch 7/10
[1m236/236[0m [32m━

In [None]:
# Load processed data (Example for one stock, modify as needed)
df = pd.read_csv("STOCK2_processed.csv")

# Select features and target
features = [col for col in df.columns if col not in ['gmtTime', 'symbol', 'target']]
target = 'target'

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(df[features])
y = df[target].values

# Reshape for LSTM (samples, timesteps, features)
sequence_length = 10  # Use last 10 hours as input
X_seq = []
y_seq = []

for i in range(len(X) - sequence_length):
    X_seq.append(X[i:i+sequence_length])
    y_seq.append(y[i+sequence_length])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

X_train = X_seq
y_train = y_seq

df_test = pd.read_csv("STOCK3_processed.csv")

scaler = StandardScaler()
X = scaler.fit_transform(df_test[features])
y = df_test[target].values

# Reshape for LSTM (samples, timesteps, features)
sequence_length = 10  # Use last 10 hours as input
X_seq = []
y_seq = []

for i in range(len(X) - sequence_length):
    X_seq.append(X[i:i+sequence_length])
    y_seq.append(y[i+sequence_length])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

X_test = X_seq
y_test = y_seq

# Split data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.5, random_state=42, shuffle=True)


# Build LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(sequence_length, X_train.shape[2])),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])


model_2 = Sequential()
model_2.add(
    LSTM(100, return_sequences=True, input_shape=(sequence_length, X_train.shape[2]))
)
model_2.add(Dropout(0.2))
model_2.add(LSTM(100, return_sequences=True))
model_2.add(Dropout(0.2))
model_2.add(LSTM(100))
model_2.add(Dropout(0.2))
model_2.add(Dense(25, activation='relu'))
model_2.add(Dense(1, activation='sigmoid'))

model_alt = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(sequence_length,  X_train.shape[2])),
    MaxPooling1D(pool_size=2),
    GRU(64, return_sequences=True),
    Dropout(0.2),
    GRU(64),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1, activation='sigmoid')
])

model = model_2

# Compile model
model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Save model
model.save("lstm_trading_model_previous_year.keras")


2025-02-27 16:54:09.670688: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-02-27 16:54:09.679708: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/20
Epoch 2/20

KeyboardInterrupt: 

### entire dataset trading

In [19]:
stock_files = ["STOCK1_processed.csv", "STOCK2_processed.csv", "STOCK3_processed.csv", "STOCK4_processed.csv", "STOCK5_processed.csv", "STOCK6_processed.csv", "STOCK7_processed.csv","STOCK8_processed.csv","STOCK9_processed.csv","STOCK10_processed.csv"]  # Add more stock files as needed

stock_files = [f"STOCK1_processed.csv" for i in range(22)]
# Load and combine all stock data
dfs = []
for file in stock_files:
    df = pd.read_csv(file)
    dfs.append(df)

dfs = pd.concat(dfs, ignore_index=True)
scaler = StandardScaler()
X = scaler.fit_transform(dfs[features])
y = dfs[target].values

# Reshape for LSTM (samples, timesteps, features)
sequence_length = 10  # Use last 10 hours as input
X_seq = []
y_seq = []

for i in range(len(X) - sequence_length):
    X_seq.append(X[i:i+sequence_length])
    y_seq.append(y[i+sequence_length])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

X_test = X_seq
y_test = y_seq

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.5, random_state=42, shuffle=True)

# Build LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(sequence_length, X_train.shape[2])),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])


model_2 = Sequential()
model_2.add(
    LSTM(100, return_sequences=True, input_shape=(sequence_length, X_train.shape[2]))
)
model_2.add(Dropout(0.2))
model_2.add(LSTM(100, return_sequences=True))
model_2.add(Dropout(0.2))
model_2.add(LSTM(100))
model_2.add(Dropout(0.2))
model_2.add(Dense(25, activation='relu'))
model_2.add(Dense(1, activation='sigmoid'))

# Compile model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Save model
model.save("lstm_trading_model_entire_set.h5")


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20

KeyboardInterrupt: 

In [None]:
stock_files = ["STOCK1_processed.csv", "STOCK2_processed.csv", "STOCK3_processed.csv", "STOCK4_processed.csv", "STOCK5_processed.csv", "STOCK6_processed.csv", "STOCK7_processed.csv","STOCK8_processed.csv","STOCK9_processed.csv","STOCK10_processed.csv"]  # Add more stock files as needed
stock_files = ['STOCK20', 'STOCK1', 'STOCK18', 'STOCK21', 'STOCK16', 'STOCK11', 'STOCK12',
 'STOCK17', 'STOCK6', 'STOCK9', 'STOCK2', 'STOCK3', 'STOCK14', 'STOCK10',
 'STOCK8', 'STOCK13', 'STOCK5', 'STOCK4', 'STOCK19', 'INDEX1']

stock_files = {f"{i}_processed.csv" for i in stock_files}

# stock_files = [f"STOCK{i}_processed.csv" for i in range(1,22)]


features = [col for col in df.columns if col not in ['gmtTime', 'symbol', 'target']]
# features = [col for col in df.columns if col not in ['gmtTime', 'target']]


target = 'target'

# Load and combine all stock data
dfs_train = []
dfs_test = []
for file in stock_files:
    df = pd.read_csv(file)
    dfs_train.append(df[:int(len(df)*0.6)])
    dfs_test.append(df[int(len(df)*0.6):])

dfs_train = pd.concat(dfs_train, ignore_index=True)
dfs_test = pd.concat(dfs_test, ignore_index=True)


# Encode 'symbol' column
symbol_encoder = {symbol: idx for idx, symbol in enumerate(dfs_train['symbol'].unique())}
dfs_train['symbol'] = dfs_train['symbol'].map(symbol_encoder)
dfs_test['symbol'] = dfs_test['symbol'].map(symbol_encoder)


scaler = StandardScaler()

X_train = dfs_train[features] #scaler.fit_transform(dfs_train[features])
y_train = dfs_train[target].values

X_test = dfs_test[features] #scaler.transform(dfs_test[features])
y_test = dfs_test[target].values

X_train_seq = []
y_train_seq = []
for i in range(len(X_train) - sequence_length):
    X_train_seq.append(X_train[i:i+sequence_length])
    y_train_seq.append(y_train[i+sequence_length])

X_train_seq = np.array(X_train_seq)
y_train_seq = np.array(y_train_seq)

X_test_seq = []
y_test_seq = []
for i in range(len(X_test) - sequence_length):
    X_test_seq.append(X_test[i:i+sequence_length])
    y_test_seq.append(y_test[i+sequence_length])

X_test_seq = np.array(X_test_seq)
y_test_seq = np.array(y_test_seq)


# Build LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(sequence_length, X_train_seq.shape[2])),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])


# BEST MODEL INVIDIDUAL STOCKS

model_2 = Sequential()
model_2.add(
    LSTM(100, return_sequences=True, input_shape=(sequence_length, X_train_seq.shape[2]))
)
model_2.add(Dropout(0.2))
model_2.add(LSTM(100, return_sequences=True))
model_2.add(Dropout(0.2))
model_2.add(LSTM(100))
model_2.add(Dropout(0.2))
model_2.add(Dense(25, activation='relu'))
model_2.add(Dense(1, activation='sigmoid'))

# Compile model
model_2.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model_2.fit(X_train_seq, y_train_seq, epochs=20, batch_size=32, validation_data=(X_test_seq, y_test_seq))

# Evaluate model
loss, accuracy = model_2.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Save model
model_2.save("lstm_trading_model_entire_set.h5")


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

KeyboardInterrupt: 

In [21]:
model_2.save("lstm_trading_model_entire_set_new_data.keras")