In [1]:
import pandas as pd
import numpy as np

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, GRU
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

### IDEAs
- give the entire stock set and trade whether entire market is up or down
- give entire stock set and pick stock that increases the most

# PREP DATA

In [4]:
# Load data
df = pd.read_csv("Historical_Data.csv")
df["gmtTime"] = pd.to_datetime(df["gmtTime"])

# Dictionary to store processed data for each stock
stock_dfs = {}

print('unique stocks=',df["symbol"].unique())

# Feature engineering for each stock
for symbol in df["symbol"].unique():
    df_stock = df[df["symbol"] == symbol].copy()

    # Round numerical columns
    cols_to_round = [col for col in df_stock.columns if col not in ["gmtTime", "symbol"]]
    df_stock[cols_to_round] = df_stock[cols_to_round].round(2)

    # Time-based features
    df_stock['hour'] = df_stock['gmtTime'].dt.hour
    df_stock['day_of_week'] = df_stock['gmtTime'].dt.dayofweek

    # Rolling statistics
    df_stock['askMedian_rolling_mean_3h'] = df_stock['askMedian'].rolling(window=3, min_periods=1).mean()
    df_stock['bidMedian_rolling_mean_3h'] = df_stock['bidMedian'].rolling(window=3, min_periods=1).mean()
    df_stock['askMedian_rolling_std_3h'] = df_stock['askMedian'].rolling(window=3, min_periods=1).std()
    df_stock['bidMedian_rolling_std_3h'] = df_stock['bidMedian'].rolling(window=3, min_periods=1).std()

    # Percentage changes
    df_stock['askMedian_pct_change'] = df_stock['askMedian'].pct_change()
    df_stock['bidMedian_pct_change'] = df_stock['bidMedian'].pct_change()

    # Spread-related features
    df_stock['spread_ratio'] = df_stock['spreadMedian'] / (df_stock['askMedian'] + df_stock['bidMedian'])
    # df_stock['spread_pct_change'] = df_stock['spreadMedian'].pct_change()

    # Volume-related features
    df_stock['askVolume_relative'] = df_stock['askVolume'] / df_stock['askVolume'].rolling(window=5, min_periods=1).mean()
    df_stock['bidVolume_relative'] = df_stock['bidVolume'] / df_stock['bidVolume'].rolling(window=5, min_periods=1).mean()
    df_stock['volume_imbalance'] = (df_stock['askVolume'] - df_stock['bidVolume']) / (df_stock['askVolume'] + df_stock['bidVolume'])

    # Lagged features (e.g., previous hour's values)
    for lag in range(1, 25):  # Add lags for the last 3 hours
        df_stock[f'askMedian_lag_{lag}'] = df_stock['askMedian'].shift(lag)
        df_stock[f'bidMedian_lag_{lag}'] = df_stock['bidMedian'].shift(lag)
        df_stock[f'spreadMedian_lag_{lag}'] = df_stock['spreadMedian'].shift(lag)

    # Target variable: Direction of price movement (1 if bidMedian increases next hour, 0 otherwise)
    df_stock['target'] = (df_stock['bidMedian'].shift(-20) > df_stock['bidMedian']).astype(int)

    # Drop rows with missing values (due to lags and rolling features)
    df_stock = df_stock.dropna()

    # Store processed dataframe
    stock_dfs[symbol] = df_stock

# Example: View processed data for one stock
print(stock_dfs['STOCK1'].head())

# Save processed data to CSV (optional)
for symbol, df_stock in stock_dfs.items():
    df_stock.to_csv(f"{symbol}_processed.csv", index=False)

unique stocks= ['STOCK20' 'STOCK1' 'STOCK18' 'STOCK21' 'STOCK16' 'STOCK11' 'STOCK12'
 'STOCK17' 'STOCK6' 'STOCK9' 'STOCK2' 'STOCK3' 'STOCK14' 'STOCK10'
 'STOCK8' 'STOCK13' 'STOCK5' 'STOCK4' 'STOCK19' 'INDEX1']
                      gmtTime  askMedian  bidMedian  askVolume  bidVolume  \
501 1999-02-10 12:00:00+00:00     160.35     160.18    13868.0     3971.0   
521 1999-02-10 13:00:00+00:00     159.13     158.97     4390.0     6125.0   
541 1999-02-10 14:00:00+00:00     158.97     158.89     4650.0     4152.0   
561 1999-02-10 15:00:00+00:00     159.05     158.89    10550.0     2410.0   
581 1999-02-10 16:00:00+00:00     158.00     157.92     3542.0     5576.0   

     spreadMedian  symbol  hour  day_of_week  askMedian_rolling_mean_3h  ...  \
501          0.16  STOCK1    12            2                 159.943333  ...   
521          0.16  STOCK1    13            2                 159.780000  ...   
541          0.08  STOCK1    14            2                 159.483333  ...   
561    

## TRAIN FOR EVERY MODEL INDIVIDUALLY AND SAVE MODELS

In [None]:
features = [col for col in df.columns if col not in ['gmtTime', 'symbol', 'target']]
# features = [col for col in df.columns if col not in ['gmtTime', 'target']]


target = 'target'


stock_files = ['STOCK20', 'STOCK1', 'STOCK18', 'STOCK21', 'STOCK16', 'STOCK11', 'STOCK12',
 'STOCK17', 'STOCK6', 'STOCK9', 'STOCK2', 'STOCK3', 'STOCK14', 'STOCK10',
 'STOCK8', 'STOCK13', 'STOCK5', 'STOCK4', 'STOCK19', 'INDEX1']

# Load and combine all stock data
for i, stock_file in enumerate(stock_files):
    print("TRAINING MODEL FOR STOCK ", i)
    dfs = pd.read_csv(f"{stock_file}_processed.csv")
    scaler = StandardScaler()
    X = scaler.fit_transform(dfs[features])
    y = dfs[target].values

    # Reshape for LSTM (samples, timesteps, features)
    sequence_length = 10  # Use last 10 hours as input
    X_seq = []
    y_seq = []

    for i in range(len(X) - sequence_length):
        X_seq.append(X[i:i+sequence_length])
        y_seq.append(y[i+sequence_length])

    X_seq = np.array(X_seq)
    y_seq = np.array(y_seq)

    X_test = X_seq
    y_test = y_seq

    # Use the last 50% of the data as test set, maintaining chronological order
    split_idx = int(len(X_seq) * 0.5)
    X_train, X_test = X_seq[:split_idx], X_seq[split_idx:]
    y_train, y_test = y_seq[:split_idx], y_seq[split_idx:]
    
    #X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.5, random_state=42, shuffle=True)

    # Build LSTM mode


    model_2 = Sequential()
    model_2.add(
        LSTM(100, return_sequences=True, input_shape=(sequence_length, X_train.shape[2]))
    )
    model_2.add(Dropout(0.2))
    model_2.add(LSTM(100, return_sequences=True))
    model_2.add(Dropout(0.2))
    model_2.add(LSTM(100))
    model_2.add(Dropout(0.2))
    model_2.add(Dense(25, activation='relu'))
    model_2.add(Dense(1, activation='sigmoid'))

    # Compile model
    model_2.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    # Train model
    model_2.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

    # Evaluate model
    loss, accuracy = model_2.evaluate(X_test, y_test)
    print(f"Test Accuracy: {accuracy:.4f}")


    '''
    SAVE MODEL FOR EACH STOCK INDIVIDUALLY
    '''
    model_save_path = f"{stock_file}_lstm_model.keras"
    model_2.save(model_save_path)
    print(f"Saved model for {stock_file} to {model_save_path}")

Epoch 1/5


  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.5397 - loss: 0.6903 - val_accuracy: 0.5227 - val_loss: 0.7556
Epoch 2/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5562 - loss: 0.6821 - val_accuracy: 0.5317 - val_loss: 0.7270
Epoch 3/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5843 - loss: 0.6737 - val_accuracy: 0.5313 - val_loss: 1.0064
Epoch 4/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5770 - loss: 0.6666 - val_accuracy: 0.5301 - val_loss: 1.3118
Epoch 5/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6048 - loss: 0.6536 - val_accuracy: 0.5300 - val_loss: 1.7617
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5380 - loss: 1.3405
Test Accuracy: 0.5300
Saved model for STOCK20 to STOCK20_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.5461 - loss: 0.6860 - val_accuracy: 0.5078 - val_loss: 1.0196
Epoch 2/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5780 - loss: 0.6765 - val_accuracy: 0.4912 - val_loss: 1.3157
Epoch 3/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5744 - loss: 0.6744 - val_accuracy: 0.5012 - val_loss: 1.8037
Epoch 4/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5970 - loss: 0.6615 - val_accuracy: 0.4987 - val_loss: 1.0620
Epoch 5/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5947 - loss: 0.6647 - val_accuracy: 0.5014 - val_loss: 1.4417
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5133 - loss: 1.0376
Test Accuracy: 0.5014
Saved model for STOCK1 to STOCK1_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5420 - loss: 0.6875 - val_accuracy: 0.5029 - val_loss: 0.7340
Epoch 2/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5809 - loss: 0.6745 - val_accuracy: 0.5242 - val_loss: 0.7465
Epoch 3/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5785 - loss: 0.6751 - val_accuracy: 0.5164 - val_loss: 0.7943
Epoch 4/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5957 - loss: 0.6671 - val_accuracy: 0.5117 - val_loss: 0.8211
Epoch 5/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.5939 - loss: 0.6658 - val_accuracy: 0.4921 - val_loss: 1.2516
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4904 - loss: 0.9879
Test Accuracy: 0.4921
Saved model for STOCK18 to STOCK18_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 24ms/step - accuracy: 0.5078 - loss: 0.6929 - val_accuracy: 0.4942 - val_loss: 0.7561
Epoch 2/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5527 - loss: 0.6818 - val_accuracy: 0.5134 - val_loss: 0.8199
Epoch 3/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.5687 - loss: 0.6751 - val_accuracy: 0.5180 - val_loss: 0.7235
Epoch 4/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5807 - loss: 0.6701 - val_accuracy: 0.5316 - val_loss: 0.8704
Epoch 5/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.5972 - loss: 0.6548 - val_accuracy: 0.5249 - val_loss: 1.0253
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5738 - loss: 0.7349
Test Accuracy: 0.5249
Saved model for STOCK21 to STOCK21_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.5375 - loss: 0.6902 - val_accuracy: 0.5515 - val_loss: 0.6842
Epoch 2/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5572 - loss: 0.6820 - val_accuracy: 0.5737 - val_loss: 0.6732
Epoch 3/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5596 - loss: 0.6811 - val_accuracy: 0.5784 - val_loss: 0.6735
Epoch 4/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5795 - loss: 0.6733 - val_accuracy: 0.5759 - val_loss: 0.6767
Epoch 5/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5824 - loss: 0.6707 - val_accuracy: 0.5829 - val_loss: 0.6810
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5968 - loss: 0.6648
Test Accuracy: 0.5829
Saved model for STOCK16 to STOCK16_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5123 - loss: 0.6912 - val_accuracy: 0.5480 - val_loss: 0.6890
Epoch 2/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5536 - loss: 0.6860 - val_accuracy: 0.5399 - val_loss: 0.6877
Epoch 3/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5562 - loss: 0.6841 - val_accuracy: 0.5377 - val_loss: 0.6861
Epoch 4/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5579 - loss: 0.6825 - val_accuracy: 0.5606 - val_loss: 0.6837
Epoch 5/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5557 - loss: 0.6816 - val_accuracy: 0.5705 - val_loss: 0.6851
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5646 - loss: 0.6829
Test Accuracy: 0.5705
Saved model for STOCK11 to STOCK11_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.4946 - loss: 0.6935 - val_accuracy: 0.5223 - val_loss: 0.6915
Epoch 2/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5391 - loss: 0.6895 - val_accuracy: 0.5269 - val_loss: 0.6916
Epoch 3/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5211 - loss: 0.6901 - val_accuracy: 0.4942 - val_loss: 0.6977
Epoch 4/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5390 - loss: 0.6858 - val_accuracy: 0.4962 - val_loss: 0.6982
Epoch 5/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5483 - loss: 0.6858 - val_accuracy: 0.5280 - val_loss: 0.6913
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5387 - loss: 0.6897
Test Accuracy: 0.5280
Saved model for STOCK12 to STOCK12_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5171 - loss: 0.6917 - val_accuracy: 0.4532 - val_loss: 0.7179
Epoch 2/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5368 - loss: 0.6901 - val_accuracy: 0.4747 - val_loss: 0.7194
Epoch 3/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5653 - loss: 0.6827 - val_accuracy: 0.4446 - val_loss: 0.7361
Epoch 4/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.5714 - loss: 0.6806 - val_accuracy: 0.4968 - val_loss: 0.7024
Epoch 5/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5672 - loss: 0.6794 - val_accuracy: 0.4488 - val_loss: 0.8689
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4705 - loss: 0.7738
Test Accuracy: 0.4488
Saved model for STOCK17 to STOCK17_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - accuracy: 0.5240 - loss: 0.6899 - val_accuracy: 0.4777 - val_loss: 0.7178
Epoch 2/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5447 - loss: 0.6862 - val_accuracy: 0.4750 - val_loss: 0.8205
Epoch 3/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.5812 - loss: 0.6780 - val_accuracy: 0.4745 - val_loss: 0.9232
Epoch 4/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5948 - loss: 0.6692 - val_accuracy: 0.4739 - val_loss: 0.7953
Epoch 5/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5861 - loss: 0.6705 - val_accuracy: 0.4770 - val_loss: 0.8051
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4996 - loss: 0.7590
Test Accuracy: 0.4770
Saved model for STOCK6 to STOCK6_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5350 - loss: 0.6904 - val_accuracy: 0.4871 - val_loss: 1.7599
Epoch 2/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5503 - loss: 0.6837 - val_accuracy: 0.4939 - val_loss: 1.1483
Epoch 3/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5698 - loss: 0.6760 - val_accuracy: 0.4888 - val_loss: 1.8449
Epoch 4/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5689 - loss: 0.6734 - val_accuracy: 0.4976 - val_loss: 1.5442
Epoch 5/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.5834 - loss: 0.6694 - val_accuracy: 0.4984 - val_loss: 1.9105
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5054 - loss: 1.1775
Test Accuracy: 0.4984
Saved model for STOCK9 to STOCK9_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5431 - loss: 0.6851 - val_accuracy: 0.5706 - val_loss: 0.6846
Epoch 2/5
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.6046 - loss: 0.6582 - val_accuracy: 0.5665 - val_loss: 0.6857
Epoch 3/5
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6387 - loss: 0.6311 - val_accuracy: 0.5984 - val_loss: 0.6904
Epoch 4/5
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6474 - loss: 0.6279 - val_accuracy: 0.5992 - val_loss: 0.6793
Epoch 5/5
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6515 - loss: 0.6224 - val_accuracy: 0.6120 - val_loss: 0.6682
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6326 - loss: 0.6383
Test Accuracy: 0.6120
Saved model for STOCK2 to STOCK2_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5442 - loss: 0.6877 - val_accuracy: 0.5215 - val_loss: 0.7087
Epoch 2/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5608 - loss: 0.6815 - val_accuracy: 0.5172 - val_loss: 0.7111
Epoch 3/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5468 - loss: 0.6826 - val_accuracy: 0.5269 - val_loss: 0.7100
Epoch 4/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - accuracy: 0.5637 - loss: 0.6781 - val_accuracy: 0.5199 - val_loss: 0.7376
Epoch 5/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5627 - loss: 0.6772 - val_accuracy: 0.5263 - val_loss: 0.7275
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5320 - loss: 0.7023
Test Accuracy: 0.5263
Saved model for STOCK3 to STOCK3_lstm_model.keras


  super().__init__(**kwargs)


Epoch 1/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5066 - loss: 0.6940 - val_accuracy: 0.4780 - val_loss: 0.7332
Epoch 2/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5532 - loss: 0.6859 - val_accuracy: 0.4841 - val_loss: 0.7817
Epoch 3/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - accuracy: 0.5604 - loss: 0.6816 - val_accuracy: 0.4860 - val_loss: 0.7081
Epoch 4/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5737 - loss: 0.6809 - val_accuracy: 0.4939 - val_loss: 0.7568
Epoch 5/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step - accuracy: 0.5764 - loss: 0.6776 - val_accuracy: 0.4721 - val_loss: 0.7677
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4775 - loss: 0.7415
Test Accuracy: 0.4721
Saved model for STOCK14 to STOCK14_lstm_model.keras
Epoch 1

  super().__init__(**kwargs)


[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - accuracy: 0.5451 - loss: 0.6861 - val_accuracy: 0.5544 - val_loss: 0.6814
Epoch 2/5
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.6033 - loss: 0.6586 - val_accuracy: 0.5779 - val_loss: 0.6663
Epoch 3/5
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.6043 - loss: 0.6501 - val_accuracy: 0.5927 - val_loss: 0.6625
Epoch 4/5
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.6043 - loss: 0.6480 - val_accuracy: 0.5869 - val_loss: 0.6632
Epoch 5/5
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.6144 - loss: 0.6494 - val_accuracy: 0.5649 - val_loss: 0.6764
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5793 - loss: 0.6744
Test Accuracy: 0.5649
Saved model for STOCK10 to STOCK10_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 22ms/step - accuracy: 0.5216 - loss: 0.6908 - val_accuracy: 0.5299 - val_loss: 0.7048
Epoch 2/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step - accuracy: 0.5699 - loss: 0.6794 - val_accuracy: 0.5615 - val_loss: 0.6842
Epoch 3/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - accuracy: 0.5712 - loss: 0.6783 - val_accuracy: 0.5688 - val_loss: 0.6759
Epoch 4/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - accuracy: 0.5811 - loss: 0.6724 - val_accuracy: 0.5402 - val_loss: 0.6922
Epoch 5/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - accuracy: 0.5888 - loss: 0.6729 - val_accuracy: 0.5501 - val_loss: 0.6892
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5885 - loss: 0.6643
Test Accuracy: 0.5501
Saved model for STOCK8 to STOCK8_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - accuracy: 0.5619 - loss: 0.6815 - val_accuracy: 0.5663 - val_loss: 0.6877
Epoch 2/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5836 - loss: 0.6718 - val_accuracy: 0.5524 - val_loss: 0.6942
Epoch 3/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5904 - loss: 0.6659 - val_accuracy: 0.5589 - val_loss: 0.6875
Epoch 4/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5887 - loss: 0.6679 - val_accuracy: 0.5628 - val_loss: 0.7229
Epoch 5/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.6120 - loss: 0.6548 - val_accuracy: 0.5465 - val_loss: 0.6983
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5514 - loss: 0.6863
Test Accuracy: 0.5465
Saved model for STOCK13 to STOCK13_lstm_model.keras
Epoch 1/5


  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 19ms/step - accuracy: 0.5326 - loss: 0.6896 - val_accuracy: 0.4649 - val_loss: 0.7111
Epoch 2/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5649 - loss: 0.6818 - val_accuracy: 0.5446 - val_loss: 0.7009
Epoch 3/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5694 - loss: 0.6802 - val_accuracy: 0.5564 - val_loss: 0.7027
Epoch 4/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5847 - loss: 0.6761 - val_accuracy: 0.5257 - val_loss: 0.7527
Epoch 5/5
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5893 - loss: 0.6704 - val_accuracy: 0.5372 - val_loss: 0.7848
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5615 - loss: 0.7232
Test Accuracy: 0.5372
Saved model for STOCK5 to STOCK5_lstm_model.keras


  super().__init__(**kwargs)


Epoch 1/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.5288 - loss: 0.6918 - val_accuracy: 0.5179 - val_loss: 0.6906
Epoch 2/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5321 - loss: 0.6902 - val_accuracy: 0.5135 - val_loss: 0.6903
Epoch 3/5
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.5377 - loss: 0.6873 - val_accuracy: 0.5377 - val_loss: 0.6909
Epoch 4/5
[1m243/244[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 14ms/step - accuracy: 0.5456 - loss: 0.6866

In [None]:
# Load processed data (Example for one stock, modify as needed)
df = pd.read_csv("STOCK2_processed.csv")

# Select features and target
features = [col for col in df.columns if col not in ['gmtTime', 'symbol', 'target']]
target = 'target'

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(df[features])
y = df[target].values

# Reshape for LSTM (samples, timesteps, features)
sequence_length = 10  # Use last 10 hours as input
X_seq = []
y_seq = []

for i in range(len(X) - sequence_length):
    X_seq.append(X[i:i+sequence_length])
    y_seq.append(y[i+sequence_length])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

X_train = X_seq
y_train = y_seq

df_test = pd.read_csv("STOCK3_processed.csv")

scaler = StandardScaler()
X = scaler.fit_transform(df_test[features])
y = df_test[target].values

# Reshape for LSTM (samples, timesteps, features)
sequence_length = 10  # Use last 10 hours as input
X_seq = []
y_seq = []

for i in range(len(X) - sequence_length):
    X_seq.append(X[i:i+sequence_length])
    y_seq.append(y[i+sequence_length])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

X_test = X_seq
y_test = y_seq

# Split data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.5, random_state=42, shuffle=True)


# Build LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(sequence_length, X_train.shape[2])),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])


model_2 = Sequential()
model_2.add(
    LSTM(100, return_sequences=True, input_shape=(sequence_length, X_train.shape[2]))
)
model_2.add(Dropout(0.2))
model_2.add(LSTM(100, return_sequences=True))
model_2.add(Dropout(0.2))
model_2.add(LSTM(100))
model_2.add(Dropout(0.2))
model_2.add(Dense(25, activation='relu'))
model_2.add(Dense(1, activation='sigmoid'))

model_alt = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(sequence_length,  X_train.shape[2])),
    MaxPooling1D(pool_size=2),
    GRU(64, return_sequences=True),
    Dropout(0.2),
    GRU(64),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1, activation='sigmoid')
])

model = model_2

# Compile model
model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Save model
model.save("lstm_trading_model_previous_year.keras")


2025-02-27 16:54:09.670688: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-02-27 16:54:09.679708: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/20
Epoch 2/20

KeyboardInterrupt: 

### entire dataset trading

In [19]:
stock_files = ["STOCK1_processed.csv", "STOCK2_processed.csv", "STOCK3_processed.csv", "STOCK4_processed.csv", "STOCK5_processed.csv", "STOCK6_processed.csv", "STOCK7_processed.csv","STOCK8_processed.csv","STOCK9_processed.csv","STOCK10_processed.csv"]  # Add more stock files as needed

stock_files = [f"STOCK1_processed.csv" for i in range(22)]
# Load and combine all stock data
dfs = []
for file in stock_files:
    df = pd.read_csv(file)
    dfs.append(df)

dfs = pd.concat(dfs, ignore_index=True)
scaler = StandardScaler()
X = scaler.fit_transform(dfs[features])
y = dfs[target].values

# Reshape for LSTM (samples, timesteps, features)
sequence_length = 10  # Use last 10 hours as input
X_seq = []
y_seq = []

for i in range(len(X) - sequence_length):
    X_seq.append(X[i:i+sequence_length])
    y_seq.append(y[i+sequence_length])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

X_test = X_seq
y_test = y_seq

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.5, random_state=42, shuffle=True)

# Build LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(sequence_length, X_train.shape[2])),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])


model_2 = Sequential()
model_2.add(
    LSTM(100, return_sequences=True, input_shape=(sequence_length, X_train.shape[2]))
)
model_2.add(Dropout(0.2))
model_2.add(LSTM(100, return_sequences=True))
model_2.add(Dropout(0.2))
model_2.add(LSTM(100))
model_2.add(Dropout(0.2))
model_2.add(Dense(25, activation='relu'))
model_2.add(Dense(1, activation='sigmoid'))

# Compile model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Save model
model.save("lstm_trading_model_entire_set.h5")


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20

KeyboardInterrupt: 

In [None]:
stock_files = ["STOCK1_processed.csv", "STOCK2_processed.csv", "STOCK3_processed.csv", "STOCK4_processed.csv", "STOCK5_processed.csv", "STOCK6_processed.csv", "STOCK7_processed.csv","STOCK8_processed.csv","STOCK9_processed.csv","STOCK10_processed.csv"]  # Add more stock files as needed
stock_files = ['STOCK20', 'STOCK1', 'STOCK18', 'STOCK21', 'STOCK16', 'STOCK11', 'STOCK12',
 'STOCK17', 'STOCK6', 'STOCK9', 'STOCK2', 'STOCK3', 'STOCK14', 'STOCK10',
 'STOCK8', 'STOCK13', 'STOCK5', 'STOCK4', 'STOCK19', 'INDEX1']

stock_files = {f"{i}_processed.csv" for i in stock_files}

# stock_files = [f"STOCK{i}_processed.csv" for i in range(1,22)]


features = [col for col in df.columns if col not in ['gmtTime', 'symbol', 'target']]
# features = [col for col in df.columns if col not in ['gmtTime', 'target']]


target = 'target'

# Load and combine all stock data
dfs_train = []
dfs_test = []
for file in stock_files:
    df = pd.read_csv(file)
    dfs_train.append(df[:int(len(df)*0.6)])
    dfs_test.append(df[int(len(df)*0.6):])

dfs_train = pd.concat(dfs_train, ignore_index=True)
dfs_test = pd.concat(dfs_test, ignore_index=True)


# Encode 'symbol' column
symbol_encoder = {symbol: idx for idx, symbol in enumerate(dfs_train['symbol'].unique())}
dfs_train['symbol'] = dfs_train['symbol'].map(symbol_encoder)
dfs_test['symbol'] = dfs_test['symbol'].map(symbol_encoder)


scaler = StandardScaler()

X_train = dfs_train[features] #scaler.fit_transform(dfs_train[features])
y_train = dfs_train[target].values

X_test = dfs_test[features] #scaler.transform(dfs_test[features])
y_test = dfs_test[target].values

X_train_seq = []
y_train_seq = []
for i in range(len(X_train) - sequence_length):
    X_train_seq.append(X_train[i:i+sequence_length])
    y_train_seq.append(y_train[i+sequence_length])

X_train_seq = np.array(X_train_seq)
y_train_seq = np.array(y_train_seq)

X_test_seq = []
y_test_seq = []
for i in range(len(X_test) - sequence_length):
    X_test_seq.append(X_test[i:i+sequence_length])
    y_test_seq.append(y_test[i+sequence_length])

X_test_seq = np.array(X_test_seq)
y_test_seq = np.array(y_test_seq)


# Build LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(sequence_length, X_train_seq.shape[2])),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])


# BEST MODEL INVIDIDUAL STOCKS

model_2 = Sequential()
model_2.add(
    LSTM(100, return_sequences=True, input_shape=(sequence_length, X_train_seq.shape[2]))
)
model_2.add(Dropout(0.2))
model_2.add(LSTM(100, return_sequences=True))
model_2.add(Dropout(0.2))
model_2.add(LSTM(100))
model_2.add(Dropout(0.2))
model_2.add(Dense(25, activation='relu'))
model_2.add(Dense(1, activation='sigmoid'))

# Compile model
model_2.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model_2.fit(X_train_seq, y_train_seq, epochs=20, batch_size=32, validation_data=(X_test_seq, y_test_seq))

# Evaluate model
loss, accuracy = model_2.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Save model
model_2.save("lstm_trading_model_entire_set.h5")


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

KeyboardInterrupt: 

In [21]:
model_2.save("lstm_trading_model_entire_set_new_data.keras")