In [7]:
import pandas as pd
import numpy as np

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, GRU
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

### IDEAs
- give the entire stock set and trade whether entire market is up or down
- give entire stock set and pick stock that increases the most

# PREP DATA

In [8]:
# Load data
df = pd.read_csv("Historical_Data.csv")
df["gmtTime"] = pd.to_datetime(df["gmtTime"])

# Dictionary to store processed data for each stock
stock_dfs = {}

print('unique stocks=',df["symbol"].unique())

# Feature engineering for each stock
for symbol in df["symbol"].unique():
    df_stock = df[df["symbol"] == symbol].copy()

    # Round numerical columns
    cols_to_round = [col for col in df_stock.columns if col not in ["gmtTime", "symbol"]]
    df_stock[cols_to_round] = df_stock[cols_to_round].round(2)

    # Time-based features
    df_stock['hour'] = df_stock['gmtTime'].dt.hour
    df_stock['day_of_week'] = df_stock['gmtTime'].dt.dayofweek

    # Rolling statistics
    df_stock['askMedian_rolling_mean_3h'] = df_stock['askMedian'].rolling(window=3, min_periods=1).mean()
    df_stock['bidMedian_rolling_mean_3h'] = df_stock['bidMedian'].rolling(window=3, min_periods=1).mean()
    df_stock['askMedian_rolling_std_3h'] = df_stock['askMedian'].rolling(window=3, min_periods=1).std()
    df_stock['bidMedian_rolling_std_3h'] = df_stock['bidMedian'].rolling(window=3, min_periods=1).std()

    # Percentage changes
    df_stock['askMedian_pct_change'] = df_stock['askMedian'].pct_change()
    df_stock['bidMedian_pct_change'] = df_stock['bidMedian'].pct_change()

    # Spread-related features
    df_stock['spread_ratio'] = df_stock['spreadMedian'] / (df_stock['askMedian'] + df_stock['bidMedian'])
    # df_stock['spread_pct_change'] = df_stock['spreadMedian'].pct_change()

    # Volume-related features
    df_stock['askVolume_relative'] = df_stock['askVolume'] / df_stock['askVolume'].rolling(window=5, min_periods=1).mean()
    df_stock['bidVolume_relative'] = df_stock['bidVolume'] / df_stock['bidVolume'].rolling(window=5, min_periods=1).mean()
    df_stock['volume_imbalance'] = (df_stock['askVolume'] - df_stock['bidVolume']) / (df_stock['askVolume'] + df_stock['bidVolume'])

    # Lagged features (e.g., previous hour's values)
    for lag in range(1, 25):  # Add lags for the last 3 hours
        df_stock[f'askMedian_lag_{lag}'] = df_stock['askMedian'].shift(lag)
        df_stock[f'bidMedian_lag_{lag}'] = df_stock['bidMedian'].shift(lag)
        df_stock[f'spreadMedian_lag_{lag}'] = df_stock['spreadMedian'].shift(lag)

    # Target variable: Direction of price movement (1 if bidMedian increases next hour, 0 otherwise)
    df_stock['target'] = (df_stock['bidMedian'].shift(-20) > df_stock['bidMedian']).astype(int)

    # Drop rows with missing values (due to lags and rolling features)
    df_stock = df_stock.dropna()

    # Store processed dataframe
    stock_dfs[symbol] = df_stock

# Example: View processed data for one stock
print(stock_dfs['STOCK1'].head())

# Save processed data to CSV (optional)
for symbol, df_stock in stock_dfs.items():
    df_stock.to_csv(f"{symbol}_processed.csv", index=False)

unique stocks= ['STOCK20' 'STOCK1' 'STOCK18' 'STOCK21' 'STOCK16' 'STOCK11' 'STOCK12'
 'STOCK17' 'STOCK6' 'STOCK9' 'STOCK2' 'STOCK3' 'STOCK14' 'STOCK10'
 'STOCK8' 'STOCK13' 'STOCK5' 'STOCK4' 'STOCK19' 'INDEX1']
                      gmtTime  askMedian  bidMedian  askVolume  bidVolume  \
501 1999-02-10 12:00:00+00:00     160.35     160.18    13868.0     3971.0   
521 1999-02-10 13:00:00+00:00     159.13     158.97     4390.0     6125.0   
541 1999-02-10 14:00:00+00:00     158.97     158.89     4650.0     4152.0   
561 1999-02-10 15:00:00+00:00     159.05     158.89    10550.0     2410.0   
581 1999-02-10 16:00:00+00:00     158.00     157.92     3542.0     5576.0   

     spreadMedian  symbol  hour  day_of_week  askMedian_rolling_mean_3h  ...  \
501          0.16  STOCK1    12            2                 159.943333  ...   
521          0.16  STOCK1    13            2                 159.780000  ...   
541          0.08  STOCK1    14            2                 159.483333  ...   
561    

## TRAIN FOR EVERY MODEL INDIVIDUALLY AND SAVE MODELS

In [9]:
import joblib 

features = [col for col in df.columns if col not in ['gmtTime', 'symbol', 'target']]
# features = [col for col in df.columns if col not in ['gmtTime', 'target']]


target = 'target'


stock_files = ['STOCK20', 'STOCK1', 'STOCK18', 'STOCK21', 'STOCK16', 'STOCK11', 'STOCK12',
 'STOCK17', 'STOCK6', 'STOCK9', 'STOCK2', 'STOCK3', 'STOCK14', 'STOCK10',
 'STOCK8', 'STOCK13', 'STOCK5', 'STOCK4', 'STOCK19', 'INDEX1']

# Load and combine all stock data
for i, stock_file in enumerate(stock_files):
    print("TRAINING MODEL FOR STOCK ", i)
    dfs = pd.read_csv(f"{stock_file}_processed.csv")
    scaler = StandardScaler()
    X = scaler.fit_transform(dfs[features])
    y = dfs[target].values

    # Save the scaler for later use during inference
    scaler_filename = f"{stock_file}_scaler.pkl"
    joblib.dump(scaler, scaler_filename)
    print(f"Saved scaler for {stock_file} to {scaler_filename}")


    # Reshape for LSTM (samples, timesteps, features)
    sequence_length = 10  # Use last 10 hours as input
    X_seq = []
    y_seq = []

    for i in range(len(X) - sequence_length):
        X_seq.append(X[i:i+sequence_length])
        y_seq.append(y[i+sequence_length])

    X_seq = np.array(X_seq)
    y_seq = np.array(y_seq)

    X_test = X_seq
    y_test = y_seq

    # Use the last 50% of the data as test set, maintaining chronological order
    split_idx = int(len(X_seq) * 0.5)
    X_train, X_test = X_seq[:split_idx], X_seq[split_idx:]
    y_train, y_test = y_seq[:split_idx], y_seq[split_idx:]
    
    #X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.5, random_state=42, shuffle=True)

    # Build LSTM mode


    model_2 = Sequential()
    model_2.add(
        LSTM(100, return_sequences=True, input_shape=(sequence_length, X_train.shape[2]))
    )
    model_2.add(Dropout(0.2))
    model_2.add(LSTM(100, return_sequences=True))
    model_2.add(Dropout(0.2))
    model_2.add(LSTM(100))
    model_2.add(Dropout(0.2))
    model_2.add(Dense(25, activation='relu'))
    model_2.add(Dense(1, activation='sigmoid'))

    # Compile model
    model_2.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    # Train model
    model_2.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

    # Evaluate model
    loss, accuracy = model_2.evaluate(X_test, y_test)
    print(f"Test Accuracy: {accuracy:.4f}")


    '''
    SAVE MODEL FOR EACH STOCK INDIVIDUALLY
    '''
    model_save_path = f"{stock_file}_lstm_model.keras"
    model_2.save(model_save_path)
    print(f"Saved model for {stock_file} to {model_save_path}")

TRAINING MODEL FOR STOCK  0
Epoch 1/10


  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5356 - loss: 0.6889 - val_accuracy: 0.5252 - val_loss: 0.8339
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5715 - loss: 0.6792 - val_accuracy: 0.5225 - val_loss: 1.3116
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5848 - loss: 0.6721 - val_accuracy: 0.5332 - val_loss: 0.9371
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5878 - loss: 0.6640 - val_accuracy: 0.5269 - val_loss: 0.8243
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5885 - loss: 0.6636 - val_accuracy: 0.5243 - val_loss: 1.4956
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6020 - loss: 0.6531 - val_accuracy: 0.5252 - val_loss: 1.5183
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.5459 - loss: 0.6890 - val_accuracy: 0.5218 - val_loss: 0.8041
Epoch 2/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5619 - loss: 0.6764 - val_accuracy: 0.5038 - val_loss: 0.8591
Epoch 3/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5741 - loss: 0.6729 - val_accuracy: 0.4967 - val_loss: 1.1068
Epoch 4/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5961 - loss: 0.6630 - val_accuracy: 0.5106 - val_loss: 0.9830
Epoch 5/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5740 - loss: 0.6700 - val_accuracy: 0.5010 - val_loss: 1.4383
Epoch 6/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5968 - loss: 0.6606 - val_accuracy: 0.5074 - val_loss: 1.4413
Epoch 7/10
[1m244/244[0m [32m━

  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 17ms/step - accuracy: 0.5429 - loss: 0.6866 - val_accuracy: 0.5054 - val_loss: 0.7480
Epoch 2/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5727 - loss: 0.6760 - val_accuracy: 0.5167 - val_loss: 0.8662
Epoch 3/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5826 - loss: 0.6719 - val_accuracy: 0.4980 - val_loss: 1.2445
Epoch 4/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5935 - loss: 0.6670 - val_accuracy: 0.5270 - val_loss: 0.9237
Epoch 5/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5931 - loss: 0.6631 - val_accuracy: 0.5045 - val_loss: 1.2270
Epoch 6/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5970 - loss: 0.6647 - val_accuracy: 0.4990 - val_loss: 2.1861
Epoch 7/10
[1m244/244[0m [32m━

  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5073 - loss: 0.6936 - val_accuracy: 0.4982 - val_loss: 0.9350
Epoch 2/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5510 - loss: 0.6862 - val_accuracy: 0.5035 - val_loss: 0.8393
Epoch 3/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5523 - loss: 0.6840 - val_accuracy: 0.5195 - val_loss: 1.1238
Epoch 4/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5786 - loss: 0.6696 - val_accuracy: 0.5356 - val_loss: 1.2660
Epoch 5/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5857 - loss: 0.6602 - val_accuracy: 0.5235 - val_loss: 1.0270
Epoch 6/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5887 - loss: 0.6628 - val_accuracy: 0.5430 - val_loss: 1.8069
Epoch 7/10
[1m244/244[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5372 - loss: 0.6893 - val_accuracy: 0.5683 - val_loss: 0.6801
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5665 - loss: 0.6808 - val_accuracy: 0.5805 - val_loss: 0.6761
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5629 - loss: 0.6782 - val_accuracy: 0.5762 - val_loss: 0.6974
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5672 - loss: 0.6761 - val_accuracy: 0.5682 - val_loss: 0.6925
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5812 - loss: 0.6733 - val_accuracy: 0.5743 - val_loss: 0.7395
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5772 - loss: 0.6725 - val_accuracy: 0.5755 - val_loss: 0.6982
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5351 - loss: 0.6899 - val_accuracy: 0.5114 - val_loss: 0.6921
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5479 - loss: 0.6848 - val_accuracy: 0.5370 - val_loss: 0.6842
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5715 - loss: 0.6799 - val_accuracy: 0.5326 - val_loss: 0.6908
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5484 - loss: 0.6853 - val_accuracy: 0.5480 - val_loss: 0.6938
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5710 - loss: 0.6771 - val_accuracy: 0.5304 - val_loss: 0.6874
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5567 - loss: 0.6780 - val_accuracy: 0.5271 - val_loss: 0.6932
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5057 - loss: 0.6935 - val_accuracy: 0.5189 - val_loss: 0.6921
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5323 - loss: 0.6892 - val_accuracy: 0.5278 - val_loss: 0.6903
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5559 - loss: 0.6872 - val_accuracy: 0.5320 - val_loss: 0.6934
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5417 - loss: 0.6890 - val_accuracy: 0.5224 - val_loss: 0.6932
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5505 - loss: 0.6855 - val_accuracy: 0.5333 - val_loss: 0.6918
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5565 - loss: 0.6835 - val_accuracy: 0.5420 - val_loss: 0.6911
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.5198 - loss: 0.6925 - val_accuracy: 0.5054 - val_loss: 0.6953
Epoch 2/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5358 - loss: 0.6896 - val_accuracy: 0.4588 - val_loss: 0.8726
Epoch 3/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5648 - loss: 0.6839 - val_accuracy: 0.4856 - val_loss: 0.7082
Epoch 4/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5745 - loss: 0.6778 - val_accuracy: 0.4661 - val_loss: 0.8505
Epoch 5/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5663 - loss: 0.6799 - val_accuracy: 0.4442 - val_loss: 1.4977
Epoch 6/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5767 - loss: 0.6753 - val_accuracy: 0.4488 - val_loss: 1.5938
Epoch 7/10
[1m244/244[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5211 - loss: 0.6907 - val_accuracy: 0.4658 - val_loss: 0.7227
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5626 - loss: 0.6811 - val_accuracy: 0.4693 - val_loss: 0.8299
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5770 - loss: 0.6769 - val_accuracy: 0.4747 - val_loss: 0.8412
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5951 - loss: 0.6720 - val_accuracy: 0.4749 - val_loss: 0.7730
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5954 - loss: 0.6650 - val_accuracy: 0.4754 - val_loss: 0.8058
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5975 - loss: 0.6605 - val_accuracy: 0.4768 - val_loss: 0.7231
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5223 - loss: 0.6902 - val_accuracy: 0.4893 - val_loss: 0.9641
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5497 - loss: 0.6823 - val_accuracy: 0.4806 - val_loss: 1.8152
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5639 - loss: 0.6740 - val_accuracy: 0.4894 - val_loss: 1.5709
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5800 - loss: 0.6727 - val_accuracy: 0.4952 - val_loss: 1.9422
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5669 - loss: 0.6742 - val_accuracy: 0.4896 - val_loss: 1.7621
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5762 - loss: 0.6690 - val_accuracy: 0.5054 - val_loss: 1.1725
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.5411 - loss: 0.6867 - val_accuracy: 0.5740 - val_loss: 0.6736
Epoch 2/10
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6076 - loss: 0.6520 - val_accuracy: 0.6124 - val_loss: 0.6492
Epoch 3/10
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.6293 - loss: 0.6382 - val_accuracy: 0.6122 - val_loss: 0.6543
Epoch 4/10
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.6411 - loss: 0.6294 - val_accuracy: 0.5711 - val_loss: 0.7411
Epoch 5/10
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.6459 - loss: 0.6265 - val_accuracy: 0.6117 - val_loss: 0.6657
Epoch 6/10
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.6578 - loss: 0.6186 - val_accuracy: 0.5869 - val_loss: 0.7324
Epoch 7/10
[1m242/242[0m [32m━

  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.5359 - loss: 0.6889 - val_accuracy: 0.5209 - val_loss: 0.7084
Epoch 2/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5699 - loss: 0.6793 - val_accuracy: 0.5120 - val_loss: 0.6994
Epoch 3/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5502 - loss: 0.6837 - val_accuracy: 0.5213 - val_loss: 0.7514
Epoch 4/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5672 - loss: 0.6783 - val_accuracy: 0.5272 - val_loss: 0.7120
Epoch 5/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5588 - loss: 0.6789 - val_accuracy: 0.5387 - val_loss: 0.7026
Epoch 6/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5685 - loss: 0.6759 - val_accuracy: 0.5336 - val_loss: 0.7201
Epoch 7/10
[1m244/244[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5192 - loss: 0.6922 - val_accuracy: 0.4893 - val_loss: 0.7227
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5461 - loss: 0.6852 - val_accuracy: 0.4794 - val_loss: 0.7308
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5606 - loss: 0.6822 - val_accuracy: 0.4914 - val_loss: 0.8173
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5638 - loss: 0.6779 - val_accuracy: 0.4915 - val_loss: 0.7558
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5654 - loss: 0.6768 - val_accuracy: 0.4797 - val_loss: 0.9039
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5747 - loss: 0.6743 - val_accuracy: 0.4950 - val_loss: 0.9167
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5385 - loss: 0.6869 - val_accuracy: 0.5432 - val_loss: 0.6960
Epoch 2/10
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5934 - loss: 0.6647 - val_accuracy: 0.5692 - val_loss: 0.6744
Epoch 3/10
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5957 - loss: 0.6577 - val_accuracy: 0.5908 - val_loss: 0.6588
Epoch 4/10
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6061 - loss: 0.6500 - val_accuracy: 0.5977 - val_loss: 0.6639
Epoch 5/10
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6168 - loss: 0.6439 - val_accuracy: 0.5580 - val_loss: 0.7257
Epoch 6/10
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6060 - loss: 0.6425 - val_accuracy: 0.5821 - val_loss: 0.6608
Epoch 7/10
[1m243/243[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5232 - loss: 0.6915 - val_accuracy: 0.5680 - val_loss: 0.6856
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5682 - loss: 0.6810 - val_accuracy: 0.5269 - val_loss: 0.6838
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5747 - loss: 0.6763 - val_accuracy: 0.5611 - val_loss: 0.7121
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5875 - loss: 0.6682 - val_accuracy: 0.5453 - val_loss: 0.6807
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5856 - loss: 0.6682 - val_accuracy: 0.5535 - val_loss: 0.7003
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5858 - loss: 0.6725 - val_accuracy: 0.5732 - val_loss: 0.6865
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.5552 - loss: 0.6831 - val_accuracy: 0.5656 - val_loss: 0.6799
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5869 - loss: 0.6719 - val_accuracy: 0.5602 - val_loss: 0.6970
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5894 - loss: 0.6686 - val_accuracy: 0.5552 - val_loss: 0.7031
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6009 - loss: 0.6632 - val_accuracy: 0.5567 - val_loss: 0.6919
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6057 - loss: 0.6574 - val_accuracy: 0.5603 - val_loss: 0.6841
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6184 - loss: 0.6560 - val_accuracy: 0.5579 - val_loss: 0.6995
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.5500 - loss: 0.6881 - val_accuracy: 0.4713 - val_loss: 0.7671
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5668 - loss: 0.6803 - val_accuracy: 0.4688 - val_loss: 0.8560
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5704 - loss: 0.6791 - val_accuracy: 0.4739 - val_loss: 0.8167
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5744 - loss: 0.6774 - val_accuracy: 0.4791 - val_loss: 0.7635
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5813 - loss: 0.6751 - val_accuracy: 0.5003 - val_loss: 0.8522
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5917 - loss: 0.6734 - val_accuracy: 0.4718 - val_loss: 1.0891
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 19ms/step - accuracy: 0.5306 - loss: 0.6923 - val_accuracy: 0.5093 - val_loss: 0.6951
Epoch 2/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5259 - loss: 0.6889 - val_accuracy: 0.5150 - val_loss: 0.6900
Epoch 3/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5349 - loss: 0.6868 - val_accuracy: 0.5201 - val_loss: 0.6906
Epoch 4/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5397 - loss: 0.6864 - val_accuracy: 0.5119 - val_loss: 0.6942
Epoch 5/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5385 - loss: 0.6859 - val_accuracy: 0.5179 - val_loss: 0.6925
Epoch 6/10
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5390 - loss: 0.6862 - val_accuracy: 0.5049 - val_loss: 0.6978
Epoch 7/10
[1m244/244[0m [32m━

  super().__init__(**kwargs)


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.5409 - loss: 0.6892 - val_accuracy: 0.5356 - val_loss: 0.6924
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5378 - loss: 0.6887 - val_accuracy: 0.5412 - val_loss: 0.6907
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.5420 - loss: 0.6871 - val_accuracy: 0.5491 - val_loss: 0.6906
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - accuracy: 0.5562 - loss: 0.6815 - val_accuracy: 0.5370 - val_loss: 0.6924
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5494 - loss: 0.6831 - val_accuracy: 0.5285 - val_loss: 0.6929
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5692 - loss: 0.6811 - val_accuracy: 0.5450 - val_loss: 0.6957
Epoch 7/10
[1m245/245[0m [32m━

  super().__init__(**kwargs)


[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.5731 - loss: 0.6732 - val_accuracy: 0.5997 - val_loss: 0.6639
Epoch 2/10
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6424 - loss: 0.6334 - val_accuracy: 0.6203 - val_loss: 0.6506
Epoch 3/10
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6561 - loss: 0.6153 - val_accuracy: 0.6361 - val_loss: 0.6717
Epoch 4/10
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.6520 - loss: 0.6065 - val_accuracy: 0.6428 - val_loss: 0.6679
Epoch 5/10
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6672 - loss: 0.6003 - val_accuracy: 0.6265 - val_loss: 0.6516
Epoch 6/10
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6649 - loss: 0.6028 - val_accuracy: 0.6154 - val_loss: 0.6719
Epoch 7/10
[1m236/236[0m [32m━

In [None]:
# Load processed data (Example for one stock, modify as needed)
df = pd.read_csv("STOCK2_processed.csv")

# Select features and target
features = [col for col in df.columns if col not in ['gmtTime', 'symbol', 'target']]
target = 'target'

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(df[features])
y = df[target].values

# Reshape for LSTM (samples, timesteps, features)
sequence_length = 10  # Use last 10 hours as input
X_seq = []
y_seq = []

for i in range(len(X) - sequence_length):
    X_seq.append(X[i:i+sequence_length])
    y_seq.append(y[i+sequence_length])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

X_train = X_seq
y_train = y_seq

df_test = pd.read_csv("STOCK3_processed.csv")

scaler = StandardScaler()
X = scaler.fit_transform(df_test[features])
y = df_test[target].values

# Reshape for LSTM (samples, timesteps, features)
sequence_length = 10  # Use last 10 hours as input
X_seq = []
y_seq = []

for i in range(len(X) - sequence_length):
    X_seq.append(X[i:i+sequence_length])
    y_seq.append(y[i+sequence_length])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

X_test = X_seq
y_test = y_seq

# Split data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.5, random_state=42, shuffle=True)


# Build LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(sequence_length, X_train.shape[2])),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])


model_2 = Sequential()
model_2.add(
    LSTM(100, return_sequences=True, input_shape=(sequence_length, X_train.shape[2]))
)
model_2.add(Dropout(0.2))
model_2.add(LSTM(100, return_sequences=True))
model_2.add(Dropout(0.2))
model_2.add(LSTM(100))
model_2.add(Dropout(0.2))
model_2.add(Dense(25, activation='relu'))
model_2.add(Dense(1, activation='sigmoid'))

model_alt = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(sequence_length,  X_train.shape[2])),
    MaxPooling1D(pool_size=2),
    GRU(64, return_sequences=True),
    Dropout(0.2),
    GRU(64),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1, activation='sigmoid')
])

model = model_2

# Compile model
model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Save model
model.save("lstm_trading_model_previous_year.keras")


2025-02-27 16:54:09.670688: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-02-27 16:54:09.679708: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/20
Epoch 2/20

KeyboardInterrupt: 

### entire dataset trading

In [19]:
stock_files = ["STOCK1_processed.csv", "STOCK2_processed.csv", "STOCK3_processed.csv", "STOCK4_processed.csv", "STOCK5_processed.csv", "STOCK6_processed.csv", "STOCK7_processed.csv","STOCK8_processed.csv","STOCK9_processed.csv","STOCK10_processed.csv"]  # Add more stock files as needed

stock_files = [f"STOCK1_processed.csv" for i in range(22)]
# Load and combine all stock data
dfs = []
for file in stock_files:
    df = pd.read_csv(file)
    dfs.append(df)

dfs = pd.concat(dfs, ignore_index=True)
scaler = StandardScaler()
X = scaler.fit_transform(dfs[features])
y = dfs[target].values

# Reshape for LSTM (samples, timesteps, features)
sequence_length = 10  # Use last 10 hours as input
X_seq = []
y_seq = []

for i in range(len(X) - sequence_length):
    X_seq.append(X[i:i+sequence_length])
    y_seq.append(y[i+sequence_length])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

X_test = X_seq
y_test = y_seq

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.5, random_state=42, shuffle=True)

# Build LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(sequence_length, X_train.shape[2])),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])


model_2 = Sequential()
model_2.add(
    LSTM(100, return_sequences=True, input_shape=(sequence_length, X_train.shape[2]))
)
model_2.add(Dropout(0.2))
model_2.add(LSTM(100, return_sequences=True))
model_2.add(Dropout(0.2))
model_2.add(LSTM(100))
model_2.add(Dropout(0.2))
model_2.add(Dense(25, activation='relu'))
model_2.add(Dense(1, activation='sigmoid'))

# Compile model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Save model
model.save("lstm_trading_model_entire_set.h5")


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20

KeyboardInterrupt: 

In [None]:
stock_files = ["STOCK1_processed.csv", "STOCK2_processed.csv", "STOCK3_processed.csv", "STOCK4_processed.csv", "STOCK5_processed.csv", "STOCK6_processed.csv", "STOCK7_processed.csv","STOCK8_processed.csv","STOCK9_processed.csv","STOCK10_processed.csv"]  # Add more stock files as needed
stock_files = ['STOCK20', 'STOCK1', 'STOCK18', 'STOCK21', 'STOCK16', 'STOCK11', 'STOCK12',
 'STOCK17', 'STOCK6', 'STOCK9', 'STOCK2', 'STOCK3', 'STOCK14', 'STOCK10',
 'STOCK8', 'STOCK13', 'STOCK5', 'STOCK4', 'STOCK19', 'INDEX1']

stock_files = {f"{i}_processed.csv" for i in stock_files}

# stock_files = [f"STOCK{i}_processed.csv" for i in range(1,22)]


features = [col for col in df.columns if col not in ['gmtTime', 'symbol', 'target']]
# features = [col for col in df.columns if col not in ['gmtTime', 'target']]


target = 'target'

# Load and combine all stock data
dfs_train = []
dfs_test = []
for file in stock_files:
    df = pd.read_csv(file)
    dfs_train.append(df[:int(len(df)*0.6)])
    dfs_test.append(df[int(len(df)*0.6):])

dfs_train = pd.concat(dfs_train, ignore_index=True)
dfs_test = pd.concat(dfs_test, ignore_index=True)


# Encode 'symbol' column
symbol_encoder = {symbol: idx for idx, symbol in enumerate(dfs_train['symbol'].unique())}
dfs_train['symbol'] = dfs_train['symbol'].map(symbol_encoder)
dfs_test['symbol'] = dfs_test['symbol'].map(symbol_encoder)


scaler = StandardScaler()

X_train = dfs_train[features] #scaler.fit_transform(dfs_train[features])
y_train = dfs_train[target].values

X_test = dfs_test[features] #scaler.transform(dfs_test[features])
y_test = dfs_test[target].values

X_train_seq = []
y_train_seq = []
for i in range(len(X_train) - sequence_length):
    X_train_seq.append(X_train[i:i+sequence_length])
    y_train_seq.append(y_train[i+sequence_length])

X_train_seq = np.array(X_train_seq)
y_train_seq = np.array(y_train_seq)

X_test_seq = []
y_test_seq = []
for i in range(len(X_test) - sequence_length):
    X_test_seq.append(X_test[i:i+sequence_length])
    y_test_seq.append(y_test[i+sequence_length])

X_test_seq = np.array(X_test_seq)
y_test_seq = np.array(y_test_seq)


# Build LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(sequence_length, X_train_seq.shape[2])),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])


# BEST MODEL INVIDIDUAL STOCKS

model_2 = Sequential()
model_2.add(
    LSTM(100, return_sequences=True, input_shape=(sequence_length, X_train_seq.shape[2]))
)
model_2.add(Dropout(0.2))
model_2.add(LSTM(100, return_sequences=True))
model_2.add(Dropout(0.2))
model_2.add(LSTM(100))
model_2.add(Dropout(0.2))
model_2.add(Dense(25, activation='relu'))
model_2.add(Dense(1, activation='sigmoid'))

# Compile model
model_2.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model_2.fit(X_train_seq, y_train_seq, epochs=20, batch_size=32, validation_data=(X_test_seq, y_test_seq))

# Evaluate model
loss, accuracy = model_2.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Save model
model_2.save("lstm_trading_model_entire_set.h5")


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

KeyboardInterrupt: 

In [21]:
model_2.save("lstm_trading_model_entire_set_new_data.keras")