### Import Libraries

In [6]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import keras_tuner as kt
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import EarlyStopping
import tensorflow as tf
import joblib
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
import os
import shutil

### Load and Normalize the Dataset

In [7]:
# Load the dataset
file_path = 'D:\\Github anyud\\final\\Data_stock\\processed_stock_data.csv'
merged_df = pd.read_csv(file_path)

# Normalize data
price_scaler = MinMaxScaler()
merged_df[['Price']] = price_scaler.fit_transform(merged_df[['Price']])

feature_scaler = MinMaxScaler()
merged_df[['MA30', 'MA90']] = feature_scaler.fit_transform(merged_df[['MA30', 'MA90']])


### Prepare Sequences for LSTM

In [8]:
# Prepare sequences for LSTM
def create_sequences(df, time_steps=30):
    sequences = []
    labels = []
    for i in range(len(df) - time_steps):
        sequence = df[['Price', 'MA30', 'MA90']].iloc[i:i+time_steps].values
        label = df['Price'].iloc[i+time_steps]
        sequences.append(sequence)
        labels.append(label)
    return np.array(sequences), np.array(labels)


### Define the LSTM HyperModel

In [4]:
# Define the LSTM HyperModel
class LSTMHyperModel(kt.HyperModel):
    def build(self, hp):
        model = Sequential()
        for i in range(hp.Int('num_layers', 1, 3)):
            model.add(LSTM(units=hp.Int(f'units_{i}', min_value=32, max_value=512, step=32), 
                           return_sequences=(i != hp.Int('num_layers', 1, 3) - 1), input_shape=(time_steps, 3)))
        model.add(Dense(1))  # Output layer should match the number of features
        model.compile(optimizer=tf.keras.optimizers.Adam(
            hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3)),
            loss='mean_squared_error')
        return model


### Train and Evaluate the Model for Each Ticker

In [10]:
# Train and evaluate the model for each ticker
tickers = merged_df['Stock_Name'].unique()

for ticker in tickers:
    ticker_df = merged_df[merged_df['Stock_Name'] == ticker].dropna()
    
    # Create sequences and labels
    time_steps = 30
    X, y = create_sequences(ticker_df, time_steps)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Initialize RandomSearch Tuner
    tuner = kt.RandomSearch(
        LSTMHyperModel(),
        objective='val_loss',
        max_trials=20,
        executions_per_trial=1,
        directory='my_dir',
        project_name=f'lstm_stock_model_{ticker}'
    )

    # Perform hyperparameter search
    tuner.search(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1, callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

    # Retrieve the best hyperparameters
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    print(f"Ticker: {ticker}")
    print(f"The hyperparameter search is complete. The optimal number of layers is {best_hps.get('num_layers')}.")
    for i in range(best_hps.get('num_layers')):
        print(f"Layer {i + 1}: {best_hps.get(f'units_{i}')} units")
    print(f"The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.")

    # Define additional hyperparameters or settings
    num_epochs = 50
    batch_size = 32

    # Create directory for ticker if it doesn't exist
    os.makedirs(f'models/{ticker}', exist_ok=True)

    # Save hyperparameters to a text file
    with open(f'models/{ticker}/best_hyperparameters_{ticker}.txt', 'w') as f:
        f.write(f"Ticker: {ticker}\n")
        f.write(f"The hyperparameter search is complete. The optimal number of layers is {best_hps.get('num_layers')}.\n")
        for i in range(best_hps.get('num_layers')):
            f.write(f"Layer {i + 1}: {best_hps.get(f'units_{i}')} units\n")
        f.write(f"The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.\n")
        f.write(f"Epochs: {num_epochs}\n")
        f.write(f"Batch size: {batch_size}\n")

    # Build the model with the optimal hyperparameters
    model = tuner.hypermodel.build(best_hps)

    # Train the model
    history = model.fit(X_train, y_train, epochs=num_epochs, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1, callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

    # Save the model
    model.save(f'models/{ticker}/lstm_stock_model_best_{ticker}.h5')

    # Save the scalers
    joblib.dump(price_scaler, f'models/{ticker}/{ticker}_price_scaler.pkl')
    joblib.dump(feature_scaler, f'models/{ticker}/{ticker}_feature_scaler.pkl')

    # Save the history for plotting
    with open(f'models/{ticker}/history_{ticker}.pkl', 'wb') as file:
        joblib.dump(history.history, file)

    # Make predictions
    y_pred = model.predict(X_test)

    # Inverse transform the predictions and the actual values
    y_pred = price_scaler.inverse_transform(y_pred)
    y_test = price_scaler.inverse_transform(y_test.reshape(-1, 1))

    # Save the predictions for plotting
    np.save(f'models/{ticker}/y_test_{ticker}.npy', y_test)
    np.save(f'models/{ticker}/y_pred_{ticker}.npy', y_pred)

Reloading Tuner from my_dir\lstm_stock_model_AAA\tuner0.json
Ticker: AAA
The hyperparameter search is complete. The optimal number of layers is 1.
Layer 1: 224 units
The optimal learning rate for the optimizer is 0.000699204056440982.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 34ms/step - loss: 5.9492e-04 - val_loss: 3.6349e-05
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 33ms/step - loss: 3.1165e-05 - val_loss: 1.6093e-05
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4071e-05 - val_loss: 1.0486e-05
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 34ms/step - loss: 1.6842e-05 - val_loss: 1.0739e-05
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 33ms/step - loss: 1.5713e-05 - val_loss: 6.6312e-06
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - loss: 1.5546e-05 - val_loss: 6.9151e-06
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step - loss: 1.6617e-05 - val_loss: 6.3224e-06
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 62ms/step - loss: 1.3105e-05 - val_loss: 5.5572



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Reloading Tuner from my_dir\lstm_stock_model_AAPL\tuner0.json
Ticker: AAPL
The hyperparameter search is complete. The optimal number of layers is 1.
Layer 1: 64 units
The optimal learning rate for the optimizer is 0.0010817749459071228.
Epoch 1/50


  super().__init__(**kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 1.8538e-05 - val_loss: 3.8952e-07
Epoch 2/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 1.0997e-07 - val_loss: 2.1915e-07
Epoch 3/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 6.0347e-08 - val_loss: 1.9225e-07
Epoch 4/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 4.9296e-08 - val_loss: 1.8777e-07
Epoch 5/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 4.4372e-08 - val_loss: 1.5301e-07
Epoch 6/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 3.5528e-08 - val_loss: 2.6643e-07
Epoch 7/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 3.2581e-08 - val_loss: 1.0000e-07
Epoch 8/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 2.1950e-08 - val_loss: 6.6338e-08




[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Reloading Tuner from my_dir\lstm_stock_model_ACB\tuner0.json
Ticker: ACB
The hyperparameter search is complete. The optimal number of layers is 1.
Layer 1: 288 units
The optimal learning rate for the optimizer is 0.001945510412913914.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 51ms/step - loss: 6.2973e-04 - val_loss: 3.3165e-05
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 47ms/step - loss: 2.3628e-05 - val_loss: 2.0039e-05
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 48ms/step - loss: 1.8964e-05 - val_loss: 1.7029e-05
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 49ms/step - loss: 1.3691e-05 - val_loss: 1.6247e-05
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 50ms/step - loss: 1.2981e-05 - val_loss: 3.2031e-05
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 45ms/step - loss: 1.3931e-05 - val_loss: 2.7599e-05
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 43ms/step - loss: 1.2244e-05 - val_loss: 2.5245e-05
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 49ms/step - loss: 1.0967e-05 - val_loss: 1.8312



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
Reloading Tuner from my_dir\lstm_stock_model_BID\tuner0.json
Ticker: BID
The hyperparameter search is complete. The optimal number of layers is 3.
Layer 1: 320 units
Layer 2: 96 units
Layer 3: 192 units
The optimal learning rate for the optimizer is 0.0023311662519474776.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 111ms/step - loss: 0.0047 - val_loss: 1.1429e-04
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step - loss: 1.1942e-04 - val_loss: 7.6238e-05
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - loss: 1.2335e-04 - val_loss: 1.1140e-04
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - loss: 7.5028e-05 - val_loss: 7.7457e-05
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - loss: 7.8460e-05 - val_loss: 2.1053e-04
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step - loss: 6.1580e-05 - val_loss: 9.3644e-05
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step - loss: 6.2079e-05 - val_loss: 5.3298e-05
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - loss: 6.4365e-05 - val_loss: 1.2581e-



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step
Reloading Tuner from my_dir\lstm_stock_model_CTG\tuner0.json
Ticker: CTG
The hyperparameter search is complete. The optimal number of layers is 3.
Layer 1: 384 units
Layer 2: 352 units
Layer 3: 32 units
The optimal learning rate for the optimizer is 0.0008569717606115053.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 64ms/step - loss: 0.0017 - val_loss: 9.4055e-05
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 58ms/step - loss: 8.5115e-05 - val_loss: 6.1013e-05
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 56ms/step - loss: 5.7390e-05 - val_loss: 7.4032e-05
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 53ms/step - loss: 7.1131e-05 - val_loss: 9.1872e-05
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 53ms/step - loss: 4.9257e-05 - val_loss: 5.7317e-05
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 53ms/step - loss: 5.2498e-05 - val_loss: 3.3304e-05
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 58ms/step - loss: 4.2499e-05 - val_loss: 8.8374e-05
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 63ms/step - loss: 5.3393e-05 - val_loss: 4.4258e-05



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step
Reloading Tuner from my_dir\lstm_stock_model_FPT\tuner0.json
Ticker: FPT
The hyperparameter search is complete. The optimal number of layers is 3.
Layer 1: 160 units
Layer 2: 256 units
Layer 3: 96 units
The optimal learning rate for the optimizer is 0.0012564412521369585.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 95ms/step - loss: 0.0051 - val_loss: 0.0019
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 85ms/step - loss: 3.2414e-04 - val_loss: 0.0027
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 87ms/step - loss: 2.5606e-04 - val_loss: 0.0011
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 83ms/step - loss: 1.8933e-04 - val_loss: 0.0012
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 85ms/step - loss: 1.6350e-04 - val_loss: 0.0013
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 85ms/step - loss: 1.3016e-04 - val_loss: 3.7718e-04
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 79ms/step - loss: 9.8044e-05 - val_loss: 8.4953e-04
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 83ms/step - loss: 9.1449e-05 - val_loss: 3.1723e-04
Epoch 9/50
[1m56/5



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 59ms/step
Reloading Tuner from my_dir\lstm_stock_model_GAS\tuner0.json
Ticker: GAS
The hyperparameter search is complete. The optimal number of layers is 1.
Layer 1: 288 units
The optimal learning rate for the optimizer is 0.003907263893779795.
Epoch 1/50


  super().__init__(**kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 56ms/step - loss: 5.3662e-04 - val_loss: 1.3211e-08
Epoch 2/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 50ms/step - loss: 2.1943e-07 - val_loss: 3.7250e-07
Epoch 3/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 45ms/step - loss: 8.5855e-08 - val_loss: 2.6750e-07
Epoch 4/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 44ms/step - loss: 7.7609e-08 - val_loss: 3.2278e-07
Epoch 5/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - loss: 7.1004e-08 - val_loss: 2.6480e-07
Epoch 6/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 43ms/step - loss: 7.0329e-08 - val_loss: 2.3213e-07




[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step
Reloading Tuner from my_dir\lstm_stock_model_NVDA\tuner0.json
Ticker: NVDA
The hyperparameter search is complete. The optimal number of layers is 1.
Layer 1: 512 units
The optimal learning rate for the optimizer is 0.0005794155666227713.


  super().__init__(**kwargs)


Epoch 1/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 94ms/step - loss: 3.5482e-05 - val_loss: 5.5737e-06
Epoch 2/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 95ms/step - loss: 2.1404e-07 - val_loss: 5.2733e-06
Epoch 3/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 91ms/step - loss: 1.5102e-07 - val_loss: 4.6023e-06
Epoch 4/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 85ms/step - loss: 1.2923e-07 - val_loss: 3.7389e-06
Epoch 5/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 100ms/step - loss: 1.1236e-07 - val_loss: 3.0760e-06
Epoch 6/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 100ms/step - loss: 9.4836e-08 - val_loss: 2.9226e-06
Epoch 7/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 100ms/step - loss: 6.6279e-08 - val_loss: 2.0214e-06
Epoch 8/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 99ms/step - loss: 5.7294e-08 - va



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 56ms/step
Reloading Tuner from my_dir\lstm_stock_model_VCB\tuner0.json
Ticker: VCB
The hyperparameter search is complete. The optimal number of layers is 2.
Layer 1: 160 units
Layer 2: 320 units
The optimal learning rate for the optimizer is 0.0011109201683730445.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 77ms/step - loss: 0.0096 - val_loss: 3.7480e-04
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 79ms/step - loss: 2.7652e-04 - val_loss: 3.6426e-04
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 80ms/step - loss: 1.8191e-04 - val_loss: 2.9954e-04
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 78ms/step - loss: 1.7183e-04 - val_loss: 4.3848e-04
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 74ms/step - loss: 1.5588e-04 - val_loss: 1.4570e-04
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 78ms/step - loss: 1.3965e-04 - val_loss: 2.0712e-04
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 78ms/step - loss: 1.3949e-04 - val_loss: 1.2399e-04
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 83ms/step - loss: 1.3624e-04 - val_loss: 1.1857e-04



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 61ms/step
Reloading Tuner from my_dir\lstm_stock_model_VNM\tuner0.json
Ticker: VNM
The hyperparameter search is complete. The optimal number of layers is 2.
Layer 1: 256 units
Layer 2: 384 units
The optimal learning rate for the optimizer is 0.001266095545138909.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 126ms/step - loss: 0.0428 - val_loss: 6.5142e-04
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 113ms/step - loss: 0.0010 - val_loss: 1.4314e-04
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 112ms/step - loss: 6.3521e-04 - val_loss: 1.2669e-04
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 105ms/step - loss: 5.4593e-04 - val_loss: 2.7484e-04
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 107ms/step - loss: 6.4204e-04 - val_loss: 5.1607e-04
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 109ms/step - loss: 6.4827e-04 - val_loss: 1.5632e-04
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 104ms/step - loss: 3.5248e-04 - val_loss: 2.7443e-04
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 114ms/step - loss: 7.1000e-04 - val_loss: 2.1650



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 68ms/step


### Plotting the Results

In [9]:
# Plotting the training and validation loss
def plot_training_validation_loss(ticker):
    with open(f'models/{ticker}/history_{ticker}.pkl', 'rb') as file:
        history = joblib.load(file)

    plt.figure(figsize=(14, 5))
    plt.plot(history['loss'], label='Training Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.title(f'Training and Validation Loss for {ticker}')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(f'models/{ticker}/training_validation_loss_{ticker}.png')  # Save the plot
    plt.close()

# Plotting the stock price prediction
def plot_stock_price_prediction(ticker):
    y_test = np.load(f'models/{ticker}/y_test_{ticker}.npy')
    y_pred = np.load(f'models/{ticker}/y_pred_{ticker}.npy')

    plt.figure(figsize=(14, 5))
    plt.plot(y_test, color='blue', label='Actual Stock Price')
    plt.plot(y_pred, color='red', label='Predicted Stock Price')
    plt.title(f'Stock Price Prediction for {ticker}')
    plt.xlabel('Time')
    plt.ylabel('Stock Price')
    plt.legend()
    plt.savefig(f'models/{ticker}/stock_price_prediction_{ticker}.png')  # Save the plot
    plt.close()

# Plot results for each ticker
for ticker in tickers:
    plot_training_validation_loss(ticker)
    plot_stock_price_prediction(ticker)
    


In [10]:
# Retrieve the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Ensure the directory exists
os.makedirs(f'my_dir/lstm_stock_model_{ticker}', exist_ok=True)

# Print and save hyperparameters
with open(f'my_dir/lstm_stock_model_{ticker}/best_hyperparameters_{ticker}.txt', 'w') as f:
    f.write(f"Stock: {ticker}\n")
    f.write(f"The hyperparameter search is complete. The optimal number of layers is {best_hps.get('num_layers')}.\n")
    for i in range(best_hps.get('num_layers')):
        f.write(f"Layer {i + 1}: {best_hps.get(f'units_{i}')} units\n")
    f.write(f"The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.\n")

print(f"Stock: {ticker}")
print(f"The hyperparameter search is complete. The optimal number of layers is {best_hps.get('num_layers')}.")
for i in range(best_hps.get('num_layers')):
    print(f"Layer {i + 1}: {best_hps.get(f'units_{i}')} units")
print(f"The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.")


Stock: VNM
The hyperparameter search is complete. The optimal number of layers is 1.
Layer 1: 288 units
The optimal learning rate for the optimizer is 0.001945510412913914.


In [13]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np
import matplotlib.pyplot as plt
import joblib

# Function to calculate and save metrics
def calculate_metrics(y_true, y_pred, ticker):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    
    accuracy = np.mean(np.abs((y_true - y_pred) / y_true) < 0.05) * 100  # Within 5% tolerance
    
    metrics = (
        f"Metrics for {ticker}:\n"
        f"Mean Squared Error (MSE): {mse}\n"
        f"Root Mean Squared Error (RMSE): {rmse}\n"
        f"R-squared (R2): {r2}\n"
        f"Mean Absolute Error (MAE): {mae}\n"
        f"Accuracy (within 5% tolerance): {accuracy}%\n"
        "--------------------------\n"
    )
    
    # Print metrics to console
    print(metrics)
    
    # Save metrics to a file
    with open(f'models/{ticker}/metrics_{ticker}.txt', 'w') as file:
        file.write(metrics)

# Plotting the training and validation loss
def plot_training_validation_loss(ticker):
    with open(f'models/{ticker}/history_{ticker}.pkl', 'rb') as file:
        history = joblib.load(file)

    plt.figure(figsize=(14, 5))
    plt.plot(history['loss'], label='Training Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.title(f'Training and Validation Loss for {ticker}')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(f'models/{ticker}/training_validation_loss_{ticker}.png')  # Save the plot
    plt.close()

# Plotting the stock price prediction
def plot_stock_price_prediction(ticker):
    y_test = np.load(f'models/{ticker}/y_test_{ticker}.npy')
    y_pred = np.load(f'models/{ticker}/y_pred_{ticker}.npy')

    # Calculate and save metrics
    calculate_metrics(y_test, y_pred, ticker)

    plt.figure(figsize=(14, 5))
    plt.plot(y_test, color='blue', label='Actual Stock Price')
    plt.plot(y_pred, color='red', label='Predicted Stock Price')
    plt.title(f'Stock Price Prediction for {ticker}')
    plt.xlabel('Time')
    plt.ylabel('Stock Price')
    plt.legend()
    plt.savefig(f'models/{ticker}/stock_price_prediction_{ticker}.png')  # Save the plot
    plt.close()


# Plot results for each ticker
for ticker in tickers:
    plot_training_validation_loss(ticker)
    plot_stock_price_prediction(ticker)


Metrics for AAA:
Mean Squared Error (MSE): 156778.47056998318
Root Mean Squared Error (RMSE): 395.9526115206
R-squared (R2): 0.9310251627274403
Mean Absolute Error (MAE): 329.5222014696609
Accuracy (within 5% tolerance): 79.5964125560538%
--------------------------

Metrics for AAPL:
Mean Squared Error (MSE): 61.29782492088633
Root Mean Squared Error (RMSE): 7.829292747169844
R-squared (R2): 0.8196670829334662
Mean Absolute Error (MAE): 6.852946345631668
Accuracy (within 5% tolerance): 65.18847006651885%
--------------------------

Metrics for ACB:
Mean Squared Error (MSE): 447523.6327764048
Root Mean Squared Error (RMSE): 668.9720717462013
R-squared (R2): 0.9543389790711115
Mean Absolute Error (MAE): 507.6501217418723
Accuracy (within 5% tolerance): 91.03139013452915%
--------------------------

Metrics for BID:
Mean Squared Error (MSE): 1897392.3296908601
Root Mean Squared Error (RMSE): 1377.4586489949018
R-squared (R2): 0.9528539294064157
Mean Absolute Error (MAE): 1110.490985913031

# Load the dataset
file_path = 'D:\\Github Mikezxc\\Big-data-stock-real-time-platform\\merged_data_with_ma.csv'
merged_df = pd.read_csv(file_path)

# Normalize data
price_scaler = MinMaxScaler()
merged_df[['close']] = price_scaler.fit_transform(merged_df[['close']])

feature_scaler = MinMaxScaler()
merged_df[['MA30', 'MA90']] = feature_scaler.fit_transform(merged_df[['MA30', 'MA90']])

# Prepare sequences for LSTM
def create_sequences(df, time_steps=30):
    sequences = []
    labels = []
    for i in range(len(df) - time_steps):
        sequence = df[['close', 'MA30', 'MA90']].iloc[i:i+time_steps].values
        label = df['close'].iloc[i+time_steps]
        sequences.append(sequence)
        labels.append(label)
    return np.array(sequences), np.array(labels)

# Define the LSTM HyperModel
class LSTMHyperModel(kt.HyperModel):
    def build(self, hp):
        model = Sequential()
        for i in range(hp.Int('num_layers', 1, 3)):
            model.add(LSTM(units=hp.Int(f'units_{i}', min_value=32, max_value=512, step=32), 
                           return_sequences=(i != hp.Int('num_layers', 1, 3) - 1), input_shape=(time_steps, 3)))
        model.add(Dense(1))  # Output layer should match the number of features
        model.compile(optimizer=tf.keras.optimizers.Adam(
            hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3)),
            loss='mean_squared_error')
        return model

# Train and evaluate the model for each ticker
tickers = merged_df['ticker'].unique()

for ticker in tickers:
    ticker_df = merged_df[merged_df['ticker'] == ticker].dropna()
    
    # Create sequences and labels
    time_steps = 30
    X, y = create_sequences(ticker_df, time_steps)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Initialize RandomSearch Tuner
    tuner = kt.RandomSearch(
        LSTMHyperModel(),
        objective='val_loss',
        max_trials=20,
        executions_per_trial=1,
        directory='my_dir',
        project_name=f'lstm_stock_model_{ticker}'
    )

    # Perform hyperparameter search
    tuner.search(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1, callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

    # Retrieve the best hyperparameters
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    print(f"Ticker: {ticker}")
    print(f"The hyperparameter search is complete. The optimal number of layers is {best_hps.get('num_layers')}.")
    for i in range(best_hps.get('num_layers')):
        print(f"Layer {i + 1}: {best_hps.get(f'units_{i}')} units")
    print(f"The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.")

    # Build the model with the optimal hyperparameters
    model = tuner.hypermodel.build(best_hps)

    # Train the model
    history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1, callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

    # Save the model
    model.save(f'/mnt/data/lstm_stock_model_best_{ticker}.h5')

    # Save the scalers
    joblib.dump(price_scaler, f'/mnt/data/{ticker}_price_scaler.pkl')
    joblib.dump(feature_scaler, f'/mnt/data/{ticker}_feature_scaler.pkl')

    # Plot the training and validation loss
    plt.figure(figsize=(14, 5))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'Training and Validation Loss for {ticker}')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

    # Make predictions
    y_pred = model.predict(X_test)

    # Inverse transform the predictions and the actual values
    y_pred = price_scaler.inverse_transform(y_pred)
    y_test = price_scaler.inverse_transform(y_test.reshape(-1, 1))

    # Plot the results
    plt.figure(figsize=(14, 5))
    plt.plot(y_test, color='blue', label='Actual Stock Price')
    plt.plot(y_pred, color='red', label='Predicted Stock Price')
    plt.title(f'Stock Price Prediction for {ticker}')
    plt.xlabel('Time')
    plt.ylabel('Stock Price')
    plt.legend()
    plt.show()