### Import Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import keras_tuner as kt
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import EarlyStopping
import tensorflow as tf
import joblib
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
import os
import shutil

### Load and Normalize the Dataset

In [2]:
# Load the dataset
file_path = 'D:\\Github anyud\\final\\Data_stock\\processed_stock_data.csv'
merged_df = pd.read_csv(file_path)

# Normalize data
price_scaler = MinMaxScaler()
merged_df[['Price']] = price_scaler.fit_transform(merged_df[['Price']])

feature_scaler = MinMaxScaler()
merged_df[['MA30', 'MA90']] = feature_scaler.fit_transform(merged_df[['MA30', 'MA90']])


### Prepare Sequences for LSTM

In [3]:
# Prepare sequences for LSTM
def create_sequences(df, time_steps=30):
    sequences = []
    labels = []
    for i in range(len(df) - time_steps):
        sequence = df[['Price', 'MA30', 'MA90']].iloc[i:i+time_steps].values
        label = df['Price'].iloc[i+time_steps]
        sequences.append(sequence)
        labels.append(label)
    return np.array(sequences), np.array(labels)


### Define the LSTM HyperModel

In [4]:
# Define the LSTM HyperModel
class LSTMHyperModel(kt.HyperModel):
    def build(self, hp):
        model = Sequential()
        for i in range(hp.Int('num_layers', 1, 3)):
            model.add(LSTM(units=hp.Int(f'units_{i}', min_value=32, max_value=512, step=32), 
                           return_sequences=(i != hp.Int('num_layers', 1, 3) - 1), input_shape=(time_steps, 3)))
        model.add(Dense(1))  # Output layer should match the number of features
        model.compile(optimizer=tf.keras.optimizers.Adam(
            hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3)),
            loss='mean_squared_error')
        return model


### Train and Evaluate the Model for Each Ticker

In [5]:
# Train and evaluate the model for each ticker
tickers = merged_df['Stock_Name'].unique()

for ticker in tickers:
    ticker_df = merged_df[merged_df['Stock_Name'] == ticker].dropna()
    
    # Create sequences and labels
    time_steps = 30
    X, y = create_sequences(ticker_df, time_steps)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Initialize RandomSearch Tuner
    tuner = kt.RandomSearch(
        LSTMHyperModel(),
        objective='val_loss',
        max_trials=20,
        executions_per_trial=1,
        directory='my_dir',
        project_name=f'lstm_stock_model_{ticker}'
    )

    # Perform hyperparameter search
    tuner.search(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1, callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

    # Retrieve the best hyperparameters
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    print(f"Ticker: {ticker}")
    print(f"The hyperparameter search is complete. The optimal number of layers is {best_hps.get('num_layers')}.")
    for i in range(best_hps.get('num_layers')):
        print(f"Layer {i + 1}: {best_hps.get(f'units_{i}')} units")
    print(f"The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.")

    # Build the model with the optimal hyperparameters
    model = tuner.hypermodel.build(best_hps)

    # Train the model
    history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1, callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

    # Create directory for ticker if it doesn't exist
    os.makedirs(f'models/{ticker}', exist_ok=True)

    # Save the model
    model.save(f'models/{ticker}/lstm_stock_model_best_{ticker}.h5')

    # Save the scalers
    joblib.dump(price_scaler, f'models/{ticker}/{ticker}_price_scaler.pkl')
    joblib.dump(feature_scaler, f'models/{ticker}/{ticker}_feature_scaler.pkl')

    # Save the history for plotting
    with open(f'models/{ticker}/history_{ticker}.pkl', 'wb') as file:
        joblib.dump(history.history, file)

    # Make predictions
    y_pred = model.predict(X_test)

    # Inverse transform the predictions and the actual values
    y_pred = price_scaler.inverse_transform(y_pred)
    y_test = price_scaler.inverse_transform(y_test.reshape(-1, 1))

    # Save the predictions for plotting
    np.save(f'models/{ticker}/y_test_{ticker}.npy', y_test)
    np.save(f'models/{ticker}/y_pred_{ticker}.npy', y_pred)


Reloading Tuner from my_dir\lstm_stock_model_AAA\tuner0.json
Ticker: AAA
The hyperparameter search is complete. The optimal number of layers is 1.
Layer 1: 224 units
The optimal learning rate for the optimizer is 0.000699204056440982.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 9.5637e-04 - val_loss: 3.4114e-05
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 4.5015e-05 - val_loss: 1.7771e-05
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 2.5967e-05 - val_loss: 1.5391e-05
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 2.2769e-05 - val_loss: 1.4081e-05
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 1.9318e-05 - val_loss: 1.1567e-05
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 1.9765e-05 - val_loss: 9.6867e-06
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 2.0694e-05 - val_loss: 1.7250e-05
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 1.8790e-05 - val_loss: 7.2645



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step
Reloading Tuner from my_dir\lstm_stock_model_AAPL\tuner0.json
Ticker: AAPL
The hyperparameter search is complete. The optimal number of layers is 1.
Layer 1: 64 units
The optimal learning rate for the optimizer is 0.0010817749459071228.
Epoch 1/50


  super().__init__(**kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 1.5205e-05 - val_loss: 6.5374e-07
Epoch 2/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 8.7958e-08 - val_loss: 2.6241e-07
Epoch 3/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 4.9261e-08 - val_loss: 1.4686e-07
Epoch 4/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 4.5407e-08 - val_loss: 1.7639e-07
Epoch 5/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 3.8246e-08 - val_loss: 8.1708e-08
Epoch 6/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 3.3643e-08 - val_loss: 4.6361e-08
Epoch 7/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 3.0853e-08 - val_loss: 1.2432e-07
Epoch 8/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 2.2270e-08 - val_loss: 4.7986



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Reloading Tuner from my_dir\lstm_stock_model_ACB\tuner0.json
Ticker: ACB
The hyperparameter search is complete. The optimal number of layers is 1.
Layer 1: 288 units
The optimal learning rate for the optimizer is 0.001945510412913914.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: 0.0010 - val_loss: 4.8352e-05
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 2.3574e-05 - val_loss: 2.7435e-05
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 1.7644e-05 - val_loss: 1.7788e-05
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 1.4764e-05 - val_loss: 2.3685e-05
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 1.1671e-05 - val_loss: 1.5362e-05
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 1.1995e-05 - val_loss: 1.4884e-05
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 1.2148e-05 - val_loss: 1.6839e-05
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 1.0043e-05 - val_loss: 1.8066e-05



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Reloading Tuner from my_dir\lstm_stock_model_BID\tuner0.json
Ticker: BID
The hyperparameter search is complete. The optimal number of layers is 3.
Layer 1: 320 units
Layer 2: 96 units
Layer 3: 192 units
The optimal learning rate for the optimizer is 0.0023311662519474776.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - loss: 0.0061 - val_loss: 3.4793e-04
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - loss: 2.5606e-04 - val_loss: 2.3876e-04
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - loss: 1.1639e-04 - val_loss: 2.2084e-04
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step - loss: 9.0447e-05 - val_loss: 6.1407e-05
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - loss: 8.8563e-05 - val_loss: 6.1444e-05
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step - loss: 8.2530e-05 - val_loss: 2.8368e-04
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - loss: 8.0199e-05 - val_loss: 9.2182e-05
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - loss: 6.5002e-05 - val_loss: 1.1950e-04



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
Reloading Tuner from my_dir\lstm_stock_model_CTG\tuner0.json
Ticker: CTG
The hyperparameter search is complete. The optimal number of layers is 3.
Layer 1: 384 units
Layer 2: 352 units
Layer 3: 32 units
The optimal learning rate for the optimizer is 0.0008569717606115053.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 0.0015 - val_loss: 7.2777e-05
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 64ms/step - loss: 8.4917e-05 - val_loss: 6.0308e-05
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 67ms/step - loss: 7.3552e-05 - val_loss: 4.7835e-05
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 67ms/step - loss: 6.6136e-05 - val_loss: 4.0249e-05
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 66ms/step - loss: 5.3000e-05 - val_loss: 4.0316e-05
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 66ms/step - loss: 4.6865e-05 - val_loss: 3.8866e-05
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 65ms/step - loss: 4.6098e-05 - val_loss: 8.0336e-05
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 65ms/step - loss: 5.9685e-05 - val_loss: 3.0848e-05



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step
Reloading Tuner from my_dir\lstm_stock_model_FPT\tuner0.json
Ticker: FPT
The hyperparameter search is complete. The optimal number of layers is 3.
Layer 1: 160 units
Layer 2: 256 units
Layer 3: 96 units
The optimal learning rate for the optimizer is 0.0012564412521369585.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 45ms/step - loss: 0.0055 - val_loss: 0.0012
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - loss: 2.4646e-04 - val_loss: 0.0011
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - loss: 2.6596e-04 - val_loss: 9.2697e-04
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - loss: 1.8380e-04 - val_loss: 0.0016
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - loss: 1.9386e-04 - val_loss: 5.9568e-04
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - loss: 1.3359e-04 - val_loss: 6.7459e-04
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - loss: 2.0651e-04 - val_loss: 4.6427e-04
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - loss: 1.1548e-04 - val_loss: 4.0553e-04
Epoch 9/50




[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
Reloading Tuner from my_dir\lstm_stock_model_GAS\tuner0.json
Ticker: GAS
The hyperparameter search is complete. The optimal number of layers is 1.
Layer 1: 288 units
The optimal learning rate for the optimizer is 0.003907263893779795.
Epoch 1/50


  super().__init__(**kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 4.1179e-04 - val_loss: 2.1154e-06
Epoch 2/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 2.8741e-07 - val_loss: 3.1615e-07
Epoch 3/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 7.8105e-08 - val_loss: 3.6796e-07
Epoch 4/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 6.9148e-08 - val_loss: 1.5808e-07
Epoch 5/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 7.6036e-08 - val_loss: 3.8365e-07
Epoch 6/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 6.5324e-08 - val_loss: 3.5085e-07
Epoch 7/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 6.4634e-08 - val_loss: 3.5252e-07
Epoch 8/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 6.5754e-08 - val_loss: 2.5401



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Reloading Tuner from my_dir\lstm_stock_model_NVDA\tuner0.json
Ticker: NVDA
The hyperparameter search is complete. The optimal number of layers is 1.
Layer 1: 512 units
The optimal learning rate for the optimizer is 0.0005794155666227713.
Epoch 1/50


  super().__init__(**kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 3.1923e-05 - val_loss: 3.9968e-06
Epoch 2/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - loss: 2.2630e-07 - val_loss: 4.9566e-06
Epoch 3/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - loss: 1.4420e-07 - val_loss: 4.8364e-06
Epoch 4/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - loss: 1.3396e-07 - val_loss: 4.1868e-06
Epoch 5/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - loss: 1.0570e-07 - val_loss: 2.8445e-06
Epoch 6/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 34ms/step - loss: 8.5703e-08 - val_loss: 2.2169e-06
Epoch 7/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 33ms/step - loss: 6.9403e-08 - val_loss: 2.0336e-06
Epoch 8/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 34ms/step - loss: 4.8948e-08 - val_loss: 1.4360



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step
Reloading Tuner from my_dir\lstm_stock_model_VCB\tuner0.json
Ticker: VCB
The hyperparameter search is complete. The optimal number of layers is 2.
Layer 1: 160 units
Layer 2: 320 units
The optimal learning rate for the optimizer is 0.0011109201683730445.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 37ms/step - loss: 0.0156 - val_loss: 3.3475e-04
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - loss: 2.6436e-04 - val_loss: 2.3341e-04
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - loss: 2.6214e-04 - val_loss: 2.9117e-04
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 2.0395e-04 - val_loss: 1.8627e-04
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 1.8334e-04 - val_loss: 1.7854e-04
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 33ms/step - loss: 1.5497e-04 - val_loss: 1.4668e-04
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - loss: 1.7148e-04 - val_loss: 1.3940e-04
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - loss: 1.3312e-04 - val_loss: 1.3227e-04



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step
Reloading Tuner from my_dir\lstm_stock_model_VNM\tuner0.json
Ticker: VNM
The hyperparameter search is complete. The optimal number of layers is 2.
Layer 1: 256 units
Layer 2: 384 units
The optimal learning rate for the optimizer is 0.001266095545138909.
Epoch 1/50


  super().__init__(**kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 114ms/step - loss: 0.0386 - val_loss: 4.5637e-04
Epoch 2/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 104ms/step - loss: 8.1236e-04 - val_loss: 1.5343e-04
Epoch 3/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 103ms/step - loss: 6.6681e-04 - val_loss: 1.6548e-04
Epoch 4/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 107ms/step - loss: 5.6819e-04 - val_loss: 1.0099e-04
Epoch 5/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 109ms/step - loss: 4.3616e-04 - val_loss: 9.9717e-05
Epoch 6/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 106ms/step - loss: 6.3156e-04 - val_loss: 8.6167e-05
Epoch 7/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 107ms/step - loss: 4.8385e-04 - val_loss: 8.3663e-05
Epoch 8/50
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 126ms/step - loss: 3.7264e-04 - val_loss: 3.



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step


### Plotting the Results

In [6]:
# Plotting the training and validation loss
def plot_training_validation_loss(ticker):
    with open(f'models/{ticker}/history_{ticker}.pkl', 'rb') as file:
        history = joblib.load(file)

    plt.figure(figsize=(14, 5))
    plt.plot(history['loss'], label='Training Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.title(f'Training and Validation Loss for {ticker}')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(f'models/{ticker}/training_validation_loss_{ticker}.png')  # Save the plot
    plt.close()

# Plotting the stock price prediction
def plot_stock_price_prediction(ticker):
    y_test = np.load(f'models/{ticker}/y_test_{ticker}.npy')
    y_pred = np.load(f'models/{ticker}/y_pred_{ticker}.npy')

    plt.figure(figsize=(14, 5))
    plt.plot(y_test, color='blue', label='Actual Stock Price')
    plt.plot(y_pred, color='red', label='Predicted Stock Price')
    plt.title(f'Stock Price Prediction for {ticker}')
    plt.xlabel('Time')
    plt.ylabel('Stock Price')
    plt.legend()
    plt.savefig(f'models/{ticker}/stock_price_prediction_{ticker}.png')  # Save the plot
    plt.close()

# Plot results for each ticker
for ticker in tickers:
    plot_training_validation_loss(ticker)
    plot_stock_price_prediction(ticker)


# Load the dataset
file_path = 'D:\\Github Mikezxc\\Big-data-stock-real-time-platform\\merged_data_with_ma.csv'
merged_df = pd.read_csv(file_path)

# Normalize data
price_scaler = MinMaxScaler()
merged_df[['close']] = price_scaler.fit_transform(merged_df[['close']])

feature_scaler = MinMaxScaler()
merged_df[['MA30', 'MA90']] = feature_scaler.fit_transform(merged_df[['MA30', 'MA90']])

# Prepare sequences for LSTM
def create_sequences(df, time_steps=30):
    sequences = []
    labels = []
    for i in range(len(df) - time_steps):
        sequence = df[['close', 'MA30', 'MA90']].iloc[i:i+time_steps].values
        label = df['close'].iloc[i+time_steps]
        sequences.append(sequence)
        labels.append(label)
    return np.array(sequences), np.array(labels)

# Define the LSTM HyperModel
class LSTMHyperModel(kt.HyperModel):
    def build(self, hp):
        model = Sequential()
        for i in range(hp.Int('num_layers', 1, 3)):
            model.add(LSTM(units=hp.Int(f'units_{i}', min_value=32, max_value=512, step=32), 
                           return_sequences=(i != hp.Int('num_layers', 1, 3) - 1), input_shape=(time_steps, 3)))
        model.add(Dense(1))  # Output layer should match the number of features
        model.compile(optimizer=tf.keras.optimizers.Adam(
            hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3)),
            loss='mean_squared_error')
        return model

# Train and evaluate the model for each ticker
tickers = merged_df['ticker'].unique()

for ticker in tickers:
    ticker_df = merged_df[merged_df['ticker'] == ticker].dropna()
    
    # Create sequences and labels
    time_steps = 30
    X, y = create_sequences(ticker_df, time_steps)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Initialize RandomSearch Tuner
    tuner = kt.RandomSearch(
        LSTMHyperModel(),
        objective='val_loss',
        max_trials=20,
        executions_per_trial=1,
        directory='my_dir',
        project_name=f'lstm_stock_model_{ticker}'
    )

    # Perform hyperparameter search
    tuner.search(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1, callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

    # Retrieve the best hyperparameters
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    print(f"Ticker: {ticker}")
    print(f"The hyperparameter search is complete. The optimal number of layers is {best_hps.get('num_layers')}.")
    for i in range(best_hps.get('num_layers')):
        print(f"Layer {i + 1}: {best_hps.get(f'units_{i}')} units")
    print(f"The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.")

    # Build the model with the optimal hyperparameters
    model = tuner.hypermodel.build(best_hps)

    # Train the model
    history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1, callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

    # Save the model
    model.save(f'/mnt/data/lstm_stock_model_best_{ticker}.h5')

    # Save the scalers
    joblib.dump(price_scaler, f'/mnt/data/{ticker}_price_scaler.pkl')
    joblib.dump(feature_scaler, f'/mnt/data/{ticker}_feature_scaler.pkl')

    # Plot the training and validation loss
    plt.figure(figsize=(14, 5))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'Training and Validation Loss for {ticker}')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

    # Make predictions
    y_pred = model.predict(X_test)

    # Inverse transform the predictions and the actual values
    y_pred = price_scaler.inverse_transform(y_pred)
    y_test = price_scaler.inverse_transform(y_test.reshape(-1, 1))

    # Plot the results
    plt.figure(figsize=(14, 5))
    plt.plot(y_test, color='blue', label='Actual Stock Price')
    plt.plot(y_pred, color='red', label='Predicted Stock Price')
    plt.title(f'Stock Price Prediction for {ticker}')
    plt.xlabel('Time')
    plt.ylabel('Stock Price')
    plt.legend()
    plt.show()