In [8]:
import numpy as np
import pandas as pd
import optuna
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Step 1: Load the data
tesla_data = pd.read_csv('Clean_data/Cleaned_Tesla_Close.csv', parse_dates=['Date'], index_col='Date')
sp500_data = pd.read_csv('Clean_data/Cleaned_SP500_Close.csv', parse_dates=['Date'], index_col='Date')
ixic_data = pd.read_csv('Clean_data/Cleaned_IXIC_Close.csv', parse_dates=['Date'], index_col='Date')

# Merge datasets on 'Date'
merged_data = tesla_data.merge(sp500_data, left_index=True, right_index=True, suffixes=('', '_sp500'))
merged_data = merged_data.merge(ixic_data, left_index=True, right_index=True, suffixes=('', '_ixic'))

# Step 2: Preprocess the data
data = merged_data[['Close_predicted', 'Close_predicted_sp500', 'Close_predicted_ixic']]

# Scale the data after making it stationary
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data.dropna())  # Ensure NaN values are removed after differencing
# Prepare the dataset for LSTM
def create_dataset(dataset, time_step=60):
    X, y = [], []
    for i in range(time_step, len(dataset)):
        X.append(dataset[i - time_step:i])
        y.append(dataset[i, 0])  # Predict the 'Close' price of Tesla
    return np.array(X), np.array(y)

time_step = 60
X, y = create_dataset(scaled_data, time_step)

# Split data into training and test sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Function to create and train the LSTM model with Optuna
def objective(trial):
    # Hyperparameters to optimize
    lstm_units = trial.suggest_int('lstm_units', 50, 100)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    epochs = trial.suggest_int('epochs', 100, 200)

    # Build the LSTM model
    model = Sequential([
        Input(shape=(time_step, X_train.shape[2])),
        LSTM(lstm_units, return_sequences=True),
        Dropout(dropout_rate),
        LSTM(lstm_units),
        Dropout(dropout_rate),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')

    # Train the model
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)

    # Evaluate the model
    test_predictions = model.predict(X_test)
    test_predictions_rescaled = scaler.inverse_transform(
        np.hstack([test_predictions, X_test[:, -1, 1:]])
    )[:, 0]
    y_test_rescaled = scaler.inverse_transform(
        np.hstack([y_test.reshape(-1, 1), X_test[:, -1, 1:]])
    )[:, 0]

    rmse = np.sqrt(mean_squared_error(y_test_rescaled, test_predictions_rescaled))
    return rmse

# Run the optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# Print the best hyperparameters
print('Best trial:')
trial = study.best_trial
print(f'  Value (RMSE): {trial.value}')
print('  Params:')
for key, value in trial.params.items():
    print(f'    {key}: {value}')


[I 2024-11-16 22:32:52,148] A new study created in memory with name: no-name-59e92c55-1572-4da3-9dab-bdc2a5f614df


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step


[I 2024-11-16 22:34:56,930] Trial 0 finished with value: 17.729190960506404 and parameters: {'lstm_units': 76, 'dropout_rate': 0.27746673263988897, 'learning_rate': 7.249980148144902e-05, 'batch_size': 16, 'epochs': 105}. Best is trial 0 with value: 17.729190960506404.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step


[I 2024-11-16 22:37:24,928] Trial 1 finished with value: 16.64092801459754 and parameters: {'lstm_units': 78, 'dropout_rate': 0.3962576263628458, 'learning_rate': 0.002115787647684285, 'batch_size': 16, 'epochs': 124}. Best is trial 1 with value: 16.64092801459754.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step


[I 2024-11-16 22:40:20,278] Trial 2 finished with value: 17.236933363166877 and parameters: {'lstm_units': 95, 'dropout_rate': 0.49370145152526534, 'learning_rate': 0.0015630020757446857, 'batch_size': 32, 'epochs': 185}. Best is trial 1 with value: 16.64092801459754.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step


[I 2024-11-16 22:42:43,313] Trial 3 finished with value: 15.43608080839731 and parameters: {'lstm_units': 76, 'dropout_rate': 0.28019359564168866, 'learning_rate': 0.0006594266278697577, 'batch_size': 16, 'epochs': 121}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step


[I 2024-11-16 22:45:29,028] Trial 4 finished with value: 18.7767975329634 and parameters: {'lstm_units': 62, 'dropout_rate': 0.4538896114679084, 'learning_rate': 0.003233092115862095, 'batch_size': 16, 'epochs': 153}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step


[I 2024-11-16 22:47:54,598] Trial 5 finished with value: 20.736906353451978 and parameters: {'lstm_units': 97, 'dropout_rate': 0.3374688414192749, 'learning_rate': 0.00012128409853328045, 'batch_size': 32, 'epochs': 140}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


[I 2024-11-16 22:49:20,720] Trial 6 finished with value: 18.263627209265273 and parameters: {'lstm_units': 50, 'dropout_rate': 0.20440285720994306, 'learning_rate': 0.0006672346608908831, 'batch_size': 64, 'epochs': 177}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step


[I 2024-11-16 22:51:30,394] Trial 7 finished with value: 15.653109474375078 and parameters: {'lstm_units': 71, 'dropout_rate': 0.3281644141456055, 'learning_rate': 0.0007314238871882003, 'batch_size': 32, 'epochs': 160}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


[I 2024-11-16 22:52:52,578] Trial 8 finished with value: 31.069991198810794 and parameters: {'lstm_units': 71, 'dropout_rate': 0.48549126482507055, 'learning_rate': 3.5193179048468373e-05, 'batch_size': 64, 'epochs': 146}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step


[I 2024-11-16 22:57:03,173] Trial 9 finished with value: 21.906267700047692 and parameters: {'lstm_units': 81, 'dropout_rate': 0.2232210733574089, 'learning_rate': 2.490012918738419e-05, 'batch_size': 16, 'epochs': 180}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


[I 2024-11-16 22:59:22,259] Trial 10 finished with value: 19.475633459210847 and parameters: {'lstm_units': 89, 'dropout_rate': 0.10956878757658667, 'learning_rate': 0.008847661714108179, 'batch_size': 16, 'epochs': 101}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step


[I 2024-11-16 23:01:40,486] Trial 11 finished with value: 16.688912891941822 and parameters: {'lstm_units': 68, 'dropout_rate': 0.31808404349416436, 'learning_rate': 0.00031932460810764953, 'batch_size': 32, 'epochs': 163}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


[I 2024-11-16 23:03:12,204] Trial 12 finished with value: 16.101721754890058 and parameters: {'lstm_units': 59, 'dropout_rate': 0.371168034221084, 'learning_rate': 0.0004807927441395839, 'batch_size': 32, 'epochs': 122}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step


[I 2024-11-16 23:06:16,395] Trial 13 finished with value: 19.613388609103787 and parameters: {'lstm_units': 85, 'dropout_rate': 0.24615826181253908, 'learning_rate': 0.000170764623286707, 'batch_size': 32, 'epochs': 200}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


[I 2024-11-16 23:07:32,289] Trial 14 finished with value: 23.18443302186334 and parameters: {'lstm_units': 66, 'dropout_rate': 0.15623238452191374, 'learning_rate': 0.0006872128237243367, 'batch_size': 64, 'epochs': 137}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


[I 2024-11-16 23:10:51,629] Trial 15 finished with value: 15.449935594293072 and parameters: {'lstm_units': 55, 'dropout_rate': 0.4088031042410164, 'learning_rate': 0.0011185761719412714, 'batch_size': 16, 'epochs': 164}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


[I 2024-11-16 23:13:22,198] Trial 16 finished with value: 28.73604645839647 and parameters: {'lstm_units': 50, 'dropout_rate': 0.42165583505980675, 'learning_rate': 1.1035077207163089e-05, 'batch_size': 16, 'epochs': 123}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step


[I 2024-11-16 23:16:43,129] Trial 17 finished with value: 15.993967416968088 and parameters: {'lstm_units': 58, 'dropout_rate': 0.2711282553126839, 'learning_rate': 0.007068819040646621, 'batch_size': 16, 'epochs': 168}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step


[I 2024-11-16 23:19:21,997] Trial 18 finished with value: 17.31435329517041 and parameters: {'lstm_units': 89, 'dropout_rate': 0.3707970107027405, 'learning_rate': 0.001593466441236781, 'batch_size': 16, 'epochs': 112}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step


[I 2024-11-16 23:22:01,383] Trial 19 finished with value: 16.049498813959065 and parameters: {'lstm_units': 56, 'dropout_rate': 0.41709548205244074, 'learning_rate': 0.0002123307500055575, 'batch_size': 16, 'epochs': 138}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


[I 2024-11-16 23:26:05,923] Trial 20 finished with value: 16.13023902484361 and parameters: {'lstm_units': 64, 'dropout_rate': 0.18141755372530727, 'learning_rate': 0.003099458643212913, 'batch_size': 16, 'epochs': 193}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


[I 2024-11-16 23:28:11,045] Trial 21 finished with value: 17.40040664118709 and parameters: {'lstm_units': 72, 'dropout_rate': 0.32539589727956775, 'learning_rate': 0.0008941481634386924, 'batch_size': 32, 'epochs': 159}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step


[I 2024-11-16 23:30:39,103] Trial 22 finished with value: 16.654246602953684 and parameters: {'lstm_units': 82, 'dropout_rate': 0.3650059210276574, 'learning_rate': 0.0010954042643537287, 'batch_size': 32, 'epochs': 171}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step


[I 2024-11-16 23:32:05,211] Trial 23 finished with value: 17.190208034434146 and parameters: {'lstm_units': 72, 'dropout_rate': 0.27960900039919756, 'learning_rate': 0.00042648063148538614, 'batch_size': 64, 'epochs': 155}. Best is trial 3 with value: 15.43608080839731.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step


[I 2024-11-16 23:35:07,264] Trial 24 finished with value: 15.417022194115535 and parameters: {'lstm_units': 76, 'dropout_rate': 0.3470811306466361, 'learning_rate': 0.004432628015988793, 'batch_size': 16, 'epochs': 148}. Best is trial 24 with value: 15.417022194115535.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


[I 2024-11-16 23:37:49,213] Trial 25 finished with value: 20.492325443018327 and parameters: {'lstm_units': 79, 'dropout_rate': 0.45166662927871604, 'learning_rate': 0.004250212103889071, 'batch_size': 16, 'epochs': 131}. Best is trial 24 with value: 15.417022194115535.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


[I 2024-11-16 23:41:05,220] Trial 26 finished with value: 25.745802415114166 and parameters: {'lstm_units': 85, 'dropout_rate': 0.30363656158898583, 'learning_rate': 0.005646918300557258, 'batch_size': 16, 'epochs': 148}. Best is trial 24 with value: 15.417022194115535.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


[I 2024-11-16 23:43:29,312] Trial 27 finished with value: 18.10173332697782 and parameters: {'lstm_units': 75, 'dropout_rate': 0.39396435196251167, 'learning_rate': 0.0023659923886588923, 'batch_size': 16, 'epochs': 111}. Best is trial 24 with value: 15.417022194115535.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


[I 2024-11-16 23:46:27,967] Trial 28 finished with value: 20.16419071457458 and parameters: {'lstm_units': 68, 'dropout_rate': 0.3526737926565971, 'learning_rate': 0.001281422432246782, 'batch_size': 16, 'epochs': 145}. Best is trial 24 with value: 15.417022194115535.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


[I 2024-11-16 23:49:13,952] Trial 29 finished with value: 18.84284662278786 and parameters: {'lstm_units': 75, 'dropout_rate': 0.279013997727173, 'learning_rate': 8.208152152096892e-05, 'batch_size': 16, 'epochs': 130}. Best is trial 24 with value: 15.417022194115535.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 397ms/step


[I 2024-11-16 23:51:19,016] Trial 30 finished with value: 19.123266695886805 and parameters: {'lstm_units': 55, 'dropout_rate': 0.25159966574179005, 'learning_rate': 0.0046286835752826026, 'batch_size': 16, 'epochs': 111}. Best is trial 24 with value: 15.417022194115535.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


[I 2024-11-16 23:53:31,112] Trial 31 finished with value: 18.310567644987586 and parameters: {'lstm_units': 76, 'dropout_rate': 0.30667730648802083, 'learning_rate': 0.00026730801272263524, 'batch_size': 32, 'epochs': 164}. Best is trial 24 with value: 15.417022194115535.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


[I 2024-11-16 23:56:50,231] Trial 32 finished with value: 17.113027171696483 and parameters: {'lstm_units': 70, 'dropout_rate': 0.3438772095571848, 'learning_rate': 0.000601760251121303, 'batch_size': 16, 'epochs': 158}. Best is trial 24 with value: 15.417022194115535.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


[I 2024-11-16 23:59:14,380] Trial 33 finished with value: 23.86330634760582 and parameters: {'lstm_units': 78, 'dropout_rate': 0.4036200351916452, 'learning_rate': 0.0018406966024988054, 'batch_size': 32, 'epochs': 175}. Best is trial 24 with value: 15.417022194115535.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


[I 2024-11-17 00:02:06,841] Trial 34 finished with value: 19.265478771958378 and parameters: {'lstm_units': 61, 'dropout_rate': 0.4440754017859553, 'learning_rate': 0.0009557697208030222, 'batch_size': 16, 'epochs': 151}. Best is trial 24 with value: 15.417022194115535.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


[I 2024-11-17 00:03:28,716] Trial 35 finished with value: 23.11594868257136 and parameters: {'lstm_units': 53, 'dropout_rate': 0.38840005966164626, 'learning_rate': 0.002411716773776433, 'batch_size': 64, 'epochs': 184}. Best is trial 24 with value: 15.417022194115535.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


[I 2024-11-17 00:07:17,932] Trial 36 finished with value: 15.389339236158593 and parameters: {'lstm_units': 93, 'dropout_rate': 0.3296526821355444, 'learning_rate': 0.00035327026589794, 'batch_size': 16, 'epochs': 168}. Best is trial 36 with value: 15.389339236158593.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


[I 2024-11-17 00:11:15,533] Trial 37 finished with value: 17.891773941433346 and parameters: {'lstm_units': 99, 'dropout_rate': 0.4729295705155495, 'learning_rate': 0.0001248396382026221, 'batch_size': 16, 'epochs': 169}. Best is trial 36 with value: 15.389339236158593.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


[I 2024-11-17 00:15:37,469] Trial 38 finished with value: 17.182836196067978 and parameters: {'lstm_units': 91, 'dropout_rate': 0.29704577312813035, 'learning_rate': 0.00036333547111691794, 'batch_size': 16, 'epochs': 185}. Best is trial 36 with value: 15.389339236158593.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


[I 2024-11-17 00:18:21,920] Trial 39 finished with value: 17.548902394221795 and parameters: {'lstm_units': 94, 'dropout_rate': 0.42797166764604005, 'learning_rate': 0.009873107509126218, 'batch_size': 16, 'epochs': 116}. Best is trial 36 with value: 15.389339236158593.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


[I 2024-11-17 00:21:24,660] Trial 40 finished with value: 15.699371536319703 and parameters: {'lstm_units': 82, 'dropout_rate': 0.24040439262514346, 'learning_rate': 0.0030328942155193185, 'batch_size': 16, 'epochs': 132}. Best is trial 36 with value: 15.389339236158593.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


[I 2024-11-17 00:23:57,951] Trial 41 finished with value: 19.870381976907588 and parameters: {'lstm_units': 100, 'dropout_rate': 0.3328548210643842, 'learning_rate': 0.0007364324029222779, 'batch_size': 32, 'epochs': 160}. Best is trial 36 with value: 15.389339236158593.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


[I 2024-11-17 00:27:42,610] Trial 42 finished with value: 20.12002750822087 and parameters: {'lstm_units': 85, 'dropout_rate': 0.35398885689877885, 'learning_rate': 0.0005635547550991814, 'batch_size': 16, 'epochs': 173}. Best is trial 36 with value: 15.389339236158593.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


[I 2024-11-17 00:28:59,925] Trial 43 finished with value: 15.873768754758832 and parameters: {'lstm_units': 64, 'dropout_rate': 0.29453351375798975, 'learning_rate': 0.0013669145569291138, 'batch_size': 64, 'epochs': 166}. Best is trial 36 with value: 15.389339236158593.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


[I 2024-11-17 00:31:40,916] Trial 44 finished with value: 15.755741321969664 and parameters: {'lstm_units': 94, 'dropout_rate': 0.32321340377578955, 'learning_rate': 0.00026274929192944085, 'batch_size': 32, 'epochs': 179}. Best is trial 36 with value: 15.389339236158593.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


[I 2024-11-17 00:34:40,265] Trial 45 finished with value: 16.03256595452235 and parameters: {'lstm_units': 79, 'dropout_rate': 0.38720782122102176, 'learning_rate': 0.0001600978217144689, 'batch_size': 16, 'epochs': 144}. Best is trial 36 with value: 15.389339236158593.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


[I 2024-11-17 00:38:06,895] Trial 46 finished with value: 15.723820044862636 and parameters: {'lstm_units': 88, 'dropout_rate': 0.26710658086248584, 'learning_rate': 0.00038765492511179357, 'batch_size': 16, 'epochs': 157}. Best is trial 36 with value: 15.389339236158593.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


[I 2024-11-17 00:39:23,727] Trial 47 finished with value: 24.49555217232659 and parameters: {'lstm_units': 69, 'dropout_rate': 0.21449186794987382, 'learning_rate': 4.313897697949727e-05, 'batch_size': 32, 'epochs': 101}. Best is trial 36 with value: 15.389339236158593.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


[I 2024-11-17 00:42:32,908] Trial 48 finished with value: 23.418652233521716 and parameters: {'lstm_units': 73, 'dropout_rate': 0.37134918434171726, 'learning_rate': 0.0007564831485472273, 'batch_size': 16, 'epochs': 152}. Best is trial 36 with value: 15.389339236158593.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


[I 2024-11-17 00:43:56,099] Trial 49 finished with value: 16.082128400629472 and parameters: {'lstm_units': 65, 'dropout_rate': 0.3161620354323499, 'learning_rate': 0.00046138384728697615, 'batch_size': 64, 'epochs': 163}. Best is trial 36 with value: 15.389339236158593.


Best trial:
  Value (RMSE): 15.389339236158593
  Params:
    lstm_units: 93
    dropout_rate: 0.3296526821355444
    learning_rate: 0.00035327026589794
    batch_size: 16
    epochs: 168


In [None]:
import numpy as np
import pandas as pd
import optuna
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Step 1: Load the data
tesla_data = pd.read_csv('Clean_data/Cleaned_Tesla_Close.csv', parse_dates=['Date'], index_col='Date')
sp500_data = pd.read_csv('Clean_data/Cleaned_SP500_Close.csv', parse_dates=['Date'], index_col='Date')
ixic_data = pd.read_csv('Clean_data/Cleaned_IXIC_Close.csv', parse_dates=['Date'], index_col='Date')

# Merge datasets on 'Date'
merged_data = tesla_data.merge(sp500_data, left_index=True, right_index=True, suffixes=('', '_sp500'))
merged_data = merged_data.merge(ixic_data, left_index=True, right_index=True, suffixes=('', '_ixic'))

# Step 2: Preprocess the data
data = merged_data[['Close_predicted', 'Close_predicted_sp500', 'Close_predicted_ixic']]
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data.dropna())  # Ensure NaN values are removed after differencing

# Prepare the dataset for LSTM
def create_dataset(dataset, time_step=60):
    X, y = [], []
    for i in range(time_step, len(dataset)):
        X.append(dataset[i - time_step:i])
        y.append(dataset[i, 0])  # Predict the 'Close' price of Tesla
    return np.array(X), np.array(y)

time_step = 60
X, y = create_dataset(scaled_data, time_step)

# Split data into training and test sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Function to create and train the LSTM model with Optuna
def objective(trial):
    # Hyperparameters to optimize
    lstm_units = trial.suggest_int('lstm_units', 55, 60)
    dropout_rate = trial.suggest_float('dropout_rate', 0.265, 0.275)
    learning_rate = trial.suggest_float('learning_rate', 0.00095, 0.0011, log=True)
    batch_size = trial.suggest_categorical('batch_size', [16])
    epochs = trial.suggest_int('epochs', 100, 200)

    # Build the LSTM model
    model = Sequential([
        Input(shape=(time_step, X_train.shape[2])),
        LSTM(lstm_units, return_sequences=True),
        Dropout(dropout_rate),
        LSTM(lstm_units),
        Dropout(dropout_rate),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')

    # Train the model
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)

    # Evaluate the model
    test_predictions = model.predict(X_test)
    test_predictions_rescaled = scaler.inverse_transform(
        np.hstack([test_predictions, X_test[:, -1, 1:]])
    )[:, 0]
    y_test_rescaled = scaler.inverse_transform(
        np.hstack([y_test.reshape(-1, 1), X_test[:, -1, 1:]])
    )[:, 0]

    rmse = np.sqrt(mean_squared_error(y_test_rescaled, test_predictions_rescaled))
    return rmse

# Run the optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

# Print the best hyperparameters
print('Best trial:')
trial = study.best_trial
print(f'  Value (RMSE): {trial.value}')
print('  Params:')
for key, value in trial.params.items():
    print(f'    {key}: {value}')
