In [1]:
import numpy as np
import pandas as pd
import optuna
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_percentage_error

# Step 1: Load the data
tesla_data = pd.read_csv('Clean_data/Cleaned_Tesla_Close.csv', parse_dates=['Date'], index_col='Date')
sp500_data = pd.read_csv('Clean_data/Cleaned_SP500_Close.csv', parse_dates=['Date'], index_col='Date')
ixic_data = pd.read_csv('Clean_data/Cleaned_IXIC_Close.csv', parse_dates=['Date'], index_col='Date')

# Merge datasets on 'Date'
merged_data = tesla_data.merge(sp500_data, left_index=True, right_index=True, suffixes=('', '_sp500'))
merged_data = merged_data.merge(ixic_data, left_index=True, right_index=True, suffixes=('', '_ixic'))

# Step 2: Preprocess the data
data = merged_data[['Close', 'Close_sp500', 'Close_ixic']]

# Scale the data after making it stationary
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data.dropna())  # Ensure NaN values are removed after differencing
# Prepare the dataset for LSTM
def create_dataset(dataset, time_step=60):
    X, y = [], []
    for i in range(time_step, len(dataset)):
        X.append(dataset[i - time_step:i])
        y.append(dataset[i, 0])  # Predict the 'Close' price of Tesla
    return np.array(X), np.array(y)

time_step = 60
X, y = create_dataset(scaled_data, time_step)

# Split data into training and test sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]


# Function to create and train the LSTM model with Optuna
def objective(trial):
    # Hyperparameters to optimize
    lstm_units = trial.suggest_int('lstm_units', 50, 75)
    dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 0.001, 0.005, log=True)
    batch_size = trial.suggest_categorical('batch_size', [16, 32])
    epochs = trial.suggest_int('epochs', 100, 200)

    # Build the LSTM model
    model = Sequential([
        Input(shape=(time_step, X_train.shape[2])),
        LSTM(lstm_units, return_sequences=True),
        Dropout(dropout_rate),
        LSTM(lstm_units),
        Dropout(dropout_rate),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')

    # Train the model
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)

    # Evaluate the model
    test_predictions = model.predict(X_test)
    test_predictions_rescaled = scaler.inverse_transform(
        np.hstack([test_predictions, X_test[:, -1, 1:]])
    )[:, 0]
    y_test_rescaled = scaler.inverse_transform(
        np.hstack([y_test.reshape(-1, 1), X_test[:, -1, 1:]])
    )[:, 0]

    # Calculate MAPE
    mape = np.mean(np.abs((y_test_rescaled - test_predictions_rescaled) / y_test_rescaled)) * 100
    return mape

# Run the optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

# Print the best hyperparameters
print('Best trial:')
trial = study.best_trial
print(f'  Value (MAPE): {trial.value}')
print('  Params:')
for key, value in trial.params.items():
    print(f'    {key}: {value}')


[I 2024-11-18 14:35:22,125] A new study created in memory with name: no-name-e0f9c61e-6d54-4fff-979b-27c2e3c7516f


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step


[I 2024-11-18 14:39:32,932] Trial 0 finished with value: 2.880342983047965 and parameters: {'lstm_units': 60, 'dropout_rate': 0.2504247801101781, 'learning_rate': 0.0012358741506022068, 'batch_size': 32, 'epochs': 132}. Best is trial 0 with value: 2.880342983047965.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 258ms/step


[I 2024-11-18 14:51:56,369] Trial 1 finished with value: 4.197712738216658 and parameters: {'lstm_units': 58, 'dropout_rate': 0.22219864608320902, 'learning_rate': 0.0010035458720074018, 'batch_size': 16, 'epochs': 193}. Best is trial 0 with value: 2.880342983047965.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 84ms/step


[I 2024-11-18 15:02:41,178] Trial 2 finished with value: 5.388560979472249 and parameters: {'lstm_units': 51, 'dropout_rate': 0.2573541860778178, 'learning_rate': 0.00204711347197719, 'batch_size': 16, 'epochs': 200}. Best is trial 0 with value: 2.880342983047965.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 96ms/step


[I 2024-11-18 15:08:35,515] Trial 3 finished with value: 6.210939293871229 and parameters: {'lstm_units': 59, 'dropout_rate': 0.306219909394503, 'learning_rate': 0.004664427585551044, 'batch_size': 32, 'epochs': 176}. Best is trial 0 with value: 2.880342983047965.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 136ms/step


[I 2024-11-18 15:14:55,975] Trial 4 finished with value: 3.7134772585381457 and parameters: {'lstm_units': 64, 'dropout_rate': 0.40001893359452856, 'learning_rate': 0.0018458160856224474, 'batch_size': 32, 'epochs': 184}. Best is trial 0 with value: 2.880342983047965.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 98ms/step


[I 2024-11-18 15:19:46,234] Trial 5 finished with value: 4.180293655244498 and parameters: {'lstm_units': 74, 'dropout_rate': 0.39774920663331276, 'learning_rate': 0.0010486888556180836, 'batch_size': 32, 'epochs': 124}. Best is trial 0 with value: 2.880342983047965.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 118ms/step


[I 2024-11-18 15:25:00,564] Trial 6 finished with value: 3.380954102262125 and parameters: {'lstm_units': 63, 'dropout_rate': 0.25637059779547644, 'learning_rate': 0.0026958818339753534, 'batch_size': 32, 'epochs': 106}. Best is trial 0 with value: 2.880342983047965.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 100ms/step


[I 2024-11-18 15:36:30,209] Trial 7 finished with value: 2.569363845301048 and parameters: {'lstm_units': 71, 'dropout_rate': 0.21109991008384008, 'learning_rate': 0.0018233013350920971, 'batch_size': 16, 'epochs': 163}. Best is trial 7 with value: 2.569363845301048.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 61ms/step


[I 2024-11-18 15:42:09,665] Trial 8 finished with value: 3.1377945675689816 and parameters: {'lstm_units': 58, 'dropout_rate': 0.394593432415259, 'learning_rate': 0.0014187296748802945, 'batch_size': 16, 'epochs': 108}. Best is trial 7 with value: 2.569363845301048.


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 91ms/step


[I 2024-11-18 15:45:54,016] Trial 9 finished with value: 4.8297536813456 and parameters: {'lstm_units': 65, 'dropout_rate': 0.20687595623779817, 'learning_rate': 0.0015955896761625397, 'batch_size': 32, 'epochs': 139}. Best is trial 7 with value: 2.569363845301048.


Best trial:
  Value (MAPE): 2.569363845301048
  Params:
    lstm_units: 71
    dropout_rate: 0.21109991008384008
    learning_rate: 0.0018233013350920971
    batch_size: 16
    epochs: 163
