## Setup (for Google Colab)

In [None]:
# !pip install optuna

# Import libraries

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np

# Keras is a high-level neural networks API, written in Python and capable of running on top of TensorFlow.
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

import optuna

import matplotlib.pyplot as plt

# Fetch data

In [None]:
stock_symbol = 'GOOGL'
start_date = '2020-01-01'
end_date = '2023-01-01'

# Fetch stock data using yfinance
stock_data = yf.download(stock_symbol, start=start_date, end=end_date, progress=False)
stock_data

In [None]:
stock_data.tail()

# Preprocess data

In [None]:
# Use 'Close' prices for prediction
data = stock_data['Close'].values.reshape(-1, 1)

# Scaling the data to values between 0 and 1
scaler = MinMaxScaler()
data = scaler.fit_transform(data)

# Create time series train and test sets

In [None]:
lookback = 60  # Number of previous days to consider
X, y = [], []
for i in range(len(data) - lookback):
    X.append(data[i:i+lookback, 0])
    y.append(data[i+lookback, 0])

X, y = np.array(X), np.array(y)
train_size = int(0.8 * len(X))

X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

## Hyperparameter tuning

### Model recipe

In [None]:
def create_lstm_model(trial):
    # units in the LSTM layer
    units = trial.suggest_int('units', 50, 128)
    # units = 50
    
    # dropout rate
    dropout_rate = trial.suggest_float('dropout_rate', 0.0, 0.5)
    
    # learning rate in a logarithmic scale
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    # learning_rate = 0.001
    
    optimizer = Adam(learning_rate=learning_rate)
    
    model = Sequential()
    model.add(LSTM(units=units, input_shape=(lookback, 1)))
    model.add(Dropout(dropout_rate))
    model.add(Dense(units=1))
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model

### Objective function definition

In [None]:
def objective(trial):
    model = create_lstm_model(trial)
    
    model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)
    
    y_pred = model.predict(X_test)
    
    y_pred = model.predict(X_test)
    y_pred = scaler.inverse_transform(y_pred)
    y_test_orig = scaler.inverse_transform(y_test.reshape(-1, 1))
    mse = mean_squared_error(y_test_orig, y_pred)
    return mse

### Optimization Process

In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

print("Value: ", trial.value)
print("Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

best_params = study.best_params

### Use the 'best model'

In [None]:
final_model = create_lstm_model(optuna.trial.FixedTrial(best_params))

final_model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)

y_pred = final_model.predict(X_test)
y_pred = scaler.inverse_transform(y_pred)
y_test_orig = scaler.inverse_transform(y_test.reshape(-1, 1))

mse = mean_squared_error(y_test_orig, y_pred)

In [None]:
test_dates = stock_data.index[train_size + lookback:].to_list()

## Show results

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_test_orig, label='Actual', color='black')
plt.plot(test_dates, y_pred, label='Predicted', color='red')
plt.legend()
plt.title(f'Predictions vs Actual\nLSTM MSE: {mse:.4f}')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.xticks(rotation=45)
plt.show()