In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, Dropout, MultiHeadAttention, LayerNormalization
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import optuna
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, MultiHeadAttention, LayerNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, MultiHeadAttention, LayerNormalization


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Generate synthetic data (e.g., exponential decay)
def simulate_ode(k=0.1, y0=1.0, t_max=10, num_points=1000):
    t = np.linspace(0, t_max, num_points)
    y = y0 * np.exp(-k * t)
    return t, y


In [3]:


def preprocess_stock_data(csv_path, date_column='Date', close_column='Close', test_size=0.2, val_size=0.1, time_steps=60):
    """
    Prepares stock market price data for time series modeling.
    
    Args:
        csv_path (str): Path to the CSV file containing the data.
        date_column (str): Name of the date column in the CSV.
        close_column (str): Name of the closing price column in the CSV.
        test_size (float): Proportion of the data for testing.
        val_size (float): Proportion of the training data for validation.
        time_steps (int): Number of past time steps to use for each sample.
    
    Returns:
        X_train, y_train: Training data and labels.
        X_val, y_val: Validation data and labels.
        X_test, y_test: Testing data and labels.
        scaler: Fitted MinMaxScaler instance for inverse scaling.
    """
    # Load the dataset
    data = pd.read_csv(csv_path, parse_dates=[date_column])
    data.sort_values(by=date_column, inplace=True)
    
    # Extract the 'close' column for scaling
    close_prices = data[close_column].values.reshape(-1, 1)
    
    # Scale the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_close = scaler.fit_transform(close_prices)
    
    # Create sequences of time_steps
    X, y = [], []
    for i in range(time_steps, len(scaled_close)):
        X.append(scaled_close[i-time_steps:i])
        y.append(scaled_close[i])
    
    X, y = np.array(X), np.array(y)
    
    # Split data into train, validation, and test sets
    train_size = int((1 - test_size) * len(X))
    val_size = int(val_size * train_size)
    
    X_train, X_temp = X[:train_size], X[train_size:]
    y_train, y_temp = y[:train_size], y[train_size:]
    
    X_val, X_test = X_temp[:val_size], X_temp[val_size:]
    y_val, y_test = y_temp[:val_size], y_temp[val_size:]
    
    return X_train, y_train, X_val, y_val, X_test, y_test, scaler



In [4]:
data_path_google = "D:/stock_price_prediction/inputs/apple_stock_cleaned.csv"

X_train, y_train, X_val, y_val, X_test, y_test, scaler = preprocess_stock_data(
    data_path_google, date_column='Date', close_column='Close', time_steps=60
)

print(f"Train shape: {X_train.shape}, {y_train.shape}")
print(f"Validation shape: {X_val.shape}, {y_val.shape}")
print(f"Test shape: {X_test.shape}, {y_test.shape}")

Train shape: (554, 60, 1), (554, 1)
Validation shape: (55, 60, 1), (55, 1)
Test shape: (84, 60, 1), (84, 1)


In [5]:


def objective(trial, model_type):
    # Common hyperparameters
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])

    # Define the model based on the type
    model = Sequential()
    
    if model_type == 'rnn':
        num_layers = trial.suggest_int('num_layers', 1, 3)
        units = trial.suggest_int('units', 32, 256, step=32)
        for _ in range(num_layers):
            model.add(SimpleRNN(units, activation='tanh', return_sequences=True if _ < num_layers - 1 else False))
        model.add(Dense(1))

    elif model_type == 'lstm':
        num_layers = trial.suggest_int('num_layers', 1, 3)
        units = trial.suggest_int('units', 32, 256, step=32)
        for _ in range(num_layers):
            model.add(LSTM(units, activation='tanh', return_sequences=True if _ < num_layers - 1 else False))
        model.add(Dense(1))
        
    elif model_type == 'transformer':
        num_heads = trial.suggest_int('num_heads', 2, 8)
        key_dim = trial.suggest_int('key_dim', 16, 64, step=16)
        ff_units = trial.suggest_int('ff_units', 32, 128, step=32)
    
        # Input Layer
        input_layer = tf.keras.layers.Input(shape=(None, 1))
    
        # Transformer Encoder Layer
        attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)(input_layer, input_layer)
        attention_output = LayerNormalization()(attention_output)
        attention_output = tf.keras.layers.Dense(ff_units, activation='relu')(attention_output)
    
         # Add a Dense layer for regression output
        output_layer = tf.keras.layers.Dense(1)(attention_output)
    
         # Create a Model instead of Sequential
        model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

    elif model_type == 'neural_net':
        num_layers = trial.suggest_int('num_layers', 1, 3)
        units = trial.suggest_int('units', 32, 256, step=32)
        for _ in range(num_layers):
            model.add(Dense(units, activation='relu'))
        model.add(Dense(1))

    elif model_type == 'ode':
        num_layers = trial.suggest_int('num_layers', 1, 3)
        units = trial.suggest_int('units', 32, 256, step=32)
        activation = trial.suggest_categorical('activation', ['relu', 'tanh', 'sigmoid'])

       # Input layer to handle sequences
        input_layer = tf.keras.layers.Input(shape=(None, 1))
    
        # Stack Dense layers for feature extraction
        x = input_layer
        for _ in range(num_layers):
            x = tf.keras.layers.Dense(units, activation=activation)(x)
    
        # Approximate the derivative using a Dense layer
        derivative_layer = tf.keras.layers.Dense(units, activation=activation)(x)
    
        # Output layer for regression
        output_layer = tf.keras.layers.Dense(1)(derivative_layer)
    
        # Define the functional model
        model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

    # Compile the model
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    # Train the model
    history = model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        epochs=10,
        batch_size=batch_size,
        verbose=0,
    )

    # Return the validation loss for Optuna to minimize
    val_loss = history.history['val_loss'][-1]
    return val_loss


In [6]:
study_rnn = optuna.create_study(direction='minimize')
study_rnn.optimize(lambda trial: objective(trial, model_type='rnn'), n_trials=100)
print("Best RNN parameters:", study_rnn.best_params)


[I 2024-12-19 02:24:32,595] A new study created in memory with name: no-name-d2afdbdf-4470-4372-81d1-55cdd4f70509
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-12-19 02:24:41,532] Trial 0 finished with value: 0.003509124740958214 and parameters: {'learning_rate': 0.0019770609759390155, 'batch_size': 128, 'num_layers': 1, 'units': 224}. Best is trial 0 with value: 0.003509124740958214.
[I 2024-12-19 02:24:49,026] Trial 1 finished with value: 0.5044333934783936 and parameters: {'learning_rate': 0.03951743860834782, 'batch_size': 128, 'num_layers': 3, 'units': 96}. Best is trial 0 with value: 0.003509124740958214.
[I 2024-12-19 02:24:55,273] Trial 2 finished with value: 0.04115406051278114 and parameters: {'learning_rate': 0.0659611002175721, 'batch_size': 16, 'num_layers': 1, 'units': 192}. Best is trial 0 with value: 0.003509124740958214.
[I 2024-12-19 02:25:00,085] Trial 3 finished with value: 0.006279309745877981 and parameters: {'learning_rate': 0.00

Best RNN parameters: {'learning_rate': 0.007223125993888851, 'batch_size': 16, 'num_layers': 1, 'units': 32}


Best is trial 92 with value: 0.001046365941874683. Best RNN parameters: {'learning_rate': 0.007223125993888851, 'batch_size': 16, 'num_layers': 1, 'units': 32}

In [7]:
study_lstm = optuna.create_study(direction='minimize')
study_lstm.optimize(lambda trial: objective(trial, model_type='lstm'), n_trials=100)
print("Best LSTM parameters:", study_lstm.best_params)


[I 2024-12-19 02:35:00,463] A new study created in memory with name: no-name-68256ac0-ab9a-4b17-b8de-b7a2cec644c2
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-12-19 02:35:10,402] Trial 0 finished with value: 0.0018183800857514143 and parameters: {'learning_rate': 0.008694145720990261, 'batch_size': 16, 'num_layers': 1, 'units': 96}. Best is trial 0 with value: 0.0018183800857514143.
[I 2024-12-19 02:35:23,240] Trial 1 finished with value: 0.10603601485490799 and parameters: {'learning_rate': 0.025788951647322495, 'batch_size': 128, 'num_layers': 3, 'units': 96}. Best is trial 0 with value: 0.0018183800857514143.
[I 2024-12-19 02:35:33,035] Trial 2 finished with value: 0.002311622491106391 and parameters: {'learning_rate': 0.0010079861293818947, 'batch_size': 32, 'num_layers': 1, 'units': 192}. Best is trial 0 with value: 0.0018183800857514143.
[I 2024-12-19 02:36:00,427] Trial 3 finished with value: 246.6801300048828 and parameters: {'learning_rate': 

Best LSTM parameters: {'learning_rate': 0.01341034226855941, 'batch_size': 16, 'num_layers': 2, 'units': 32}


Best is trial 94 with value: 0.0010134328622370958. Best LSTM parameters: {'learning_rate': 0.01341034226855941, 'batch_size': 16, 'num_layers': 2, 'units': 32}


In [8]:
study_transformer = optuna.create_study(direction='minimize')
study_transformer.optimize(lambda trial: objective(trial, model_type='transformer'), n_trials=100)
print("Best Transformer parameters:", study_transformer.best_params)


[I 2024-12-19 02:55:38,189] A new study created in memory with name: no-name-60d64596-0d59-4edc-a418-b8d4180427ce
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-12-19 02:55:44,453] Trial 0 finished with value: 0.32206374406814575 and parameters: {'learning_rate': 0.00017802054844489107, 'batch_size': 32, 'num_heads': 5, 'key_dim': 16, 'ff_units': 32}. Best is trial 0 with value: 0.32206374406814575.
[I 2024-12-19 02:55:49,104] Trial 1 finished with value: 0.06539555639028549 and parameters: {'learning_rate': 0.007853472265399872, 'batch_size': 64, 'num_heads': 2, 'key_dim': 32, 'ff_units': 96}. Best is trial 1 with value: 0.06539555639028549.
[I 2024-12-19 02:55:55,613] Trial 2 finished with value: 0.061462342739105225 and parameters: {'learning_rate': 0.03825647381120418, 'batch_size': 16, 'num_heads': 5, 'key_dim': 32, 'ff_units': 128}. Best is trial 2 with value: 0.061462342739105225.
[I 2024-12-19 02:56:01,434] Trial 3 finished with value: 0.0688966

Best Transformer parameters: {'learning_rate': 0.014370676595139612, 'batch_size': 128, 'num_heads': 6, 'key_dim': 64, 'ff_units': 32}


 Best is trial 84 with value: 0.05489753559231758. Best Transformer parameters: {'learning_rate': 0.014370676595139612, 'batch_size': 128, 'num_heads': 6, 'key_dim': 64, 'ff_units': 32}


In [9]:
study_nn = optuna.create_study(direction='minimize')
study_nn.optimize(lambda trial: objective(trial, model_type='neural_net'), n_trials=100)
print("Best Neural Network parameters:", study_nn.best_params)

[I 2024-12-19 03:07:50,641] A new study created in memory with name: no-name-dbbd6f0a-1582-4aad-a01f-3549713f4916
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-12-19 03:07:54,525] Trial 0 finished with value: 0.03295713663101196 and parameters: {'learning_rate': 0.03735334123347407, 'batch_size': 32, 'num_layers': 3, 'units': 96}. Best is trial 0 with value: 0.03295713663101196.
[I 2024-12-19 03:07:57,557] Trial 1 finished with value: 0.08014288544654846 and parameters: {'learning_rate': 0.0687799501230703, 'batch_size': 32, 'num_layers': 2, 'units': 64}. Best is trial 0 with value: 0.03295713663101196.
[I 2024-12-19 03:08:00,159] Trial 2 finished with value: 0.03765813633799553 and parameters: {'learning_rate': 0.00822974963416513, 'batch_size': 64, 'num_layers': 1, 'units': 256}. Best is trial 0 with value: 0.03295713663101196.
[I 2024-12-19 03:08:02,995] Trial 3 finished with value: 0.03967640548944473 and parameters: {'learning_rate': 0.00520292696

Best Neural Network parameters: {'learning_rate': 0.03489233060526877, 'batch_size': 32, 'num_layers': 3, 'units': 96}


est is trial 71 with value: 0.022898802533745766. Best Neural Network parameters: {'learning_rate': 0.03489233060526877, 'batch_size': 32, 'num_layers': 3, 'units': 96}

In [10]:
study_ode = optuna.create_study(direction='minimize')
study_ode.optimize(lambda trial: objective(trial, model_type='ode'), n_trials=100)
print("Best ODE parameters:", study_ode.best_params)


[I 2024-12-19 03:13:37,552] A new study created in memory with name: no-name-1c391d23-5190-4f69-b912-da68016a7b1a
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-12-19 03:13:40,429] Trial 0 finished with value: 0.03307391330599785 and parameters: {'learning_rate': 0.00019581949172167053, 'batch_size': 128, 'num_layers': 2, 'units': 32, 'activation': 'tanh'}. Best is trial 0 with value: 0.03307391330599785.
[I 2024-12-19 03:13:43,556] Trial 1 finished with value: 0.040840033441782 and parameters: {'learning_rate': 0.0006255946646260178, 'batch_size': 64, 'num_layers': 2, 'units': 64, 'activation': 'relu'}. Best is trial 0 with value: 0.03307391330599785.
[I 2024-12-19 03:13:47,240] Trial 2 finished with value: 0.0367329865694046 and parameters: {'learning_rate': 0.000659951131985773, 'batch_size': 16, 'num_layers': 1, 'units': 128, 'activation': 'relu'}. Best is trial 0 with value: 0.03307391330599785.
[I 2024-12-19 03:13:50,188] Trial 3 finished with val

Best ODE parameters: {'learning_rate': 0.08863382397195368, 'batch_size': 128, 'num_layers': 2, 'units': 160, 'activation': 'tanh'}


 Best is trial 43 with value: 0.020803609862923622. Best ODE parameters: {'learning_rate': 0.08863382397195368, 'batch_size': 128, 'num_layers': 2, 'units': 160, 'activation': 'tanh'}


Here’s how to use this function to print the architecture for each model: