In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, Dropout, MultiHeadAttention, LayerNormalization
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import optuna
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, MultiHeadAttention, LayerNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, MultiHeadAttention, LayerNormalization
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Generate synthetic data (e.g., exponential decay)
def simulate_ode(k=0.1, y0=1.0, t_max=10, num_points=1000):
    t = np.linspace(0, t_max, num_points)
    y = y0 * np.exp(-k * t)
    return t, y


In [3]:


def preprocess_stock_data(csv_path, date_column='Date', close_column='Close', test_size=0.2, val_size=0.1, time_steps=60):
    """
    Prepares stock market price data for time series modeling.
    
    Args:
        csv_path (str): Path to the CSV file containing the data.
        date_column (str): Name of the date column in the CSV.
        close_column (str): Name of the closing price column in the CSV.
        test_size (float): Proportion of the data for testing.
        val_size (float): Proportion of the training data for validation.
        time_steps (int): Number of past time steps to use for each sample.
    
    Returns:
        X_train, y_train: Training data and labels.
        X_val, y_val: Validation data and labels.
        X_test, y_test: Testing data and labels.
        scaler: Fitted MinMaxScaler instance for inverse scaling.
    """
    # Load the dataset
    data = pd.read_csv(csv_path, parse_dates=[date_column])
    data.sort_values(by=date_column, inplace=True)
    
    # Extract the 'close' column for scaling
    close_prices = data[close_column].values.reshape(-1, 1)
    
    # Scale the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_close = scaler.fit_transform(close_prices)
    
    # Create sequences of time_steps
    X, y = [], []
    for i in range(time_steps, len(scaled_close)):
        X.append(scaled_close[i-time_steps:i])
        y.append(scaled_close[i])
    
    X, y = np.array(X), np.array(y)
    
    # Split data into train, validation, and test sets
    train_size = int((1 - test_size) * len(X))
    val_size = int(val_size * train_size)
    
    X_train, X_temp = X[:train_size], X[train_size:]
    y_train, y_temp = y[:train_size], y[train_size:]
    
    X_val, X_test = X_temp[:val_size], X_temp[val_size:]
    y_val, y_test = y_temp[:val_size], y_temp[val_size:]
    
    return X_train, y_train, X_val, y_val, X_test, y_test, scaler



In [6]:


# Get the directory of the current script
current_dir = os.getcwd()
# Construct the path to the data folder (same level as the tuning folder)
data_folder = os.path.join(current_dir, "..", "inputs")
data_file_path_nvidia = os.path.join(data_folder, "nvidia_stock_cleaned.csv")


#data_path_google = "D:/stock_price_prediction/inputs/nvidia_stock_cleaned.csv"
#data_path_google = "./inputs/nvidia_stock_cleaned.csv"

X_train, y_train, X_val, y_val, X_test, y_test, scaler = preprocess_stock_data(
    data_file_path_nvidia, date_column='Date', close_column='Close', time_steps=60
)

print(f"Train shape: {X_train.shape}, {y_train.shape}")
print(f"Validation shape: {X_val.shape}, {y_val.shape}")
print(f"Test shape: {X_test.shape}, {y_test.shape}")

Train shape: (528, 60, 1), (528, 1)
Validation shape: (52, 60, 1), (52, 1)
Test shape: (81, 60, 1), (81, 1)


In [7]:


def objective(trial, model_type):
    # Common hyperparameters
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])

    # Define the model based on the type
    model = Sequential()
    
    if model_type == 'rnn':
        num_layers = trial.suggest_int('num_layers', 1, 3)
        units = trial.suggest_int('units', 32, 256, step=32)
        for _ in range(num_layers):
            model.add(SimpleRNN(units, activation='tanh', return_sequences=True if _ < num_layers - 1 else False))
        model.add(Dense(1))

    elif model_type == 'lstm':
        num_layers = trial.suggest_int('num_layers', 1, 3)
        units = trial.suggest_int('units', 32, 256, step=32)
        for _ in range(num_layers):
            model.add(LSTM(units, activation='tanh', return_sequences=True if _ < num_layers - 1 else False))
        model.add(Dense(1))
        
    elif model_type == 'transformer':
        num_heads = trial.suggest_int('num_heads', 2, 8)
        key_dim = trial.suggest_int('key_dim', 16, 64, step=16)
        ff_units = trial.suggest_int('ff_units', 32, 128, step=32)
    
        # Input Layer
        input_layer = tf.keras.layers.Input(shape=(None, 1))
    
        # Transformer Encoder Layer
        attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)(input_layer, input_layer)
        attention_output = LayerNormalization()(attention_output)
        attention_output = tf.keras.layers.Dense(ff_units, activation='relu')(attention_output)
    
         # Add a Dense layer for regression output
        output_layer = tf.keras.layers.Dense(1)(attention_output)
    
         # Create a Model instead of Sequential
        model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

    elif model_type == 'neural_net':
        num_layers = trial.suggest_int('num_layers', 1, 3)
        units = trial.suggest_int('units', 32, 256, step=32)
        for _ in range(num_layers):
            model.add(Dense(units, activation='relu'))
        model.add(Dense(1))

    elif model_type == 'ode':
        num_layers = trial.suggest_int('num_layers', 1, 3)
        units = trial.suggest_int('units', 32, 256, step=32)
        activation = trial.suggest_categorical('activation', ['relu', 'tanh', 'sigmoid'])

       # Input layer to handle sequences
        input_layer = tf.keras.layers.Input(shape=(None, 1))
    
        # Stack Dense layers for feature extraction
        x = input_layer
        for _ in range(num_layers):
            x = tf.keras.layers.Dense(units, activation=activation)(x)
    
        # Approximate the derivative using a Dense layer
        derivative_layer = tf.keras.layers.Dense(units, activation=activation)(x)
    
        # Output layer for regression
        output_layer = tf.keras.layers.Dense(1)(derivative_layer)
    
        # Define the functional model
        model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

    # Compile the model
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    # Train the model
    history = model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        epochs=10,
        batch_size=batch_size,
        verbose=0,
    )

    # Return the validation loss for Optuna to minimize
    val_loss = history.history['val_loss'][-1]
    return val_loss


In [7]:
study_rnn = optuna.create_study(direction='minimize')
study_rnn.optimize(lambda trial: objective(trial, model_type='rnn'), n_trials=100)
print("Best RNN parameters:", study_rnn.best_params)


[I 2024-12-19 23:45:24,811] A new study created in memory with name: no-name-8a8ec752-e4a2-4044-a8c6-13ec53d8397e
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-12-19 23:45:32,698] Trial 0 finished with value: 0.0007004979415796697 and parameters: {'learning_rate': 0.001379218001920653, 'batch_size': 32, 'num_layers': 2, 'units': 32}. Best is trial 0 with value: 0.0007004979415796697.
[I 2024-12-19 23:45:46,466] Trial 1 finished with value: 0.07814586162567139 and parameters: {'learning_rate': 0.0012282527798022276, 'batch_size': 16, 'num_layers': 3, 'units': 160}. Best is trial 0 with value: 0.0007004979415796697.
[I 2024-12-19 23:45:52,508] Trial 2 finished with value: 0.020000746473670006 and parameters: {'learning_rate': 0.003554759152674873, 'batch_size': 128, 'num_layers': 3, 'units': 32}. Best is trial 0 with value: 0.0007004979415796697.
[I 2024-12-19 23:46:03,588] Trial 3 finished with value: 0.0008467691368423402 and parameters: {'learning_rat

Best RNN parameters: {'learning_rate': 0.016352092151644158, 'batch_size': 32, 'num_layers': 1, 'units': 32}


Best is trial 92 with value: 0.0005872648325748742. Best RNN parameters: {'learning_rate': 0.016352092151644158, 'batch_size': 32, 'num_layers': 1, 'units': 32}

In [8]:
study_lstm = optuna.create_study(direction='minimize')
study_lstm.optimize(lambda trial: objective(trial, model_type='lstm'), n_trials=100)
print("Best LSTM parameters:", study_lstm.best_params)


[I 2024-12-19 23:56:26,240] A new study created in memory with name: no-name-f8593809-456b-4f6b-b82a-5699aa3ef1a6
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-12-19 23:56:39,853] Trial 0 finished with value: 0.41034090518951416 and parameters: {'learning_rate': 0.01543009727272392, 'batch_size': 16, 'num_layers': 1, 'units': 256}. Best is trial 0 with value: 0.41034090518951416.
[I 2024-12-19 23:56:51,884] Trial 1 finished with value: 0.009252061136066914 and parameters: {'learning_rate': 0.00016201013832679856, 'batch_size': 16, 'num_layers': 1, 'units': 224}. Best is trial 1 with value: 0.009252061136066914.
[I 2024-12-19 23:56:55,776] Trial 2 finished with value: 0.0010065120877698064 and parameters: {'learning_rate': 0.06635116926494748, 'batch_size': 128, 'num_layers': 1, 'units': 32}. Best is trial 2 with value: 0.0010065120877698064.
[I 2024-12-19 23:57:09,528] Trial 3 finished with value: 0.0027706860564649105 and parameters: {'learning_rate':

Best LSTM parameters: {'learning_rate': 0.0077440806253652045, 'batch_size': 16, 'num_layers': 2, 'units': 128}


Best is trial 73 with value: 0.0007600520621053874. Best LSTM parameters: {'learning_rate': 0.0077440806253652045, 'batch_size': 16, 'num_layers': 2, 'units': 128}

In [8]:
study_transformer = optuna.create_study(direction='minimize')
study_transformer.optimize(lambda trial: objective(trial, model_type='transformer'), n_trials=100)
print("Best Transformer parameters:", study_transformer.best_params)


[I 2024-12-20 00:45:00,123] A new study created in memory with name: no-name-8569d240-3424-4657-a7e3-d653f7b54fc0
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-12-20 00:45:08,660] Trial 0 finished with value: 0.25231173634529114 and parameters: {'learning_rate': 0.02543439775570256, 'batch_size': 32, 'num_heads': 5, 'key_dim': 48, 'ff_units': 128}. Best is trial 0 with value: 0.25231173634529114.
[I 2024-12-20 00:45:15,310] Trial 1 finished with value: 0.24572178721427917 and parameters: {'learning_rate': 0.007025873394803057, 'batch_size': 32, 'num_heads': 5, 'key_dim': 48, 'ff_units': 96}. Best is trial 1 with value: 0.24572178721427917.
[I 2024-12-20 00:45:21,598] Trial 2 finished with value: 0.3846697509288788 and parameters: {'learning_rate': 0.0002806351414232282, 'batch_size': 32, 'num_heads': 5, 'key_dim': 48, 'ff_units': 128}. Best is trial 1 with value: 0.24572178721427917.
[I 2024-12-20 00:45:27,111] Trial 3 finished with value: 0.2547583580

Best Transformer parameters: {'learning_rate': 0.07545159709344915, 'batch_size': 32, 'num_heads': 3, 'key_dim': 48, 'ff_units': 128}


 Best is trial 44 with value: 0.20226186513900757. Best Transformer parameters: {'learning_rate': 0.07545159709344915, 'batch_size': 32, 'num_heads': 3, 'key_dim': 48, 'ff_units': 128}

In [9]:
study_nn = optuna.create_study(direction='minimize')
study_nn.optimize(lambda trial: objective(trial, model_type='neural_net'), n_trials=100)
print("Best Neural Network parameters:", study_nn.best_params)

[I 2024-12-20 00:55:30,525] A new study created in memory with name: no-name-d1aafeeb-89ff-41fd-8122-5c5eb6b59220
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-12-20 00:55:34,477] Trial 0 finished with value: 0.28961658477783203 and parameters: {'learning_rate': 0.07979665504141374, 'batch_size': 16, 'num_layers': 2, 'units': 192}. Best is trial 0 with value: 0.28961658477783203.
[I 2024-12-20 00:55:37,540] Trial 1 finished with value: 0.1627950668334961 and parameters: {'learning_rate': 0.00015636392639619993, 'batch_size': 128, 'num_layers': 2, 'units': 128}. Best is trial 1 with value: 0.1627950668334961.
[I 2024-12-20 00:55:40,266] Trial 2 finished with value: 0.01872294582426548 and parameters: {'learning_rate': 0.07819514424797501, 'batch_size': 16, 'num_layers': 1, 'units': 96}. Best is trial 2 with value: 0.01872294582426548.
[I 2024-12-20 00:55:44,967] Trial 3 finished with value: 0.31411632895469666 and parameters: {'learning_rate': 0.0895045

Best Neural Network parameters: {'learning_rate': 0.000693786820052996, 'batch_size': 16, 'num_layers': 1, 'units': 160}


Best is trial 76 with value: 0.017089562490582466. Best Neural Network parameters: {'learning_rate': 0.000693786820052996, 'batch_size': 16, 'num_layers': 1, 'units': 160}

In [10]:
study_ode = optuna.create_study(direction='minimize')
study_ode.optimize(lambda trial: objective(trial, model_type='ode'), n_trials=100)
print("Best ODE parameters:", study_ode.best_params)


[I 2024-12-20 01:00:32,657] A new study created in memory with name: no-name-166b3cd9-9b9e-4e37-a430-77fa07fde35a
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-12-20 01:00:36,181] Trial 0 finished with value: 0.019769897684454918 and parameters: {'learning_rate': 0.0020832647643461914, 'batch_size': 128, 'num_layers': 2, 'units': 128, 'activation': 'tanh'}. Best is trial 0 with value: 0.019769897684454918.
[I 2024-12-20 01:00:39,469] Trial 1 finished with value: 0.01885734498500824 and parameters: {'learning_rate': 0.0008901317908449299, 'batch_size': 128, 'num_layers': 2, 'units': 96, 'activation': 'relu'}. Best is trial 1 with value: 0.01885734498500824.
[I 2024-12-20 01:00:44,549] Trial 2 finished with value: 0.12089090794324875 and parameters: {'learning_rate': 0.09024234964973468, 'batch_size': 32, 'num_layers': 2, 'units': 224, 'activation': 'tanh'}. Best is trial 1 with value: 0.01885734498500824.
[I 2024-12-20 01:00:49,870] Trial 3 finished wit

Best ODE parameters: {'learning_rate': 0.01015501043744077, 'batch_size': 16, 'num_layers': 3, 'units': 256, 'activation': 'tanh'}


 Best is trial 5 with value: 0.013553121127188206. Best ODE parameters: {'learning_rate': 0.01015501043744077, 'batch_size': 16, 'num_layers': 3, 'units': 256, 'activation': 'tanh'}

Here’s how to use this function to print the architecture for each model: