In [2]:
import pickle
import optuna
import mlflow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras import layers, regularizers

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
with open('Datasets/spy_classification_data.pkl', 'rb') as file:
    tickData = pickle.load(file)

In [4]:
#huge amnt of data so cutting it short for ease
startIndex= int(len(tickData) - len(tickData)/75)

In [5]:
tickData=tickData[startIndex:]

In [6]:
X = tickData.iloc[:, :-1]
y = tickData.iloc[:, -1]

In [15]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [17]:
mlflow.set_experiment("Variance Classificaiton Optimization")


2024/11/13 17:28:15 INFO mlflow.tracking.fluent: Experiment with name 'Variance Classificaiton Optimization' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///Users/atulkrishnan/Desktop/TradingAlgos/Variance_Algo/mlruns/658072455368796720', creation_time=1731536895333, experiment_id='658072455368796720', last_update_time=1731536895333, lifecycle_stage='active', name='Variance Classificaiton Optimization', tags={}>

In [18]:
experiment_counter = 1


In [21]:
def objective(trial):
    global experiment_counter

    model_name = f"Model {experiment_counter}"
    
    # Start MLflow run for each Optuna trial
    with mlflow.start_run(run_name=model_name) as run:
        model = Sequential()
        model.add(Dense(
            units=trial.suggest_int('n_units_l1', 16, 64),
            activation='relu',
            input_shape=(X_train.shape[1],),
            kernel_regularizer=tf.keras.regularizers.l2(trial.suggest_loguniform('l2_reg', 1e-6, 1e-2))
        ))
        model.add(Dropout(trial.suggest_float('dropout_rate_l1', 0.1, 0.5)))

        n_layers = trial.suggest_int('n_layers', 1, 3)
        for i in range(n_layers):
            model.add(Dense(
                units=trial.suggest_int(f'n_units_l{i+2}', 16, 64),
                activation='relu',
                kernel_regularizer=tf.keras.regularizers.l2(trial.suggest_loguniform(f'l2_reg_l{i+2}', 1e-6, 1e-2))
            ))
            model.add(Dropout(trial.suggest_float(f'dropout_rate_l{i+2}', 0.1, 0.5)))

        model.add(Dense(1, activation='sigmoid'))

        lr = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

        mlflow.log_param("n_units_l1", trial.params['n_units_l1'])
        mlflow.log_param("dropout_rate_l1", trial.params['dropout_rate_l1'])
        mlflow.log_param("l2_reg", trial.params['l2_reg'])
        mlflow.log_param("learning_rate", lr)
        mlflow.log_param("n_layers", n_layers)
        
        for i in range(n_layers):
            mlflow.log_param(f"n_units_l{i+2}", trial.params[f'n_units_l{i+2}'])
            mlflow.log_param(f"dropout_rate_l{i+2}", trial.params[f'dropout_rate_l{i+2}'])
            mlflow.log_param(f"l2_reg_l{i+2}", trial.params[f'l2_reg_l{i+2}'])

        history = model.fit(X_train, y_train,
                            validation_data=(X_val, y_val),
                            epochs=trial.suggest_int('epochs', 10, 50),
                            batch_size=trial.suggest_int('batch_size', 16, 64),
                            verbose=0)

        val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
        mlflow.log_metric("val_loss", val_loss)
        mlflow.log_metric("val_accuracy", val_accuracy)

        mlflow.keras.log_model(model, model_name)

        experiment_counter += 1

        return val_accuracy


In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

In [None]:
print("Best trial:")
print("  Value: ", study.best_trial.value)
print("  Params: ")
for key, value in study.best_trial.params.items():
    print("    {}: {}".format(key, value))