In [1]:
import numpy as np
import pandas as pd
from main import MLP_Classifier,Layer,accuracy
from sklearn.datasets import make_classification
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
np.random.seed(42)
# Generate  dataset
X, Y = make_classification(
    n_samples=1000,     
    n_features=4,       
    n_redundant=0,      
    n_clusters_per_class=1,
    flip_y=0.1,         # Add label noise
    class_sep=1.0,      # Class separation
    n_classes=2,      # nb classes
)


noise = np.random.normal(0, 0.5, X.shape)
X = X + noise
X=pd.DataFrame(X)
Y=pd.Series(Y)
if len(np.unique(Y))==2:
   Y=pd.DataFrame(Y)
else: 
   Y=pd.get_dummies(Y).astype(int)


determine objective function to optimise :  minimise cross entropy (in pdf maximise log-likelihood).

for example we optimise over batch size, learning rate and dropout (one of the most important parameters in NN).

we could do also on layers but computantionally expensive for large datasets.


In [10]:



def objective(trial):
    # Define hyperparameter search space
    batch_size = trial.suggest_int("batch_size", 500, 800)
    alpha = trial.suggest_float("alpha", 0.01, 0.1)
    dropout_rate = trial.suggest_float("dropout", 0.5, 0.9)

    model = MLP_Classifier(
        (
            [
                Layer(
                    nb_neurons=20,
                    activation_function="relu",
                    regul=("l2", 0.1),
                    initial="he",
                ),
                Layer(
                    nb_neurons=10,
                    activation_function="relu",
                    regul=("l2", 0.1),
                    initial="he",
                ),
                Layer(
                    nb_neurons=30,
                    activation_function="relu",
                    regul=("dropout", dropout_rate),
                    initial="he",
                ),
            ]
        ),
        max_iter=2000,
        thr=1e-5,
        alpha=alpha,
        seed=123,
        batch_size=batch_size,
        verbose=False,
        optim="adam"
    )

    model.train(X, Y)

    score = model.loss(Y)  # need to do on val set

    return score


storage = "sqlite:///optuna_mlp.db"
study = optuna.create_study(
    direction="minimize", study_name="MLP", storage=storage, load_if_exists=True
)  # 'minimize' for loss functions
study.optimize(objective, n_trials=20)

print("Best Hyperparameters:", study.best_params)


[I 2025-10-06 14:10:30,929] Using an existing study with name 'MLP' instead of creating a new one.
[I 2025-10-06 14:10:33,739] Trial 68 finished with value: 0.39635769032420504 and parameters: {'batch_size': 660, 'alpha': 0.05305922404049647, 'dropout': 0.6701779055276591}. Best is trial 10 with value: 0.3929574144038384.
[I 2025-10-06 14:10:34,518] Trial 69 finished with value: 0.3902171042180091 and parameters: {'batch_size': 534, 'alpha': 0.07687725243652464, 'dropout': 0.7341042753305932}. Best is trial 69 with value: 0.3902171042180091.
[I 2025-10-06 14:10:38,217] Trial 70 finished with value: 0.3840466996668747 and parameters: {'batch_size': 534, 'alpha': 0.07512249196749742, 'dropout': 0.7355842477137954}. Best is trial 70 with value: 0.3840466996668747.
[I 2025-10-06 14:10:42,791] Trial 71 finished with value: 0.37838720432456874 and parameters: {'batch_size': 517, 'alpha': 0.08383707724593538, 'dropout': 0.8770937426215277}. Best is trial 71 with value: 0.37838720432456874.
[I

Best Hyperparameters: {'batch_size': 534, 'alpha': 0.08408839297279899, 'dropout': 0.8714076961172885}


In [11]:
best_results={"best value" : study.best_trial.values,"params": study.best_trial.params}
best_results


{'best value': [0.37534649553560645],
 'params': {'batch_size': 534,
  'alpha': 0.08408839297279899,
  'dropout': 0.8714076961172885}}

run model on optimised parameters

In [None]:
model = MLP_Classifier(
    (
        [
            Layer(
                nb_neurons=20,
                activation_function="relu",
                regul=("l2", 0.1),
                initial="he",
            ),
            Layer(
                nb_neurons=10,
                activation_function="relu",
                regul=("l2", 0.1),
                initial="he",
            ),
            Layer(
                nb_neurons=30,
                activation_function="relu",
                regul=("dropout", best_results["params"]["dropout"]),
                initial="he",
            ),
        ]
    ),
    max_iter=2000,
    thr=1e-5,
    alpha=best_results["params"]["alpha"],
    seed=123,
    batch_size=best_results["params"]["batch_size"],
    verbose=True,
    optim="adam"
)

model.train(X, Y)

print("final accuracy", accuracy(model.predict(X), np.array(Y)))


iteration 0 : accuracy  : 0.717, loss : 0.6047089945911341
iteration 100 : accuracy  : 0.834, loss : 0.38891906801321363
iteration 200 : accuracy  : 0.834, loss : 0.38933392266681666
Model terminated successfully, Converged at 256 epoch, for a given alpha :  0.08408839297279899 and given threshold : 1e-05 
final accuracy 0.845


In [None]:
#future steps:
"""
-do dynamic learning rate
-do split train test and val on test always in the training loop too
-for a given set of hyperparameters (best) plot some losses for different opti methods 
-add differential evolution 


-add batch norm 


"""