In [1]:
import numpy as np
import pandas as pd
from main import MLP_Classifier,Layer,accuracy
from sklearn.datasets import make_classification
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
np.random.seed(42)
# Generate  dataset
X, Y = make_classification(
    n_samples=1000,     
    n_features=4,       
    n_redundant=0,      
    n_clusters_per_class=1,
    flip_y=0.1,         # Add label noise
    class_sep=1.0,      # Class separation
    n_classes=2,      # nb classes
)


noise = np.random.normal(0, 0.5, X.shape)
X = X + noise
X=pd.DataFrame(X)
Y=pd.Series(Y)
if len(np.unique(Y))==2:
   Y=pd.DataFrame(Y)
else: 
   Y=pd.get_dummies(Y).astype(int)


determine objective function to optimise :  minimise cross entropy (in pdf maximise log-likelihood).

for example we optimise over batch size, learning rate and dropout (one of the most important parameters in NN).

we could do also on layers but computantionally expensive for large datasets.


In [3]:



def objective(trial):
    # Define hyperparameter search space
    batch_size = trial.suggest_int("batch_size", 500, 800)
    alpha = trial.suggest_float("alpha", 0.01, 0.1)
    dropout_rate = trial.suggest_float("dropout", 0.5, 0.9)

    model = MLP_Classifier(
        (
            [
                Layer(
                    nb_neurons=20,
                    activation_function="relu",
                    regul=("l2", 0.1),
                    initial="he",
                ),
                Layer(
                    nb_neurons=10,
                    activation_function="relu",
                    regul=("l2", 0.1),
                    initial="he",
                ),
                Layer(
                    nb_neurons=30,
                    activation_function="relu",
                    regul=("dropout", dropout_rate),
                    initial="he",
                ),
            ]
        ),
        max_iter=2000,
        thr=1e-5,
        alpha=alpha,
        seed=123,
        batch_size=batch_size,
        verbose=False,
    )

    model.train(X, Y)

    score = model.loss(Y)  # need to do on val set

    return score


storage = "sqlite:///optuna_mlp.db"
study = optuna.create_study(
    direction="minimize", study_name="MLP", storage=storage, load_if_exists=True
)  # 'minimize' for loss functions
study.optimize(objective, n_trials=20)

print("Best Hyperparameters:", study.best_params)


[I 2025-10-06 10:52:32,157] Using an existing study with name 'MLP' instead of creating a new one.


Don't forget to normalise input data and think about Batch normalisations


[I 2025-10-06 10:52:33,030] Trial 41 finished with value: 0.45006786377067426 and parameters: {'batch_size': 672, 'alpha': 0.06796381033487839, 'dropout': 0.609613634717613}. Best is trial 10 with value: 0.3929574144038384.
[I 2025-10-06 10:52:35,194] Trial 42 finished with value: 0.4080460878352336 and parameters: {'batch_size': 734, 'alpha': 0.0623023541304181, 'dropout': 0.6416628842394265}. Best is trial 10 with value: 0.3929574144038384.
[I 2025-10-06 10:52:36,060] Trial 43 finished with value: 0.4359845939288715 and parameters: {'batch_size': 698, 'alpha': 0.07823572663930908, 'dropout': 0.6209358023560235}. Best is trial 10 with value: 0.3929574144038384.
[I 2025-10-06 10:52:36,565] Trial 44 finished with value: 0.5569548459952196 and parameters: {'batch_size': 705, 'alpha': 0.07240904831743597, 'dropout': 0.5479516377178313}. Best is trial 10 with value: 0.3929574144038384.
[I 2025-10-06 10:52:38,919] Trial 45 finished with value: 0.4138217694811252 and parameters: {'batch_size

Model terminated successfully, Did not Converge at 2000 epoch, for a given alpha :  0.08008405535854189 and given threshold : 1e-05 


[I 2025-10-06 10:52:53,255] Trial 50 finished with value: 0.397477263731721 and parameters: {'batch_size': 629, 'alpha': 0.08582385357178272, 'dropout': 0.7250688402963831}. Best is trial 10 with value: 0.3929574144038384.
[I 2025-10-06 10:52:59,511] Trial 51 finished with value: 0.39415901695101807 and parameters: {'batch_size': 542, 'alpha': 0.09647757228978489, 'dropout': 0.6880826095775431}. Best is trial 10 with value: 0.3929574144038384.


Model terminated successfully, Did not Converge at 2000 epoch, for a given alpha :  0.09647757228978489 and given threshold : 1e-05 


[I 2025-10-06 10:53:00,679] Trial 52 finished with value: 0.4180221083008415 and parameters: {'batch_size': 538, 'alpha': 0.0942336083448913, 'dropout': 0.6768793774907153}. Best is trial 10 with value: 0.3929574144038384.
[I 2025-10-06 10:53:05,032] Trial 53 finished with value: 0.39401136289160776 and parameters: {'batch_size': 603, 'alpha': 0.0989461452425066, 'dropout': 0.691045506455889}. Best is trial 10 with value: 0.3929574144038384.
[I 2025-10-06 10:53:07,876] Trial 54 finished with value: 0.4035956068692178 and parameters: {'batch_size': 598, 'alpha': 0.09070422534608173, 'dropout': 0.6900116271045297}. Best is trial 10 with value: 0.3929574144038384.
[I 2025-10-06 10:53:08,847] Trial 55 finished with value: 0.42547475369801746 and parameters: {'batch_size': 590, 'alpha': 0.09554375730668144, 'dropout': 0.7307076331502801}. Best is trial 10 with value: 0.3929574144038384.
[I 2025-10-06 10:53:09,800] Trial 56 finished with value: 0.4610732849145131 and parameters: {'batch_size

Model terminated successfully, Did not Converge at 2000 epoch, for a given alpha :  0.08484551541416105 and given threshold : 1e-05 


[I 2025-10-06 10:53:19,031] Trial 58 finished with value: 0.39810274182747163 and parameters: {'batch_size': 615, 'alpha': 0.0740842105204432, 'dropout': 0.6632934708219554}. Best is trial 10 with value: 0.3929574144038384.


Model terminated successfully, Did not Converge at 2000 epoch, for a given alpha :  0.0740842105204432 and given threshold : 1e-05 


[I 2025-10-06 10:53:20,080] Trial 59 finished with value: 0.4033986989928147 and parameters: {'batch_size': 680, 'alpha': 0.08941156114757785, 'dropout': 0.8074366551393992}. Best is trial 10 with value: 0.3929574144038384.
[I 2025-10-06 10:53:21,346] Trial 60 finished with value: 0.40721674167006777 and parameters: {'batch_size': 779, 'alpha': 0.09977973848614705, 'dropout': 0.688125660776427}. Best is trial 10 with value: 0.3929574144038384.


Best Hyperparameters: {'batch_size': 714, 'alpha': 0.04353166651137545, 'dropout': 0.8963721690959264}


In [4]:
best_results={"best value" : study.best_trial.values,"params": study.best_trial.params}
best_results


{'best value': [0.3929574144038384],
 'params': {'batch_size': 714,
  'alpha': 0.04353166651137545,
  'dropout': 0.8963721690959264}}

run model on optimised parameters

In [5]:
model = MLP_Classifier(
    (
        [
            Layer(
                nb_neurons=20,
                activation_function="relu",
                regul=("l2", 0.1),
                initial="he",
            ),
            Layer(
                nb_neurons=10,
                activation_function="relu",
                regul=("l2", 0.1),
                initial="he",
            ),
            Layer(
                nb_neurons=30,
                activation_function="relu",
                regul=("dropout", best_results["params"]["dropout"]),
                initial="he",
            ),
        ]
    ),
    max_iter=2000,
    thr=1e-5,
    alpha=best_results["params"]["alpha"],
    seed=123,
    batch_size=best_results["params"]["batch_size"],
    verbose=True,
)

model.train(X, Y)

print("final accuracy", accuracy(model.predict(X), np.array(Y)))



iteration 0 : accuracy  : 0.636, loss : 0.6912064731705144
iteration 100 : accuracy  : 0.794, loss : 0.5593979229332793
iteration 200 : accuracy  : 0.83, loss : 0.4590097088035097
iteration 300 : accuracy  : 0.829, loss : 0.43923466186811005
iteration 400 : accuracy  : 0.831, loss : 0.427387640599838
iteration 500 : accuracy  : 0.83, loss : 0.4207655838681293
iteration 600 : accuracy  : 0.829, loss : 0.4107991680521842
iteration 700 : accuracy  : 0.84, loss : 0.4031294167460206
iteration 800 : accuracy  : 0.835, loss : 0.4069390439872127
iteration 900 : accuracy  : 0.839, loss : 0.39642508923937514
iteration 1000 : accuracy  : 0.823, loss : 0.39801424641481165
iteration 1100 : accuracy  : 0.83, loss : 0.3955574568176502
iteration 1200 : accuracy  : 0.837, loss : 0.39397585667442775
Model terminated successfully, Converged at 1288 epoch, for a given alpha :  0.04353166651137545 and given threshold : 1e-05 
final accuracy 0.841
