In [13]:
import numpy as np
import pandas as pd
from main import MLP_Classifier,Layer
from sklearn.datasets import make_classification
import optuna
from sklearn.metrics import accuracy_score,precision_score,recall_score
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
np.random.seed(42)
# Generate  dataset
X, Y = make_classification(
    n_samples=1000,     
    n_features=4,       
    n_redundant=0,      
    n_clusters_per_class=1,
    flip_y=0.1,         # Add label noise
    class_sep=1.0,      # Class separation
    n_classes=2,      # nb classes
)


noise = np.random.normal(0, 0.5, X.shape)
X = X + noise
X=pd.DataFrame(X)
Y=pd.Series(Y)
if len(np.unique(Y))==2:
   Y=pd.DataFrame(Y)
else: 
   Y=pd.get_dummies(Y).astype(int)


determine objective function to optimise :  minimise cross entropy (in pdf maximise log-likelihood).

for example we optimise over batch size, learning rate and dropout (one of the most important parameters in NN).

we could do also on layers but computantionally expensive for large datasets.


In [15]:



def objective(trial):
    # Define hyperparameter search space
    batch_size = trial.suggest_int("batch_size", 500, 800)
    alpha = trial.suggest_float("alpha", 0.01, 0.1)
    dropout_rate = trial.suggest_float("dropout", 0.5, 0.9)

    model = MLP_Classifier(
        (
            (
                Layer(
                    nb_neurons=20,
                    activation_function="relu",
                    regul=("l2", 0.1),
                    initial="he",
                    batchnorm=True
                ),
                Layer(
                    nb_neurons=10,
                    activation_function="relu",
                    regul=("l2", 0.1),
                    initial="he",
                ),
                Layer(
                    nb_neurons=30,
                    activation_function="relu",
                    regul=("dropout", dropout_rate),
                    initial="he",
                ),
            )
        ),
        max_iter=2000,
        thr=1e-5,
        alpha=alpha,
        seed=123,
        batch_size=batch_size,
        verbose=False,
        optim="adam"
    )

    model.train(X, Y)

    score = model.loss(Y,model.y_hat)  # need to do on val set

    return score


storage = "sqlite:///optuna_mlpsoftmax.db"
study = optuna.create_study(
    direction="minimize", study_name="MLP", storage=storage, load_if_exists=True
)  # 'minimize' for loss functions
study.optimize(objective, n_trials=1)

print("Best Hyperparameters:", study.best_params)


[I 2026-01-04 19:50:55,976] A new study created in RDB with name: MLP
[I 2026-01-04 19:50:57,713] Trial 0 finished with value: 0.3785585541635659 and parameters: {'batch_size': 752, 'alpha': 0.02319128823729845, 'dropout': 0.5788575266859909}. Best is trial 0 with value: 0.3785585541635659.


Best Hyperparameters: {'batch_size': 752, 'alpha': 0.02319128823729845, 'dropout': 0.5788575266859909}


In [16]:
best_results={"best value" : study.best_trial.values,"params": study.best_trial.params}
best_results


{'best value': [0.3785585541635659],
 'params': {'batch_size': 752,
  'alpha': 0.02319128823729845,
  'dropout': 0.5788575266859909}}

run model on optimised parameters

In [17]:


X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=42)

model = MLP_Classifier(
    (
        (
            Layer(
                nb_neurons=20,
                activation_function="relu",
                regul=("l2", 0.1),
                initial="he",
                batchnorm=True
         
            ),
            Layer(
                nb_neurons=10,
                activation_function="relu",
                regul=("l2", 0.1),
                initial="he",
                
            ),
            Layer(
                nb_neurons=30,
                activation_function="relu",
                regul=("dropout", best_results["params"]["dropout"]),
                initial="he",
                
            ),
        )
    ),
    max_iter=2000,
    thr=1e-5,
    alpha=best_results["params"]["alpha"],
    seed=123,
    batch_size=best_results["params"]["batch_size"],
    verbose=True,
    optim="adam",
    nb_epochs_early_stopping=50
)

fct=accuracy_score

model.train(X_train, y_train,X_test,y_test,fct)

print(f"final {fct.__name__}", accuracy_score(model.predict(X_test), y_test))


-------------------------------------------------------------------------
iteration 0 : TRAIN accuracy_score  : 0.7093333333333334, loss : 0.6566102261215156
iteration 0 : TEST accuracy_score  : 0.748, loss : 0.6565915727919444
-------------------------------------------------------------------------
iteration 50 : TRAIN accuracy_score  : 0.8133333333333334, loss : 0.43423980488257347
iteration 50 : TEST accuracy_score  : 0.828, loss : 0.41263360186056397
-------------------------------------------------------------------------
iteration 100 : TRAIN accuracy_score  : 0.8186666666666667, loss : 0.4144917150850996
iteration 100 : TEST accuracy_score  : 0.852, loss : 0.39384797691126466
-------------------------------------------------------------------------
iteration 150 : TRAIN accuracy_score  : 0.8306666666666667, loss : 0.40984694893046175
iteration 150 : TEST accuracy_score  : 0.86, loss : 0.3658694271920856
early stopping at epoch 161
final accuracy_score 0.888


In [19]:
model.E.keys()

dict_keys([3, 2, 1])

In [7]:
model.nb_layers

3

In [8]:
model.network

{1: {'layer_type': layers.Layer,
  'nb_neurons': 20,
  'activ_fct': 'relu',
  'regul': 'l2',
  'regul_param': 0.1,
  'init': 'he',
  'law': 'normal',
  'batchnorm': True},
 2: {'layer_type': layers.Layer,
  'nb_neurons': 10,
  'activ_fct': 'relu',
  'regul': 'l2',
  'regul_param': 0.1,
  'init': 'he',
  'law': 'normal',
  'batchnorm': False},
 3: {'layer_type': layers.Layer,
  'nb_neurons': 30,
  'activ_fct': 'relu',
  'regul': 'dropout',
  'regul_param': 0.8375822042749481,
  'init': 'he',
  'law': 'normal',
  'batchnorm': False}}

In [2]:
from layers import ConvLayer,MaxPoolLayer,Layer,FlatLayer
from cnn import CNN
import numpy as np 

In [3]:

q=CNN(

    (
        ConvLayer(in_channels=3,output_channels=16,kernel_size=2,stride=1,padding=True,activation_function="relu",initial="lecun",law="normal"),
        MaxPoolLayer(kernel_size=3,stride=2,padding=True),
        ConvLayer(in_channels=16,output_channels=32,kernel_size=2,stride=1,padding=True,activation_function="relu",initial="lecun",law="normal"),
        MaxPoolLayer(kernel_size=3,stride=2,padding=True),
        ConvLayer(in_channels=32,output_channels=64,kernel_size=2,stride=1,padding=True,activation_function="relu",initial="lecun",law="normal"),
        MaxPoolLayer(kernel_size=3,stride=2,padding=True),
        
        FlatLayer(),
        Layer(
                nb_neurons=20,
                activation_function="relu",
                regul=("l2", 0.1),
                initial="he",
                batchnorm=True
         
            ),
            Layer(
                nb_neurons=10,
                activation_function="relu",
                regul=("l2", 0.1),
                initial="he",
                
            ),
            Layer(
                nb_neurons=30,
                activation_function="relu",
                regul=("dropout",0.5),
                initial="he",
                
            ),
        
    ),
    max_iter=2000,
    thr=1e-5,
    alpha=0.001,
    seed=123,
    batch_size=200,
    verbose=True,
    nb_epochs_early_stopping=20



)

Don't forget to normalise input data and think about Batch normalisations


In [4]:
X=np.random.randn(20, 20, 3,10)
Y=np.random.choice([0, 1], size=(10,1))


In [5]:
q.train(X,Y)

dummy res shape (1, 64)


In [6]:
q.nb_cnn_layers

7

In [7]:
q.nb_layers

3

In [8]:
q.predict(X)

array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0]])

In [9]:
q.forward_cnn(X,"train")

array([[1.04309733, 0.42125133, 0.6151505 , 0.87207374, 0.2649009 ,
        0.        , 0.        , 0.        , 2.64491035, 0.        ,
        0.81233333, 0.07565668, 1.17636037, 0.16779605, 1.72012548,
        0.38541424, 1.23683358, 0.06780775, 0.69487111, 0.        ,
        1.37474056, 0.        , 0.        , 1.79196996, 0.38253687,
        0.        , 1.24398606, 0.        , 0.27049357, 0.        ,
        0.23664962, 1.40377845, 0.        , 0.        , 0.        ,
        1.18259667, 1.99181423, 1.17500782, 0.12541915, 0.        ,
        1.48577624, 0.        , 3.67866338, 0.        , 0.        ,
        0.68318539, 0.36301289, 1.49638792, 1.32319102, 0.48770102,
        3.30156341, 0.69886151, 1.95907703, 0.84138351, 0.52738804,
        0.53079001, 0.13897682, 1.6266533 , 0.        , 0.        ,
        0.04375029, 1.33786091, 1.75170106, 0.81874512],
       [1.02840027, 0.16013389, 0.35628336, 1.14561655, 0.03639263,
        0.10299305, 0.        , 0.        , 1.59351064, 0. 

In [10]:
q.test(X,Y)

7 FlatLayer
6 MaxPoolLayer
5 ConvLayer
4 MaxPoolLayer
3 ConvLayer
2 MaxPoolLayer
1 ConvLayer


In [22]:
dig=3
q.kernels[dig].shape==q.dCV_dkernel[dig].shape
q.kernels[dig]-0.001*q.dCV_dkernel[dig]

array([[[[-0.2196998 , -0.0765447 , -0.08108   , ...,  0.04418219,
           0.00895457, -0.04685565],
         [-0.22373087,  0.1000089 ,  0.20166354, ...,  0.05514734,
           0.18124681,  0.08211619],
         [-0.08478045,  0.04862241,  0.06290738, ..., -0.04130769,
           0.07065262,  0.07890433],
         ...,
         [-0.23766614,  0.01987325,  0.05278197, ...,  0.28156224,
          -0.01720866, -0.09611407],
         [ 0.06440298,  0.16507033,  0.27179409, ..., -0.29827412,
          -0.0935062 ,  0.00827358],
         [-0.07283915,  0.2103153 ,  0.03831658, ..., -0.08418466,
           0.0569132 , -0.03720187]],

        [[-0.08020151,  0.13228964, -0.12346249, ...,  0.01672144,
           0.03583277, -0.06772737],
         [-0.31811362,  0.03063458,  0.1550733 , ..., -0.05754899,
          -0.10931265,  0.24614339],
         [-0.2743177 , -0.10064724, -0.06395634, ...,  0.05536397,
          -0.15356036,  0.01952366],
         ...,
         [ 0.0329283 , -0.24121409

In [24]:
q.X.shape

(20, 20, 3, 10)

In [25]:
X.shape

(20, 20, 3, 10)

In [28]:
X[:,:,:,0:2].shape

(20, 20, 3, 2)