<a href="https://colab.research.google.com/github/federicovilla55/optML_mini_project/blob/setup/Simple_MLP_setup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Setup

In [1]:
!pip install openml



In [2]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
import time
from getpass import getpass
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import numpy as np
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
seed = 42

### Dataset

In [3]:
df = pd.read_csv("Data/Dfs/3D_Poly_3_5___nbr_of_samples=500___range_of_sampling=(-3, 3).csv", index_col=0)
print(df.head())

           z         x         y
0  11.506183 -2.596277  2.072520
1 -28.467607  2.878641  2.767614
2  23.399129 -2.456888  2.837377
3  -5.072664  0.010060  1.960925
4  16.272314 -2.624652 -0.444982


In [4]:
y = df['z']
X = df.drop(columns=['z'])

# Convert to numpy arrays.
X_np = X.values.astype(np.float32)
y_np = y.astype(np.float32)

In [5]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_np, y_np, test_size=0.2, random_state=seed)

# Optional: validation split from train
X_train_search, X_val_search, y_train_search, y_val_search = train_test_split(
    X_train, y_train, test_size=0.25, random_state=seed)

In [6]:
train_dataset = TensorDataset(
    torch.from_numpy(X_train),
    torch.from_numpy(y_train.to_numpy()).unsqueeze(1)  # Convert y_train to NumPy array
)

test_dataset = TensorDataset(
    torch.from_numpy(X_test),
    torch.from_numpy(y_test.to_numpy()).unsqueeze(1)  # Convert y_test to NumPy array
)

# **Hyperparameter tuning for baseline**

Using Keras for this because its interface is easier for this

In [7]:
!pip install keras_core
!pip install keras-tuner --upgrade



In [8]:
import os
os.environ["KERAS_BACKEND"] = "torch"
import keras_core as keras
#########################################
import tensorflow as tf
#from tensorflow.keras.optimizers import Adadelta
import keras
#from tensorflow.keras import layers
#from tensorflow.keras.optimizers import Adam
import keras_tuner as kt
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import Callback
from tensorflow.keras import regularizers
#import tensorflow as tf
#from tensorflow import keras
#from tensorflow.keras import layers
from tensorflow.keras.models import clone_model
from tensorflow.keras.models import load_model

Using PyTorch backend.


In [9]:
# Define the model as a function for Keras Tuner for regression
def build_model(hp):
    net = keras.Sequential()

    # Input layer
    net.add(keras.layers.Input(shape=(X_train_search.shape[1],)))  # Input shape based on features

    # Define the activation function to be used for all layers
    activation_function = hp.Choice('activation_function', ['relu', 'tanh', 'sigmoid'])

    # No regularization in baseline, but you can add it if needed
    # regularization = hp.Float('regularization', min_value=0.0, max_value=0.1, step=0.005)

    # Tune the number of hidden layers
    for i in range(hp.Int('hidden_layers', 2, 5)):
        # Tune the number of units per layer
        units = hp.Int(f'units_in_layer{i}', min_value=18, max_value=128, step=10)
        net.add(keras.layers.Dense(units=units, activation=activation_function))

    # Output layer
    net.add(keras.layers.Dense(units=1))

    net.compile(
        optimizer=keras.optimizers.SGD(learning_rate=hp.Float('learning_rate', 1e-3, 1e-1, sampling='log')),
        loss='mean_squared_error',
        metrics=['mean_absolute_error']
        )
    return net

# Define the tuner
tuner = kt.RandomSearch(
    build_model,
    objective='val_loss',  # Optimize for validation loss, not accuracy in regression
    max_trials=100,         # Number of hyperparameter combinations to try
    executions_per_trial=1, # Number of times to train each configuration
    directory='/content/optML_mini_project',
    project_name='Hyperparam Search Baseline Model'
)

# Define the EarlyStopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',       # Monitor validation loss for early stopping
    patience=5,               # Number of epochs with no improvement before stopping
    restore_best_weights=True # Restore model weights from the epoch with the best validation loss
)


Reloading Tuner from /content/optML_mini_project\Hyperparam Search Baseline Model\tuner0.json


**Only run this cell if want training/search again => THIS CAN TAKE A LONG TIME**

In [10]:
# Run tuner search with custom loss history and early stopping
for trial_id in range(tuner.oracle.max_trials):
    tuner.search(
        X_train_search, y_train_search,
        validation_data=(X_val_search, y_val_search),
        epochs=50,
        batch_size=64,
        callbacks=[early_stopping]
        )

In [11]:
# Print the summary of the search space
tuner.search_space_summary()

# Print the results of the search
tuner.results_summary()                       #WILL RETURN WRONG LAYER SIZES!!!! STARTS OF RIGHT, BUT FILLED WITH ADDITIONAL BS => check amount of layers it says there is

Search space summary
Default search space size: 8
activation_function (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh', 'sigmoid'], 'ordered': False}
hidden_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 5, 'step': 1, 'sampling': 'linear'}
units_in_layer0 (Int)
{'default': None, 'conditions': [], 'min_value': 18, 'max_value': 128, 'step': 10, 'sampling': 'linear'}
units_in_layer1 (Int)
{'default': None, 'conditions': [], 'min_value': 18, 'max_value': 128, 'step': 10, 'sampling': 'linear'}
learning_rate (Float)
{'default': 0.001, 'conditions': [], 'min_value': 0.001, 'max_value': 0.1, 'step': None, 'sampling': 'log'}
units_in_layer2 (Int)
{'default': None, 'conditions': [], 'min_value': 18, 'max_value': 128, 'step': 10, 'sampling': 'linear'}
units_in_layer3 (Int)
{'default': None, 'conditions': [], 'min_value': 18, 'max_value': 128, 'step': 10, 'sampling': 'linear'}
units_in_layer4 (Int)
{'default': None, 'conditions': [], 'min_valu

Save results in GIT

In [12]:
%cd /content/optML_mini_project
!git add --a
!git commit -m "Hyperparameter search for baseline model"
!git push

c:\content\optML_mini_project


fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git


In [13]:
# Retrieve the best model from the tuner
top_models = tuner.get_best_models(num_models=10)
best_model = tuner.get_best_models(num_models=1)[0]

# Display summaries of the top models
for i, model in enumerate(top_models, start=1):
    print(f"\nModel {i} Summary:")
    model.summary()
# Display the optimizer for each model
for i, model in enumerate(top_models, start=1):
    optimizer_config = model.optimizer.get_config()  # Get optimizer configuration
    print(f"\nModel {i} Optimizer:")
    for key, value in optimizer_config.items():
        print(f"  {key}: {value}")



Model 1 Summary:



Model 2 Summary:



Model 3 Summary:



Model 4 Summary:



Model 5 Summary:



Model 6 Summary:



Model 7 Summary:



Model 8 Summary:



Model 9 Summary:



Model 10 Summary:



Model 1 Optimizer:
  name: SGD
  learning_rate: 0.027553942054510117
  weight_decay: None
  clipnorm: None
  global_clipnorm: None
  clipvalue: None
  use_ema: False
  ema_momentum: 0.99
  ema_overwrite_frequency: None
  loss_scale_factor: None
  gradient_accumulation_steps: None
  momentum: 0.0
  nesterov: False

Model 2 Optimizer:
  name: SGD
  learning_rate: 0.0050878627225756645
  weight_decay: None
  clipnorm: None
  global_clipnorm: None
  clipvalue: None
  use_ema: False
  ema_momentum: 0.99
  ema_overwrite_frequency: None
  loss_scale_factor: None
  gradient_accumulation_steps: None
  momentum: 0.0
  nesterov: False

Model 3 Optimizer:
  name: SGD
  learning_rate: 0.0039107948541641235
  weight_decay: None
  clipnorm: None
  global_clipnorm: None
  clipvalue: None
  use_ema: False
  ema_momentum: 0.99
  ema_overwrite_frequency: None
  loss_scale_factor: None
  gradient_accumulation_steps: None
  momentum: 0.0
  nesterov: False

Model 4 Optimizer:
  name: SGD
  learning_rate: 0

Best model performance

In [14]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_pred = best_model.predict(X_val_search)

mae = mean_absolute_error(y_val_search, y_pred)
print("Mean Absolute Error (MAE):", mae)
mse = mean_squared_error(y_val_search, y_pred)
print("Mean Squared Error (MSE):", mse)
r2 = r2_score(y_val_search, y_pred)
print("R-squared:", r2)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
Mean Absolute Error (MAE): 4.394749641418457
Mean Squared Error (MSE): 56.386932373046875
R-squared: 0.8234274387359619


If need to save the best model

In [None]:
# Define the path to save the model in Google Drive
gdrive_path = '/content/optML_mini_project/best_model_baseline.keras'
# Save the model
best_model.save(gdrive_path)

%cd /content/optML_mini_project
!git add --a
!git commit -m "Best baseline model saved"
!git push

# **Experiments:**
  Training + evaluating all the different kind of regularization

  - Baseline
  - Dropout
  - Weight Decay/L2
  - Gradient Noise Injection
  - ....