# ImageNet - Bayesian Optimization 
## 02463 Active ML and Agency - Group BO 2

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms

import skopt
from skopt import gp_minimize
from skopt import dummy_minimize
from train_model import train_model

import json

In [2]:

params = {
    'module__base_channels': 16,                    # or try: 32
    'module__dropout': 0.5,                        # or try: 0.3
}
# Train for 10 epochs using 1000 training samples (adjust as needed)
accuracy = train_model(params, num_epochs=10, n_training_samples=1000)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Files already downloaded and verified
Files already downloaded and verified


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | features   | Sequential        | 5.1 K  | train
1 | avgpool    | AdaptiveAvgPool2d | 0      | train
2 | classifier | Sequential        | 5.5 K  | train
---------------------------------------------------------
10.6 K    Trainable params
0         Non-trainable params
10.6 K    Total params
0.042     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Test Accuracy: 25.93%


In [3]:


# Define the search space as follows
# params = {
#     'module__base_channels': (1,128)
#     'module__dropout': (0.0, 0.9)
# }

space = [
    skopt.space.Integer(1, 256, name='module__base_channels'),
    skopt.space.Real(0.0, 0.9, name='module__dropout')
]

def objective(x):
    # Decode the JSON encoded categorical values.
    params = {
        'module__base_channels': x[0],
        'module__dropout': x[1] ,
    }
    
    print("Param values: ", params)
    accuracy = train_model(params, num_epochs=1, n_training_samples=50)
    # Return negative accuracy for minimization.
    return -accuracy

opt_bo = gp_minimize(
    objective,
    space,
    acq_func='EI',
    n_calls=10,
    n_random_starts=5,
    verbose=True,
    xi=0.1,
    noise=0.01**2
)

Iteration No: 1 started. Evaluating function at random point.
Param values:  {'module__base_channels': np.int64(237), 'module__dropout': 0.20427059584766266}
Files already downloaded and verified
Files already downloaded and verified


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | features   | Sequential        | 1.0 M  | train
1 | avgpool    | AdaptiveAvgPool2d | 0      | train
2 | classifier | Sequential        | 919 K  | train
---------------------------------------------------------
1.9 M     Trainable params
0         Non-trainable params
1.9 M     Total params
7.751     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 89.2658
Function value obtained: -0.1261
Current minimum: -0.1261
Iteration No: 2 started. Evaluating function at random point.
Param values:  {'module__base_channels': np.int64(9), 'module__dropout': 0.6162336200793878}
Files already downloaded and verified
Files already downloaded and verified


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | features   | Sequential        | 1.7 K  | train
1 | avgpool    | AdaptiveAvgPool2d | 0      | train
2 | classifier | Sequential        | 2.1 K  | train
---------------------------------------------------------
3.8 K     Trainable params
0         Non-trainable params
3.8 K     Total params
0.015     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


Iteration No: 2 ended. Evaluation done at random point.
Time taken: 6.8736
Function value obtained: -0.1088
Current minimum: -0.1261
Iteration No: 3 started. Evaluating function at random point.
Param values:  {'module__base_channels': np.int64(9), 'module__dropout': 0.6162336200793878}
Files already downloaded and verified
Files already downloaded and verified


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | features   | Sequential        | 1.7 K  | train
1 | avgpool    | AdaptiveAvgPool2d | 0      | train
2 | classifier | Sequential        | 2.1 K  | train
---------------------------------------------------------
3.8 K     Trainable params
0         Non-trainable params
3.8 K     Total params
0.015     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


Iteration No: 3 ended. Evaluation done at random point.
Time taken: 7.2787
Function value obtained: -0.1088
Current minimum: -0.1261
Iteration No: 4 started. Evaluating function at random point.
Param values:  {'module__base_channels': np.int64(9), 'module__dropout': 0.6162336200793878}
Files already downloaded and verified
Files already downloaded and verified


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | features   | Sequential        | 1.7 K  | train
1 | avgpool    | AdaptiveAvgPool2d | 0      | train
2 | classifier | Sequential        | 2.1 K  | train
---------------------------------------------------------
3.8 K     Trainable params
0         Non-trainable params
3.8 K     Total params
0.015     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


Iteration No: 4 ended. Evaluation done at random point.
Time taken: 8.9152
Function value obtained: -0.1088
Current minimum: -0.1261
Iteration No: 5 started. Evaluating function at random point.
Param values:  {'module__base_channels': np.int64(9), 'module__dropout': 0.6162336200793878}
Files already downloaded and verified
Files already downloaded and verified


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | features   | Sequential        | 1.7 K  | train
1 | avgpool    | AdaptiveAvgPool2d | 0      | train
2 | classifier | Sequential        | 2.1 K  | train
---------------------------------------------------------
3.8 K     Trainable params
0         Non-trainable params
3.8 K     Total params
0.015     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


Iteration No: 5 ended. Evaluation done at random point.
Time taken: 7.6940
Function value obtained: -0.1088
Current minimum: -0.1261
Iteration No: 6 started. Searching for the next optimal point.
Param values:  {'module__base_channels': np.int64(183), 'module__dropout': 0.18492568483099572}
Files already downloaded and verified
Files already downloaded and verified


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | features   | Sequential        | 608 K  | train
1 | avgpool    | AdaptiveAvgPool2d | 0      | train
2 | classifier | Sequential        | 551 K  | train
---------------------------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.641     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


Iteration No: 6 ended. Search finished for the next optimal point.
Time taken: 66.4022
Function value obtained: -0.1359
Current minimum: -0.1359
Iteration No: 7 started. Searching for the next optimal point.
Param values:  {'module__base_channels': np.int64(240), 'module__dropout': 0.12698421786148625}
Files already downloaded and verified
Files already downloaded and verified


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | features   | Sequential        | 1.0 M  | train
1 | avgpool    | AdaptiveAvgPool2d | 0      | train
2 | classifier | Sequential        | 942 K  | train
---------------------------------------------------------
2.0 M     Trainable params
0         Non-trainable params
2.0 M     Total params
7.947     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


Iteration No: 7 ended. Search finished for the next optimal point.
Time taken: 96.3169
Function value obtained: -0.1312
Current minimum: -0.1359
Iteration No: 8 started. Searching for the next optimal point.
Param values:  {'module__base_channels': np.int64(35), 'module__dropout': 0.002941020492969205}
Files already downloaded and verified
Files already downloaded and verified


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | features   | Sequential        | 23.1 K | train
1 | avgpool    | AdaptiveAvgPool2d | 0      | train
2 | classifier | Sequential        | 22.7 K | train
---------------------------------------------------------
45.8 K    Trainable params
0         Non-trainable params
45.8 K    Total params
0.183     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


Iteration No: 8 ended. Search finished for the next optimal point.
Time taken: 12.9893
Function value obtained: -0.1000
Current minimum: -0.1359
Iteration No: 9 started. Searching for the next optimal point.
Param values:  {'module__base_channels': np.int64(187), 'module__dropout': 0.8983381574232611}
Files already downloaded and verified
Files already downloaded and verified


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | features   | Sequential        | 635 K  | train
1 | avgpool    | AdaptiveAvgPool2d | 0      | train
2 | classifier | Sequential        | 575 K  | train
---------------------------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.844     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


KeyboardInterrupt: 

In [None]:
# do random search
opt_random = dummy_minimize(
    objective,
    space,
    n_calls=10,
    verbose=True,
    random_state=42
)

In [None]:
## comparison between random search and bayesian optimization
## we can plot the maximum oob per iteration of the sequence

# collect the maximum each iteration of BO
y_bo = np.maximum.accumulate(-opt_bo.func_vals).ravel()
y_random = np.maximum.accumulate(-opt_random.func_vals).ravel()
# define iteration number
xs = range(1, len(y_bo) + 1)

plt.plot(xs, y_random, 'o-', color = 'red', label='Random Search')
plt.plot(xs, y_bo, 'o-', color = 'blue', label='Bayesian Optimization')
plt.legend()
plt.xlabel('Iterations')
plt.ylabel('accuracy')
plt.title('Bayesian Optimization')
plt.show()

In [None]:
#print(opt_bo.func_vals.min(), opt_bo.func_vals.max())
#print(min(opt_bo.x_iters), max(opt_bo.x_iters))
opt_bo.x_iters = np.array(opt_bo.x_iters)
plt.scatter(np.abs(opt_bo.func_vals), opt_bo.x_iters[:,1], color='blue')
plt.xlabel("Accuracy")
plt.xlim(0.01, 0.7)
plt.ylabel("Dropout value")
plt.grid()
plt.show()

In [None]:
opt_random.x_iters = np.array(opt_random.x_iters)

plt.scatter(np.abs(opt_random.func_vals), opt_random.x_iters[:,1], color='blue')
plt.xlabel("Accuracy")
plt.xlim(0.01, 0.7)
plt.ylabel("Dropout value")
plt.grid()
plt.show()

In [None]:
# Create a dense grid in the normalized space and predict with GP

numebr_of_points = 1000
fixed_dropout = np.array([0.5]*numebr_of_points)[:, np.newaxis]  # fixed dropout value
x_norm = np.linspace(0, 1, numebr_of_points)[:, np.newaxis]  
x_norm = np.hstack((x_norm, fixed_dropout))  # add the base_channels


f_pred, f_std = opt_bo.models[-1].predict(x_norm, return_std=True)  
# Convert predictions from negative accuracy to accuracy
acc_pred = -f_pred  
ci_lower = acc_pred - 1.96 * f_std  
ci_upper = acc_pred + 1.96 * f_std  

# Convert x from normalized to original scale
x_orig = 1 + x_norm * (256 - 1)  # because lower=1, upper=256

# Get the evaluated (checked) points and their observed values (convert to accuracy)
x_points = np.array([pt[0] for pt in opt_bo.x_iters])
y_points = -np.array(opt_bo.func_vals)

plt.plot(x_orig[:, 0], acc_pred, "r--", label="Surrogate model")
plt.fill_between(x_orig[:, 0].flatten(), ci_lower.flatten(), ci_upper.flatten(), color="r", alpha=0.2, label="95% CI")
plt.scatter(x_points, y_points, c="black", zorder=5, label="Checked points")
plt.xlabel("module__base_channels")
plt.ylabel("Accuracy")
plt.legend()
plt.show()