# ImageNet - Bayesian Optimization 
## 02463 Active ML and Agency - Group BO 2

In [26]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms

import skopt
from skopt import gp_minimize
from skopt import dummy_minimize
from train_model import train_model

import json

In [27]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

# Download the CIFAR-100 dataset (train and test)
trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Create data loaders - BATCH SIZE IS SUBJECTIVE TO CHANGE 
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

# Check the data
images, labels = next(iter(trainloader))

In [None]:

params = {
    'module__base_channels': 16,                    # or try: 32
    'module__dropout': 0.5,                        # or try: 0.3
}
# Train for 10 epochs using 1000 training samples (adjust as needed)
accuracy = train_model(params, num_epochs=10, n_training_samples=1000)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

In [None]:


# Define the search space as follows
# params = {
#     'module__base_channels': (1,128)
#     'module__dropout': (0.0, 0.9)
# }

space = [
    skopt.space.Integer(1, 256, name='module__base_channels'),
    skopt.space.Real(0.0, 0.9, name='module__dropout')
]

def objective(x):
    # Decode the JSON encoded categorical values.
    params = {
        'module__base_channels': x[0],
        'module__dropout': x[1] ,
    }
    
    print("Param values: ", params)
    accuracy = train_model(params, num_epochs=10, n_training_samples=50000)
    # Return negative accuracy for minimization.
    return -accuracy

opt_bo = gp_minimize(
    objective,
    space,
    acq_func='EI',
    n_calls=20,
    n_random_starts=5,
    verbose=True,
    xi=0.1,
    noise=0.01**2
)

In [None]:
# do random search
opt_random = dummy_minimize(
    objective,
    space,
    n_calls=20,
    verbose=True,
    random_state=42
)

In [None]:
## comparison between random search and bayesian optimization
## we can plot the maximum oob per iteration of the sequence

# collect the maximum each iteration of BO
y_bo = np.maximum.accumulate(-opt_bo.func_vals).ravel()
y_random = np.maximum.accumulate(-opt_random.func_vals).ravel()
# define iteration number
xs = range(1, len(y_bo) + 1)

plt.plot(xs, y_random, 'o-', color = 'red', label='Random Search')
plt.plot(xs, y_bo, 'o-', color = 'blue', label='Bayesian Optimization')
plt.legend()
plt.xlabel('Iterations')
plt.ylabel('accuracy')
plt.title('Bayesian Optimization')
plt.show()

In [None]:
#print(opt_bo.func_vals.min(), opt_bo.func_vals.max())
#print(min(opt_bo.x_iters), max(opt_bo.x_iters))
plt.scatter(np.abs(opt_bo.func_vals), opt_bo.x_iters, color='blue')
plt.xlabel("Accuracy")
plt.xlim(0.55, 0.7)
plt.ylabel("Dropout value")
plt.grid()
plt.show()

In [None]:
plt.scatter(np.abs(opt_random.func_vals), opt_random.x_iters, color='blue')
plt.xlabel("Accuracy")
plt.xlim(0.55, 0.7)
plt.ylabel("Dropout value")
plt.grid()
plt.show()

In [None]:
# Create a dense grid in the normalized space and predict with GP
x_norm = np.linspace(0, 1, 10000)[:, np.newaxis]  
f_pred, f_std = opt_bo.models[-1].predict(x_norm, return_std=True)  
# Convert predictions from negative accuracy to accuracy
acc_pred = -f_pred  
ci_lower = acc_pred - 1.96 * f_std  
ci_upper = acc_pred + 1.96 * f_std  

# Convert x from normalized to original scale
x_orig = 1 + x_norm * (256 - 1)  # because lower=1, upper=256

# Get the evaluated (checked) points and their observed values (convert to accuracy)
x_points = np.array([pt[0] for pt in opt_bo.x_iters])
y_points = -np.array(opt_bo.func_vals)

plt.plot(x_orig, acc_pred, "r--", label="Surrogate model")
plt.fill_between(x_orig.flatten(), ci_lower, ci_upper, color="r", alpha=0.2, label="95% CI")
plt.scatter(x_points, y_points, c="black", zorder=5, label="Checked points")
plt.xlabel("module__base_channels")
plt.ylabel("Accuracy")
plt.legend()
plt.show()