### Question 1

In [1]:
!pip install bayesian-optimization==1.4.3



In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from ray import tune
from ray.tune.search.bayesopt import BayesOptSearch
from ray.tune.schedulers import HyperBandScheduler
import time

In [3]:
class LeNet(nn.Module):
    def __init__(self, conv_filters, hidden, dropout):
        super(LeNet, self).__init__()
        conv_filters = max(8, int(conv_filters) // 8 * 8)
        hidden = int(hidden)
        self.conv1 = nn.Conv2d(1, conv_filters, kernel_size=5)
        self.conv2 = nn.Conv2d(conv_filters, 32, kernel_size=5)
        self.fc1 = nn.Linear(512, hidden)
        self.fc2 = nn.Linear(hidden, 10)
        self.dropout = nn.Dropout(float(dropout))

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 512)
        x = self.dropout(F.relu(self.fc1(x)))
        return self.fc2(x)

In [4]:
from ray.air import session

def train_mnist(config):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = LeNet(config["conv_filters"], config["hidden"], config["dropout"]).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])

    transform = transforms.Compose([transforms.ToTensor()])
    train_loader = torch.utils.data.DataLoader(
        torchvision.datasets.MNIST(root="~/data", train=True, download=True, transform=transform),
        batch_size=int(config["batch_size"]), shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        torchvision.datasets.MNIST(root="~/data", train=False, download=True, transform=transform),
        batch_size=256, shuffle=False)

    for epoch in range(3):
        model.train()
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            loss = F.cross_entropy(model(batch_x), batch_y)
            loss.backward()
            optimizer.step()

    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            output = model(batch_x)
            _, predicted = torch.max(output, 1)
            correct += (predicted == batch_y).sum().item()
            total += batch_y.size(0)
    session.report({"mean_accuracy": correct / total})

In [5]:
search_space_grid = {
    "conv_filters": tune.grid_search([64, 128, 256]),
    "lr": tune.grid_search([0.001, 0.01, 0.1]),
    "batch_size": tune.grid_search([64, 128, 256]),
    "dropout": tune.grid_search([0.0, 0.25, 0.5]),
    "hidden": tune.grid_search([128, 256, 512])
}

start = time.time()
grid_result = tune.run(
    train_mnist,
    config=search_space_grid,
    metric="mean_accuracy",
    mode="max",
    resources_per_trial={"gpu": 1}
)
end = time.time()

best_grid = grid_result.get_best_result(metric="mean_accuracy", mode="max")
print("Grid Search Best Config:", best_grid.config)
print("Grid Search Best Accuracy:", best_grid.metrics["mean_accuracy"])
print("Grid Search Time:", end - start)


2025-05-13 07:47:45,914	INFO worker.py:1888 -- Started a local Ray instance.
2025-05-13 07:47:46,319	INFO tune.py:253 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2025-05-13 07:47:46,320	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2025-05-13 08:29:18
Running for:,00:41:32.57
Memory:,43.9/250.9 GiB

Trial name,status,loc,batch_size,conv_filters,dropout,hidden,lr,acc,iter,total time (s)
train_mnist_8f5a1_00000,TERMINATED,172.17.0.2:93499,64,64,0.0,128,0.001,0.9884,1,8.66418
train_mnist_8f5a1_00001,TERMINATED,172.17.0.2:93639,128,64,0.0,128,0.001,0.9887,1,7.15968
train_mnist_8f5a1_00002,TERMINATED,172.17.0.2:93733,256,64,0.0,128,0.001,0.9864,1,6.20744
train_mnist_8f5a1_00003,TERMINATED,172.17.0.2:93823,64,128,0.0,128,0.001,0.9873,1,8.79741
train_mnist_8f5a1_00004,TERMINATED,172.17.0.2:93913,128,128,0.0,128,0.001,0.9876,1,7.09971
train_mnist_8f5a1_00005,TERMINATED,172.17.0.2:94006,256,128,0.0,128,0.001,0.987,1,6.32345
train_mnist_8f5a1_00006,TERMINATED,172.17.0.2:94097,64,256,0.0,128,0.001,0.9899,1,8.73992
train_mnist_8f5a1_00007,TERMINATED,172.17.0.2:94188,128,256,0.0,128,0.001,0.9884,1,7.07599
train_mnist_8f5a1_00008,TERMINATED,172.17.0.2:94278,256,256,0.0,128,0.001,0.9866,1,6.03122
train_mnist_8f5a1_00009,TERMINATED,172.17.0.2:94366,64,64,0.25,128,0.001,0.9901,1,8.85826


Trial name,mean_accuracy
train_mnist_8f5a1_00000,0.9884
train_mnist_8f5a1_00001,0.9887
train_mnist_8f5a1_00002,0.9864
train_mnist_8f5a1_00003,0.9873
train_mnist_8f5a1_00004,0.9876
train_mnist_8f5a1_00005,0.987
train_mnist_8f5a1_00006,0.9899
train_mnist_8f5a1_00007,0.9884
train_mnist_8f5a1_00008,0.9866
train_mnist_8f5a1_00009,0.9901


2025-05-13 08:29:18,913	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/train_mnist_2025-05-13_07-47-46' in 0.0321s.
2025-05-13 08:29:18,935	INFO tune.py:1041 -- Total run time: 2492.62 seconds (2492.53 seconds for the tuning loop).


AttributeError: 'ExperimentAnalysis' object has no attribute 'get_best_result'



In [8]:
end = time.time()

best_grid = grid_result.get_best_trial(metric="mean_accuracy", mode="max", scope="all")

if best_grid is not None:
    print("Grid Search Best Config:", best_grid.config)
    print("Grid Search Best Accuracy:", best_grid.last_result["mean_accuracy"])
else:
    print("No successful trials found in grid search.")

print("Grid Search Time: {:.2f} seconds".format(end - start))

Grid Search Best Config: {'conv_filters': 128, 'lr': 0.001, 'batch_size': 64, 'dropout': 0.5, 'hidden': 512}
Grid Search Best Accuracy: 0.9932
Grid Search Time: 2669.82 seconds


In [10]:
from ray import tune
from ray.air import session
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms

def train_mnist(config):
    batch_size = int(config["batch_size"])
    conv_filters = int(config["conv_filters"])
    hidden = int(config["hidden"])
    dropout = float(config["dropout"])
    lr = float(config["lr"])

    class LeNet(nn.Module):
        def __init__(self):
            super(LeNet, self).__init__()
            conv_filters_rounded = max(8, conv_filters // 8 * 8)
            self.conv1 = nn.Conv2d(1, conv_filters_rounded, kernel_size=5)
            self.conv2 = nn.Conv2d(conv_filters_rounded, 32, kernel_size=5)
            self.fc1 = nn.Linear(512, hidden)
            self.fc2 = nn.Linear(hidden, 10)
            self.dropout = nn.Dropout(dropout)

        def forward(self, x):
            x = F.relu(F.max_pool2d(self.conv1(x), 2))
            x = F.relu(F.max_pool2d(self.conv2(x), 2))
            x = x.view(-1, 512)
            x = self.dropout(F.relu(self.fc1(x)))
            return self.fc2(x)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = LeNet().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    transform = transforms.Compose([transforms.ToTensor()])
    train_loader = torch.utils.data.DataLoader(
        torchvision.datasets.MNIST(
            root="~/data", train=True, download=True, transform=transform),
        batch_size=batch_size, shuffle=True)

    test_loader = torch.utils.data.DataLoader(
        torchvision.datasets.MNIST(
            root="~/data", train=False, download=True, transform=transform),
        batch_size=256, shuffle=False)

    for epoch in range(3): 
        model.train()
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            loss = F.cross_entropy(model(batch_x), batch_y)
            loss.backward()
            optimizer.step()

        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_x, batch_y in test_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                _, predicted = torch.max(outputs, 1)
                total += batch_y.size(0)
                correct += (predicted == batch_y).sum().item()

        accuracy = correct / total
        session.report({"mean_accuracy": accuracy, "training_iteration": epoch + 1})

In [11]:
search_space_bayes = {
    "conv_filters": tune.uniform(64, 256),
    "lr": tune.loguniform(0.001, 0.1),
    "batch_size": tune.quniform(64, 256, 1),  
    "dropout": tune.uniform(0.0, 1.0),
    "hidden": tune.quniform(128, 512, 1)
}

bayes_search = BayesOptSearch(metric="mean_accuracy", mode="max")
start = time.time()
bayes_result = tune.run(
    train_mnist,
    config=search_space_bayes,
    search_alg=bayes_search,
    metric="mean_accuracy",
    mode="max",
    num_samples=20,
    resources_per_trial={"gpu": 1}
)
end = time.time()

best_bayes = bayes_result.get_best_result(metric="mean_accuracy", mode="max")
print("Bayesian Search Best Config:", best_bayes.config)
print("Bayesian Search Best Accuracy:", best_bayes.metrics["mean_accuracy"])
print("Bayesian Search Time:", end - start)

2025-05-13 08:37:18,483	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2025-05-13 08:40:52
Running for:,00:03:33.66
Memory:,43.9/250.9 GiB

Trial name,status,loc,batch_size,conv_filters,dropout,hidden,lr,acc,iter,total time (s)
train_mnist_5b44841a,TERMINATED,172.17.0.2:115955,135.912,246.537,0.731994,357.885,0.0164458,0.9604,3,7.52582
train_mnist_d21828f4,TERMINATED,172.17.0.2:116047,93.9509,75.1521,0.866176,358.828,0.0710992,0.1028,3,8.15134
train_mnist_f84d82ba,TERMINATED,172.17.0.2:116140,67.9522,250.223,0.832443,209.538,0.0190007,0.1135,3,9.44878
train_mnist_7bb347e0,TERMINATED,172.17.0.2:116231,99.2137,122.415,0.524756,293.867,0.0298317,0.1135,3,8.16449
train_mnist_549e56c3,TERMINATED,172.17.0.2:116321,181.476,90.7828,0.292145,268.683,0.0461509,0.1009,3,7.16088
train_mnist_9b447c0c,TERMINATED,172.17.0.2:116417,214.754,102.337,0.514234,355.487,0.00559859,0.9879,3,7.01587
train_mnist_619a9d81,TERMINATED,172.17.0.2:116507,180.649,96.7406,0.0650516,492.372,0.0965976,0.1135,3,7.33013
train_mnist_de282bfb,TERMINATED,172.17.0.2:116600,219.212,122.486,0.0976721,390.745,0.0445751,0.1135,3,7.25272
train_mnist_bb543eab,TERMINATED,172.17.0.2:116689,87.4313,159.074,0.0343885,477.179,0.0266192,0.9485,3,8.56452
train_mnist_e92d9fe8,TERMINATED,172.17.0.2:116780,191.204,123.849,0.520068,337.937,0.0193006,0.976,3,7.19096


Trial name,mean_accuracy
train_mnist_1d8a1046,0.1028
train_mnist_218ddea9,0.912
train_mnist_293ac7c1,0.9431
train_mnist_323132ed,0.948
train_mnist_549e56c3,0.1009
train_mnist_5b44841a,0.9604
train_mnist_619a9d81,0.1135
train_mnist_7bb347e0,0.1135
train_mnist_7d8c3f18,0.9759
train_mnist_936b3c8a,0.1135


2025-05-13 08:40:52,154	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/train_mnist_2025-05-13_08-37-18' in 0.0053s.
2025-05-13 08:40:52,159	INFO tune.py:1041 -- Total run time: 213.68 seconds (213.66 seconds for the tuning loop).


AttributeError: 'ExperimentAnalysis' object has no attribute 'get_best_result'

In [13]:
end = time.time()

best_bayes = bayes_result.get_best_trial(metric="mean_accuracy", mode="max", scope="all")

if best_bayes is not None:
    print("Bayesian Search Best Config:", best_bayes.config)
    print("Bayesian Search Best Accuracy:", best_bayes.last_result["mean_accuracy"])
else:
    print("No successful trials found in Bayesian search.")

print("Bayesian Search Time: {:.2f} seconds".format(end - start))

Bayesian Search Best Config: {'conv_filters': np.float64(102.33736617440508), 'lr': np.float64(0.005598590859279775), 'batch_size': np.float64(214.7537845874586), 'dropout': np.float64(0.5142344384136116), 'hidden': np.float64(355.4871944430243)}
Bayesian Search Best Accuracy: 0.9879
Bayesian Search Time: 516.04 seconds


In [17]:
search_space_hyperband = {
    "conv_filters": tune.randint(64, 257),
    "lr": tune.uniform(0.001, 0.1),
    "batch_size": tune.choice([64, 128, 256]),
    "dropout": tune.uniform(0.0, 1.0),
    "hidden": tune.randint(128, 513)
}

hyperband_scheduler = HyperBandScheduler(metric="mean_accuracy", mode="max")
start = time.time()

hyperband_result = tune.run(
    train_mnist,
    config=search_space_hyperband,
    scheduler=hyperband_scheduler,
    num_samples=20,
    resources_per_trial={"gpu": 1}
)
end = time.time()

2025-05-13 08:56:41,779	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2025-05-13 09:00:13
Running for:,00:03:31.86
Memory:,43.6/250.9 GiB

Trial name,status,loc,batch_size,conv_filters,dropout,hidden,lr,acc,iter,total time (s)
train_mnist_30465_00000,TERMINATED,172.17.0.2:120007,64,127,0.617635,256,0.0691736,0.0958,3,9.20293
train_mnist_30465_00001,TERMINATED,172.17.0.2:120100,64,241,0.168519,441,0.0220862,0.9634,3,9.61084
train_mnist_30465_00002,TERMINATED,172.17.0.2:120191,256,147,0.677222,370,0.011313,0.9814,3,6.99796
train_mnist_30465_00003,TERMINATED,172.17.0.2:120281,256,229,0.301181,203,0.0756573,0.9339,3,6.93261
train_mnist_30465_00004,TERMINATED,172.17.0.2:120371,256,239,0.704675,331,0.0299639,0.9634,3,6.99231
train_mnist_30465_00005,TERMINATED,172.17.0.2:120463,256,170,0.95132,131,0.076309,0.098,3,7.05873
train_mnist_30465_00006,TERMINATED,172.17.0.2:120553,256,186,0.434106,273,0.0785136,0.1135,3,7.14579
train_mnist_30465_00007,TERMINATED,172.17.0.2:120643,256,121,0.230954,132,0.0806124,0.098,3,6.98162
train_mnist_30465_00008,TERMINATED,172.17.0.2:120733,64,101,0.921212,429,0.0774291,0.1135,3,9.13129
train_mnist_30465_00009,TERMINATED,172.17.0.2:120829,256,168,0.118479,442,0.0794801,0.8672,3,6.96618


Trial name,mean_accuracy
train_mnist_30465_00000,0.0958
train_mnist_30465_00001,0.9634
train_mnist_30465_00002,0.9814
train_mnist_30465_00003,0.9339
train_mnist_30465_00004,0.9634
train_mnist_30465_00005,0.098
train_mnist_30465_00006,0.1135
train_mnist_30465_00007,0.098
train_mnist_30465_00008,0.1135
train_mnist_30465_00009,0.8672


2025-05-13 09:00:13,650	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/train_mnist_2025-05-13_08-56-41' in 0.0052s.
2025-05-13 09:00:13,654	INFO tune.py:1041 -- Total run time: 211.87 seconds (211.86 seconds for the tuning loop).


In [18]:
best_hyper = hyperband_result.get_best_trial(metric="mean_accuracy", mode="max", scope="all")

if best_hyper is not None:
    print("Hyperband Best Config:", best_hyper.config)
    print("Hyperband Best Accuracy:", best_hyper.last_result["mean_accuracy"])
else:
    print("No successful trials found in Hyperband search.")

print("Hyperband Search Time: {:.2f} seconds".format(end - start))

Hyperband Best Config: {'conv_filters': 147, 'lr': 0.011312976753809767, 'batch_size': 256, 'dropout': 0.6772220821054592, 'hidden': 370}
Hyperband Best Accuracy: 0.9814
Hyperband Search Time: 211.88 seconds


### Question 2

After performing Grid Search, Bayesian Search, and Hyperband for the given hyperparameter configurations, these were my results (copy-pasted from the printed outputs above so you can see the results for all 3 search methods together): 

Grid Search Best Config: {'conv_filters': 128, 'lr': 0.001, 'batch_size': 64, 'dropout': 0.5, 'hidden': 512}
Grid Search Best Accuracy: 0.9932
Grid Search Time: 2669.82 seconds

Bayesian Search Best Config: {'conv_filters': np.float64(102.33736617440508), 'lr': np.float64(0.005598590859279775), 'batch_size': np.float64(214.7537845874586), 'dropout': np.float64(0.5142344384136116), 'hidden': np.float64(355.4871944430243)}
Bayesian Search Best Accuracy: 0.9879
Bayesian Search Time: 516.04 seconds

Hyperband Best Config: {'conv_filters': 147, 'lr': 0.011312976753809767, 'batch_size': 256, 'dropout': 0.6772220821054592, 'hidden': 370}
Hyperband Best Accuracy: 0.9814
Hyperband Search Time: 211.88 seconds

### Question

After running all three hyperparameter search methods, I observed that Grid Search achieved the highest accuracy (0.9932), but it also took the longest time to run - over 44 minutes (2669.82 seconds). This makes sense since Grid Search exhaustively evaluates all combinations, which ensures optimality but at the cost of time and computational resources.

Bayesian Optimization, on the other hand, achieved slightly lower accuracy (0.9879) but completed in less than 9 minutes (516.04 seconds). It was much more efficient by intelligently exploring the search space rather than evaluating all possible combinations.

Hyperband was the fastest, finishing in about 3.5 minutes (211.88 seconds), but its best accuracy (0.9814) was the lowest among the three. This tradeoff is expected, as Hyperband aggressively prunes poorly performing configurations early on to save time.

Overall, the results highlight the classic trade-off between accuracy and search efficiency. If time and resources are not a constraint, Grid Search gives the best performance. However, for more practical scenarios, Bayesian Search provides a strong balance between performance and runtime, while Hyperband is ideal when speed is the top priority.