<a href="https://colab.research.google.com/github/ahzaidy/Programs/blob/main/CPSC_5440_HW21.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split, TensorDataset
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pickle
from google.colab import drive

# Mount Google Drive
drive.mount("/content/drive")

# Load CIFAR-100 dataset from Google Drive
with open('/content/drive/My Drive/train', 'rb') as file:
    train_dict = pickle.load(file, encoding='bytes')

with open('/content/drive/My Drive/test', 'rb') as file:
    test_dict = pickle.load(file, encoding='bytes')

# Extract data and labels
train_data = torch.tensor(train_dict[b'data'], dtype=torch.float32).reshape(-1, 3, 32, 32) / 255.0
train_labels = torch.tensor(train_dict[b'fine_labels'], dtype=torch.long)
test_data = torch.tensor(test_dict[b'data'], dtype=torch.float32).reshape(-1, 3, 32, 32) / 255.0
test_labels = torch.tensor(test_dict[b'fine_labels'], dtype=torch.long)

# Create datasets
train_dataset = TensorDataset(train_data, train_labels)
test_dataset = TensorDataset(test_data, test_labels)

class CIFAR100Net(nn.Module):
    def __init__(self, units, hidden_activation):
        super(CIFAR100Net, self).__init__()
        self.hidden_activation = getattr(nn, hidden_activation)()
        self.fc1 = nn.Linear(3 * 32 * 32, units)
        self.hidden_layers = nn.ModuleList([nn.Linear(units, units) for _ in range(4)])
        self.output_layer = nn.Linear(units, 100)

    def forward(self, x):
        x = x.view(-1, 3 * 32 * 32)
        x = self.hidden_activation(self.fc1(x))
        for layer in self.hidden_layers:
            x = self.hidden_activation(layer(x))
        x = self.output_layer(x)  # No softmax, since CrossEntropyLoss expects raw logits
        return x

def train_cifar100(model, train_loader, val_loader, optimizer, criterion, device, epochs=10):
    history = {'epoch': [], 'accuracy': []}
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        val_loss = 0.0
        correct = 0
        total = 0
        model.eval()
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_accuracy = correct / total
        history['epoch'].append(epoch + 1)
        history['accuracy'].append(val_accuracy)
        print(f'Epoch {epoch+1}, Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}')
    return history

# Set hyperparameters
config = {
    'units': 240,
    'hidden_activation': 'ReLU',
    'optimizer': 'Adam',
    'batch_size': 1000,
    'lr': 0.001
}

# Prepare data
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_subset, val_subset = random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_subset, batch_size=config['batch_size'], shuffle=True)
val_loader = DataLoader(val_subset, batch_size=config['batch_size'], shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False)

# Initialize model
device = torch.device("cpu")
model = CIFAR100Net(config['units'], config['hidden_activation']).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = getattr(optim, config['optimizer'])(model.parameters(), lr=config['lr'])

# Train model
history = train_cifar100(model, train_loader, val_loader, optimizer, criterion, device, epochs=10)

# Evaluate model
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

best_accuracy = correct / total
print(f'Best Hyperparameters: {config}')
print(f'Best Accuracy: {best_accuracy:.4f}')

# Plot results
df = pd.DataFrame(history)
plt.figure(figsize=(10, 5))
sns.lineplot(data=df, x='epoch', y='accuracy')
plt.xlabel('Training Iteration')
plt.ylabel('Validation Accuracy')
plt.title('Hyperparameter Tuning Results')
plt.grid()
plt.savefig("/content/drive/My Drive/hyperparameter_tuning_plot.png", dpi=300)
plt.show()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


2025-03-07 06:00:31,059	INFO worker.py:1672 -- Calling ray.init() again after it has already been called.


+-----------------------------------------------------------------------+
| Configuration for experiment     train_cifar100_2025-03-07_06-00-33   |
+-----------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator                |
| Scheduler                        AsyncHyperBandScheduler              |
| Number of trials                 10                                   |
+-----------------------------------------------------------------------+

View detailed results here: /root/ray_results/train_cifar100_2025-03-07_06-00-33
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2025-03-07_05-45-45_878621_2122/artifacts/2025-03-07_06-00-33/train_cifar100_2025-03-07_06-00-33/driver_artifacts`

Trial status: 10 PENDING
Current time: 2025-03-07 06:00:33. Total running time: 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+---------------------------------------------------------------------

2025-03-07 06:00:42,839	ERROR tune_controller.py:1331 -- Trial task failed for trial train_cifar100_7b6d0_00000
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py", line 2771, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py",


Trial train_cifar100_7b6d0_00000 errored after 0 iterations at 2025-03-07 06:00:42. Total running time: 9s
Error file: /tmp/ray/session_2025-03-07_05-45-45_878621_2122/artifacts/2025-03-07_06-00-33/train_cifar100_2025-03-07_06-00-33/driver_artifacts/train_cifar100_7b6d0_00000_0_batch_size=2000,hidden_activation=Sigmoid,lr=0.0041,optimizer=Adam,units=120_2025-03-07_06-00-33/error.txt

Trial train_cifar100_7b6d0_00001 started with configuration:
+-----------------------------------------------------+
| Trial train_cifar100_7b6d0_00001 config             |
+-----------------------------------------------------+
| batch_size                                     1000 |
| hidden_activation                              ReLU |
| lr                                          0.00081 |
| optimizer                                      Adam |
| units                                           120 |
+-----------------------------------------------------+


2025-03-07 06:00:51,214	ERROR tune_controller.py:1331 -- Trial task failed for trial train_cifar100_7b6d0_00001
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py", line 2771, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py",


Trial train_cifar100_7b6d0_00001 errored after 0 iterations at 2025-03-07 06:00:51. Total running time: 17s
Error file: /tmp/ray/session_2025-03-07_05-45-45_878621_2122/artifacts/2025-03-07_06-00-33/train_cifar100_2025-03-07_06-00-33/driver_artifacts/train_cifar100_7b6d0_00001_1_batch_size=1000,hidden_activation=ReLU,lr=0.0008,optimizer=Adam,units=120_2025-03-07_06-00-33/error.txt

Trial train_cifar100_7b6d0_00002 started with configuration:
+-----------------------------------------------------+
| Trial train_cifar100_7b6d0_00002 config             |
+-----------------------------------------------------+
| batch_size                                     1000 |
| hidden_activation                           Sigmoid |
| lr                                          0.00815 |
| optimizer                                      Adam |
| units                                           240 |
+-----------------------------------------------------+


2025-03-07 06:01:01,314	ERROR tune_controller.py:1331 -- Trial task failed for trial train_cifar100_7b6d0_00002
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py", line 2771, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py",


Trial train_cifar100_7b6d0_00002 errored after 0 iterations at 2025-03-07 06:01:01. Total running time: 27s
Error file: /tmp/ray/session_2025-03-07_05-45-45_878621_2122/artifacts/2025-03-07_06-00-33/train_cifar100_2025-03-07_06-00-33/driver_artifacts/train_cifar100_7b6d0_00002_2_batch_size=1000,hidden_activation=Sigmoid,lr=0.0081,optimizer=Adam,units=240_2025-03-07_06-00-33/error.txt

Trial status: 3 ERROR | 7 PENDING
Current time: 2025-03-07 06:01:03. Total running time: 30s
Logical resource usage: 2.0/2 CPUs, 0/0 GPUs
+------------------------------------------------------------------------------------------------------------------+
| Trial name                   status       units   hidden_activation     optimizer       batch_size            lr |
+------------------------------------------------------------------------------------------------------------------+
| train_cifar100_7b6d0_00003   PENDING        240   Sigmoid               Adam                  1000   0.0836389   |
| tra

2025-03-07 06:01:11,169	ERROR tune_controller.py:1331 -- Trial task failed for trial train_cifar100_7b6d0_00003
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py", line 2771, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py",


Trial train_cifar100_7b6d0_00003 errored after 0 iterations at 2025-03-07 06:01:11. Total running time: 37s
Error file: /tmp/ray/session_2025-03-07_05-45-45_878621_2122/artifacts/2025-03-07_06-00-33/train_cifar100_2025-03-07_06-00-33/driver_artifacts/train_cifar100_7b6d0_00003_3_batch_size=1000,hidden_activation=Sigmoid,lr=0.0836,optimizer=Adam,units=240_2025-03-07_06-00-33/error.txt

Trial train_cifar100_7b6d0_00004 started with configuration:
+-----------------------------------------------------+
| Trial train_cifar100_7b6d0_00004 config             |
+-----------------------------------------------------+
| batch_size                                     2000 |
| hidden_activation                              ReLU |
| lr                                          0.00462 |
| optimizer                                      Adam |
| units                                           120 |
+-----------------------------------------------------+


2025-03-07 06:01:19,012	ERROR tune_controller.py:1331 -- Trial task failed for trial train_cifar100_7b6d0_00004
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py", line 2771, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py",


Trial train_cifar100_7b6d0_00004 errored after 0 iterations at 2025-03-07 06:01:19. Total running time: 45s
Error file: /tmp/ray/session_2025-03-07_05-45-45_878621_2122/artifacts/2025-03-07_06-00-33/train_cifar100_2025-03-07_06-00-33/driver_artifacts/train_cifar100_7b6d0_00004_4_batch_size=2000,hidden_activation=ReLU,lr=0.0046,optimizer=Adam,units=120_2025-03-07_06-00-33/error.txt

Trial train_cifar100_7b6d0_00005 started with configuration:
+-----------------------------------------------------+
| Trial train_cifar100_7b6d0_00005 config             |
+-----------------------------------------------------+
| batch_size                                     2000 |
| hidden_activation                              ReLU |
| lr                                          0.01101 |
| optimizer                                      Adam |
| units                                           240 |
+-----------------------------------------------------+


2025-03-07 06:01:28,511	ERROR tune_controller.py:1331 -- Trial task failed for trial train_cifar100_7b6d0_00005
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py", line 2771, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py",


Trial train_cifar100_7b6d0_00005 errored after 0 iterations at 2025-03-07 06:01:28. Total running time: 54s
Error file: /tmp/ray/session_2025-03-07_05-45-45_878621_2122/artifacts/2025-03-07_06-00-33/train_cifar100_2025-03-07_06-00-33/driver_artifacts/train_cifar100_7b6d0_00005_5_batch_size=2000,hidden_activation=ReLU,lr=0.0110,optimizer=Adam,units=240_2025-03-07_06-00-33/error.txt


2025-03-07 06:01:30,849	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/train_cifar100_2025-03-07_06-00-33' in 0.0072s.



Trial status: 6 ERROR | 4 PENDING
Current time: 2025-03-07 06:01:30. Total running time: 57s
Logical resource usage: 2.0/2 CPUs, 0/0 GPUs
+------------------------------------------------------------------------------------------------------------------+
| Trial name                   status       units   hidden_activation     optimizer       batch_size            lr |
+------------------------------------------------------------------------------------------------------------------+
| train_cifar100_7b6d0_00006   PENDING        120   Sigmoid               Adagrad               1000   0.000112485 |
| train_cifar100_7b6d0_00007   PENDING        120   Sigmoid               Adam                  2000   0.000224907 |
| train_cifar100_7b6d0_00008   PENDING        240   ReLU                  Adagrad               1000   0.0189111   |
| train_cifar100_7b6d0_00009   PENDING        240   ReLU                  Adam                  1000   0.0153408   |
| train_cifar100_7b6d0_00000   ERROR      

2025-03-07 06:01:32,712	ERROR tune.py:1037 -- Trials did not complete: [train_cifar100_7b6d0_00000, train_cifar100_7b6d0_00001, train_cifar100_7b6d0_00002, train_cifar100_7b6d0_00003, train_cifar100_7b6d0_00004, train_cifar100_7b6d0_00005]
Resume experiment with: tune.run(..., resume=True)
- train_cifar100_7b6d0_00006: FileNotFoundError('Could not fetch metrics for train_cifar100_7b6d0_00006: both result.json and progress.csv were not found at /root/ray_results/train_cifar100_2025-03-07_06-00-33/train_cifar100_7b6d0_00006_6_batch_size=1000,hidden_activation=Sigmoid,lr=0.0001,optimizer=Adagrad,units=120_2025-03-07_06-00-33')
- train_cifar100_7b6d0_00007: FileNotFoundError('Could not fetch metrics for train_cifar100_7b6d0_00007: both result.json and progress.csv were not found at /root/ray_results/train_cifar100_2025-03-07_06-00-33/train_cifar100_7b6d0_00007_7_batch_size=2000,hidden_activation=Sigmoid,lr=0.0002,optimizer=Adam,units=120_2025-03-07_06-00-33')
- train_cifar100_7b6d0_00008: 




AttributeError: 'NoneType' object has no attribute 'config'