In [1]:
import os
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR
import numpy as np


checkpoint_dir = './checkpoints_cifar100'
os.makedirs(checkpoint_dir, exist_ok=True)

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)  
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.fc1 = nn.Linear(32 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, 100)  
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])


trainset = torchvision.datasets.CIFAR100(root='./data', train=True,
                                         download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR100(root='./data', train=False,
                                        download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=128,
                                         shuffle=False, num_workers=2)


num_epochs = 175
num_runs = 5


all_train_accuracies = []
all_test_accuracies = []
all_losses = []

def save_checkpoint(run, model, optimizer, scheduler, train_accuracies, losses):
    checkpoint = {
        'run': run,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'train_accuracies': train_accuracies,
        'losses': losses
    }
    torch.save(checkpoint, os.path.join(checkpoint_dir, f'checkpoint_run_{run}.pth'))

def load_checkpoint(run):
    checkpoint = torch.load(os.path.join(checkpoint_dir, f'checkpoint_run_{run}.pth'))
    return checkpoint


for run in range(num_runs):
    print(f"Starting run {run + 1}/{num_runs}")

    
    net = Net()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(net.parameters(), lr=0.0001)  
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

    
    if os.path.exists(os.path.join(checkpoint_dir, f'checkpoint_run_{run}.pth')):
        print(f"Loading checkpoint for run {run + 1}")
        checkpoint = load_checkpoint(run)
        net.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        train_accuracies = checkpoint['train_accuracies']
        losses = checkpoint['losses']
    else:
        train_accuracies = []
        losses = []
    test_accuracies = []
    
    for epoch in range(num_epochs):
        net.train()  
        running_loss = 0.0
        correct = 0
        total = 0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_accuracy = 100 * correct / total
        train_accuracies.append(train_accuracy)
        losses.append(running_loss / len(trainloader))
        print(f"Run {run+1}, Epoch {epoch+1}, Loss: {running_loss / len(trainloader):.3f}, Training Accuracy: {train_accuracy:.2f}%")

        scheduler.step()  

    net.eval()  
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_accuracy = 100 * correct / total
    test_accuracies.append(test_accuracy)
    print(f"Run {run+1}, Final Accuracy on test set: {test_accuracy:.2f}%")

    all_train_accuracies.append(train_accuracies)
    all_test_accuracies.append(test_accuracies)
    all_losses.append(losses)

    save_checkpoint(run, net, optimizer, scheduler, train_accuracies, losses)

    print(f"Results after run {run + 1}:")
    print(f"Training Accuracies: {train_accuracies}")
    print(f"Test Accuracy: {test_accuracy:.2f}%")
    print(f"Losses: {losses}")

all_train_accuracies = np.array(all_train_accuracies)
all_test_accuracies = np.array(all_test_accuracies)
all_losses = np.array(all_losses)

np.savetxt('train_accuracies_cifar100.txt', all_train_accuracies)
np.savetxt('test_accuracies_cifar100.txt', all_test_accuracies)
np.savetxt('losses_cifar100.txt', all_losses)


print("All Training Accuracies over Epochs for each run:", all_train_accuracies)
print("All Test Accuracies after each run:", all_test_accuracies)
print("All Losses over Epochs for each run:", all_losses)


Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:02<00:00, 80268725.60it/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified
Starting run 1/5
Run 1, Epoch 1, Loss: 4.224, Training Accuracy: 6.01%
Run 1, Epoch 2, Loss: 3.983, Training Accuracy: 9.64%
Run 1, Epoch 3, Loss: 3.862, Training Accuracy: 11.59%
Run 1, Epoch 4, Loss: 3.759, Training Accuracy: 13.28%
Run 1, Epoch 5, Loss: 3.685, Training Accuracy: 14.68%
Run 1, Epoch 6, Loss: 3.622, Training Accuracy: 15.69%
Run 1, Epoch 7, Loss: 3.570, Training Accuracy: 16.44%
Run 1, Epoch 8, Loss: 3.531, Training Accuracy: 17.27%
Run 1, Epoch 9, Loss: 3.490, Training Accuracy: 17.75%
Run 1, Epoch 10, Loss: 3.458, Training Accuracy: 18.51%
Run 1, Epoch 11, Loss: 3.409, Training Accuracy: 19.59%
Run 1, Epoch 12, Loss: 3.400, Training Accuracy: 19.76%
Run 1, Epoch 13, Loss: 3.389, Training Accuracy: 20.11%
Run 1, Epoch 14, Loss: 3.387, Training Accuracy: 20.01%
Run 1, Epoch 15, Loss: 3.384, Training Accuracy: 19.96%
Run 1, Epoch 16, Loss: 3.385, Training Accuracy: 20.09%
Run 1, E