<a href="https://colab.research.google.com/github/Vkavyasree732/Repository1/blob/main/MHC2024008_assignment7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import copy
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset
batch_size = 128
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:04<00:00, 35.2MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

net = CNN().to(device)


In [None]:
def train_model(model, optimizer, criterion, num_epochs=10):
    model.to(device)
    best_model_wts = copy.deepcopy(model.state_dict())
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        print(f'Epoch {epoch+1}, Loss: {running_loss / len(trainloader):.4f}, Accuracy: {accuracy:.2f}%')
    return accuracy

In [None]:
initial_model = CNN().to(device)
initial_weights = copy.deepcopy(initial_model.state_dict())

In [None]:
momentum_values = [0.5, 0.9, 0.99]
for momentum in momentum_values:
    print(f'Training with SGD, Momentum: {momentum}')
    model = CNN().to(device)
    model.load_state_dict(initial_weights)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=momentum)
    train_model(model, optimizer, criterion)

Training with SGD, Momentum: 0.5
Epoch 1, Loss: 1.4173, Accuracy: 49.48%
Epoch 2, Loss: 1.0267, Accuracy: 64.06%
Epoch 3, Loss: 0.8748, Accuracy: 69.54%
Epoch 4, Loss: 0.7781, Accuracy: 73.12%
Epoch 5, Loss: 0.7017, Accuracy: 75.86%
Epoch 6, Loss: 0.6428, Accuracy: 77.73%
Epoch 7, Loss: 0.5861, Accuracy: 79.85%
Epoch 8, Loss: 0.5367, Accuracy: 81.60%
Epoch 9, Loss: 0.4906, Accuracy: 83.24%
Epoch 10, Loss: 0.4476, Accuracy: 84.68%
Training with SGD, Momentum: 0.9
Epoch 1, Loss: 1.2428, Accuracy: 55.19%
Epoch 2, Loss: 0.8463, Accuracy: 70.12%
Epoch 3, Loss: 0.6938, Accuracy: 75.55%
Epoch 4, Loss: 0.5854, Accuracy: 79.53%
Epoch 5, Loss: 0.4990, Accuracy: 82.41%
Epoch 6, Loss: 0.4283, Accuracy: 84.89%
Epoch 7, Loss: 0.3509, Accuracy: 87.78%
Epoch 8, Loss: 0.2960, Accuracy: 89.76%
Epoch 9, Loss: 0.2397, Accuracy: 91.69%
Epoch 10, Loss: 0.1852, Accuracy: 93.60%
Training with SGD, Momentum: 0.99
Epoch 1, Loss: 1.4707, Accuracy: 47.58%
Epoch 2, Loss: 1.0365, Accuracy: 63.52%
Epoch 3, Loss: 0.8

In [None]:
for momentum in momentum_values:
    print(f'Training with NAG, Momentum: {momentum}')
    model = CNN().to(device)
    model.load_state_dict(initial_weights)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=momentum, nesterov=True)
    train_model(model, optimizer, criterion)

In [None]:
print('Training with Adam')
model = CNN().to(device)
model.load_state_dict(initial_weights)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, optimizer, criterion)

In [None]:
print('Training with RMSProp')
model = CNN().to(device)
model.load_state_dict(initial_weights)
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr=0.001)
train_model(model, optimizer, criterion)

In [None]:
import matplotlib.pyplot as plt

optimizers = ['SGD (0.5)', 'SGD (0.9)', 'SGD (0.99)', 'NAG (0.5)', 'NAG (0.9)', 'NAG (0.99)', 'Adam', 'RMSprop']
accuracies = [83.91,91.88, 87.03,83.43, 93.80, 91.17, 92.33, 87.36]

plt.figure(figsize=(10, 6))
plt.bar(optimizers, accuracies, color='skyblue')
plt.xlabel("Optimizers")
plt.ylabel("Accuracy (%)")
plt.title("Accuracy Comparison of Different Optimizers")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()


Analysis of Convergence and Generalization

1)Which Optimizer Converges Faster?

NAG with Momentum 0.9 achieved 93.80% accuracy in 10 epochs, showing the fastest convergence.
Adam also converged quickly, reaching 92.33% in 10 epochs.
RMSProp had slower convergence, reaching 87.36%.
SGD (Momentum 0.5, 0.9, 0.99) took longer to reach high accuracy compared to NAG and Adam.

2)Which Optimizer Generalizes Better?

NAG with Momentum 0.9 and Adam performed best in training. However, Adam is known for overfitting, while NAG often generalizes better.
SGD with Momentum 0.9 showed good stability, but not as high accuracy as NAG.
RMSProp is typically better for non-stationary data, but in this case, it underperformed compared to others.
Impact of Increasing Momentum on Training Stability
Momentum 0.5: Stable but slower convergence.
Momentum 0.9: Optimal balance between speed and stability.
Momentum 0.99: Faster convergence but may overshoot or cause oscillations.