<a href="https://colab.research.google.com/github/elimeyer1/ML_4105/blob/main/Homework_7_Quesiton_1b.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time
import matplotlib.pyplot as plt

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
num_epochs = 200
batch_size = 64
learning_rate = 0.001

In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [5]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:02<00:00, 72.3MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [6]:
class ExtendedCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(ExtendedCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fc_layers = nn.Sequential(
            nn.Linear(64 * 4 * 4, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

In [7]:
model = ExtendedCNN().to(device)

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [9]:
def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs):
    train_losses, test_losses = [], []
    train_accuracies, test_accuracies = [], []
    start_time = time.time()

    for epoch in range(num_epochs):
        model.train()
        train_loss, train_correct, train_total = 0, 0, 0

        for data, targets in train_loader:
            data, targets = data.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += targets.size(0)
            train_correct += (predicted == targets).sum().item()


        model.eval()
        test_loss, test_correct, test_total = 0, 0, 0
        with torch.no_grad():
            for data, targets in test_loader:
                data, targets = data.to(device), targets.to(device)
                outputs = model(data)
                loss = criterion(outputs, targets)

                test_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                test_total += targets.size(0)
                test_correct += (predicted == targets).sum().item()

        train_acc = 100 * train_correct / train_total
        test_acc = 100 * test_correct / test_total
        train_losses.append(train_loss / len(train_loader))
        test_losses.append(test_loss / len(test_loader))
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)


        print(f'Epoch {epoch+1}: Train Loss {train_losses[-1]:.4f}, Test Acc {test_acc:.2f}%')

    training_time = time.time() - start_time

    return training_time, train_losses[-1], test_losses[-1], train_accuracies[-1], test_accuracies[-1]


In [10]:
training_time, train_loss, test_loss, train_acc, test_acc = train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs)

model_size = sum(p.numel() for p in model.parameters() if p.requires_grad)

Epoch 1: Train Loss 1.6042, Test Acc 52.33%
Epoch 2: Train Loss 1.2547, Test Acc 61.91%
Epoch 3: Train Loss 1.0886, Test Acc 65.87%
Epoch 4: Train Loss 0.9763, Test Acc 67.26%
Epoch 5: Train Loss 0.9000, Test Acc 68.71%
Epoch 6: Train Loss 0.8308, Test Acc 70.02%
Epoch 7: Train Loss 0.7843, Test Acc 71.70%
Epoch 8: Train Loss 0.7402, Test Acc 71.99%
Epoch 9: Train Loss 0.6987, Test Acc 72.03%
Epoch 10: Train Loss 0.6571, Test Acc 73.28%
Epoch 11: Train Loss 0.6303, Test Acc 73.68%
Epoch 12: Train Loss 0.5946, Test Acc 73.17%
Epoch 13: Train Loss 0.5705, Test Acc 73.24%
Epoch 14: Train Loss 0.5518, Test Acc 73.31%
Epoch 15: Train Loss 0.5256, Test Acc 73.90%
Epoch 16: Train Loss 0.5028, Test Acc 74.07%
Epoch 17: Train Loss 0.4864, Test Acc 73.84%
Epoch 18: Train Loss 0.4608, Test Acc 73.57%
Epoch 19: Train Loss 0.4470, Test Acc 74.42%
Epoch 20: Train Loss 0.4315, Test Acc 73.74%
Epoch 21: Train Loss 0.4229, Test Acc 73.49%
Epoch 22: Train Loss 0.4020, Test Acc 73.82%
Epoch 23: Train Los

In [13]:
print(f'Model Size: {model_size} parameters')
print(f'Training Time: {training_time:.2f} seconds')


Model Size: 288554 parameters
Training Time: 13430.22 seconds
