# Resnet for CIFAR-10

In [12]:
# Import get_CIFAR10_data from data_process.py
from data_process import get_CIFAR10_data
import torch
from torch.utils.data import DataLoader, TensorDataset
import torchvision.transforms as transforms

# Load data
data = get_CIFAR10_data()

# Retrieve datasets
X_train, y_train = data["X_train"], data["y_train"]
X_val, y_val = data["X_val"], data["y_val"]
X_test, y_test = data["X_test"], data["y_test"]

# Convert to PyTorch tensors and apply transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

# Apply transformations
X_train = torch.tensor(X_train).float() / 255  # Normalizing to [0, 1] range
X_val = torch.tensor(X_val).float() / 255
X_test = torch.tensor(X_test).float() / 255
y_train = torch.tensor(y_train)
y_val = torch.tensor(y_val)
y_test = torch.tensor(y_test)

# Create TensorDatasets
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)


In [13]:
import torch.nn as nn

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = nn.ReLU()(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = nn.ReLU()(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        layers = [block(self.in_planes, planes, stride)]
        self.in_planes = planes * block.expansion
        for _ in range(1, num_blocks):
            layers.append(block(self.in_planes, planes))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = nn.ReLU()(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = nn.AvgPool2d(4)(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

# Instantiate ResNet for CIFAR-10 (ResNet-18)
def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])

net = ResNet18()
net = net.to('cuda' if torch.cuda.is_available() else 'cpu')


In [14]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)


In [15]:
import torch

if torch.backends.mps.is_available():
    print("MPS backend is available!")
else:
    print("MPS backend is not available.")


MPS backend is available!


In [16]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
net = net.to(device)
device


device(type='mps')

In [17]:
# Set up device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Move the model to the device before initializing the optimizer
net = net.to(device)

# Define optimizer after moving model to the device
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

# Training loop
for epoch in range(num_epochs):
    net.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        # Move data to the correct device
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    scheduler.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader)}")

    # Evaluation on the test set
    if (epoch + 1) % 10 == 0:
        net.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = net(inputs)
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()
        print(f'Accuracy on the test set after epoch {epoch+1}: {100 * correct / total:.2f}%')





Epoch [1/100], Loss: 1.8730357297095555
Epoch [2/100], Loss: 1.3218409003539122
Epoch [3/100], Loss: 1.0304363471098419
Epoch [4/100], Loss: 0.8139414171014692
Epoch [5/100], Loss: 0.6444653789306123
Epoch [6/100], Loss: 0.5206202747927633
Epoch [7/100], Loss: 0.44646078908412323
Epoch [8/100], Loss: 0.3922140416604109
Epoch [9/100], Loss: 0.3480944662274642
Epoch [10/100], Loss: 0.31240905478946845
Accuracy on the test set after epoch 10: 77.83%
Epoch [11/100], Loss: 0.27871740593844857
Epoch [12/100], Loss: 0.26819978049389687
Epoch [13/100], Loss: 0.25038716803954103
Epoch [14/100], Loss: 0.2316042138920129
Epoch [15/100], Loss: 0.22349707383477657
Epoch [16/100], Loss: 0.21917569493344807
Epoch [17/100], Loss: 0.21041642072034256
Epoch [18/100], Loss: 0.20324311629132252
Epoch [19/100], Loss: 0.195562837076405
Epoch [20/100], Loss: 0.1998163971942959
Accuracy on the test set after epoch 20: 78.55%
Epoch [21/100], Loss: 0.18591015428966082
Epoch [22/100], Loss: 0.19310611082780765
E

In [19]:
import matplotlib.pyplot as plt

# Plot Training Loss
plt.figure(figsize=(12, 5))
plt.plot(range(1, num_epochs + 1), train_losses, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss over Epochs')
plt.legend()
plt.show()

# Plot Test Accuracy
plt.figure(figsize=(12, 5))
plt.plot(range(1, num_epochs + 1), test_accuracies, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Test Accuracy over Epochs')
plt.legend()
plt.show()



NameError: name 'train_losses' is not defined

<Figure size 1200x500 with 0 Axes>