In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torchvision import datasets

import matplotlib.pyplot as plt

In [18]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [19]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Create an instance of the CNN model
net = Net()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()

# Initialize the model and optimizer for continual learning
task_count = 45
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)  # Initial step size

# Lists to store accuracy results for different step sizes
step_sizes = [0.0001, 0.001, 0.01]
accuracies = {0.0001: [], 0.001: [], 0.01: []}

for task in range(task_count):
    # Load the data for the current task (two classes at a time)
    classes = list(range(task * 2, (task + 1) * 2))
    trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform, target_transform=lambda x: x if x in classes else -1)
    trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

    # Train the model
    for epoch in range(5):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs[labels != -1], labels[labels != -1])
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

    print(f"Task {task + 1}, Loss: {running_loss / len(trainloader)}")

    # Evaluate the model's accuracy on the test set for the current task
    testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform, target_transform=lambda x: x if x in classes else -1)
    testloader = DataLoader(testset, batch_size=64, shuffle=False)
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = net(images)
            predicted = outputs.max(1)[1]
            total += (labels != -1).sum().item()
            correct += (predicted[labels != -1] == labels[labels != -1]).sum().item()

    accuracy = 100 * correct / total
    print(f"Task {task + 1}, Accuracy: {accuracy}%")

    # Store accuracy for different step sizes
    for step_size in step_sizes:
        accuracies[step_size].append(accuracy)

print("Finished Training")


for step_size in step_sizes:
    plt.plot(range(1, task_count + 1), accuracies[step_size], label=f"Step Size: {step_size}")

plt.xlabel('Task Number')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.title('Continual Learning Accuracy for Different Step Sizes')
plt.show()

ValueError: Expected input batch_size (64) to match target batch_size (12).