In [1]:
import torchvision.datasets as datasets
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch.utils.data import Subset
import numpy as np

In [2]:
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cpu


In [3]:
transform_train = transforms.Compose([
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
])

trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)
trainset = torch.utils.data.Subset(trainset, indices=np.arange(400))
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2)

testset = datasets.MNIST(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)

In [4]:
class Net(torch.nn.Module):
    def __init__(self, hidden_units):
        super(Net, self).__init__()
        self.layer1 = torch.nn.Linear(28*28, hidden_units)
        self.layer2 = torch.nn.Linear(hidden_units, 10)
        
    def forward(self, x):
        x = x.reshape(-1, 28*28)
        x = torch.relu(self.layer1(x))
        x = torch.sigmoid(self.layer2(x))
        return x

In [5]:
def train_and_evaluate_model(model, hidden_units, optimizer, criterion):
    model = model.to(device)
    for epoch in range(600):
        train_loss = 0.0
        model.train()
        for data in trainloader:
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss = train_loss / len(trainloader)
        
    
    model.eval()
    test_loss = 0.0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
    test_loss = test_loss / len(testloader)
    return train_loss, test_loss

In [None]:
hidden_units_list = [i for i in range(1, 10)]
train_losses = []
test_losses = []
for hidden_units in hidden_units_list:
    model = Net(hidden_units)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.95)
    criterion = torch.nn.CrossEntropyLoss()
    train_loss, test_loss = train_and_evaluate_model(model, hidden_units, optimizer, criterion)
    train_losses.append(train_loss)
    test_losses.append(test_loss)
plt.plot(hidden_units_list, train_losses, marker='o', label='train')
plt.plot(hidden_units_list, test_losses, marker='o', label='test')
plt.xlabel('Hidden units')
plt.ylabel('cross entropy test loss')
plt.title('Double Descent Curve')
plt.legend()
plt.show()