In [1]:
import torch
import torchvision
from torch import nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
import torchvision.transforms as transforms
from torch.utils.data.dataset import random_split

torch.manual_seed(265)                  # Sets the randomness
torch.set_default_dtype(torch.double)   # Sets the default datatype for tensors

In [10]:
transform = transforms.Compose([
    transforms.ToTensor(),                          #converts to tensor
    transforms.Lambda(lambda x: torch.flatten(x))   #Flattens the tensor
])

# Loads the cifar10 training and test sets. It downloads it into the data folder and is transformed into a tenser using the transfrom.toTensor.
full_train_val_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
full_test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [13]:
# Splits training set into training and validation set
training_size = int(len(full_train_val_set)*0.9)
validation_size = len(full_train_val_set) - training_size

full_train_set, full_val_set = random_split(full_train_val_set, [training_size, validation_size], generator=torch.Generator().manual_seed(265))

In [15]:
label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']

# Filters out the nonrelevant labels. Keeps the airplane and birds
train_set = [(img, label_map[label]) for img, label in full_train_set if label in [0, 2]]
test_set = [(img, label_map[label]) for img, label in full_test_set if label in [0, 2]]
val_set = [(img, label_map[label]) for img, label in full_val_set if label in [0, 2]]

In [16]:
# MLP class for 3.1.2

# CRITERIA:
#   * The input dimension is 3072 (= 32*32*3) and the output dimension is 2 (for the 2 classes).
#   * The hidden layers have respectively 512, 128 and 32 hidden units.
#   * All activation functions are ReLU. The last layer has no activation function since the cross-entropy loss already includes a softmax activation function.

# Layout: [3072, 512, 128, 32, 2]

class MyMLP(nn.Module):

    def __init__(self):
        super().__init__() # Initializes the nn.module

        # Defines the network using seqential.
        self.model = nn.Sequential(
            nn.Linear(in_features=3072, out_features=512),
            nn.ReLU(),
            nn.Linear(in_features=512, out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=32),
            nn.ReLU(),
            nn.Linear(in_features=32, out_features=2)
        )

    def forward(self, input): #Passes the input trough the network and returns the output
        return self.model(input)

In [32]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #Sets the device

In [33]:

def train(n_epochs, optimizer, model, loss_fn, train_loader):

    model.to(device)
    model.train()

    for i in range(n_epochs):
        total_loss = 0.0
        batch_amount = len(train_loader)

        for x, y_true  in train_loader:
            x, y_true = x.to(device, dtype=torch.double), y_true.to(device)

            optimizer.zero_grad()           # Resets the gradients

            y_pred = model(x)               # Forward passes the batch

            loss = loss_fn(y_pred, y_true)  # calcs the loss
            total_loss += loss.item()       # Adds the loss to the total

            loss.backward()                 # Backward propogation
            optimizer.step()                # Updates the weights and biases

        print("Train ||| Epoch: ",(i+1), " of ", n_epochs, "| Average Loss: ", total_loss / batch_amount)
    


In [49]:
def train_manual_update(n_epochs, lr, model, loss_fn, train_loader, weight_decay=0, momentum=0):

    model.to(device)
    model.train()

    velocities = {p: torch.zeros_like(p) for p in model.parameters()}

    for i in range(n_epochs):
        total_loss = 0.0
        batch_amount = len(train_loader)

        for x, y_true  in train_loader:
            x, y_true = x.to(device, dtype=torch.double), y_true.to(device)
            
            model.zero_grad()           # Resets the gradients

            y_pred = model(x)               # Forward passes the batch

            loss = loss_fn(y_pred, y_true)  # calcs the loss
            total_loss += loss.item()       # Adds the loss to the total
            

            loss.backward()                 # Backward propogation
            
            with torch.no_grad():           #Manually uses gradient decent to update each oarameter
                for p, velocity in model.parameters(), velocities.values():
                    if p.requires_grad:  # Only update parameters that require gradient

                        if momentum != 0:
                            grad = p.grad + weight_decay * p
                            # Update velocity
                            velocity *= momentum
                            velocity += grad*lr

                            p -= velocity

                        else:
                            p -= lr*( p.grad + p*weight_decay)     # Equation (2) in project pdf. p = p - lr*p.grad
                            

        print("Train_manual_update ||| Epoch: ",(i+1), " of ", n_epochs, "| Average Loss: ", total_loss / batch_amount)

In [41]:
torch.manual_seed(265)

model = MyMLP()

lr = 0.01
n_epochs = 10

optimizer = optim.SGD(model.parameters(), lr=lr)
loss_fn = nn.CrossEntropyLoss()
train_loader = DataLoader(train_set, 64)

train(n_epochs, optimizer, model, loss_fn, train_loader)


Train ||| Epoch:  1  of  10 | Average Loss:  0.6759906525975633
Train ||| Epoch:  2  of  10 | Average Loss:  0.6193885364965185
Train ||| Epoch:  3  of  10 | Average Loss:  0.5490012386761132
Train ||| Epoch:  4  of  10 | Average Loss:  0.5200428597277562
Train ||| Epoch:  5  of  10 | Average Loss:  0.5085929610671069
Train ||| Epoch:  6  of  10 | Average Loss:  0.5012871729824612
Train ||| Epoch:  7  of  10 | Average Loss:  0.4952112674215567
Train ||| Epoch:  8  of  10 | Average Loss:  0.4896398773165913
Train ||| Epoch:  9  of  10 | Average Loss:  0.48401971178514486
Train ||| Epoch:  10  of  10 | Average Loss:  0.4784286860893781


In [44]:

torch.manual_seed(265)

model = MyMLP()

lr = 0.01
n_epochs = 10
weight_decay = 0.5

optimizer = optim.SGD(model.parameters(), lr=lr)
loss_fn = nn.CrossEntropyLoss()
train_loader = DataLoader(train_set, 64)

train_manual_update(n_epochs, lr, model, loss_fn, train_loader, weight_decay)

Train_manual_update ||| Epoch:  1  of  10 | Average Loss:  0.6969461991856638
Train_manual_update ||| Epoch:  2  of  10 | Average Loss:  0.6969461991856638
Train_manual_update ||| Epoch:  3  of  10 | Average Loss:  0.6969461991856638
Train_manual_update ||| Epoch:  4  of  10 | Average Loss:  0.6969461991856638
Train_manual_update ||| Epoch:  5  of  10 | Average Loss:  0.6969461991856638
Train_manual_update ||| Epoch:  6  of  10 | Average Loss:  0.6969461991856638
Train_manual_update ||| Epoch:  7  of  10 | Average Loss:  0.6969461991856638
Train_manual_update ||| Epoch:  8  of  10 | Average Loss:  0.6969461991856638
Train_manual_update ||| Epoch:  9  of  10 | Average Loss:  0.6969461991856638
Train_manual_update ||| Epoch:  10  of  10 | Average Loss:  0.6969461991856638
