<a href="https://colab.research.google.com/github/alec-carawan/Intro-to-Machine-Learning/blob/main/HW_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import collections

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x7e48403d9f90>

In [11]:
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']

In [12]:
from torchvision import datasets, transforms
data_path = '../data-unversioned/p1ch6/'
cifar10 = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data-unversioned/p1ch6/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 63273027.47it/s]


Extracting ../data-unversioned/p1ch6/cifar-10-python.tar.gz to ../data-unversioned/p1ch6/
Files already downloaded and verified


In [4]:
conv = nn.Conv2d(3, 16, kernel_size=3)
conv.weight.shape, conv.bias.shape

(torch.Size([16, 3, 3, 3]), torch.Size([16]))

In [5]:
img, _ = cifar10[0]
output = conv(img.unsqueeze(0))
img.unsqueeze(0).shape, output.shape

(torch.Size([1, 3, 32, 32]), torch.Size([1, 16, 30, 30]))

In [6]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = out.view(-1, 8 * 8 * 8) # <1>
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out

model = Net()
model(img.unsqueeze(0))

tensor([[ 0.0392, -0.1330, -0.0797,  0.2267,  0.1575,  0.0391,  0.1307,  0.0203,
          0.1797, -0.0242]], grad_fn=<AddmmBackward0>)

In [22]:
import datetime  # <1>

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):  # <2>
        loss_train = 0.0
        for imgs, labels in train_loader:  # <3>

            outputs = model(imgs)  # <4>

            loss = loss_fn(outputs, labels)  # <5>

            optimizer.zero_grad()  # <6>

            loss.backward()  # <7>

            optimizer.step()  # <8>

            loss_train += loss.item()  # <9>

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))  # <10>

In [10]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)  # <1>

model = Net()  #  <2>
optimizer = optim.SGD(model.parameters(), lr=1e-2)  #  <3>
loss_fn = nn.CrossEntropyLoss()  #  <4>

training_loop(  # <5>
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2023-12-12 23:43:02.985321 Epoch 1, Training loss 2.0244765376191003
2023-12-12 23:45:55.194542 Epoch 10, Training loss 1.2294329266109125


KeyboardInterrupt: ignored

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                         shuffle=False)

def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():  # <1>
            for imgs, labels in loader:
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <2>
                total += labels.shape[0]  # <3>
                correct += int((predicted == labels).sum())  # <4>

        print("Accuracy {}: {:.2f}".format(name , correct / total))

validate(model, train_loader, val_loader)

In [29]:
class Net_expanded(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(32, 16, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.conv3 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act3 = nn.Tanh()
        self.pool3 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(2 * 8 * 8, 32)
        self.act4 = nn.Tanh()
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = self.pool3(self.act3(self.conv3(out)))
        out = out.view(-1, 2 * 8 * 8)
        out = self.act4(self.fc1(out))
        out = self.fc2(out)
        return out

model = Net_expanded()
model(img.unsqueeze(0))

tensor([[ 0.1416,  0.0377,  0.0891, -0.0749, -0.1347,  0.1406,  0.0543, -0.0699,
          0.0578,  0.0719]], grad_fn=<AddmmBackward0>)

In [30]:
#check model complexity
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(11138, [864, 32, 4608, 16, 1152, 8, 4096, 32, 320, 10])

In [14]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)  # <1>

model = Net_expanded()  #  <2>
optimizer = optim.SGD(model.parameters(), lr=1e-2)  #  <3>
loss_fn = nn.CrossEntropyLoss()  #  <4>

training_loop(  # <5>
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2023-12-12 23:51:50.526632 Epoch 1, Training loss 2.15064963980404
2023-12-12 23:56:10.285381 Epoch 10, Training loss 1.2790547658110518
2023-12-13 00:00:59.049806 Epoch 20, Training loss 1.0193802652609012
2023-12-13 00:05:44.600121 Epoch 30, Training loss 0.9106081973408799
2023-12-13 00:10:28.875551 Epoch 40, Training loss 0.850234483056666
2023-12-13 00:15:11.918281 Epoch 50, Training loss 0.8098195384606681
2023-12-13 00:19:57.517872 Epoch 60, Training loss 0.7772660306500047
2023-12-13 00:24:40.887741 Epoch 70, Training loss 0.7552297420209021
2023-12-13 00:29:25.150239 Epoch 80, Training loss 0.7332331096500997
2023-12-13 00:34:09.744944 Epoch 90, Training loss 0.71761597647234
2023-12-13 00:38:52.205342 Epoch 100, Training loss 0.7009343238895201
2023-12-13 00:43:35.138472 Epoch 110, Training loss 0.688207793456819
2023-12-13 00:48:17.369570 Epoch 120, Training loss 0.6750549912986243
2023-12-13 00:52:59.594176 Epoch 130, Training loss 0.6653867843358413
2023-12-13 00:57:40.852

In [10]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                         shuffle=False)

def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():  # <1>
            for imgs, labels in loader:
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <2>
                total += labels.shape[0]  # <3>
                correct += int((predicted == labels).sum())  # <4>

        print("Accuracy {}: {:.2f}".format(name , correct / total))

validate(model, train_loader, val_loader)

NameError: ignored

In [17]:
torch.save(model.state_dict(), data_path + 'img_classification_CNN_expanded')

In [31]:
class NetRes(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3,
                               padding=1)
        self.conv3 = nn.Conv2d(n_chans1 // 2, n_chans1 // 2,
                               kernel_size=3, padding=1)
        self.fc1 = nn.Linear(4 * 4 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = F.max_pool2d(torch.relu(self.conv2(out)), 2)
        out1 = out
        out = F.max_pool2d(torch.relu(self.conv3(out)) + out1, 2)
        out = out.view(-1, 4 * 4 * self.n_chans1 // 2)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [5]:
device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))
print(f"Training on device {device}.")

Training on device cuda.


In [13]:
import datetime

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)  # <1>
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))

In [6]:
all_acc_dict = collections.OrderedDict()

In [23]:
model = NetRes(n_chans1=32).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2023-12-13 02:16:13.869426 Epoch 1, Training loss 2.1297521957046235
2023-12-13 02:18:32.198768 Epoch 10, Training loss 1.0946818234975382
2023-12-13 02:21:06.147302 Epoch 20, Training loss 0.8877365139439283
2023-12-13 02:23:40.102769 Epoch 30, Training loss 0.7896931357395923
2023-12-13 02:26:13.194114 Epoch 40, Training loss 0.7288132244744874
2023-12-13 02:28:45.874490 Epoch 50, Training loss 0.6849590169880396
2023-12-13 02:31:19.972434 Epoch 60, Training loss 0.651790841003818
2023-12-13 02:33:54.630847 Epoch 70, Training loss 0.6249319502840871
2023-12-13 02:36:34.263574 Epoch 80, Training loss 0.6031040023736027
2023-12-13 02:39:14.238280 Epoch 90, Training loss 0.5841518016270054
2023-12-13 02:41:53.996177 Epoch 100, Training loss 0.5665281374969751
2023-12-13 02:44:33.502939 Epoch 110, Training loss 0.5519352796132607
2023-12-13 02:47:12.666002 Epoch 120, Training loss 0.5384975899287197
2023-12-13 02:49:51.546136 Epoch 130, Training loss 0.5268549437984786
2023-12-13 02:52:3

RuntimeError: ignored

In [24]:
#check model complexity
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(16394, [864, 32, 4608, 16, 2304, 16, 8192, 32, 320, 10])

In [15]:
def validate(model, train_loader, val_loader):
    accdict = {}
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device=device)
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <1>
                total += labels.shape[0]
                correct += int((predicted == labels).sum())

        print("Accuracy {}: {:.2f}".format(name , correct / total))
        accdict[name] = correct / total
    return accdict

all_acc_dict["res"] = validate(model, train_loader, val_loader)

In [None]:
torch.save(model.state_dict(), data_path + 'img_classification_ResNet')

In [33]:
#weight penalties
def training_loop_l2reg(n_epochs, optimizer, model, loss_fn,
                        train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            l2_lambda = 0.001
            l2_norm = sum(p.pow(2.0).sum()
                          for p in model.parameters())  # <1>
            loss = loss + l2_lambda * l2_norm

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()
        if epoch == 1 or epoch % 10 == 0:
             print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))

In [36]:
model = Net().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop_l2reg(
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)
all_acc_dict["l2 reg"] = validate(model, train_loader, val_loader)

2023-12-13 03:42:44.834935 Epoch 1, Training loss 2.095834989681878
2023-12-13 03:45:11.830687 Epoch 10, Training loss 1.2667120487031425
2023-12-13 03:47:55.858914 Epoch 20, Training loss 1.1209423332415578
2023-12-13 03:50:38.934188 Epoch 30, Training loss 1.052933368993842
2023-12-13 03:53:23.219896 Epoch 40, Training loss 1.0115779168008234
2023-12-13 03:56:06.910247 Epoch 50, Training loss 0.9864043677249528
2023-12-13 03:58:50.266149 Epoch 60, Training loss 0.9691655215857279
2023-12-13 04:01:35.285596 Epoch 70, Training loss 0.9568942366811015
2023-12-13 04:04:19.725414 Epoch 80, Training loss 0.9477766585319548
2023-12-13 04:07:04.354024 Epoch 90, Training loss 0.9406030601095361
2023-12-13 04:09:48.362368 Epoch 100, Training loss 0.9349961071977835
Accuracy train: 0.73
Accuracy val: 0.67


In [3]:
class NetDropout(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_dropout = nn.Dropout2d(p=0.4)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3,
                               padding=1)
        self.conv2_dropout = nn.Dropout2d(p=0.4)
        self.fc1 = nn.Linear(8 * 8 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = self.conv1_dropout(out)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = self.conv2_dropout(out)
        out = out.view(-1, 8 * 8 * self.n_chans1 // 2)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [14]:
model = NetDropout(n_chans1=32).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                         shuffle=False)

training_loop(
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2023-12-13 04:13:09.584809 Epoch 1, Training loss 2.039463736517045
2023-12-13 04:15:14.577778 Epoch 10, Training loss 1.42207378682578
2023-12-13 04:17:33.586577 Epoch 20, Training loss 1.2785672027893993
2023-12-13 04:19:53.078382 Epoch 30, Training loss 1.2027465034171443
2023-12-13 04:22:12.012442 Epoch 40, Training loss 1.1548577416735841
2023-12-13 04:24:31.055568 Epoch 50, Training loss 1.1243669370098797
2023-12-13 04:26:49.925966 Epoch 60, Training loss 1.1036753393804934
2023-12-13 04:29:09.006184 Epoch 70, Training loss 1.0889071714695153
2023-12-13 04:31:29.685168 Epoch 80, Training loss 1.072825924743472
2023-12-13 04:33:50.049656 Epoch 90, Training loss 1.0671812845465471
2023-12-13 04:36:10.065241 Epoch 100, Training loss 1.0525516720531543


NameError: ignored

In [16]:
all_acc_dict["dropout"] = validate(model, train_loader, val_loader)

Accuracy train: 0.63
Accuracy val: 0.58


In [17]:
class NetBatchNorm(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_batchnorm = nn.BatchNorm2d(num_features=n_chans1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3,
                               padding=1)
        self.conv2_batchnorm = nn.BatchNorm2d(num_features=n_chans1 // 2)
        self.fc1 = nn.Linear(8 * 8 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = self.conv1_batchnorm(self.conv1(x))
        out = F.max_pool2d(torch.tanh(out), 2)
        out = self.conv2_batchnorm(self.conv2(out))
        out = F.max_pool2d(torch.tanh(out), 2)
        out = out.view(-1, 8 * 8 * self.n_chans1 // 2)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [18]:
model = NetBatchNorm(n_chans1=32).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)


2023-12-13 04:37:21.306285 Epoch 1, Training loss 1.7820517827787667
2023-12-13 04:39:22.969283 Epoch 10, Training loss 0.9479873890004804
2023-12-13 04:41:39.038426 Epoch 20, Training loss 0.7784260831906668
2023-12-13 04:43:56.702188 Epoch 30, Training loss 0.6861266164523562
2023-12-13 04:46:14.481476 Epoch 40, Training loss 0.6141742164902675
2023-12-13 04:48:33.424312 Epoch 50, Training loss 0.5543651118341004
2023-12-13 04:50:53.410439 Epoch 60, Training loss 0.5026485762365943


KeyboardInterrupt: ignored

In [19]:
all_acc_dict["batch_norm"] = validate(model, train_loader, val_loader)

Accuracy train: 0.80
Accuracy val: 0.66
