In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as T
from torch.utils.data import DataLoader
from torchvision import models
import itertools
import copy

dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Running on:", dev)


Running on: cuda


In [2]:
def get_cifar_loaders(batch=64):

    tfm = T.Compose([
        T.Resize((64,64)),
        T.ToTensor(),
        T.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
    ])

    train_set = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=tfm)
    test_set  = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=tfm)

    tr_loader = DataLoader(train_set, batch_size=batch, shuffle=True)
    te_loader = DataLoader(test_set, batch_size=batch, shuffle=False)

    return tr_loader, te_loader

train_ld, test_ld = get_cifar_loaders()


100%|██████████| 170M/170M [00:12<00:00, 13.1MB/s]


In [3]:
class SimpleNet(nn.Module):
    def __init__(self, out_classes=10, act_type="relu"):
        super().__init__()

        if act_type == "relu":
            act = nn.ReLU()
        elif act_type == "tanh":
            act = nn.Tanh()
        else:
            act = nn.LeakyReLU()

        self.c1 = nn.Conv2d(3, 32, 3, padding=1)
        self.b1 = nn.BatchNorm2d(32)

        self.c2 = nn.Conv2d(32, 64, 3, padding=1)
        self.b2 = nn.BatchNorm2d(64)

        self.c3 = nn.Conv2d(64, 128, 3, padding=1)
        self.b3 = nn.BatchNorm2d(128)

        self.pool = nn.MaxPool2d(2)
        self.act = act

        self.fc1 = nn.Linear(128*8*8, 256)
        self.drop = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, out_classes)

    def forward(self, x):
        x = self.pool(self.act(self.b1(self.c1(x))))
        x = self.pool(self.act(self.b2(self.c2(x))))
        x = self.pool(self.act(self.b3(self.c3(x))))
        x = x.view(x.size(0), -1)
        x = self.act(self.fc1(x))
        x = self.drop(x)
        x = self.fc2(x)
        return x


In [4]:
def apply_init(net, mode):
    for layer in net.modules():
        if isinstance(layer, (nn.Conv2d, nn.Linear)):
            if mode == "xavier":
                nn.init.xavier_uniform_(layer.weight)
            elif mode == "kaiming":
                nn.init.kaiming_uniform_(layer.weight, nonlinearity="relu")
            else:
                nn.init.normal_(layer.weight, 0.0, 0.02)


In [5]:
def accuracy(pred, lab):
    return (pred.argmax(1) == lab).sum().item()

def run_train(net, loader, loss_fn, opt):
    net.train()
    tot_loss, tot_corr = 0, 0

    for x,y in loader:
        x,y = x.to(dev), y.to(dev)
        opt.zero_grad()
        out = net(x)
        loss = loss_fn(out,y)
        loss.backward()
        opt.step()

        tot_loss += loss.item()
        tot_corr += accuracy(out,y)

    return tot_loss/len(loader), tot_corr/len(loader.dataset)

def run_eval(net, loader, loss_fn):
    net.eval()
    tot_loss, tot_corr = 0, 0

    with torch.no_grad():
        for x,y in loader:
            x,y = x.to(dev), y.to(dev)
            out = net(x)
            loss = loss_fn(out,y)
            tot_loss += loss.item()
            tot_corr += accuracy(out,y)

    return tot_loss/len(loader), tot_corr/len(loader.dataset)


In [6]:
acts = ["relu", "tanh", "leaky"]
inits = ["xavier", "kaiming", "normal"]
opts  = ["sgd", "adam", "rmsprop"]

loss_fn = nn.CrossEntropyLoss()
saved_weights = {}

for a,i,o in itertools.product(acts, inits, opts):

    print(f"\nConfig → Act:{a}, Init:{i}, Opt:{o}")

    net = SimpleNet(act_type=a).to(dev)
    apply_init(net, i)

    if o == "sgd":
        optimizer = optim.SGD(net.parameters(), lr=0.01)
    elif o == "adam":
        optimizer = optim.Adam(net.parameters(), lr=0.001)
    else:
        optimizer = optim.RMSprop(net.parameters(), lr=0.001)

    for ep in range(5):
        tr_l, tr_a = run_train(net, train_ld, loss_fn, optimizer)
        te_l, te_a = run_eval(net, test_ld, loss_fn)
        print(f"Epoch {ep+1}: ValAcc={te_a:.4f}")

    saved_weights[f"{a}_{i}_{o}"] = copy.deepcopy(net.state_dict())



Config → Act:relu, Init:xavier, Opt:sgd
Epoch 1: ValAcc=0.4727
Epoch 2: ValAcc=0.5702
Epoch 3: ValAcc=0.5950
Epoch 4: ValAcc=0.6458
Epoch 5: ValAcc=0.6275

Config → Act:relu, Init:xavier, Opt:adam
Epoch 1: ValAcc=0.4763
Epoch 2: ValAcc=0.5402
Epoch 3: ValAcc=0.5939
Epoch 4: ValAcc=0.6368
Epoch 5: ValAcc=0.6603

Config → Act:relu, Init:xavier, Opt:rmsprop
Epoch 1: ValAcc=0.2746
Epoch 2: ValAcc=0.4853
Epoch 3: ValAcc=0.5066
Epoch 4: ValAcc=0.5137
Epoch 5: ValAcc=0.5408

Config → Act:relu, Init:kaiming, Opt:sgd
Epoch 1: ValAcc=0.5249
Epoch 2: ValAcc=0.5684
Epoch 3: ValAcc=0.5870
Epoch 4: ValAcc=0.6095
Epoch 5: ValAcc=0.6024

Config → Act:relu, Init:kaiming, Opt:adam
Epoch 1: ValAcc=0.4480
Epoch 2: ValAcc=0.5343
Epoch 3: ValAcc=0.5882
Epoch 4: ValAcc=0.5861
Epoch 5: ValAcc=0.6086

Config → Act:relu, Init:kaiming, Opt:rmsprop
Epoch 1: ValAcc=0.4367
Epoch 2: ValAcc=0.5275
Epoch 3: ValAcc=0.5423
Epoch 4: ValAcc=0.5320
Epoch 5: ValAcc=0.6346

Config → Act:relu, Init:normal, Opt:sgd
Epoch 1: V

In [7]:
res = models.resnet18(pretrained=True)

for p in res.parameters():
    p.requires_grad = False

res.fc = nn.Linear(res.fc.in_features, 10)
res = res.to(dev)

opt_res = optim.Adam(res.fc.parameters(), lr=0.001)

for ep in range(5):
    tr_l, tr_a = run_train(res, train_ld, loss_fn, opt_res)
    te_l, te_a = run_eval(res, test_ld, loss_fn)
    print(f"ResNet Epoch {ep+1}: Acc={te_a:.4f}")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 225MB/s]


ResNet Epoch 1: Acc=0.6316
ResNet Epoch 2: Acc=0.6413
ResNet Epoch 3: Acc=0.6461
ResNet Epoch 4: Acc=0.6476
ResNet Epoch 5: Acc=0.6415
