In [1]:
import numpy as np
import sys
import matplotlib.pyplot as plt
import math
import os
import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torchvision
from torch.utils.data import Dataset
from torch.utils.data import TensorDataset
from torchvision.transforms import ToTensor
from pathlib import Path
import requests
import torch.nn.functional as F
from torchinfo import summary


from torch.utils.tensorboard import SummaryWriter

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


the 3 total conv layers are 1x16 -> 16x16 -> 16x10, and in the two conv case we use first and last <br>

using lr = 0.001 <br>
mnist6 (and mnist13) is with 2 conv layers with stride 1 and then AvgPool(4) into FC, ~6.5k param <br>
mnist7 is with 2 conv layers with stride 2 and no pooling into FC ~6.5k param<br>
mnist8 is with 3 conv layers with stride 2 and no pooling into FC ~5.5k param<br>
mnist14 is with 2 conv layers with stride 1 and then MaxPool(4) into FC <br>
using lr = 0.05 <br>
mnist5 (and mnist10) is with all 3 conv layers at stride 2 and then 4 pooling, no FC, ~4k param <br>
mnist9 is with 3 conv layers with stride 2 and no pooling into FC ~ 5.5k param <br>
mnist11 is with 2 conv layers with stride 2 and no pooling into FC ~ 6.5k param <br>
mnist12 is with 2 conv layers with stride 1 and then AvgPool(4) into an FC ~ 6.5k param <br>
mnist15 is with 2 conv layers with stride 1 and then MaxPool(4) into an FC ~ 6.5k param <br>

In [7]:
#Getting data
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

batch_size = 100;

writer = SummaryWriter("runs/mnist15")

class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func

    def forward(self, x):
        return self.func(x)

def preprocess(x):
    return x.view(-1, 1, 28, 28)

def init_weights(mod):
    if (type(mod) == nn.Conv2d) or (type(mod) == nn.Linear):
        torch.nn.init.orthogonal_(mod.weight)


def get_data(train_ds, valid_ds, bs):
    return (
        DataLoader(train_ds, batch_size=bs, shuffle=True),
        DataLoader(valid_ds, batch_size=bs * 2),
    )


"""
class Mnist_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)
        torch.nn.init.orthogonal_(self.conv1.weight)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1)
        torch.nn.init.orthogonal_(self.conv2.weight)
        self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)
        torch.nn.init.orthogonal_(self.conv3.weight)
        
    def forward(self, xb):
        xb = xb.view(-1,1,28,28)
        xb = F.relu(self.conv1(xb))
        xb = F.relu(self.conv2(xb))
        xb = F.relu(self.conv3(xb))
        xb = F.avg_pool2d(xb, 4)
        return F.log_softmax(xb.view(-1, xb.size(1)), dim=1)
"""
#defining loss function, weights, and bias tensors
loss_func = F.nll_loss
lr = 0.05
"""
def get_model():
    model = Mnist_Logistic()
    return model, optim.SGD(model.parameters(),lr=lr)
"""

def get_model():
    model = nn.Sequential(
        Lambda(preprocess),
        nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        #nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
        #nn.ReLU(),
        nn.Conv2d(16, 10, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        #nn.AvgPool2d(4),
        nn.MaxPool2d(4),
        nn.Flatten(),
        nn.Linear(10*7*7,10),
        nn.LogSoftmax(dim=1),
        Lambda(lambda x: x.view(x.size(0), -1)),
    )

    model.apply(init_weights)
    return model

def accuracy(xb, yb):
    max_xb = torch.argmax(xb,dim=1)
    return (max_xb == yb).float().mean()

def loss_batch(model, loss_func, xb, yb, opt = None):
    loss = loss_func(model(xb),yb)
    
    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()
        
    return loss.item(), len(xb)

def acc_batch(model, xb, yb):
    acc = accuracy(model(xb),yb)
    return acc, len(xb)
        
def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    n_total_steps = len(train_dl)
    running_loss = 0.0
    list_loss=[]
    list_acc = []
    for epoch in range(epochs):
        i=0
        model.train()
        for xb,yb in train_dl:
            i+=1
            running_loss += loss_batch(model,loss_func,xb, yb, opt)[0]
            if i%100==0:
                writer.add_scalar('training loss',running_loss/100, epoch*n_total_steps + i)
                list_loss.append(running_loss/100)
                running_loss = 0
        model.eval()
        with torch.no_grad():
            losses, nums = zip(*[loss_batch(model, loss_func, xb, yb) for xb,yb in valid_dl])
            accs, nums = zip(*[acc_batch(model, xb, yb) for xb,yb in valid_dl])
        val_loss = np.sum(np.multiply(losses,nums))/np.sum(nums)
        val_acc = np.sum(np.multiply(accs,nums))/np.sum(nums)
        writer.add_scalar('testing acc', val_acc, epoch)
        list_acc.append(val_acc)
        print(epoch, val_loss,val_acc)
    return list_loss,list_acc
        

        

In [31]:
train_dl, valid_dl = get_data(training_data,test_data,batch_size)
examples = iter(train_dl)
example_data, example_targets = next(examples)
model = Mnist_CNN()
img_grid = torchvision.utils.make_grid(example_data)
writer.add_image('mnist_images_5',img_grid)
writer.add_graph(model, example_data)
writer.close()

TypeError: 'int' object is not callable

In [54]:
total_epochs = 3;
num_runs = 1
all_loss = []
all_acc = []
for run in range(num_runs):
    train_dl, valid_dl = get_data(training_data,test_data,batch_size)
    model = Mnist_CNN()
    opt = optim.SGD(model.parameters(), lr = lr, momentum = 0.9)
    temp_loss, temp_acc = fit(total_epochs, model, loss_func, opt, train_dl, valid_dl)
    all_loss.append(np.array(temp_loss))
    all_acc.append(np.array(temp_acc))
all_acc = np.array(all_acc)
all_loss = np.array(all_loss)

mean_acc = all_acc.mean(0)
mean_loss = all_loss.mean(0)
for j in range(len(mean_loss)):
    writer.add_scalar('Average Loss',mean_loss[j], j)
for j in range(len(mean_acc)):
    writer.add_scalar('Average Accuracy',mean_acc[j], j)

0 0.7770357704162598 0.7237999987602234
1 0.5766548186540603 0.7952999985218048
2 0.5585945546627045 0.804200005531311


In [None]:
total_epochs = 15;
num_runs = 5
all_loss = []
all_acc = []
for run in range(num_runs):
    train_dl, valid_dl = get_data(training_data,test_data,batch_size)
    model = get_model()
    opt = optim.SGD(model.parameters(), lr = lr, momentum = 0.9)
    temp_loss, temp_acc = fit(total_epochs, model, loss_func, opt, train_dl, valid_dl)
    all_loss.append(np.array(temp_loss))
    all_acc.append(np.array(temp_acc))
all_acc = np.array(all_acc)
all_loss = np.array(all_loss)

mean_acc = all_acc.mean(0)
mean_loss = all_loss.mean(0)
for j in range(len(mean_loss)):
    writer.add_scalar('Average Loss',mean_loss[j], j)
for j in range(len(mean_acc)):
    writer.add_scalar('Average Accuracy',mean_acc[j], j)

In [9]:
summary(get_model())

Layer (type:depth-idx)                   Param #
Sequential                               --
├─Lambda: 1-1                            --
├─Conv2d: 1-2                            160
├─ReLU: 1-3                              --
├─Conv2d: 1-4                            1,450
├─ReLU: 1-5                              --
├─MaxPool2d: 1-6                         --
├─Flatten: 1-7                           --
├─Linear: 1-8                            4,910
├─LogSoftmax: 1-9                        --
├─Lambda: 1-10                           --
Total params: 6,520
Trainable params: 6,520
Non-trainable params: 0

learning rate = 0.05 <br>
mnist16 is with a 3x3 and 5x5 conv layers with stride 2 and then MaxPool(4) into an FC ~ 3.5k param <br>
mnist17 is with a 3x3 and 5x5 conv layers with stride 2 and then MaxPool(3) into an FC ~ 5.5k param <br>
mnist18 is with 3x3 and 5x5 conv layer with strides 1 and 2, followed by two 3x3 layers with strides 2 and 1, followed by a maxpool(3) into an FC ~ 6.6k param <br>

# CNN with 3x3 and 5x5 within the same layer

In [29]:
#Getting data
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

batch_size = 100;

writer = SummaryWriter("runs/mnist18")

class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func

    def forward(self, x):
        return self.func(x)

def preprocess(x):
    return x.view(-1, 1, 28, 28)

def init_weights(mod):
    if (type(mod) == nn.Conv2d) or (type(mod) == nn.Linear):
        torch.nn.init.orthogonal_(mod.weight)


def get_data(train_ds, valid_ds, bs):
    return (
        DataLoader(train_ds, batch_size=bs, shuffle=True),
        DataLoader(valid_ds, batch_size=bs * 2),
    )


class Mnist_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        torch.nn.init.orthogonal_(self.conv1.weight)
        self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)
        torch.nn.init.orthogonal_(self.conv1.weight)
        
        self.conv2 = nn.Conv2d(1, 16, kernel_size=5, stride=2, padding=2)
        torch.nn.init.orthogonal_(self.conv2.weight)
        self.conv4 = nn.Conv2d(16, 10, kernel_size=3, stride=1, padding=1)
        torch.nn.init.orthogonal_(self.conv1.weight)

        self.lin1 = nn.Linear(10*4*4 + 10*4*4,10)
        torch.nn.init.orthogonal_(self.lin1.weight)
    def forward(self, xb):
        xb = xb.view(-1,1,28,28)
        xb1 = F.relu(self.conv1(xb))
        xb1 = F.relu(self.conv3(xb1))
        xb2 = F.relu(self.conv2(xb))
        xb2 = F.relu(self.conv4(xb2))
        xb1 = F.max_pool2d(xb1, 3)
        xb2 = F.max_pool2d(xb2, 3)
        xb1 = xb1.view(xb.size(0), -1)
        xb2 = xb2.view(xb.size(0), -1)
        xb = torch.cat((xb1,xb2),1)
        xb = self.lin1(xb)
        xb = F.log_softmax(xb,dim=1)
        return xb

#defining loss function, weights, and bias tensors
loss_func = F.nll_loss
lr = 0.05
"""
def get_model():
    model = Mnist_Logistic()
    return model, optim.SGD(model.parameters(),lr=lr)
"""
"""
def get_model():
    model = nn.Sequential(
        Lambda(preprocess),
        nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        #nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
        #nn.ReLU(),
        nn.Conv2d(16, 10, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        #nn.AvgPool2d(4),
        nn.MaxPool2d(4),
        nn.Flatten(),
        nn.Linear(10*7*7,10),
        nn.LogSoftmax(dim=1),
        Lambda(lambda x: x.view(x.size(0), -1)),
    )

    model.apply(init_weights)
    return model
"""

def accuracy(xb, yb):
    max_xb = torch.argmax(xb,dim=1)
    return (max_xb == yb).float().mean()

def loss_batch(model, loss_func, xb, yb, opt = None):
    loss = loss_func(model(xb),yb)
    
    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()
        
    return loss.item(), len(xb)

def acc_batch(model, xb, yb):
    acc = accuracy(model(xb),yb)
    return acc, len(xb)
        
def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    n_total_steps = len(train_dl)
    running_loss = 0.0
    list_loss=[]
    list_acc = []
    for epoch in range(epochs):
        i=0
        model.train()
        for xb,yb in train_dl:
            i+=1
            running_loss += loss_batch(model,loss_func,xb, yb, opt)[0]
            if i%100==0:
                writer.add_scalar('training loss',running_loss/100, epoch*n_total_steps + i)
                list_loss.append(running_loss/100)
                running_loss = 0
        model.eval()
        with torch.no_grad():
            losses, nums = zip(*[loss_batch(model, loss_func, xb, yb) for xb,yb in valid_dl])
            accs, nums = zip(*[acc_batch(model, xb, yb) for xb,yb in valid_dl])
        val_loss = np.sum(np.multiply(losses,nums))/np.sum(nums)
        val_acc = np.sum(np.multiply(accs,nums))/np.sum(nums)
        writer.add_scalar('testing acc', val_acc, epoch)
        list_acc.append(val_acc)
        print(epoch, val_loss,val_acc)
    return list_loss,list_acc
        

        

In [33]:
model = Mnist_CNN()

In [34]:
summary(model)

Layer (type:depth-idx)                   Param #
Mnist_CNN                                --
├─Conv2d: 1-1                            160
├─Conv2d: 1-2                            1,450
├─Conv2d: 1-3                            416
├─Conv2d: 1-4                            1,450
├─Linear: 1-5                            3,210
Total params: 6,686
Trainable params: 6,686
Non-trainable params: 0

###### total_epochs = 15;
num_runs = 5
all_loss = []
all_acc = []
for run in range(num_runs):
    train_dl, valid_dl = get_data(training_data,test_data,batch_size)
    model = Mnist_CNN()
    opt = optim.SGD(model.parameters(), lr = lr, momentum = 0.9)
    temp_loss, temp_acc = fit(total_epochs, model, loss_func, opt, train_dl, valid_dl)
    all_loss.append(np.array(temp_loss))
    all_acc.append(np.array(temp_acc))
all_acc = np.array(all_acc)
all_loss = np.array(all_loss)

mean_acc = all_acc.mean(0)
mean_loss = all_loss.mean(0)
for j in range(len(mean_loss)):
    writer.add_scalar('Average Loss',mean_loss[j], j)
for j in range(len(mean_acc)):
    writer.add_scalar('Average Accuracy',mean_acc[j], j)

In [4]:
train_dl, valid_dl = get_data(training_data,test_data,batch_size)
examples = iter(train_dl)
example_data, example_targets = next(examples)

In [6]:
example_data.size(0)

100