In [53]:
# implementing a vgg block

import torch
import torch.nn as nn

def vgg_block(num_convs, in_channels, out_channels):
    layers = []
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        in_channels=out_channels
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)

In [54]:
# implementing VGG11

conv_arch = ((1,64), (1,128), (2,256), (2,512), (2,512))

In [55]:
def vgg(conv_arch):
    conv_blks = []
    in_channels = 1
    
    for (num_convs, out_channels) in conv_arch:
        conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
        in_channels=out_channels
    
    return nn.Sequential(
        *conv_blks, nn.Flatten(), 
        nn.Linear(in_channels *7 *7, 4096), nn.ReLU(), nn.Dropout(p=0.5), 
        nn.Linear(4096, 4096),nn.ReLU(), nn.Dropout(p=0.5),
        nn.Linear(4096, 10)
    )

In [56]:
vgg_net = vgg(conv_arch)

In [57]:
X = torch.randn(1,1,224,224)

def look_at_net(net, X):
    out = X
    for layer in net:
        out = layer(out)
        print(f'For {layer.__class__.__name__}: {out.shape}')
        
look_at_net(vgg_net, X)

For Sequential: torch.Size([1, 64, 112, 112])
For Sequential: torch.Size([1, 128, 56, 56])
For Sequential: torch.Size([1, 256, 28, 28])
For Sequential: torch.Size([1, 512, 14, 14])
For Sequential: torch.Size([1, 512, 7, 7])
For Flatten: torch.Size([1, 25088])
For Linear: torch.Size([1, 4096])
For ReLU: torch.Size([1, 4096])
For Dropout: torch.Size([1, 4096])
For Linear: torch.Size([1, 4096])
For ReLU: torch.Size([1, 4096])
For Dropout: torch.Size([1, 4096])
For Linear: torch.Size([1, 10])


In [58]:
# we reduce the ratio as it is very computationaly expensive to traina VGG model
ratio = 4
small_conv_arch = [(pair[0], pair[1]//ratio) for pair in conv_arch]
net = vgg(small_conv_arch)

look_at_net(net, X)

For Sequential: torch.Size([1, 16, 112, 112])
For Sequential: torch.Size([1, 32, 56, 56])
For Sequential: torch.Size([1, 64, 28, 28])
For Sequential: torch.Size([1, 128, 14, 14])
For Sequential: torch.Size([1, 128, 7, 7])
For Flatten: torch.Size([1, 6272])
For Linear: torch.Size([1, 4096])
For ReLU: torch.Size([1, 4096])
For Dropout: torch.Size([1, 4096])
For Linear: torch.Size([1, 4096])
For ReLU: torch.Size([1, 4096])
For Dropout: torch.Size([1, 4096])
For Linear: torch.Size([1, 10])


In [59]:
lr, num_epochs, batch_size = 0.001, 20, 128

In [60]:

import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torchvision.datasets as datasets


my_transforms = transforms.Compose(
                [
                    transforms.Resize(224),
                    transforms.ToTensor()]
)

train_dataset = datasets.FashionMNIST(download=False,root="../data", train=True, transform=my_transforms)
test_dataset = datasets.FashionMNIST(download=False, root="../data", train=False, transform=my_transforms)


train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [61]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr = lr, momentum=0.9)

In [62]:
def accuracy(y_hat,y):
    return (y_hat.argmax(1)==y).sum()

In [63]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [64]:
def full_accuracy(net, data_iter):
    net.eval()
#     device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'
    net = net.to(device)
    
    total_acc = 0
    total_num = 0
    with torch.no_grad():
        for X, y in data_iter:
            X = X.to(device)
            y = y.to(device)

            y_hat = net(X)

            total_acc += accuracy(y_hat, y)
            total_num += y.numel()
    
    return total_acc/total_num

In [65]:
def train_net(net):
    
    train_loss = []
    train_acc = []
    test_acc = []

    net= net.to(device)
    for epoch in range(num_epochs):

        acc_value = 0
        total_number = 0
        total_loss= 0
        for i, data in enumerate(train_dataloader):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()


            total_loss += loss.item()
            acc_value += accuracy(outputs, labels)
            total_number += labels.numel()

        with torch.no_grad():

            print(f"\tEpoch {epoch} : Statistics: ")
            print(f'\tcurrent train loss : {float(total_loss/total_number)}')
            print(f'\tcurrent train acc : {float(acc_value/total_number)}')
            print(f'\tcurrent test acc : {float(full_accuracy(net, test_dataloader))}')


            train_loss.append(float(total_loss/total_number))
            test_acc.append(float(full_accuracy(net, test_dataloader)))
            train_acc.append(float(acc_value/total_number))
    
    return train_loss, test_acc, train_acc

In [66]:
for X, y in train_dataloader:
    print(X.shape, len(y))
    look_at_net(net.to(device), X.to(device).float())
    break

torch.Size([128, 1, 224, 224]) 128
For Sequential: torch.Size([128, 16, 112, 112])
For Sequential: torch.Size([128, 32, 56, 56])
For Sequential: torch.Size([128, 64, 28, 28])
For Sequential: torch.Size([128, 128, 14, 14])
For Sequential: torch.Size([128, 128, 7, 7])
For Flatten: torch.Size([128, 6272])
For Linear: torch.Size([128, 4096])
For ReLU: torch.Size([128, 4096])
For Dropout: torch.Size([128, 4096])
For Linear: torch.Size([128, 4096])
For ReLU: torch.Size([128, 4096])
For Dropout: torch.Size([128, 4096])
For Linear: torch.Size([128, 10])


In [67]:
try:
    train_loss, test_acc, train_acc = train_net(net)
except Exception as e:
    print(e)

CUDA out of memory. Tried to allocate 50.00 MiB (GPU 0; 4.00 GiB total capacity; 2.64 GiB already allocated; 45.23 MiB free; 2.67 GiB reserved in total by PyTorch)


In [68]:
# its difficult to train so
# 1. lets reduce the image size
# 2. and also create a smaller architecture
my_transforms = transforms.Compose(
                [
#                     transforms.Resize(224),
                    transforms.ToTensor()]
)

train_dataset = datasets.FashionMNIST(download=False,root="../data", train=True, transform=my_transforms)
test_dataset = datasets.FashionMNIST(download=False, root="../data", train=False, transform=my_transforms)


train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)


In [69]:
# So it is difficult to run this lets reduce the model even further

new_conv_arch = ((1,64), (1, 128))

In [70]:
def vgg_reduced(conv_arch):
    conv_blks = []
    in_channels = 1
    
    for (num_convs, out_channels) in conv_arch:
        conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
        in_channels=out_channels
    
    return nn.Sequential(
        *conv_blks, nn.Flatten(), 
        nn.Linear(in_channels *7 *7, 4096), nn.ReLU(), nn.Dropout(p=0.5), 
        nn.Linear(4096, 10)
    )

In [71]:
X = torch.randn(1,1,28,28)

net = vgg_reduced(new_conv_arch)

look_at_net(net, X)

For Sequential: torch.Size([1, 64, 14, 14])
For Sequential: torch.Size([1, 128, 7, 7])
For Flatten: torch.Size([1, 6272])
For Linear: torch.Size([1, 4096])
For ReLU: torch.Size([1, 4096])
For Dropout: torch.Size([1, 4096])
For Linear: torch.Size([1, 10])


In [None]:
try:
    train_loss, test_acc, train_acc = train_net(net)
except Exception as e:
    print(e)

In [None]:
print("finished")

import matplotlib.pyplot as plt
num_epochs = num_epochs
plt.plot(range(num_epochs), train_loss, label='train loss')
plt.plot(range(num_epochs), train_acc, label = 'train acc')
plt.plot(range(num_epochs), test_acc, label = 'test acc')
plt.grid(True)
plt.legend()
plt.show()