<a href="https://colab.research.google.com/github/boosungkim/ml-paper-implementations/blob/main/vgg_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision.transforms import transforms
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt

In [3]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda'

In [4]:
BATCH_SIZE = 100
NUM_EPOCH = 2000
NUM_CLASSES = 10
CLASSES = ('plane','car','bird','cat','deer',
          'dog','frog','horse','ship','truck')

In [5]:
from torchvision.datasets import CIFAR10

In [6]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),
                        (0.5,0.5,0.5))
])

In [7]:
cifar10_train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)

cifar10_train_dataloader = DataLoader(cifar10_train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 73064333.18it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


In [8]:
cifar10_eval_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)
cifar10_val_dataset, cifar10_test_dataset = torch.utils.data.random_split(cifar10_eval_dataset,[0.5,0.5])

cifar10_val_dataloader = DataLoader(cifar10_val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
cifar10_test_dataloader = DataLoader(cifar10_test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

Files already downloaded and verified


In [9]:
def eval(model, dataloader, criterion, device):
    avg_loss = 0.0
    avg_acc = 0.0
    test_loss = 0.0
    test_acc = 0.0
    model.eval()
    
    with torch.no_grad():
        for i, (batch_X, batch_y) in enumerate(dataloader):
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            z = model(batch_X)
            
            J = criterion(z, batch_y)
            test_loss += J.item()
            
            _, predicted = z.max(1)
            test_acc += torch.sum(predicted == batch_y)/len(batch_y)
#             print(f"Batch {i} testing loss: {test_loss}",
#                  f"Batch {i} testing accuracy: {test_acc}")
            
    avg_loss = test_loss / len(dataloader)
    avg_acc = test_acc / len(dataloader)
    
    print(f"Batch average loss: {avg_loss}",
         f"Batch average accuracy: {avg_acc}")
            
            

In [10]:
class Trainer(object):
    def __init__(self, model, criterion=None, optimizer=None, scheduler=None, device=DEVICE):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.device = device
    
    def train_step(self, dataloader):
        self.model.train()
        step_loss = 0.0
        train_predicted = []
        train_trues = []
        
        for i, (batch_X, batch_y) in enumerate(dataloader):
            batch_X, batch_y = batch_X.to(self.device), batch_y.to(self.device)
            self.optimizer.zero_grad()
            
            # forward propagate
            z = self.model(batch_X)
            
            # loss
            J = self.criterion(z, batch_y)
            step_loss += J.item()
            J.backward()
            
            # backprop
            self.optimizer.step()
            
            # accuracy
            _, pred = z.max(1)
            train_predicted.extend(pred)
            train_trues.extend(batch_y)
            
        return step_loss, train_predicted, train_trues
    
    def eval_step(self, dataloader):
        self.model.eval()
        step_loss = 0.0
        val_predicted = []
        val_trues = []
        
        for i, (batch_X, batch_y) in enumerate(dataloader):
            batch_X, batch_y = batch_X.to(self.device), batch_y.to(self.device)
            
            z = self.model(batch_X)
            J = self.criterion(z, batch_y)
            step_loss += J.item()
            
            _, pred = z.max(1)
            val_predicted.extend(pred)
            val_trues.extend(batch_y)
            
        return step_loss, val_predicted, val_trues
            
    
    def calc_accuracy(self, predicted_y, true_y):
        count = 0
        for i, pred in enumerate(predicted_y):
            if pred == true_y[i]:
                count += 1
        return count / len(true_y)
    
    def train(self, num_epoch, train_dataloader, val_dataloader):
        accumulated_loss = 0.0
        
        for epoch in range(num_epoch):
            train_loss, train_predicted, train_trues = self.train_step(train_dataloader)
            val_loss, val_predicted, val_trues = self.eval_step(val_dataloader)
            self.scheduler.step(val_loss)
            
            accumulated_loss += train_loss
            accuracy_train = self.calc_accuracy(train_predicted, train_trues)
            accuracy_val = self.calc_accuracy(val_predicted, val_trues)
            
            
            print(f"--- For epoch {epoch} ---\n"
                  f"Average train loss: {train_loss} | Average validation loss: {val_loss}\n"
                  f"Train accuracy: {accuracy_train} | Validation accuracy: {accuracy_val}")
        
        average_loss = accumulated_loss / num_epoch
        
        return average_loss
            
    
    # def predict(self, test_dataloader):
    #     self.model.eval()
    #     accuracy = 0.0
    #     for i, (batch_X, batch_y) in enumerate(test_dataloader):
            
    #     return accuracy

In [11]:
CONFIGURATION = {
    "VGG11": [
        [64,'M'],
        [128,'M'],
        [256,256,'M'],
        [512,512,'M'],
        [512,512,'M']
    ],
    "VGG13": [
        [64,64,'M'],
        [128,128,'M'],
        [256,256,'M'],
        [512,512,'M'],
        [512,512,'M']
    ],
    "VGG16": [
        [64,64,'M'],
        [128,128,'M'],
        [256,256,256,'M'],
        [512,512,512,'M'],
        [512,512,512,'M']
    ],
    "VGG19": [
        [64,64,'M'],
        [128,128,'M'],
        [256,256,256,256,'M'],
        [512,512,512,512,'M'],
        [512,512,512,512,'M']
    ]
}

class VGGModel(nn.Module):
    # 
    # Pytorch implementation of the various VGG models from Very Deep Convulutional Networks For Large-Scale Image Recognition
    # 
    def __init__(self, architecture_name, input_width, num_output):
        super(VGGModel, self).__init__()
        self.architecture = self.create_architecture(CONFIGURATION.get(architecture_name), input_width, num_output)
        self.block1 = self.architecture[0]
        self.block2 = self.architecture[1]
        self.block3 = self.architecture[2]
        self.block4 = self.architecture[3]
        self.block5 = self.architecture[4]
        self.block6 = self.architecture[5]
        self.flat = nn.Flatten()


    def forward(self,x):
        z = self.block1(x)
        # print(z.size())     # torch.Size([64, 112, 112])
        z = self.block2(z)
        # print(z.size())     # torch.Size([128, 56, 56])
        z = self.block3(z)
        # print(z.size())     # torch.Size([256, 28, 28])
        z = self.block4(z)
        # print(z.size())     # torch.Size([512, 14, 14])
        z = self.block5(z)
        # print(z.size())     # torch.Size([512, 7, 7])
        # z = self.flat(z).reshape(1,-1)
        z = self.flat(z)
        # print(z.size())
        z = self.block6(z)
        return z

    
    def create_architecture(self, architecture, input_width, num_outputs):
        """
        Create the CNN architecture with num_outputs outputs in the end.
        
        Parameters
        -------------
        architecture  :   2D list of int and string
              Each entry is either the number of filters in the Conv2d layer or an indication
              of MaxPool2d
        num_outputs   :   int
              Number of output classes in the end of the network
        
        Returns
        -------------
        blocks_list   :   List of nn.Sequential
            List of PyTorch NN sequences, each representing one block
        """
        blocks_list = []
        num_next_input_channels = 3
        
        for block in architecture:
            num_next_input_channels, layers = self.create_block(block, num_next_input_channels)
            blocks_list.append(layers)
        
        # Final layer for all VGG models
        blocks_list.append(
            nn.Sequential(
                        nn.Linear(512*int(input_width / 2**5), 4096), # 32 = 2
                        nn.ReLU(),
                        nn.Dropout(p=0.5),
                        nn.Linear(4096,4096),
                        nn.ReLU(),
                        nn.Dropout(p=0.5),
                        nn.Linear(4096, num_outputs),
                        # nn.Softmax(dim=1)
                        )
        )
        return blocks_list


    def create_block(self, block, num_next_input_channels):
        """
        Create a singular CNN block.
        
        Parameters
        -------------
        block   :   1D list of int and string of block
            Each entry is either the int of filters in the Conv2d layer or a str indication
            of MaxPool2d
        n       :   number for channels for the Conv2d layer
        'M'     :   MaxPool2d
        
        Returns
        -------------
        num_channels    :   int
            number of channels outputted and inputted to the next layer
        layers          :   nn.Sequential
            Pytorch NN sequence for one block
        """
        layers_list = []
        num_channels = num_next_input_channels
        for layer in block:
                if isinstance(layer, int):
                    layers_list += [
                        nn.Conv2d(in_channels=num_channels, out_channels=layer, kernel_size=(3,3), stride=(1,1), padding=(1,1)),
                        nn.ReLU()
                    ]
                    num_channels = layer
                else:
                    layers_list += [
                        nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
                    ]
        layers = nn.Sequential(*layers_list)
        return num_channels, layers

In [16]:
model = VGGModel("VGG11",32,10).to(DEVICE)

In [17]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005,eps=1e-08, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.1, patience=3)

In [18]:
trainer2 = Trainer(model, loss_fn, optimizer, scheduler, DEVICE)

In [19]:
trainer2.train(16, cifar10_train_dataloader, cifar10_val_dataloader)

--- For epoch 0 ---
Average train loss: 908.963249206543 | Average validation loss: 77.75905442237854
Train accuracy: 0.27864 | Validation accuracy: 0.378
--- For epoch 1 ---
Average train loss: 682.8947247862816 | Average validation loss: 56.72398680448532
Train accuracy: 0.48688 | Validation accuracy: 0.5838
--- For epoch 2 ---
Average train loss: 521.2222071886063 | Average validation loss: 48.46707731485367
Train accuracy: 0.63166 | Validation accuracy: 0.6622
--- For epoch 3 ---
Average train loss: 424.6241908669472 | Average validation loss: 43.871166944503784
Train accuracy: 0.70264 | Validation accuracy: 0.6968
--- For epoch 4 ---
Average train loss: 350.7294847071171 | Average validation loss: 39.46448755264282
Train accuracy: 0.75812 | Validation accuracy: 0.7362
--- For epoch 5 ---
Average train loss: 293.61700278520584 | Average validation loss: 38.54185110330582
Train accuracy: 0.79768 | Validation accuracy: 0.7474
--- For epoch 6 ---
Average train loss: 237.9230199754238 

254.6856852749479

In [20]:
def eval(model, dataloader, criterion, device):
    avg_loss = 0.0
    avg_acc = 0.0
    test_loss = 0.0
    test_acc = 0.0
    model.eval()
    
    with torch.no_grad():
        for i, (batch_X, batch_y) in enumerate(dataloader):
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            z = model(batch_X)
            
            J = criterion(z, batch_y)
            test_loss += J.item()
            
            _, predicted = z.max(1)
            test_acc += torch.sum(predicted == batch_y)/len(batch_y)
#             print(f"Batch {i} testing loss: {test_loss}",
#                  f"Batch {i} testing accuracy: {test_acc}")
            
    avg_loss = test_loss / len(dataloader)
    avg_acc = test_acc / len(dataloader)
    
    print(f"Batch average loss: {avg_loss}",
         f"Batch average accuracy: {avg_acc}")
            
            

In [21]:
loss_fn2 = nn.CrossEntropyLoss()
eval(model, cifar10_test_dataloader, loss_fn2, DEVICE)

Batch average loss: 1.5267061829566955 Batch average accuracy: 0.7786000370979309


## Pretrained model