<a href="https://colab.research.google.com/github/harry-hrz/torch-image-classification/blob/main/VGG_16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.utils.data as torchdata
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from tqdm import tqdm

## VGG

In [6]:
class VGG16(nn.Module):
    def __init__(self, init_weights=True):
        super(VGG16, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1), 
            nn.ReLU(True), 
            nn.Conv2d(64, 64, 3, padding=1), 
            nn.ReLU(True), 
            nn.MaxPool2d(2,2), 
            nn.Conv2d(64, 128, 3, padding=1), 
            nn.ReLU(True), 
            nn.Conv2d(128, 128, 3, padding=1), 
            nn.ReLU(True), 
            nn.MaxPool2d(2, 2), 
            nn.Conv2d(128, 256, 3, padding=1), 
            nn.ReLU(True), 
            nn.Conv2d(256, 256, 3, padding=1), 
            nn.ReLU(True), 
            nn.MaxPool2d(2, 2), 
            nn.Conv2d(256, 512, 3, padding=1), 
            nn.ReLU(True), 
            nn.Conv2d(512, 512, 3, padding=1), 
            nn.ReLU(True), 
            nn.MaxPool2d(2, 2), 
            nn.Conv2d(512, 512, 3, padding=1), 
            nn.ReLU(True), 
            nn.Conv2d(512, 512, 3, padding=1), 
            nn.ReLU(True), 
            nn.MaxPool2d(2, 2), 
        )
        self.classification = nn.Sequential(
            nn.Linear(7*7*512, 4096), 
            nn.ReLU(True), 
            nn.Dropout(p=0.5), 
            nn.Linear(4096, 4096), 
            nn.ReLU(True), 
            nn.Dropout(p=0.5), 
            nn.Linear(4096, 10)
        )
        if init_weights == True:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classification(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                #nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                #nn.init.xavier_uniform_(m.weight)
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

## dataset

In [5]:
mean = (0.49139968, 0.48215827, 0.44653124)
std = (0.24703233, 0.24348505, 0.26158768)
data_transform = {
    "train": transforms.Compose([
        transforms.RandomResizedCrop(224), 
        transforms.RandomHorizontalFlip(), 
        transforms.ToTensor(), 
        transforms.Normalize(mean, std)
                                 ]), 
    "val": transforms.Compose([
        transforms.Resize((224, 224)), 
        transforms.ToTensor(), 
        transforms.Normalize(mean, std)
        ])
    }

train_set = torchvision.datasets.CIFAR10(root='./data', train=True, transform=data_transform['train'], download=True)
train_loader = torchdata.DataLoader(train_set, batch_size=64, shuffle=True)
val_set = torchvision.datasets.CIFAR10(root='./data', train=False, transform=data_transform['val'], download=True)
val_loader = torchdata.DataLoader(val_set, batch_size=128, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


## train

In [None]:
''' 
Note:   If you want to resize CIFAR10 to 3*224*224, it is fine for you to use both Adam and Adadelta.
        But if you want to keep the orgininal size of CIFAR10, you should use Adadelta.
        I personally think 32*32 is not a good fit for VGG.
'''

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))

model = VGG16()
model.to(device)
loss_func = nn.CrossEntropyLoss()
#optimizer = optim.Adadelta(model.parameters())
optimizer = optim.Adam(model.parameters(), amsgrad=True)
for epoch in range(30):
    epoch_loss = 0.0
    model.train()
    step = 0
    for data in tqdm(train_loader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs.to(device))
        loss = loss_func(outputs, labels.to(device))
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        step += 1
    
    model.eval()
    acc = 0.0
    with torch.no_grad():
        for data in tqdm(val_loader):
            inputs, labels = data
            outputs = model(inputs.to(device))
            predict = torch.max(outputs, dim=1)[1]
            acc += (predict == labels.to(device)).sum()
        val_acc = acc / len(val_set)
    print('epoch[%d], train_loss: %.3f, val_acc: %.3f' % (epoch+1, epoch_loss/step, val_acc))