In [17]:
%run utils.py

In [18]:
from utils import ConvRelu
from torch import nn

def vgg_block(conv_in, conv_out, conv_num):
    layers = []
    layers.append(ConvRelu(conv_in, conv_out, kernel_size=3, padding=1))
    for i in range(conv_num - 1):
        layers.append(ConvRelu(conv_out, conv_out, kernel_size=3, padding=1))

    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)

block = vgg_block(3, 64, 2)
print(block)

Sequential(
  (0): ConvRelu(
    (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu): ReLU()
  )
  (1): ConvRelu(
    (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu): ReLU()
  )
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)


In [19]:
class VggBlock(nn.Module):
    def __init__(self, conv_in, conv_out, conv_num):
        super(VggBlock, self).__init__()
        layers = []
        layers.append(ConvRelu(conv_in, conv_out, kernel_size=3, padding=1))
        for i in range(conv_num - 1):
            layers.append(ConvRelu(conv_out, conv_out, kernel_size=3, padding=1))

        layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        self.block = nn.Sequential(*layers)

    def forward(self, x):
        return self.block(x)

block = VggBlock(3, 64, 2)
print(block)

VggBlock(
  (block): Sequential(
    (0): ConvRelu(
      (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (relu): ReLU()
    )
    (1): ConvRelu(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (relu): ReLU()
    )
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
)


In [20]:
from utils import LinerRelu

class Vgg16(nn.Module):
    def __init__(self, dropout=0.5):
        super(Vgg16, self).__init__()
        self.block1 = VggBlock(3, 64, 2)
        self.block2 = VggBlock(64, 128, 2)
        self.block3 = VggBlock(128, 256, 3)
        self.block4 = VggBlock(256, 512, 3)
        self.block5 = VggBlock(512, 512, 3)
        self.fc1 = LinerRelu(512, 256, dropout=dropout)
        self.fc2 = LinerRelu(256, 128, dropout=dropout)
        self.fc3 = LinerRelu(128, 10, dropout=dropout)

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = nn.Flatten()(x)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [21]:
from torchinfo import summary

vgg16 = Vgg16()
summary(vgg16, (1, 3, 32, 32))

Layer (type:depth-idx)                   Output Shape              Param #
Vgg16                                    [1, 10]                   --
├─VggBlock: 1-1                          [1, 64, 16, 16]           --
│    └─Sequential: 2-1                   [1, 64, 16, 16]           --
│    │    └─ConvRelu: 3-1                [1, 64, 32, 32]           1,792
│    │    └─ConvRelu: 3-2                [1, 64, 32, 32]           36,928
│    │    └─MaxPool2d: 3-3               [1, 64, 16, 16]           --
├─VggBlock: 1-2                          [1, 128, 8, 8]            --
│    └─Sequential: 2-2                   [1, 128, 8, 8]            --
│    │    └─ConvRelu: 3-4                [1, 128, 16, 16]          73,856
│    │    └─ConvRelu: 3-5                [1, 128, 16, 16]          147,584
│    │    └─MaxPool2d: 3-6               [1, 128, 8, 8]            --
├─VggBlock: 1-3                          [1, 256, 4, 4]            --
│    └─Sequential: 2-3                   [1, 256, 4, 4]            --

In [22]:
from torchvision import datasets
from utils import train_val_split

full = datasets.CIFAR10(root="./data", train=True, download=True)
test = datasets.CIFAR10(root="./data", train=False, download=True)
train, valid = train_val_split(full, seed=666)

print(len(train), len(valid), len(test))

40000 10000 10000


In [23]:
from utils import PackDataset
from torchvision import transforms

train_data = PackDataset(train, transform=transforms.ToTensor())
valid_data = PackDataset(valid, transform=transforms.ToTensor())
test_data = PackDataset(test, transform=transforms.ToTensor())

image, label = train_data[0]
print(image.size())

torch.Size([3, 32, 32])


In [24]:
import json
from utils import control_callbacks
from sklearn.model_selection import ParameterGrid
import torch
from skorch import NeuralNetClassifier
from skorch.helper import predefined_split

In [None]:
epochs = 15
param_grid = {
    'lr': [0.001, 0.0005, 0.0001],
    'dropout': [0.5, 0.3, 0.2]
}

results = {
    'best_params': None,
    'best_acc': 0.0,
    'all_results': []
}

calls = control_callbacks(epochs, check_dir='./data/alex-checkpoints', show_bar=False)
for params in ParameterGrid(param_grid):
    print(f"\nTraining with params: {params}")
    vgg = Vgg16(params['dropout'])
    net = NeuralNetClassifier(
        vgg,
        criterion=nn.CrossEntropyLoss,
        optimizer=torch.optim.Adam,
        lr=params['lr'],
        batch_size=2048,
        max_epochs=epochs,
        train_split=predefined_split(valid_data),
        device='cuda' if torch.cuda.is_available() else 'cpu',
        callbacks=calls,
        classes=list(range(10)),
    )
    net.fit(X=train_data, y=None)
    valid_acc = max(net.history[:, 'valid_acc'])
    current_result = {'params': params, 'valid_acc': valid_acc}
    results['all_results'].append(current_result)

    if valid_acc > results['best_acc']:
        results['best_acc'] = valid_acc
        results['best_params'] = params

    print(f"\nBest params: {results['best_params']}, best acc: {results['best_acc']}")

with open('./data/hyperparam_results.json', 'w') as f:
    json.dump(results, f, indent=2)

In [26]:
epochs = 50
vgg = Vgg16(0.3)
calls = control_callbacks(epochs, check_dir='./data/alex-checkpoints', show_bar=False)
full_data = PackDataset(full, transform=transforms.ToTensor())
net = NeuralNetClassifier(
    vgg,
    criterion=nn.CrossEntropyLoss,
    optimizer=torch.optim.Adam,
    lr=0.0001,
    batch_size=2048,
    max_epochs=epochs,
    train_split=predefined_split(test_data),
    device='cuda' if torch.cuda.is_available() else 'cpu',
    callbacks=calls,
    classes=list(range(10)),
)
net.fit(full_data, y=None)

  epoch    train_acc    train_loss    valid_acc    valid_loss    cp      lr      dur
-------  -----------  ------------  -----------  ------------  ----  ------  -------
      1       [36m0.1482[0m        [32m2.2613[0m       [35m0.2150[0m        [31m2.1284[0m     +  0.0001  24.1931
      2       0.2481        [32m2.1186[0m       [35m0.3802[0m        [31m1.8842[0m     +  0.0001  24.4257
      3       0.3103        [32m2.0060[0m       [35m0.4099[0m        [31m1.7984[0m     +  0.0001  24.7189
      4       0.3483        [32m1.9252[0m       [35m0.4788[0m        [31m1.6667[0m     +  0.0001  24.7000
      5       0.3776        [32m1.8628[0m       [35m0.5013[0m        [31m1.5812[0m     +  0.0001  24.7995
      6       0.3954        [32m1.8205[0m       [35m0.5171[0m        [31m1.5734[0m     +  0.0001  24.9996
      7       0.4071        [32m1.7865[0m       [35m0.5372[0m        [31m1.5071[0m     +  0.0001  25.0132
      8       0.4258        [32m1.73

0,1,2
,module,Vgg16(  (blo...e=False)  ) )
,criterion,<class 'torch...sEntropyLoss'>
,train_split,functools.par...7efacb122020>)
,classes,"[0, 1, ...]"
,optimizer,<class 'torch...im.adam.Adam'>
,lr,0.0001
,max_epochs,50
,batch_size,2048
,iterator_train,<class 'torch...r.DataLoader'>
,iterator_valid,<class 'torch...r.DataLoader'>
