In [1]:
import sys
sys.path.append("../")

In [2]:
import copy

import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.nn import functional as F
import numpy as np

import inclearn

In [3]:
inclearn.train._set_seed(1)

Set seed 1


In [4]:
train_transforms = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])


train_dataset = datasets.cifar.CIFAR100("data", download=True, train=True, transform=train_transforms)
test_dataset= datasets.cifar.CIFAR100("data", download=True, train=False, transform=test_transforms)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
train_loader = DataLoader(train_dataset, batch_size=128, num_workers=10, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, num_workers=10)

In [6]:
device = torch.device("cuda:2")

In [7]:
def acc(model, loader):
    predictions = []
    all_targets = []
    
    for inputs, targets in loader:
        inputs = inputs.to(device)
        all_targets.append(targets.numpy())
        
        logits = model(inputs)
        predictions.append(logits.argmax(dim=1).cpu().numpy())

    predictions = np.concatenate(predictions)
    all_targets = np.concatenate(all_targets)
    
    total_acc = (predictions == all_targets).sum() / len(all_targets)
    
    return all_targets, predictions, total_acc

In [8]:

import torch
import torch.nn as nn

class BasicBlock(nn.Module):
    """Basic Block for resnet 18 and resnet 34
    """

    #BasicBlock and BottleNeck block 
    #have different output size
    #we use class attribute expansion
    #to distinct
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()

        #residual function
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels * BasicBlock.expansion)
        )

        #shortcut
        self.shortcut = nn.Sequential()

        #the shortcut output dimension is not the same with residual function
        #use 1*1 convolution to match the dimension
        if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * BasicBlock.expansion)
            )
        
    def forward(self, x):
        return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))

class BottleNeck(nn.Module):
    """Residual block for resnet over 50 layers
    """
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, stride=stride, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels * BottleNeck.expansion),
        )

        self.shortcut = nn.Sequential()

        if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BottleNeck.expansion, stride=stride, kernel_size=1, bias=False),
                nn.BatchNorm2d(out_channels * BottleNeck.expansion)
            )
        
    def forward(self, x):
        return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))
    
class ResNet(nn.Module):

    def __init__(self, block, num_block, num_classes=100):
        super().__init__()

        self.in_channels = 64

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True))
        #we use a different inputsize than the original paper
        #so conv2_x's stride is 1
        self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
        self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
        self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
        self.conv5_x = self._make_layer(block, 512, num_block[3], 2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride):
        """make resnet layers(by layer i didnt mean this 'layer' was the 
        same as a neuron netowork layer, ex. conv layer), one layer may 
        contain more than one residual block 
        Args:
            block: block type, basic block or bottle neck block
            out_channels: output depth channel number of this layer
            num_blocks: how many blocks per layer
            stride: the stride of the first block of this layer
        
        Return:
            return a resnet layer
        """

        # we have num_block blocks per layer, the first block 
        # could be 1 or 2, other blocks would always be 1
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        
        return nn.Sequential(*layers)

    def forward(self, x):
        output = self.conv1(x)
        output = self.conv2_x(output)
        output = self.conv3_x(output)
        output = self.conv4_x(output)
        output = self.conv5_x(output)
        output = self.avg_pool(output)
        output = output.view(output.size(0), -1)
        output = self.fc(output)

        return output 

def resnet18():
    """ return a ResNet 18 object
    """
    return ResNet(BasicBlock, [2, 2, 2, 2])

def resnet34():
    """ return a ResNet 34 object
    """
    return ResNet(BasicBlock, [3, 4, 6, 3])

def resnet50():
    """ return a ResNet 50 object
    """
    return ResNet(BottleNeck, [3, 4, 6, 3])

def resnet101():
    """ return a ResNet 101 object
    """
    return ResNet(BottleNeck, [3, 4, 23, 3])

def resnet152():
    """ return a ResNet 152 object
    """
    return ResNet(BottleNeck, [3, 8, 36, 3])



In [9]:
from torch.optim.lr_scheduler import _LRScheduler

class WarmUpLR(_LRScheduler):
    """warmup_training learning rate scheduler
    Args:
        optimizer: optimzier(e.g. SGD)
        total_iters: totoal_iters of warmup phase
    """
    def __init__(self, optimizer, total_iters, last_epoch=-1):
        
        self.total_iters = total_iters
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        """we will use the first m batches, and set the learning
        rate to base_lr * m / total_iters
        """
        return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]

In [15]:
#model = inclearn.lib.network.BasicNet("rebuffi", device=device, use_bias=True)
#model.add_classes(100)

#model = resnet34().to(device)
model = torchvision.models.resnet34(num_classes=100).to(device)

In [16]:
lr = 0.1
scheduling = [60, 120, 160]
gamma = 0.2
warmup_epochs = 1
n_epochs = 200

In [17]:
optimizer = torch.optim.SGD(model.parameters(), lr=lr, nesterov=True, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, scheduling, gamma=gamma)
warmup = WarmUpLR(optimizer, len(train_loader) * warmup_epochs)

In [18]:

best_model = None
best_acc = -1.

for epoch in range(n_epochs):
    if epoch < warmup_epochs:
        warmup.step()
    else:
        scheduler.step(epoch)
    
    epoch_loss = 0.
    
    model.train()
    for inputs, targets in train_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()
        logits = model(inputs)
        loss = F.cross_entropy(logits, targets)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        
    model.eval()
    _, _, train_acc = acc(model, train_loader)
    _, _, test_acc = acc(model, test_loader)
        
    print("Epoch {}/{}, train_loss: {}, train_acc: {}, test_acc: {}".format(
        epoch, n_epochs,
        round(epoch_loss / len(train_loader), 3),
        round(train_acc, 2), round(test_acc, 2)
    ))
    
    if test_acc > best_acc:
        print("Best acc! Saving model")
        best_model = copy.deepcopy(model)
        best_acc = test_acc

Epoch 0/200, train_loss: 4.666, train_acc: 0.01, test_acc: 0.01
Best acc! Saving model
Epoch 1/200, train_loss: 4.086, train_acc: 0.11, test_acc: 0.11
Best acc! Saving model
Epoch 2/200, train_loss: 3.488, train_acc: 0.18, test_acc: 0.18
Best acc! Saving model
Epoch 3/200, train_loss: 3.083, train_acc: 0.26, test_acc: 0.25
Best acc! Saving model
Epoch 4/200, train_loss: 2.746, train_acc: 0.33, test_acc: 0.34
Best acc! Saving model
Epoch 5/200, train_loss: 2.463, train_acc: 0.36, test_acc: 0.36
Best acc! Saving model
Epoch 6/200, train_loss: 2.23, train_acc: 0.4, test_acc: 0.41
Best acc! Saving model
Epoch 7/200, train_loss: 2.044, train_acc: 0.45, test_acc: 0.45
Best acc! Saving model
Epoch 8/200, train_loss: 1.912, train_acc: 0.46, test_acc: 0.44
Epoch 9/200, train_loss: 1.803, train_acc: 0.47, test_acc: 0.46
Best acc! Saving model
Epoch 10/200, train_loss: 1.721, train_acc: 0.48, test_acc: 0.47
Best acc! Saving model
Epoch 11/200, train_loss: 1.653, train_acc: 0.51, test_acc: 0.49
Be

Epoch 120/200, train_loss: 0.094, train_acc: 0.99, test_acc: 0.74
Best acc! Saving model
Epoch 121/200, train_loss: 0.05, train_acc: 0.99, test_acc: 0.75
Best acc! Saving model
Epoch 122/200, train_loss: 0.037, train_acc: 1.0, test_acc: 0.75
Best acc! Saving model
Epoch 123/200, train_loss: 0.033, train_acc: 1.0, test_acc: 0.75
Epoch 124/200, train_loss: 0.029, train_acc: 1.0, test_acc: 0.75
Best acc! Saving model
Epoch 125/200, train_loss: 0.026, train_acc: 1.0, test_acc: 0.75
Epoch 126/200, train_loss: 0.025, train_acc: 1.0, test_acc: 0.75
Best acc! Saving model
Epoch 127/200, train_loss: 0.022, train_acc: 1.0, test_acc: 0.75
Best acc! Saving model
Epoch 128/200, train_loss: 0.02, train_acc: 1.0, test_acc: 0.76
Best acc! Saving model
Epoch 129/200, train_loss: 0.019, train_acc: 1.0, test_acc: 0.76
Best acc! Saving model
Epoch 130/200, train_loss: 0.019, train_acc: 1.0, test_acc: 0.76
Epoch 131/200, train_loss: 0.018, train_acc: 1.0, test_acc: 0.76
Best acc! Saving model
Epoch 132/200

KeyboardInterrupt: 

In [None]:
best_acc

In [None]:
test_acc