In [1]:
import torch
import tqdm
import torchvision
import torchvision.transforms as transforms
# from tqdm import tqdm
from tqdm.notebook import tqdm
import os

In [2]:
train_transforms = transforms.Compose([
                                      #  transforms.RandomResizedCrop(128) ,
                                       transforms.ToTensor()  ,
                                       transforms.Normalize( mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010)),
                                       transforms.RandomHorizontalFlip()
])

val_transforms = transforms.Compose([
                                      #  transforms.Resize(128) ,
                                       transforms.ToTensor()  ,
                                       transforms.Normalize( mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010)),
])
train_data = torchvision.datasets.CIFAR10('data/train',train=True,download=True, transform=train_transforms)
val_data = torchvision.datasets.CIFAR10('data/val',train=False,download=True, transform=val_transforms)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/train/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting data/train/cifar-10-python.tar.gz to data/train
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/val/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting data/val/cifar-10-python.tar.gz to data/val


In [3]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=32, shuffle=True)

In [42]:
len(train_loader.sampler)

50000

In [4]:
# model = torchvision.models.resnet50()

In [5]:
# model.fc=torch.nn.Linear(2048,10)

In [6]:
'''ResNet in PyTorch.
For Pre-activation ResNet, see 'preact_resnet.py'.
Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition. arXiv:1512.03385
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])


def ResNet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])


def ResNet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])


def ResNet101():
    return ResNet(Bottleneck, [3, 4, 23, 3])


def ResNet152():
    return ResNet(Bottleneck, [3, 8, 36, 3])


def test():
    net = ResNet18()
    y = net(torch.randn(1, 3, 32, 32))
    print(y.size())

In [29]:
#TODO : 
#1-gradient clipping
#2-tensorboard
def train(model, train_loader , val_loader, cfg):
    """
       Simple training loop for PyTorch model.
       cfg: criterion, optimizer ,epochs , model_path='model.ckpt' , scheduler=None, resume=False, min_val_acc_to_save=88.0

    """ 
    if cfg['gpu']:
      device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    #Resume training
    if cfg['resume'] and os.path.exists(cfg['load_path']):
      print('Loading the model from ckpt.....')
      train_ckpt=torch.load(cfg['load_path'])
      model.load_state_dict(train_ckpt['model_sate_dict'])
      cfg['optimizer'].load_state_dict(train_ckpt['optimizer'])
      if cfg['scheduler']:
        cfg['scheduler'].load_state_dict(train_ckpt['scheduler'])
      print('The model is ready!')

    # Make sure model is in training mode.
    # model.train()

    # Move model to the device (CPU or GPU).
    model.to(device)

    # Exponential moving average of the loss.
    ema_loss = None
    losses=[]
    train_accs=[]
    val_accs=[]
    best_val_acc=0
    cfg['optimizer'].zero_grad()

    print(f'----- Training on {device} -----')
    # Loop over epochs.
    for epoch in range(cfg['epochs']):
        correct = 0
        num_examples=0
        # Loop over data.
        loop=tqdm(enumerate(train_loader , start =epoch*len(train_loader)), total=len(train_loader))
        for step , (images, target) in loop:
            # Forward pass.
            output = model(images.to(device))
            loss = cfg['criterion'](output.to(device), target.to(device))

            # Backward pass.
            loss = loss / cfg['accumulation_steps'] # Normalize the Gradients
            loss.backward()

            if ((step+ 1) % cfg['accumulation_steps'] == 0) or (step + 1 == len(train_loader)):
              cfg['optimizer'].step()
              cfg['optimizer'].zero_grad()


            # NOTE: It is important to call .item() on the loss before summing.
            if ema_loss is None:
                ema_loss = loss.item()
            else:
                ema_loss += (loss.item() - ema_loss) * 0.01 
            # Compute the correct classifications
            preds = output.argmax(dim=1, keepdim=True)
            correct+= preds.cpu().eq(target.view_as(preds)).sum().item()
            num_examples+= images.shape[0]
            train_acc=correct/num_examples

            #tqdm
            loop.set_description(f"Epoch [{epoch+1}/{cfg['epochs']}]")
            loop.set_postfix(loss=ema_loss, acc=train_acc)
        
        losses.append(ema_loss)
        train_accs.append(train_acc)

        #schedular
        if cfg['scheduler']:
          cfg['scheduler'].step()

        #validate
        if (epoch+1) % cfg['val_period']==0:
          val_acc = test(model ,val_loader, device)
          val_accs.append(val_acc)
          if val_acc > best_val_acc and val_acc > cfg['min_val_acc_to_save']:
              print(f'validation accuracy increased from {best_val_acc} to {val_acc}  , saving the model ....')
              #saving training ckpt
              chk_point={'model_sate_dict':model.state_dict(), 'optimizer':cfg['optimizer'].state_dict(), 'scheduler':cfg['scheduler'].state_dict() if cfg['scheduler'] else None,
                         'epochs':epoch+1, 'best_val_acc':best_val_acc}
              torch.save(chk_point, cfg['ckpt_save_path'])
              best_val_acc=val_acc
        print('-------------------------------------------------------------')

    return train_accs , val_accs, losses
    

In [30]:
def test(model, data_loader, device):
    """Measures the accuracy of a model on a data set.""" 
    # Make sure the model is in evaluation mode.
    model.eval()
    correct = 0
    print(f'----- Model Evaluation on {device}-----')
    # We do not need to maintain intermediate activations while testing.
    with torch.no_grad():
        
        # Loop over test data.
        for features, target in data_loader:
          
            # Forward pass.
            output = model(features.to(device))
            
            # Get the label corresponding to the highest predicted probability.
            preds = output.argmax(dim=1, keepdim=True) #[bs x 1]
            
            # Count number of correct predictions.
            correct += preds.cpu().eq(target.view_as(preds)).sum().item()
    model.train()
    # Print test accuracy.
    percent = 100. * correct / len(data_loader.sampler)
    print(f'validation accuracy: {correct} / {len(data_loader.sampler)} ({percent:.0f}%)')
    return percent

In [31]:
model = ResNet18()

In [32]:
# optimizer=torch.optim.RMSprop(model.parameters(), lr=0.00001 , momentum=0.9)
optimizer= torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
cfg={
'criterion' : torch.nn.CrossEntropyLoss(),
'optimizer' : optimizer,
'scheduler' : torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1),
'val_period' : 1,
'epochs' : 3,
'accumulation_steps' : 1,
'ckpt_save_path' : 'model.ckpt' ,
'resume' : False, 
'load_path' : 'model.ckpt',
'min_val_acc_to_save' : 30.0,
'gpu' : True
}
train(model, train_loader, val_loader, cfg)

----- Training on cuda -----


  0%|          | 0/1563 [00:00<?, ?it/s]

----- Model Evaluation on cuda-----
validation accuracy: 4893 / 10000 (49%)
validation accuracy increased from 0 to 48.93  , saving the model ....
-------------------------------------------------------------


  0%|          | 0/1563 [00:00<?, ?it/s]

----- Model Evaluation on cuda-----
validation accuracy: 6352 / 10000 (64%)
validation accuracy increased from 48.93 to 63.52  , saving the model ....
-------------------------------------------------------------


  0%|          | 0/1563 [00:00<?, ?it/s]

----- Model Evaluation on cuda-----
validation accuracy: 6686 / 10000 (67%)
validation accuracy increased from 63.52 to 66.86  , saving the model ....
-------------------------------------------------------------


([0.34788, 0.57836, 0.68456],
 [48.93, 63.52, 66.86],
 [1.4276402479783044, 1.0249666731954998, 0.8379341680506238])

In [33]:
# optimizer=torch.optim.RMSprop(model.parameters(), lr=0.00001 , momentum=0.9)
optimizer= torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
cfg={
'criterion' : torch.nn.CrossEntropyLoss(),
'optimizer' : optimizer,
'scheduler' : torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1),
'val_period' : 1,
'epochs' : 3,
'accumulation_steps' : 1,
'ckpt_save_path' : 'model.ckpt' ,
'resume' : True, 
'load_path' : 'model.ckpt',
'min_val_acc_to_save' : 30.0,
'gpu' : True
}
train(model, train_loader, val_loader, cfg)

Loading the model from ckpt.....
The model is ready!
----- Training on cuda -----


  0%|          | 0/1563 [00:00<?, ?it/s]

----- Model Evaluation on cuda-----
validation accuracy: 8201 / 10000 (82%)
validation accuracy increased from 0 to 82.01  , saving the model ....
-------------------------------------------------------------


  0%|          | 0/1563 [00:00<?, ?it/s]

----- Model Evaluation on cuda-----
validation accuracy: 8247 / 10000 (82%)
validation accuracy increased from 82.01 to 82.47  , saving the model ....
-------------------------------------------------------------


  0%|          | 0/1563 [00:00<?, ?it/s]

----- Model Evaluation on cuda-----
validation accuracy: 8357 / 10000 (84%)
validation accuracy increased from 82.47 to 83.57  , saving the model ....
-------------------------------------------------------------


([0.80872, 0.84004, 0.8589],
 [82.01, 82.47, 83.57],
 [0.48867135682423973, 0.45312318050733386, 0.4140854474050861])

In [None]:
def predict(model, test_loader):
    """Measures the accuracy of a model on a data set.""" 
    # Make sure the model is in evaluation mode.
    device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.eval()
    preds=[]
    # We do not need to maintain intermediate activations while testing.
    with torch.no_grad():
        
        # Loop over test data.
        for images, targets in tqdm(test_loader):
          
            # Forward pass.
            output = model(images.to(device)) #[bs x out_dim]
            # print(output.shape)
            # Get the label corresponding to the highest predicted probability.
            # print(output.argmax(dim=1, keepdim=True).shape)
            preds+= (output.argmax(dim=1, keepdim=True).cpu()) #[bs x 1]
            # print('preds',torch.tensor(preds).shape)
            
            # Count number of correct predictions.
    # Print test accuracy.
    for i,p in enumerate(preds):
      preds[i]=preds[i].item()

    return preds 

In [None]:
preds=predict(model,val_loader)

HBox(children=(FloatProgress(value=0.0, max=157.0), HTML(value='')))




In [None]:
len(preds)

10000

In [None]:
preds[0:10]

[2, 6, 0, 6, 2, 0, 4, 4, 9, 4]