In [1]:
import torch
import tqdm
import torchvision
import torchvision.transforms as transforms
# from tqdm import tqdm
from tqdm.notebook import tqdm
import os

In [2]:
train_transforms = transforms.Compose([
                                       transforms.RandomResizedCrop(128) ,
                                       transforms.ToTensor()  ,
                                       transforms.Normalize( mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010)),
                                       transforms.RandomHorizontalFlip()
])

val_transforms = transforms.Compose([
                                       transforms.Resize(128) ,
                                       transforms.ToTensor()  ,
                                       transforms.Normalize( mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010)),
])
train_data = torchvision.datasets.CIFAR10('data/train',train=True,download=True, transform=train_transforms)
val_data = torchvision.datasets.CIFAR10('data/val',train=False,download=True, transform=val_transforms)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/train/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting data/train/cifar-10-python.tar.gz to data/train
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/val/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting data/val/cifar-10-python.tar.gz to data/val


In [3]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=64, shuffle=True)

In [4]:
model = torchvision.models.resnet50()

In [5]:
model.fc=torch.nn.Linear(2048,10)

In [82]:
#TODO : 
#1-gradient clipping
#2-tensorboard
def train(model, train_loader , val_loader, cfg):
    """
       Simple training loop for PyTorch model.
       cfg: criterion, optimizer ,epochs , model_path='model.ckpt' , scheduler=None  ,load_model=False, min_val_acc_to_save=88.0

    """ 
    if cfg['gpu']:
      device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    best_val_acc=0
    # Make sure model is in training mode.
    if cfg['Load_model'] and cfg['load_path']:
      print('Loading the model from ckpt.....')
      train_ckpt=torch.load(cfg['load_path'])
      model.load_state_dict(train_ckpt['model'])
      print('The model is ready!')


    model.train()
    cfg['optimizer'].zero_grad()

    # Move model to the device (CPU or GPU).
    model.to(device)
    
    # Exponential moving average of the loss.
    ema_loss = None
    losses=[]
    train_accs=[]
    val_accs=[]

    print(f'----- Training on {device} -----')
    # Loop over epochs.
    for epoch in range(cfg['epochs']):
        correct = 0
        num_examples=0
        # Loop over data.
        loop=tqdm(enumerate(train_loader , start =epoch*len(train_loader)), total=len(train_loader))
        for step , (images, target) in loop:
            # Forward pass.
            output = model(images.to(device))
            loss = cfg['criterion'](output.to(device), target.to(device))

            # Backward pass.
            loss = loss / cfg['accumulation_steps'] # Normalize our loss (if averaged)
            loss.backward()
            if epoch+1 % cfg['accumulation_steps']==0:
              cfg['optimizer'].step()
              cfg['optimizer'].zero_grad()


            # NOTE: It is important to call .item() on the loss before summing.
            if ema_loss is None:
                ema_loss = loss.item()
            else:
                ema_loss += (loss.item() - ema_loss) * 0.01 
            # Compute the correct classifications
            preds = output.argmax(dim=1, keepdim=True)
            correct+= preds.cpu().eq(target.view_as(preds)).sum().item()
            num_examples+= images.shape[0]
            train_acc=correct/num_examples
            #tqdm
            loop.set_description(f"Epoch [{epoch+1}/{cfg['epochs']}]")
            loop.set_postfix(loss=ema_loss, acc=train_acc)
        
        losses.append(ema_loss)
        train_accs.append(train_acc)
        #schedular
        if cfg['scheduler']:
          cfg['scheduler'].step()
        #validate
        if epoch+1 % cfg['val_period']==0:
          val_acc = test(model ,val_loader, device)
          val_accs.append(val_acc)
          if val_acc > best_val_acc and val_acc > cfg['min_val_acc_to_save']:
              print(f'validation accuracy increased from {best_val_acc} to {val_acc}  , saving the model ....')
              #saving training ckpt
              chk_point={'model_sate_dict':model.state_dict(), 'epochs':epoch+1, 'best_val_acc':best_val_acc}
              torch.save(chk_point, cfg['ckpt_path'])
              best_val_acc=val_acc
        print('-------------------------------------------------------------')

        return train_accs , val_accs, losses
    

In [79]:
def test(model, data_loader, device):
    """Measures the accuracy of a model on a data set.""" 
    # Make sure the model is in evaluation mode.
    model.eval()
    correct = 0
    print(f'----- Model Evaluation on {device}-----')
    # We do not need to maintain intermediate activations while testing.
    with torch.no_grad():
        
        # Loop over test data.
        for features, target in data_loader:
          
            # Forward pass.
            output = model(features.to(device))
            
            # Get the label corresponding to the highest predicted probability.
            preds = output.argmax(dim=1, keepdim=True) #[bs x 1]
            
            # Count number of correct predictions.
            correct += preds.cpu().eq(target.view_as(preds)).sum().item()
    model.train()
    # Print test accuracy.
    percent = 100. * correct / len(data_loader.sampler)
    print(f'validation accuracy: {correct} / {len(data_loader.sampler)} ({percent:.0f}%)')
    return percent

In [80]:
optimizer=torch.optim.RMSprop(model.parameters(), lr=0.00001 , momentum=0.9)
cfg={
'criterion' : torch.nn.CrossEntropyLoss(),
'optimizer' : optimizer,
'scheduler' : torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1),
'val_period' : 1,
'epochs' : 5,
'accumulation_steps' : 1,
'ckpt_path' : 'model.ckpt' ,
'load_model' : True, 
'load_path' : 'model.ckpt',
'min_val_acc_to_save' : 30.0,
'gpu' : True
}
train(model, train_loader, val_loader, cfg)

loading the model from ckpt.....
the model is ready
----- Training on cuda -----


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))


----- Model Evaluation on cuda-----
validation accuracy: 7061 / 10000 (71%)
validation accuracy increased from 0 to 70.61  , saving the model ....
-------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))


----- Model Evaluation on cuda-----
validation accuracy: 7381 / 10000 (74%)
validation accuracy increased from 70.61 to 73.81  , saving the model ....
-------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))


----- Model Evaluation on cuda-----
validation accuracy: 7584 / 10000 (76%)
validation accuracy increased from 73.81 to 75.84  , saving the model ....
-------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))


----- Model Evaluation on cuda-----
validation accuracy: 7940 / 10000 (79%)
validation accuracy increased from 75.84 to 79.4  , saving the model ....
-------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))


----- Model Evaluation on cuda-----
validation accuracy: 7988 / 10000 (80%)
validation accuracy increased from 79.4 to 79.88  , saving the model ....
-------------------------------------------------------------


In [73]:
def predict(model, test_loader):
    """Measures the accuracy of a model on a data set.""" 
    # Make sure the model is in evaluation mode.
    device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.eval()
    preds=[]
    # We do not need to maintain intermediate activations while testing.
    with torch.no_grad():
        
        # Loop over test data.
        for images, targets in tqdm(test_loader):
          
            # Forward pass.
            output = model(images.to(device)) #[bs x out_dim]
            # print(output.shape)
            # Get the label corresponding to the highest predicted probability.
            # print(output.argmax(dim=1, keepdim=True).shape)
            preds+= (output.argmax(dim=1, keepdim=True).cpu()) #[bs x 1]
            # print('preds',torch.tensor(preds).shape)
            
            # Count number of correct predictions.
    # Print test accuracy.
    for i,p in enumerate(preds):
      preds[i]=preds[i].item()

    return preds 

In [74]:
preds=predict(model,val_loader)

HBox(children=(FloatProgress(value=0.0, max=157.0), HTML(value='')))




In [75]:
len(preds)

10000

In [76]:
preds[0:10]

[2, 6, 0, 6, 2, 0, 4, 4, 9, 4]