In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# copy anytimeDnn data
#!cp -r drive/My\ Drive/reducedAnytimeDnn/* .
!mkdir data
!cp -r drive/My\ Drive/reducedAnytimeDnn/data/ImagenetDataset.py ./data/ImagenetDataset.py
!cp -r drive/My\ Drive/reducedAnytimeDnn/data/__init__.py ./data/__init__.py
!cp -r drive/My\ Drive/reducedAnytimeDnn/densenet .
!cp -r drive/My\ Drive/reducedAnytimeDnn/msdnet .
!cp -r drive/My\ Drive/reducedAnytimeDnn/resnet .
!ls

In [None]:
#!pip install -r drive/My\ Drive/reducedAnytimeDnn/requirements.txt
#!pip3 install torch===1.6.0 torchvision===0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
#!conda install pytorch==1.5.0 torchvision==0.6.0 cudatoolkit=10.1 -c pytorch
!nvidia-smi
#!pip install numpy
#!pip uninstall torch torchvision
#!pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html


In [None]:
import torch
torch.cuda.empty_cache()
print(torch.version.cuda)
print(torch.cuda.is_available())
print(torch.backends.cudnn.enabled)

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.backends.cudnn as cudnn
#from data.ImagenetDataset import get_imagenet_datasets

from msdnet.dataloader import get_dataloaders_alt
from resnet import ResNet
from densenet import *
#from msdnet.models.msdnet import MSDNet

import os
import shutil
import time

# for repo:
# DATA_PATH = "data/imagenet_images"
# for colab:
DATA_PATH = "drive/My Drive/reducedAnytimeDnn/data/imagenet_images"
BATCH_SIZE = 8
NUM_WORKERS = 4

STAT_FREQUENCY = 50
LEARNING_RATE = 0.1
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
GPU_ID = None
START_EPOCH = 0
EPOCHS = 2
CHECKPOINT_INTERVALL = 10
CHECKPOINT_DIR = 'checkpoints'
ARCH = 'resnet50'

if __name__ == "__main__":
    torch.cuda.empty_cache()

    n_gpus_per_node = torch.cuda.device_count()
    print(f"Found {n_gpus_per_node} GPU(-s)")

    # create model 
    model = ResNet.resnet50()
    
    
    if not torch.cuda.is_available():
      print("Using CPU for slow training process")
    else:
      print("Cuda is available")
      if GPU_ID is not None:
        torch.cuda.set_device(GPU_ID)
        model.cuda(GPU_ID)
      else:
        print("Using all available GPUs")
        model = nn.DataParallel(model).cuda()
    
    # loss function (criterion) and optimizer
    if torch.cuda.is_available():
      print("Move cross entropy to device")
      criterion = nn.CrossEntropyLoss().cuda()
    else:
      criterion = nn.CrossEntropyLoss()
    
    optimizer = torch.optim.SGD(
        model.parameters(), 
        LEARNING_RATE, 
        momentum=MOMENTUM, 
        weight_decay=WEIGHT_DECAY)
    
    cudnn.benchmark = True

    train_loader, test_loader, _ = get_dataloaders_alt(
        DATA_PATH, 
        data="ImageNet", 
        use_valid=False, 
        save='save/default-{}'.format(time.time()),
        batch_size=BATCH_SIZE, 
        workers=NUM_WORKERS, 
        splits=['train', 'test'])
    
    # size of batch:
    for i, (input, target) in enumerate(train_loader):
      if i == 0:
        element_size_in_byte = input.element_size()
        n_elements = input.nelement()
        size_in_byte = element_size_in_byte * n_elements
        print(f"Input:\n{n_elements} Elements times {element_size_in_byte} bytes is {size_in_byte}")
        element_size_in_byte = target.element_size()
        n_elements = target.nelement()
        size_in_byte = element_size_in_byte * n_elements
        print(f"Target:\n{n_elements} Elements times {element_size_in_byte} bytes is {size_in_byte}")
        break



    # TODO add loading of checkpoint behaviour

In [None]:
def save_checkpoint(state, is_best, filename=f'{ARCH}_checkpoint.pth.tar'):
    if not os.path.isdir(CHECKPOINT_DIR):
        os.mkdir(CHECKPOINT_DIR)
    
    torch.save(state, filename)
    shutil.move(filename, CHECKPOINT_DIR)
    if is_best:
        shutil.copyfile(filename, os.path.join(CHECKPOINT_DIR, f'{ARCH}_model_best.pth.tar'))

In [None]:
def accuracy(output, target, topk=(1,)):
    """Computes accuracy over the k top predictions for the values of k"""
    
    # reduce memory consumption on following calculations
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)
        
        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        #print(res)
        return res


In [None]:
def adjust_learning_rate(optimizer, epoch):
    """
        Sets learning rate to default value, decayed by division with 10 every 25 epochs and 
        updates the lr in the optimizer.
    """
    lr = LEARNING_RATE * (0.1 ** (epoch // 25)) 
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [None]:
def train(train_loader, model, criterion, optimizer, epoch):
    model.train()
    batch_time = AverageMeter('Batch Time', ':6.3f')
    data_load_time = AverageMeter('Data Time', ':6.3f')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        
        if GPU_ID is not None:
            input = input.cuda(GPU_ID, non_blocking=True)
        if torch.cuda.is_available():
            target = target.cuda(GPU_ID, non_blocking=True)
        # time it takes to load data
        data_load_time.update(time.time() - end)
        
        # compute output of the current network
        output = model(input)
        loss = criterion(output, target)

        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        
        top1.update(acc1[0], input.size(0))
        top5.update(acc5[0], input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # printing statistics every 2000 mini batch size
        if i % STAT_FREQUENCY == STAT_FREQUENCY - 1:
            print(f'Stats of Train loop {i} of {len(train_loader)}')
            # measure accuracy and record loss
            
            print(f'Epoch {epoch} - Iteration {i}/{len(train_loader)} - Loss {loss}')
            print(top1)
            print(top5)
            print(batch_time)
            print(data_load_time)
            return


In [None]:
def validate(val_loader, model, criterion):
    """Compute average accuracy, top 1 and top 5 accuracy"""
    model.eval()
    
    batch_time = AverageMeter('Batch Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')

    with torch.no_grad():
        end = time.time()
        for i , (input, target) in enumerate(val_loader):
            # check if could be moved to cuda device
            if GPU_ID is not None:
                input = input.cuda(GPU_ID, non_blocking=True)
            if torch.cuda.is_available():
                target = target.cuda(GPU_ID, non_blocking=True)
                
            # compute output
            output = model(input)
            
            # compute loss
            loss = criterion(output, target)
            
            # measure accuracy and record loss
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(),input.size(0))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)

            if i % STAT_FREQUENCY == STAT_FREQUENCY - 1:
                print(f'validation loop {i} of {len(val_loader)}')
                print(losses)
                print(top1)
                print(top5)
                print(batch_time)
                return top1.avg
    return top1.avg

In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self, name, fmt=':f'):
        self.count = 0
        self.sum = 0
        self.avg = 0
        self.val = 0
        self.name = name
        self.fmt = fmt

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

In [None]:
# train loop
#for epoch in range(START_EPOCH, EPOCHS):
best_acc = 0.0
for epoch in range(0, 1):
    adjust_learning_rate(optimizer, epoch)
    
    # train for one epoch
    print('Running train loop')
    train(train_loader, model, criterion, optimizer, epoch)
    
    #evaluate the network on test set
    print('Compute accuracy')
    acc = validate(test_loader, model, criterion)
    
    # remember top acc
    is_best = acc > best_acc
    best_acc = max(acc, best_acc)
    
    # safe model
    if epoch % CHECKPOINT_INTERVALL == 0 or is_best:
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': ARCH,
            'state_dict': model.state_dict(),
            'best_acc': best_acc,
            'optimizer': optimizer.state_dict(),
        }, is_best)