In [None]:
from google.colab import drive
drive.mount('/content/drive')
%load_ext tensorboard

In [None]:
# copy anytimeDnn data
#!cp -r drive/My\ Drive/reducedAnytimeDnn/* .
!mkdir data
!mkdir data/imagenet_red
!mkdir data/imagenet_full
!cp -r drive/My\ Drive/reducedAnytimeDnn/data/utils.py ./data/utils.py 
!cp -r drive/My\ Drive/reducedAnytimeDnn/data/ImagenetDataset.py ./data/ImagenetDataset.py
!cp -r drive/My\ Drive/reducedAnytimeDnn/data/__init__.py ./data/__init__.py
!cp drive/My\ Drive/reducedAnytimeDnn/data/imagenet_red/index-* ./data/imagenet_red
#!cp drive/My\ Drive/reducedAnytimeDnn/data/imagenet_full/index-* ./data/imagenet_full
!cp -r drive/My\ Drive/reducedAnytimeDnn/densenet .
!cp -r drive/My\ Drive/reducedAnytimeDnn/msdnet .
!cp -r drive/My\ Drive/reducedAnytimeDnn/resnet .
!cp drive/My\ Drive/reducedAnytimeDnn/utils.py ./utils.py
!cp drive/My\ Drive/reducedAnytimeDnn/train.py ./train.py
!ls

In [None]:
#!pip install -r drive/My\ Drive/reducedAnytimeDnn/requirements.txt
#!pip3 install torch===1.6.0 torchvision===0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
#!conda install pytorch==1.5.0 torchvision==0.6.0 cudatoolkit=10.1 -c pytorch
!nvidia-smi
#!pip install numpy
#!pip uninstall torch torchvision
#!pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
!pip install dareblopy==0.0.3

In [55]:
import os
import sys
import time
import logging

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim
import torch.backends.cudnn as cudnn

from torch.utils.tensorboard import SummaryWriter

import msdnet.models

from utils import *
from data.ImagenetDataset import get_zipped_dataloaders, REDUCED_SET_PATH, FULL_SET_PATH

In [56]:
RUN_PATH = 'runs/'
DATA_PATH = REDUCED_SET_PATH
IS_DEBUG = True
DEBUG_ITERATIONS = 3
STAT_FREQUENCY = 200
LEARNING_RATE = 0.1
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
GPU_ID = None
START_EPOCH = 0
EPOCHS = 2
CHECKPOINT_INTERVALL = 4 
CHECKPOINT_DIR = 'checkpoints'

LOG_FLOAT_PRECISION = ':6.4f'

BATCH_SIZE = 8

In [57]:
# ARGUEMTNS FOR THE MSD-Net Configurtaion and Training
class Object(object):
  pass

args = None

try: 
  args = arg_parser.parse_args()
except:
  args = Object()

  growFactor = list(map(int, "1-2-4-4".split("-")))
  bnFactor = list(map(int, "1-2-4-4".split("-")))

  args_dict = {
      'gpu': 'gpu:0',
      'use_valid': True,
      'data': 'ImageNet',
      'save': os.path.join(os.getcwd(), 'save'),
      'evalmode': None,
      'start_epoch': START_EPOCH,
      'epochs': EPOCHS,
      'arch': 'msdnet',
      'seed': 42,
      'test_interval': 10,

      'grFactor': growFactor,
      'bnFactor': bnFactor,
      'nBlocks': 5,
      'nChannels': 32,
      'nScales': len(growFactor),
      'reduction': 0.5,
      'bottleneck': True,
      'prune': 'max',
      'growthRate': 16,
      'base': 4,
      'step': 4,
      'stepmode': 'even',
      
      'lr': LEARNING_RATE,
      'lr_type': 'multistep',
      'momentum': MOMENTUM,
      'weight_decay': WEIGHT_DECAY,
      'resume': False,
      'data_root': DATA_PATH,
      'batch_size': BATCH_SIZE,
      'workers': 1,
      'print_freq': STAT_FREQUENCY
  } 

  for key in args_dict:
    setattr(args, key, args_dict[key])
    #print(getattr(args, key))

In [58]:
writer = SummaryWriter(os.path.join(RUN_PATH, 'experiment_1'))

In [59]:
def main(args):

    torch.cuda.empty_cache()

    n_gpus_per_node = torch.cuda.device_count()
    logging.info(f"Found {n_gpus_per_node} GPU(-s)")


    # MAIN LOOP
    #model = get_msd_net_model()
    model = msdnet.models.msdnet(args)

    writer.add_graph(model, torch.rand(1, 3, 224, 224))

    criterion = nn.CrossEntropyLoss()

    if torch.cuda.is_available():
        logging.debug("Cuda is available.")
        logging.info("Using all available GPUs")
        for i in range(torch.cuda.device_count()):
            logging.info(f"gpu:{i} - {torch.cuda.get_device_name(i)}")
        model = nn.DataParallel(model).cuda()
        logging.info("Moving criterion to device.")
        criterion = criterion.cuda()
        cudnn.benchmark = True
    else:
        logging.info("Using slow CPU training.")


    optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    calc_lr = lambda epoch: epoch // 30
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=calc_lr)

    train_loader, val_loader, test_loader = get_zipped_dataloaders(args.data_root, args.batch_size, use_valid=True)

    best_prec1, best_epoch = 0.0, 0


    for epoch in range(EPOCHS):
        logging.info(f"Started Epoch{epoch + 1}/{EPOCHS}")
        # train()
        train_loss, train_prec1, train_prec5, lr = train(train_loader, model, criterion, optimizer, scheduler, epoch)
        # validate()
        val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion)
        scheduler.step()

        is_best = val_prec1 > best_prec1
        if is_best:
            best_prec1 = val_prec1
            best_epoch = epoch
            logging.info(f'Best val_prec1 {best_prec1}')
        
        if is_best or epoch % CHECKPOINT_INTERVALL == 0:
            save_checkpoint(getStateDict(model, epoch, 'msdnet', best_prec1, optimizer),
                            is_best, 
                            'msdnet', 
                            CHECKPOINT_DIR)

        if epoch % args.test_interval == 0:
            avg_loss, avg_top1, avg_top5 = validate(test_loader, model, criterion)
            writer.add_scalar('test_loss', avg_loss, epoch + 1)
            writer.add_scalar('test_top1', avg_top1, epoch + 1)
            writer.add_scalar('test_top5', avg_top5, epoch + 1)

    writer.close()

    logging.info(f'Best val_prec1: {best_prec1:.4f} at epoch {best_epoch}')

    logging.info('*************** Final prediction results ***************')
    validate(test_loader, model, criterion)



In [60]:
def validate(val_loader, model, criterion):
    batch_time = AverageMeter('Batch Time', LOG_FLOAT_PRECISION)
    losses = AverageMeter('Loss', LOG_FLOAT_PRECISION)
    data_time = AverageMeter('Data Time', LOG_FLOAT_PRECISION)
    top1, top5 = [], []
    for i in range(args.nBlocks):
        top1.append(AverageMeter(f'Top1-{i+1}', LOG_FLOAT_PRECISION))
        top5.append(AverageMeter(f'Top1-{i+1}', LOG_FLOAT_PRECISION))

    model.eval()
    with torch.no_grad():
        end = time.time()
        for i, (img, target) in enumerate(val_loader):
            if torch.cuda.is_available():
                img = img.cuda(non_blocking=True)
                target = target.cuda(non_blocking=True)
            
            data_time.update(time.time() - end)

            output = model(img)
            if not isinstance(output, list):
                output = [output]

            loss = 0.0
            for j in range(len(output)):
                loss += criterion(output[j], target)

            losses.update(loss.item(), img.size(0))

            for j in range(len(output)):
                prec1, prec5 = accuracy(output[j].data, target, topk=(1,5))
                top1[j].update(prec1.item(), img.size(0))
                top5[j].update(prec5.item(), img.size(0))

            batch_time.update(time.time() - end)
            
            if i % args.print_freq == 0:
                logging.info(f'Val - Epoch: [{i+1}/{len(val_loader)}]\t'
                      f'Time {batch_time.avg:.3f}\t'
                      f'Data {data_time.avg:.3f}\t'
                      f'Loss {losses.val:.4f}\t'
                      f'Acc@1 {top1[-1].val:.4f}\t'
                      f'Acc@5 {top5[-1].val:.4f}')
            end = time.time()
            
            if IS_DEBUG and i == DEBUG_ITERATIONS:
                return losses.avg, top1[-1].avg, top5[-1].avg

    for j in range(args.nBlocks):
        logging.info(f' * prec@1 {top1[j].avg:.3f} prec@5 {top5[j].avg:.3f}')
    logging.info(f' * prec@1 {top1[-1].avg:.3f} prec@5 {top5[-1].avg:.3f}')
    
    return losses.avg, top1[-1].avg, top5[-1].avg

In [61]:
def train(train_loader, model, criterion, optimizer, scheduler, epoch):
    batch_time = AverageMeter('Batch Time', LOG_FLOAT_PRECISION)
    data_time = AverageMeter('Data Time', LOG_FLOAT_PRECISION)
    losses = AverageMeter('Loss', LOG_FLOAT_PRECISION)
    top1, top5 = [],[]

    for i in range(args.nBlocks):
        top1.append(AverageMeter(f'Top1-{i+1}', LOG_FLOAT_PRECISION))
        top5.append(AverageMeter(f'Top5-{i+1}', LOG_FLOAT_PRECISION))
    
    model.train()
    end = time.time()

    running_lr = scheduler.get_last_lr()

    for i, data in enumerate(train_loader):
        
        data_time.update(time.time() - end)
        
        image, target = data

        if torch.cuda.is_available():
            image = image.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)
                # time it takes to load data

        output = model(image)
        if not isinstance(output, list):
            output = [output]

        loss = 0.0
        for j in range(len(output)):
            loss += criterion(output[j], target)
        
        losses.update(loss.item(), image.size(0))

        for j in range(len(output)):
            prec1, prec5 = accuracy(output[j].data, target, topk=(1, 5))
            top1[j].update(prec1.item(), image.size(0))
            top5[j].update(prec5.item(), image.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            writer.add_scalar('training_loss', 
                                losses.val / len(output) / args.print_freq,
                                epoch * len(train_loader) + 1)
            logging.info(
                f'Train - Epoch: [{epoch}][{i + 1}/{len(train_loader)}]\t'
                f'Time {batch_time.avg:.3f}\t'
                f'Data {data_time.avg:.3f}\t'
                f'Loss {losses.val:.4f}\t'
                f'Acc@1 {top1[-1].val:.4f}\t'
                f'Acc@5 {top5[-1].val:.4f}')

        if IS_DEBUG and i == DEBUG_ITERATIONS:
            return losses.avg, top1[-1].avg, top5[-1].avg, running_lr

    return losses.avg, top1[-1].avg, top5[-1].avg, running_lr

In [62]:
def accuracy(output, target, topk=(1,)):
    """Computes accuracy over the k top predictions for the values of k"""
    
    # reduce memory consumption on following calculations
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)
        
        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [63]:
%tensorboard --logdir "{RUN_PATH}" --bind_all

UsageError: Line magic function `%tensorboard` not found.


In [64]:
try:
  main(args)
except Exception as e:
  torch.cuda.empty_cache()
  print("Oh no! Bad things happened...")
  print(e)
  traceback.print_exc()
finally:
  torch.cuda.empty_cache()

building network of steps: 
[4, 4, 4, 4, 4] 20
 ********************** Block 1  **********************
|		inScales 4 outScales 4 inChannels 32 outChannels 16		|

|		inScales 4 outScales 4 inChannels 48 outChannels 16		|

|		inScales 4 outScales 4 inChannels 64 outChannels 16		|

|		inScales 4 outScales 4 inChannels 80 outChannels 16		|

 ********************** Block 2  **********************
|		inScales 4 outScales 4 inChannels 96 outChannels 16		|

|		inScales 4 outScales 3 inChannels 112 outChannels 16		|
|		Transition layer inserted! (max), inChannels 128, outChannels 64	|

|		inScales 3 outScales 3 inChannels 64 outChannels 16		|

|		inScales 3 outScales 3 inChannels 80 outChannels 16		|

 ********************** Block 3  **********************
|		inScales 3 outScales 3 inChannels 96 outChannels 16		|

|		inScales 3 outScales 3 inChannels 112 outChannels 16		|

|		inScales 3 outScales 2 inChannels 128 outChannels 16		|
|		Transition layer inserted! (max), inChannels 144, outChannels