In [24]:
import os
import sys
import time
import logging

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim
import torch.backends.cudnn as cudnn

from torch.utils.tensorboard import SummaryWriter
from utils import *
from data.ImagenetDataset import get_zipped_dataloaders, REDUCED_SET_PATH
from train import accuracy

In [25]:
RUN_PATH = 'runs/'
DATA_PATH = REDUCED_SET_PATH
IS_DEBUG = True
DEBUG_ITERATIONS = 40
STAT_FREQUENCY = 200
LEARNING_RATE = 0.1
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
GPU_ID = None
START_EPOCH = 0
EPOCHS = 2
CHECKPOINT_INTERVALL = 4 
CHECKPOINT_DIR = 'checkpoints'

LOG_FLOAT_PRECISION = ':6.4f'

BATCH_SIZE = 8

In [26]:
# ARGUEMTNS FOR THE MSD-Net Configurtaion and Training
class Object(object):
  pass

args = None

try: 
  args = arg_parser.parse_args()
except:
  args = Object()
  args_dict = {
      'gpu': 'gpu:0',
      'use_valid': True,
      'data': 'ImageNet',
      'save': os.path.join(os.getcwd(), 'save'),
      'evalmode': None,
      'start_epoch': START_EPOCH,
      'epochs': EPOCHS,
      'arch': 'msdnet',
      'seed': 42,
      'test_interval': 10,

      'grFactor': "1-2-4-4",
      'bnFactor': "1-2-4-4",
      'nBlocks': 3,
      'reduction': 0.5,
      'bottleneck': True,
      'prune': 'max',
      'growthRate': 16,
      'base': 4,
      'step': 4,
      'stepmode': 'even',
      
      'lr': LEARNING_RATE,
      'lr_type': 'multistep',
      'momentum': MOMENTUM,
      'weight_decay': WEIGHT_DECAY,
      'resume': False,
      'data_root': DATA_PATH,
      'batch_size': BATCH_SIZE,
      'workers': 1,
      'print_freq': STAT_FREQUENCY
  } 

  for key in args_dict:
    setattr(args, key, args_dict[key])
    #print(getattr(args, key))

In [27]:
writer = SummaryWriter(os.path.join(RUN_PATH, 'experiment_1'))

In [41]:
def main(args):

    # MAIN LOOP
    model = get_msd_net_model()

    criterion = nn.CrossEntropyLoss()

    if torch.cuda.is_available():
        model.cuda()
        criterion = criterion.cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    calc_lr = lambda epoch: epoch // 30
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=calc_lr)

    train_loader, val_loader, test_loader = get_zipped_dataloaders(args.data_root, args.batch_size, use_valid=True)

    best_prec1, best_epoch = 0.0, 0

    for epoch in range(EPOCHS):

        # train()
        train_loss, train_prec1, train_prec5, lr = train(train_loader, model, criterion, optimizer, scheduler, epoch)
        # validate()
        val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion)
        scheduler.step()

        is_best = val_prec1 > best_prec1
        if is_best:
            best_prec1 = va_prec1
            best_epoch = epoch
            logging.info(f'Best var_prec1 {best_prec1}')
        
        if is_best or epoch % CHECKPOINT_INTERVALL == 0:
            save_checkpoint(getStateDict(model, epoch, 'msdnet', best_acc, optimizer),
                            is_best, 
                            'msdnet', 
                            CHECKPOINT_DIR)
        if epoch % args.test_interval == 0:
            avg_loss, avg_top1, avg_top5 =validate(test_loader, model, criterion)
            writer.add_scalar('test_loss', 
                    avg_loss / args.test_interval,
                    epoch + 1)
            writer.add_scalar('test_top1', avg_top1, epoch + 1)
            writer.add_scalar('test_top5', avg_top5, epoch + 1)


    logging.info(f'Best val_prec1: {best_prec1:.4f} at epoch {best_epoch}')

    logging.info('*************** Final prediction results ***************')
    validate(test_loader, model, criterion)


In [42]:
def validate(val_loader, model, criterion)-> float, float, float:
    batch_time = AverageMeter('Batch Time', LOG_FLOAT_PRECISION)
    losses = AverageMeter('Loss', LOG_FLOAT_PRECISION)
    data_time = AverageMeter('Data Time', LOG_FLOAT_PRECISION)
    top1, top5 = [], []
    for i in range(args.nBlocks):
        top1.append(AverageMeter(f'Top1-{i+1}', LOG_FLOAT_PRECISION))
        top5.append(AverageMeter(f'Top1-{i+1}', LOG_FLOAT_PRECISION))

    model.eval()
    with torch.no_grad():
        end = time.time()
        for i, (img, target) in enumerate(val_loader):
            if torch.cuda.is_available():
                img = img.cuda(non_blocking=True)
                target = target.cuda(non_blocking=True)
            
            data_time.update(time.time() - end)

            output = model(img)
            if not isinstance(output, list):
                output = [output]

            loss = 0.0
            for j in range(len(output)):
                loss += criterion(output[j], target)

            losses.update(loss.item(), img.size(0))

            for j in range(len(output)):
                prec1, prec5 = accuracy(output[j].data, topk=(1,5))
                top1[j].update(prec1.item(), img.size(0))
                top5[j].update(prec5.item(), img.size(0))

            batch_time.update(time.time() - end)
            
            if i % args.print_freq == 0:
                logging.info(f'Epoch: [{i+1}/{len(val_loader)}]\t'
                      f'Time {batch_time.avg:.3f}\t'
                      f'Data {data_time.avg:.3f}\t'
                      f'Loss {losses.val:.4f}\t'
                      f'Acc@1 {top1[-1].val:.4f}\t'
                      f'Acc@5 {top5[-1].val:.4f}')
            end = time.time()
    
    for j in range(args.nBlocks):
        logging.info(f' * prec@1 {top1[j].avg:.3f} prec@5 {top5[j].avg:.3f}')
        logging.info(f' * prec@1 {top1[-1].avg:.3f} prec@5 {top5[-1].avg:.3f}')
    
    return losses.avg, top1[-1].avg, top5[-1].avg

In [43]:
def train(train_loader, model, criterion, optimizer, scheduler, epoch):
    batch_time = AverageMeter('Batch Time', LOG_FLOAT_PRECISION)
    data_time = AverageMeter('Data Time', LOG_FLOAT_PRECISION)
    losses = AverageMeter('Loss', LOG_FLOAT_PRECISION)
    top1, top5 = [],[]

    for i in range(args.nBlocks):
        top1.append(AverageMeter(f'Top1-{i+1}', LOG_FLOAT_PRECISION))
        top5.append(AverageMeter(f'Top5-{i+1}', LOG_FLOAT_PRECISION))
    
    model.train()
    end = time.time()

    running_lr = scheduler.get_last_lr()

    for i, data in enumerate(train_loader):
        
        data_time.update(time.time() - end)
        
        image, target = data

        if torch.cuda.is_available():
            image = image.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)
                # time it takes to load data

        output = model(image)
        if not isinstance(output, list):
            output = [output]

        loss = 0.0
        for j in range(len(output)):
            loss += criterion(output[j], target)
        
        losses.update(loss.item(), image.size(0))

        for j in range(len(output)):
            prec1, prec5 = accuracy(output[j].data, target, topk=(1, 5))
            top1[j].update(prec1.item(), image.size(0))
            top5[j].update(prec5.item(), image.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            writer.add_scalar('training_loss', 
                                loss / args.print_freq,
                                epoch * len(train_loader) + 1)
            logging.info(
                f'Epoch: [{epoch}][{i + 1}/{len(train_loader)}]\t'
                f'Time {batch_time.avg:.3f}\t'
                f'Data {data_time.avg:.3f}\t'
                f'Loss {losses.val:.4f}\t'
                f'Acc@1 {top1[-1].val:.4f}\t'
                f'Acc@5 {top5[-1].val:.4f}')
    
    return losses.avg, top1[-1].avg, top5[-1].avg, running_lr

In [44]:
main(args)

building network of steps: 
[4] 4
 ********************** Block 1  **********************
|		inScales 4 outScales 4 inChannels 224 outChannels 16		|

|		inScales 4 outScales 3 inChannels 240 outChannels 16		|
|		Transition layer inserted! (max), inChannels 256, outChannels 128	|

|		inScales 3 outScales 2 inChannels 128 outChannels 16		|
|		Transition layer inserted! (max), inChannels 144, outChannels 72	|

|		inScales 2 outScales 1 inChannels 72 outChannels 16		|
|		Transition layer inserted! (max), inChannels 88, outChannels 44	|

data/imagenet_red/index-train.txt
data/imagenet_red/index-val.txt


KeyboardInterrupt: 