In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from data.ImagenetDataset import get_imagenet_datasets
from msdnet.dataloader import get_dataloaders_alt
from resnet import ResNet
from densenet import *
#from msdnet.models.msdnet import MSDNet

import os
import shutil
import time

DATA_PATH = "data/imagenet_images"
BATCH_SIZE = 2


LEARNING_RATE = 0.1
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
GPU_ID = None
START_EPOCH = 0
EPOCHS = 2
CHECKPOINT_INTERVALL = 10
CHECKPOINT_DIR = 'state'
ARCH = 'densenet'

if __name__ == "__main__":


    train_loader, test_loader, _ = get_dataloaders_alt(
        DATA_PATH, 
        data="ImageNet", 
        use_valid=False, 
        save='save/default-{}'.format(time.time()),
        batch_size=1, 
        workers=1, 
        splits=['train', 'test'])
    
    # create model 
    model = ResNet.resnet50()
    
    
    if not torch.cuda.is_available():
        print("Using CPU for slow training process")
    else:
        model = nn.DataParallel(model).cuda()
    
    # loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    
    optimizer = torch.optim.SGD(
        model.parameters(), 
        LEARNING_RATE, 
        momentum=MOMENTUM, 
        weight_decay=WEIGHT_DECAY)
    
    # TODO add loading of checkpoint behaviour

Using CPU for slow training process


In [2]:
def save_checkpoint(data, is_best, filename='checkpoint.pth.tar'):
    file_path = os.path.join(CHECKPOINT_DIR, filename)
    
    if not os.path.isDir(CHECKPOINT_DIR):
        os.path.mkdir(CHECKPOINT_DIR)
    
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

In [3]:
def accuracy(output, target, topk=(1,)):
    """Computes accuracy over the k top predictions for the values of k"""
    
    # reduce memory consumption on following calculations
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)
        
        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res


In [4]:
def adjust_learning_rate(optimizer, epoch):
    """
        Sets learning rate to default value, decayed by division with 10 every 25 epochs and 
        updates the lr in the optimizer.
    """
    lr = LEARNING_RATE * (0.1 ** (epoch // 25)) 
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [5]:
def train(train_loader, model, criterion, optimizer, epoch):
    model.train()
    
    for i, (input, target) in enumerate(train_loader):
        
        if torch.cuda.is_available():
            target = target.cuda(GPU_ID, non_blocking=True)
        
        # compute output of the current network
        output = model(input)
        loss = criterion(output, target)
        
        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))


In [None]:
# train loop
#for epoch in range(START_EPOCH, EPOCHS):
for epoch in range(0, 1):
    adjust_learning_rate(optimizer, epoch)
    
    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)
    
    #evaluate the network on test set
    acc = validate(test_loader, model, criterion)
    
    # remember top acc
    is_best = acc > best_acc
    best_acc = max(acc, best_acc)
    
    # safe model
    if epoch % CHECKPOINT_INTERVALL == 0 or is_best:
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': ARCH,
            'state_dict': model.state_dict(),
            'best_acc': best_acc,
            'optimizer': optimizer.state_dict(),
        }, is_best)