In [None]:
%matplotlib inline

import os
import shutil
import time

from IPython.display import Image
import matplotlib.pyplot as plt

import numpy as np
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models 

import DataLoader
from densenet_modified import *

## General setup:

In [None]:
# Trainer parameters
print_freq_epochs = 100
use_cuda = True

# Dataset Parameters
batch_size = 50
load_size = 256
fine_size = 224
c = 3
data_mean = np.asarray([0.45834960097,0.44674252445,0.41352266842])

# Training parameters
# architecture = 'resnet34'
# architecture = 'vgg16_bn'
# architecture = 'dense'
lr = 0.1  # densenet default = 0.1, 
lr_init = 0.1
momentum = 0.90 # densenet default = 0.9 
weight_decay = 1e-3 # densenet default = 1e-4
num_epochs = 95

dummy_text_file = open("dummy_text.txt", "w")

In [None]:
def construct_dataloader_disk():
    # Construct DataLoader
    opt_data_train = {
        #'data_h5': 'miniplaces_128_train.h5',
        'data_root': '../../data/images/',   # MODIFY PATH ACCORDINGLY
        'data_list': '../../data/train.txt', # MODIFY PATH ACCORDINGLY
        'load_size': load_size,
        'fine_size': fine_size,
        'data_mean': data_mean,
        'randomize': True
        }
    opt_data_val = {
        #'data_h5': 'miniplaces_128_val.h5',
        'data_root': '../../data/images/',   # MODIFY PATH ACCORDINGLY
        'data_list': '../../data/val.txt',   # MODIFY PATH ACCORDINGLY
        'load_size': load_size,
        'fine_size': fine_size,
        'data_mean': data_mean,
        'randomize': False
        }

    loader_train = DataLoader.DataLoaderDisk(**opt_data_train)
    loader_val = DataLoader.DataLoaderDisk(**opt_data_val)
    
    return (loader_train, loader_val)

def construct_dataloader_disk_128():
    # Construct DataLoader
    opt_data_train = {
        #'data_h5': 'miniplaces_128_train.h5',
        'data_root': '../../data/images/',   # MODIFY PATH ACCORDINGLY
        'data_list': '../../data/train.txt', # MODIFY PATH ACCORDINGLY
        'load_size': 128,
        'fine_size': 128,
        'data_mean': data_mean,
        'randomize': True
        }
    opt_data_val = {
        #'data_h5': 'miniplaces_128_val.h5',
        'data_root': '../../data/images/',   # MODIFY PATH ACCORDINGLY
        'data_list': '../../data/val.txt',   # MODIFY PATH ACCORDINGLY
        'load_size': 128,
        'fine_size': 128,
        'data_mean': data_mean,
        'randomize': False
        }

    loader_train = DataLoader.DataLoaderDisk(**opt_data_train)
    loader_val = DataLoader.DataLoaderDisk(**opt_data_val)
    
    return (loader_train, loader_val)

def construct_dataloader_disk_trainval():
    opt_data_trainval = {
        #'data_h5': 'miniplaces_128_val.h5',
        'data_root': '../../data/images/',   # MODIFY PATH ACCORDINGLY
        'data_list': '../../data/trainval.txt',   # MODIFY PATH ACCORDINGLY
        'load_size': load_size,
        'fine_size': fine_size,
        'data_mean': data_mean,
        'randomize': False
        }
    loader_valtrain = DataLoader.DataLoaderDisk(**opt_data_trainval)
        
    return (loader_valtrain)

In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def adjust_learning_rate(lr, optimizer, epoch):
    """Calculates a learning rate of the initial LR decayed by 10 every 30 epochs"""
    lr = lr_init * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return lr

# def adjust_learning_rate(lr, optimizer, epoch): # for densenet (201)
#     """Sets the learning rate to the initial LR decayed by 10 after 150 and 225 epochs"""
#     lr = lr_init * (0.1 ** (epoch // 20)) * (0.1 ** (epoch // 50))
#     for param_group in optimizer.param_groups:
#         param_group['lr'] = lr
#     return lr

def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [None]:
def save_checkpoint(filename, model, state, is_best, epoch):
    torch.save(state, "models/"+filename) #"densenet121__retraining.tar"
    if is_best:
        torch.save(model, "results/"+filename)

In [None]:
# train and validate methods adapted from https://github.com/pytorch/examples/blob/master/imagenet/main.py

def train(train_loader, model, criterion, optimizer, epoch, text_file):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i in range(int(train_loader.size()/batch_size)):
        input, target = train_loader.next_batch(batch_size)
        target = target.long()
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            target = target.cuda(async=True)
            input = input.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)
        target_var = target_var.long()
        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
                
        if i % print_freq_epochs == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   epoch, i, train_loader.size()/batch_size, batch_time=batch_time,
                   data_time=data_time, loss=losses, top1=top1, top5=top5))
            
    text_file.write(str(epoch)+str(",")+str(i)+str(",")+str(batch_time.val)+str(",")+str(data_time.val)+str(",")+str(losses.avg)+str(",")+str(top1.avg)+str(",")+str(top5.avg)+"\n")
        
def validate(val_loader, model, criterion, text_file, epoch):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i in range(int(val_loader.size()/batch_size)):
        input, target = val_loader.next_batch(batch_size)
        target = target.long()
        if use_cuda:
            target = target.cuda(async=True)
            input = input.cuda(async=True)
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)
        target_var = target_var.long()

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        
        
        if i % print_freq_epochs == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   i, val_loader.size()/batch_size, batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5))


    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))
    
    text_file.write(str("val,")+str(epoch)+","+str(i)+str(",")+str(batch_time.val)+str(",")+str(losses.avg)+str(",")+str(top1.avg)+str(",")+str(top5.avg)+"\n")

    return top5.avg

In [None]:
criterion = nn.CrossEntropyLoss()

if use_cuda:
    criterion = criterion.cuda()

In [None]:
train_loader, val_loader = construct_dataloader_disk()
train_loader_128, val_loader_128 = construct_dataloader_disk_128()
trainval_loader = construct_dataloader_disk_trainval()

## DenseNet 121

In [None]:
# model

In [None]:
model = torch.load('results/resnet34.pt')

if use_cuda:
    model = model.cuda()

# optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=weight_decay)

In [None]:
best_prec5 = 0.0

In [None]:
# model.load_state_dict(torch.load("models/densenet121.pt")['state_dict'])

In [None]:
filename = "resnet34_adam"

In [None]:
text_file_train = open("results/"+filename+".txt", "w")
text_file_val = open("results/"+filename+".txt", "w")

for epoch in range(0,num_epochs):
#     lr = adjust_learning_rate(lr, optimizer, epoch) # turn off for Adam
    print("learning rate:", lr)
    
    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch, text_file_train)

    # evaluate on validation set
    prec5 = validate(val_loader, model, criterion, text_file_val, epoch)
    
    # remember best prec@1 and save checkpoint
    is_best = prec5 > best_prec5
    best_prec5 = max(prec5, best_prec5)
    
    save_checkpoint(filename+".pt", model, {
        'epoch': epoch + 1,
        'arch': filename,
        'state_dict': model.state_dict(),
        'best_prec5': best_prec5,
        'optimizer' : optimizer.state_dict(),
    }, is_best, epoch)

In [None]:
# model = torch.load("results/densenet121.pt")
validate(val_loader_128, dense_model2, criterion, dummy_text_file, 0)

In [None]:
best_prec5

## Validate ensable:

In [None]:
model0x = torch.load('../pytorch/results/densenet169__retraining.pt') # train: * Prec@1 65.902 Prec@5 90.538 ,val: Prec@1 49.070 Prec@5 78.260
model1x = torch.load('../pytorch/results/densenet121__retraining.pt') # train * Prec@1 69.012 Prec@5 92.059, val  * Prec@1 51.270 Prec@5 80.680
model4x = torch.load('../pytorch/results/best_dense161_retraining.pt') #  * Prec@1 62.776 Prec@5 89.279 val:" * Prec@1 44.600 Prec@5 75.260
dense_model = torch.load('../pytorch/models/best_dense.pt') #   * Prec@1 76.392 Prec@5 95.001 *val Prec@1 51.660 Prec@5 80.690
dense_model2 = torch.load('../pytorch/models/best_dense201.pt') #  * train Prec@1 93.920 Prec@5 99.380 val: * Prec@1 51.010 Prec@5 79.500

model0 = torch.load('../pytorch_v3/results/resnet34.pt') # 78.92
model1 = torch.load('../pytorch_v3/results/resnet50.pt') # 75.4
model2 = torch.load('../pytorch_v3/results/resnet101.pt') # 75.39
model3 = torch.load('../pytorch_v3/results/resnet152.pt') #  70.4

model4 = torch.load('../pytorch_v3/results/resnet34_valtrained.pt') # 77.14?
model5 = torch.load('../pytorch_v3/results/resnet50_valtrained.pt') # 75.27?
model6 = torch.load('../pytorch_v3/results/resnet101_valtrained.pt') # not trained on val
model7 = torch.load('../pytorch_v3/results/resnet152_valtrained.pt') # 70.54

models = [model0, model1]
models_128 = [model0x,model1x, model4x, dense_model, dense_model2]

In [None]:
validate(val_loader, model0, criterion, dummy_text_file, 0)

In [None]:
def construct_dataloader_test():
    # Construct DataLoader
    opt_data_test = {
        'data_root': '../../data/images/',
        'load_size': load_size,
        'fine_size': fine_size,
        'data_mean': data_mean,
        'randomize': False # random crops
    }
    
    return DataLoader.DataLoaderDiskTest(**opt_data_test)

def construct_dataloader_test_128():
    # Construct DataLoader
    opt_data_test = {
        'data_root': '../../data/images/',
        'load_size': 128,
        'fine_size': 128,
        'data_mean': data_mean,
        'randomize': False # random crops
    }
    
    return DataLoader.DataLoaderDiskTest(**opt_data_test)

In [None]:
def test(test_loader, test_loader_128, models, models_128):
    # switch to evaluate mode
    for model in models:
        model.eval()
        
    for model in models_128:
        model.eval()

    preds = []
    batch_size = 200
    crop_iterations = 10
    for i in range(int(test_loader.size() / batch_size * crop_iterations)):
        input, paths = test_loader.next_batch(batch_size)
        input_128, paths = test_loader_128.next_batch(batch_size)
        if use_cuda:
            input = input.cuda(async=True)
            input_128 = input_128.cuda(async=True)
                    
        input_var = torch.autograd.Variable(input, volatile=True)
        input_var_128 = torch.autograd.Variable(input_128, volatile=True)

        # compute output
        output=0
        for model in models:
            output += model(input_var)[:,:100]
        for model_128 in models_128:
            output += model_128(input_var_128)[:,:100]
            
        preds.append((paths, output))

        print('Test: [{0}/{1}]\t'.format((i+1)*batch_size, test_loader.size()*crop_iterations))

    return preds

In [None]:
def validate_ensamble(val_loader, val_loader_128, models, models128, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
#     losses_128 = AverageMeter()
#     top1_128 = AverageMeter()
#     top5_128 = AverageMeter()
    # switch to evaluate mode
    for model in models:
        model.eval()
        
    for model in models_128:
        model.eval()

    end = time.time()
    for i in range(int(val_loader.size()/batch_size)):
        input, target = val_loader.next_batch(batch_size)
        input_128, target_128 = val_loader_128.next_batch(batch_size)
        target = target.long()
        target_128 = target_128.long()
        if use_cuda:
            target = target.cuda(async=True)
            input = input.cuda(async=True)
            target_128 = target_128.cuda(async=True)
            input_128 = input_128.cuda(async=True)
            
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)
        target_var = target_var.long()
        
        input_var_128 = torch.autograd.Variable(input_128, volatile=True)
        target_var_128 = torch.autograd.Variable(target_128, volatile=True)
        target_var_128 = target_var_128.long()

        # compute output
        output=0
#         output_128 = 0
        for model in models:
            output += model(input_var)[:,:100]
            
        for model_128 in models_128:
            output += model_128(input_var_128)[:,:100]

        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
                
        if i % print_freq_epochs == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   i, val_loader.size()/batch_size, batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5))
            

    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))

    return top5.avg

In [None]:
# models

In [None]:
validate_ensamble(val_loader, val_loader_128, models, models_128, criterion)

In [None]:
test_loader = construct_dataloader_test()
test_loader_128 = construct_dataloader_test_128()

In [None]:
# len(probs)

In [None]:
probs = test(test_loader, test_loader_128, models, models_128)

In [None]:
batch_probs_processed = []
diff = 50
for i in range(int(len(probs)/10)):
    paths = probs[i][0]
    output = probs[i][1]
    for j in range(1,10):
        print(i+j*diff)
        output += probs[i+j*diff][1]
    batch_probs_processed.append((paths, output))

In [None]:
with open('preds_fixed_vF.txt', 'w') as f:
    for paths, batch_probs in batch_probs_processed:
        batch_preds = batch_probs.data.topk(5, 1, True, True)[1]
        for path, top5 in zip(paths, batch_preds):
            path = 'test/' + os.path.basename(path)
            f.write('{} {} {} {} {} {}\n'.format(path, *top5))