In [None]:
import argparse
import os
import random
import shutil
import time
import warnings
from enum import Enum

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

In [None]:
import numpy as np
import matplotlib.pyplot as plt


In [None]:
from train_validate import train, validate

In [None]:
model_names = sorted(name for name in models.__dict__
    if name.islower() and not name.startswith("__")
    and callable(models.__dict__[name]))

In [None]:
class Args(object):
    pass
args = Args()
# path to dataset
args.data = './data'
args.arch = 'resnet18'
assert args.arch in model_names

In [None]:
args.workers = 4
args.epochs = 90
args.start_epoch = 0
args.batch_size = 256
args.lr = 0.1
args.momentum = 0.9
args.weight_decay = 1e-4
args.print_freq = 10

args.resume = ''
args.evaluate = False
args.pretrained = False
# distributed training
args.world_size = -1
args.rank = -1
args.dist_url = 'tcp://224.66.41.62:23456'
args.dist_backend = 'nccl'

# random seed
args.seed = None

args.gpu = 0
args.multiprocessing_distributed = 0

In [None]:
best_acc1 = 0

In [None]:
if args.seed is not None:
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    cudnn.deterministic = True
    warnings.warn('You have chosen to seed training. '
                  'This will turn on the CUDNN deterministic setting, '
                  'which can slow down your training considerably! '
                  'You may see unexpected behavior when restarting '
                  'from checkpoints.')

if args.gpu is not None:
    warnings.warn('You have chosen a specific GPU. This will completely '
                  'disable data parallelism.')

if args.dist_url == "env://" and args.world_size == -1:
    args.world_size = int(os.environ["WORLD_SIZE"])

args.distributed = args.world_size > 1 or args.multiprocessing_distributed
print("distributed: ", args.distributed)

In [None]:
ngpus_per_node = torch.cuda.device_count()
print('ngpus_per_node:', ngpus_per_node)
# if args.multiprocessing_distributed:
#     # Since we have ngpus_per_node processes per node, the total world_size
#     # needs to be adjusted accordingly
#     args.world_size = ngpus_per_node * args.world_size
#     # Use torch.multiprocessing.spawn to launch distributed processes: the
#     # main_worker process function
#     mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
# else:
#     # Simply call main_worker function
#     main_worker(args.gpu, ngpus_per_node, args)

In [None]:
if args.gpu is not None:
    print("Use GPU: {} for training".format(args.gpu))

if args.distributed:
    if args.dist_url == "env://" and args.rank == -1:
        args.rank = int(os.environ["RANK"])
    if args.multiprocessing_distributed:
        # For multiprocessing distributed training, rank needs to be the
        # global rank among all the processes
        args.rank = args.rank * ngpus_per_node + gpu
    dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                            world_size=args.world_size, rank=args.rank)
# create model
if args.pretrained:
    print("=> using pre-trained model '{}'".format(args.arch))
    model = models.__dict__[args.arch](pretrained=True)
else:
    print("=> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch]()

if not torch.cuda.is_available():
    print('using CPU, this will be slow')
elif args.distributed:
    # For multiprocessing distributed, DistributedDataParallel constructor
    # should always set the single device scope, otherwise,
    # DistributedDataParallel will use all available devices.
    if args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model.cuda(args.gpu)
        # When using a single GPU per process and per
        # DistributedDataParallel, we need to divide the batch size
        # ourselves based on the total number of GPUs we have
        args.batch_size = int(args.batch_size / ngpus_per_node)
        args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
    else:
        model.cuda()
        # DistributedDataParallel will divide and allocate batch_size to all
        # available GPUs if device_ids are not set
        model = torch.nn.parallel.DistributedDataParallel(model)
elif args.gpu is not None:
    torch.cuda.set_device(args.gpu)
    model = model.cuda(args.gpu)
else:
    # DataParallel will divide and allocate batch_size to all available GPUs
    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

In [None]:
# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss().cuda(args.gpu)

optimizer = torch.optim.SGD(model.parameters(), args.lr,
                            momentum=args.momentum,
                            weight_decay=args.weight_decay)

In [None]:
# optionally resume from a checkpoint
if args.resume:
    if os.path.isfile(args.resume):
        print("=> loading checkpoint '{}'".format(args.resume))
        if args.gpu is None:
            checkpoint = torch.load(args.resume)
        else:
            # Map model to be loaded to specified single gpu.
            loc = 'cuda:{}'.format(args.gpu)
            checkpoint = torch.load(args.resume, map_location=loc)
        args.start_epoch = checkpoint['epoch']
        best_acc1 = checkpoint['best_acc1']
        if args.gpu is not None:
            # best_acc1 may be from a checkpoint from a different GPU
            best_acc1 = best_acc1.to(args.gpu)
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(args.resume, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(args.resume))

cudnn.benchmark = True

# Data loading code
traindir = os.path.join(args.data, 'train')
valdir = os.path.join(args.data, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

train_dataset = datasets.ImageFolder(
    traindir,
    transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ]))

In [None]:
len(train_dataset)

In [None]:
if args.distributed:
    train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
else:
    train_sampler = None

In [None]:
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
    num_workers=args.workers, pin_memory=True, sampler=train_sampler)

val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(valdir, transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])),
    batch_size=args.batch_size, shuffle=False,
    num_workers=args.workers, pin_memory=True)

In [None]:
if args.evaluate:
    print(validate(val_loader, model, criterion, args))

# GridSearch

In [None]:
# generate candidate learning rates
args.lr_init_epoch = 2
from LRBench.lr.LR import LR
LRs = [
    LR({'lrPolicy': 'FIX', 'k0': k/10.0}) for k in range(1, 11,)
]
trainResults = [] # lr -> epoch -> [tR, vR]]

In [None]:
for lrbenchLR in LRs:
    model = models.__dict__[args.arch]().cuda()
    lrResults = [] # epoch -> [tR, vR]
    for epoch in range(args.lr_init_epoch):
        # train for one epoch
        tR = train(train_loader, model, criterion, optimizer, epoch, args, lrbenchLR) # losses, top1, top5
        # evaluate on validation set
        vR = validate(val_loader, model, criterion, args) # acc1
        lrResults.append([tR, vR])
    trainResults.append(lrResults)

In [None]:
# import pickle
# pickle.dump(
#     {
#         "LRs": LRs,
#         "trainResults": trainResults,
#         "args": args,
#     },
#     open("lrRangeResults.obj", "wb")
# )

In [None]:
x_lr = np.array([p.lrParam['k0'] for p in LRs])

colors = ('g', 'b', 'r', 'k', 'm', 'y')
y_acc_label = [
    1,
    2,
]

y_acc = np.array([
    [trainResults[i][j][1].item() for i in range(len(LRs))] for j in range(args.lr_init_epoch)
])

In [None]:
plt.figure(figsize=(8.0, 6.0))
plt.xlabel('Learning Rate (FIX, k)', fontsize = 23)
plt.ylabel('Accuracy (\%)', fontsize = 23)


recordN = y_acc.shape[0]
for i in range(recordN-1, -1, -1):
    plt.plot(x_lr, 100*y_acc[i], colors[i], label=str(y_acc_label[i])+' Epoch(s)')
# plt.xscale('log')
# plt.legend(bbox_to_anchor=(0.732, 1.01), loc=2, fontsize = 12.5, handlelength=1)
# plt.axvline(x=0.001, linestyle='dashed', color='k')
# plt.axvline(x=0.0005, linestyle='dashed', color='r')
# plt.axvline(x=0.006, linestyle='dashed', color='r')
# plt.axvline(x=0.01, linestyle='dashed', color='k')
# plt.axvline(x=0.0001, linestyle='dashed', color='k')
plt.tick_params(axis='both', which='major', labelsize=15)
#plt.plot([0.001, 0.020], [81, 10], 'r--')
# plt.savefig('acc-lr-cifar10-FIX.png')

In [None]:
# Training
# LR range: 0.1 ~ 0.5

In [None]:
import time
start = time.time()
losses, top1, top5 = train(train_loader, model, criterion, optimizer, epoch, args, lrbenchLR)
end = time.time()
print(end-start)

In [None]:
values = [v.item() for v in trainResults[2][1][0][1].values]

In [None]:
plt.plot(values)

In [None]:
print(top1.values[1])

In [None]:
print(end - start)

In [None]:
for epoch in range(args.start_epoch, args.epochs):
    if args.distributed:
        train_sampler.set_epoch(epoch)

    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch, args)

    # evaluate on validation set
    acc1 = validate(val_loader, model, criterion, args)

    # remember best acc@1 and save checkpoint
    is_best = acc1 > best_acc1
    best_acc1 = max(acc1, best_acc1)

    if not args.multiprocessing_distributed or (args.multiprocessing_distributed
            and args.rank % ngpus_per_node == 0):
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.arch,
            'state_dict': model.state_dict(),
            'best_acc1': best_acc1,
            'optimizer' : optimizer.state_dict(),
        }, is_best)