## # Copyright 
https://github.com/clovaai/CutMix-PyTorch

In [16]:
import os
os.chdir('d:/github/Paper2Code')
import shutil
import time
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch import optim
from torch.utils.data import distributed, DataLoader
from torchvision import transforms
from torchvision import datasets
from torchvision import models
from PyramidNet import model_pytorch as PN
import numpy as np

best_err1 = 100
epochs = 300
batch_size = 48
lr = 0.25
momentum = 0.9
weight_decay = 1e-4
depth = 200
alpha = 240
beta = 1.0
cutmix_prob = 0.5
expname = 'w'

In [2]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr_new = lr * (0.1 ** (epoch // (epochs * 0.5))) * (0.1 ** (epoch // (epochs * 0.75)))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr_new


def get_learning_rate(optimizer):
    lr = []
    for param_group in optimizer.param_groups:
        lr += [param_group['lr']]
    return lr


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        wrong_k = batch_size - correct_k
        res.append(wrong_k.mul_(100.0 / batch_size))
    return res

In [18]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2


def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    directory = "d:/dataset/runs/%s/" % (expname)
    if not os.path.exists(directory):
        os.makedirs(directory)
    filename = directory + filename
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'd:/dataset/runs/%s/' % (expname) + 'model_best.pth.tar')


class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


In [4]:
normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
                                std=[x / 255.0 for x in [63.0, 62.1, 66.7]])
transform_train = transforms.Compose([transforms.RandomCrop(32, padding=4),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     normalize,])
transform_test = transforms.Compose([transforms.ToTensor(), 
                                    normalize,])

In [5]:
train_loader = DataLoader(datasets.CIFAR10('d:/dataset/cifar10/cifar-10-batches-py', train=True, download=True, 
                                           transform=transform_train), batch_size=batch_size, shuffle=True, pin_memory=False)
val_loader = DataLoader(datasets.CIFAR10('d:/dataset/cifar10/cifar-10-batches-py', train=False,
                                       transform=transform_test), batch_size=16, shuffle=True, pin_memory=False)
C = 10

Files already downloaded and verified


In [6]:
model = PN.PyramidNet(depth, alpha, C)
model = model.cuda()

In [7]:
print(model)
'the number of model params: {}'.format(sum([p.data.nelement() for p in model.parameters()]))

PyramidNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (stage1): Sequential(
    (0): BottleneckBlock(
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(16, 20, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn3): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(20, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn4): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (1): BottleneckBlock(
      (bn1): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_runn

)


'the number of model params: 26752702'

In [8]:
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay, nesterov=True)
cudnn.benchmark = True

In [9]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    current_LR = get_learning_rate(optimizer)[0]
    for i, (input, target) in enumerate(train_loader):
        
        # measure data loading time
        data_time.update(time.time() - end)

        input = input.cuda()
        target = target.cuda()

        r = np.random.rand(1)
        if beta > 0 and r < cutmix_prob:
            # generate mixed sample
            lam = np.random.beta(beta, beta)
            rand_index = torch.randperm(input.size()[0]).cuda()
            target_a = target
            target_b = target[rand_index]
            bbx1, bby1, bbx2, bby2 = rand_bbox(input.size(), lam)
            input[:, :, bbx1:bbx2, bby1:bby2] = input[rand_index, :, bbx1:bbx2, bby1:bby2]
            # adjust lambda to exactly match pixel ratio
            lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (input.size()[-1] * input.size()[-2]))
            # compute output
            input_var = torch.autograd.Variable(input, requires_grad=True)
            target_a_var = torch.autograd.Variable(target_a)
            target_b_var = torch.autograd.Variable(target_b)
            output = model(input_var)
            loss = criterion(output, target_a_var) * lam + criterion(output, target_b_var) * (1. - lam)
        else:
            # compute output
            input_var = torch.autograd.Variable(input, requires_grad=True)
            target_var = torch.autograd.Variable(target)
            output = model(input_var)
            loss = criterion(output, target_var)

        # measure accuracy and record loss
        err1, _ = accuracy(output.data, target, topk=(1, 5))

        losses.update(loss.item(), input.size(0))
        top1.update(err1.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        print('Epoch: [{0}/{1}][{2}/{3}]\t'
              'LR: {LR:.6f}\t'
              'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
              'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Top 1-err {top1.val:.4f} ({top1.avg:.4f})\t'.format(
            epoch, epochs, i, len(train_loader), LR=current_LR, batch_time=batch_time,
            data_time=data_time, loss=losses, top1=top1))

    print('* Epoch: [{0}/{1}]\t Top 1-err {top1.avg:.3f} \t Train Loss {loss.avg:.3f}'.format(
        epoch, epochs, top1=top1,loss=losses))

    return losses.avg

def validate(val_loader, model, criterion, epoch):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda()
        input = input.cuda()
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        err1, _ = accuracy(output.data, target, topk=(1, 5))

        losses.update(loss.item(), input.size(0))

        top1.update(err1.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        print('Test (on val set): [{0}/{1}][{2}/{3}]\t'
              'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Top 1-err {top1.val:.4f} ({top1.avg:.4f})'.format(
               epoch, epochs, i, len(val_loader), batch_time=batch_time, loss=losses,
               top1=top1))

    print('* Epoch: [{0}/{1}]\t Top 1-err {top1.avg:.3f} \t Test Loss {loss.avg:.3f}'.format(
        epoch, epochs, top1=top1, loss=losses))
    return top1.avg, losses.avg

In [10]:
for epoch in range(0, epochs):
    adjust_learning_rate(optimizer, epoch)
    train_loss = train(train_loader, model, criterion, optimizer, epoch)
    err1, val_loss = validate(val_loader, model, criterion, epoch)
    is_best = err1 <= best_err1
    best_err1 = min(err1, best_err1)
    print('Current best accuracy (top-1 and 5 error):', best_err1)
    save_checkpoint({'epoch': epoch, 'state_dict': model.state_dict(), 'best_err1': best_err1, 'optimizer': optimizer.state_dict(),}, is_best)
print('Best err (top-1): ', best_err1)

Epoch: [0/300][0/1042]	LR: 0.250000	Time 9.589 (9.589)	Data 0.011 (0.011)	Loss 2.3520 (2.3520)	Top 1-err 97.9167 (97.9167)	
Epoch: [0/300][1/1042]	LR: 0.250000	Time 0.624 (5.107)	Data 0.009 (0.010)	Loss 7.7074 (5.0297)	Top 1-err 87.5000 (92.7083)	
Epoch: [0/300][2/1042]	LR: 0.250000	Time 0.414 (3.542)	Data 0.011 (0.010)	Loss 13.0136 (7.6910)	Top 1-err 83.3333 (89.5833)	
Epoch: [0/300][3/1042]	LR: 0.250000	Time 0.414 (2.760)	Data 0.009 (0.010)	Loss 14.0942 (9.2918)	Top 1-err 97.9167 (91.6667)	
Epoch: [0/300][4/1042]	LR: 0.250000	Time 0.445 (2.297)	Data 0.010 (0.010)	Loss 9.9187 (9.4172)	Top 1-err 91.6667 (91.6667)	
Epoch: [0/300][5/1042]	LR: 0.250000	Time 0.454 (1.990)	Data 0.011 (0.010)	Loss 15.2232 (10.3849)	Top 1-err 91.6667 (91.6667)	
Epoch: [0/300][6/1042]	LR: 0.250000	Time 0.447 (1.770)	Data 0.017 (0.011)	Loss 4.8189 (9.5897)	Top 1-err 81.2500 (90.1786)	
Epoch: [0/300][7/1042]	LR: 0.250000	Time 0.405 (1.599)	Data 0.010 (0.011)	Loss 6.1171 (9.1556)	Top 1-err 83.3333 (89.3229)	
Epoc

OSError: [WinError 123] 파일 이름, 디렉터리 이름 또는 볼륨 레이블 구문이 잘못되었습니다: 'runs/d:'