# Final Project

## Import

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets

import argparse
import numpy as np
import pandas as pd
from tensorboardX import SummaryWriter

import skopt
from skopt import gbrt_minimize, gp_minimize
from skopt.utils import use_named_args
from skopt.space import Real, Categorical, Integer


## Resnet18

In [2]:
def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(in_planes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class PreActBlock(nn.Module):
    '''Pre-activation version of the BasicBlock.'''
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(PreActBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = conv3x3(in_planes, planes, stride)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
            )

    def forward(self, x):
        out = F.relu(self.bn1(x))
        shortcut = self.shortcut(out)
        out = self.conv1(out)
        out = self.conv2(F.relu(self.bn2(out)))
        out += shortcut
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class PreActBottleneck(nn.Module):
    '''Pre-activation version of the original Bottleneck module.'''
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(PreActBottleneck, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
            )

    def forward(self, x):
        out = F.relu(self.bn1(x))
        shortcut = self.shortcut(out)
        out = self.conv1(out)
        out = self.conv2(F.relu(self.bn2(out)))
        out = self.conv3(F.relu(self.bn3(out)))
        out += shortcut
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = conv3x3(3,64)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, lin=0, lout=5):
        out = x
        if lin < 1 and lout > -1:
            out = self.conv1(out)
            out = self.bn1(out)
            out = F.relu(out)
        if lin < 2 and lout > 0:
            out = self.layer1(out)
        if lin < 3 and lout > 1:
            out = self.layer2(out)
        if lin < 4 and lout > 2:
            out = self.layer3(out)
        if lin < 5 and lout > 3:
            out = self.layer4(out)
        if lout > 4:
            out = F.avg_pool2d(out, 4)
            out = out.view(out.size(0), -1)
            out = self.linear(out)
        return out


def ResNet18():
    return ResNet(PreActBlock, [2,2,2,2])

## Dataloader

In [3]:
args = argparse.Namespace(lr=0.1, seed=2021, batch_size=256, epoch=31, decay=1e-4)

use_cuda = torch.cuda.is_available()

if args.seed != 0:
    torch.manual_seed(args.seed)

transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010)),
])


transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), 
                         (0.2023, 0.1994, 0.2010)),
])

trainset = datasets.CIFAR10(root='./data', train=True, download=False, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=8)

testset = datasets.CIFAR10(root='./data', train=False, download=False, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=8)

In [4]:
def adjust_learning_rate(optimizer, epoch):
    lr = args.lr
    if epoch >= 10:
        lr /= 5
    if epoch >= 20:
        lr /= 5
    if epoch >= 25:
        lr /= 5
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

## Baseline

In [5]:
def train_baseline(epoch, trainloader, net, Loss, optimizer):
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        outputs = net(inputs)
        loss = Loss(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


def performance(epoch, net, Loss):
    train_loss, test_loss = 0, 0
    train_correct, test_correct = 0, 0
    train_total, test_total = 0, 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            outputs = net(inputs)
            loss = Loss(outputs, targets)
            train_loss += loss.data
            _, predicted = torch.max(outputs.data, 1)
            train_total += targets.size(0)
            train_correct += predicted.eq(targets.data).cpu().sum()

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            outputs = net(inputs)
            loss = Loss(outputs, targets)
            test_loss += loss.data
            _, predicted = torch.max(outputs.data, 1)
            test_total += targets.size(0)
            test_correct += predicted.eq(targets.data).cpu().sum()
    if epoch % 10 == 0 or train_correct == train_total:
        print('\nEpoch: %d ===============================================================================' % epoch)
        print('Train Loss: %.3f | Train Acc: %.3f%% (%d/%d) \t Test  Loss: %.3f | Test  Acc: %.3f%% (%d/%d)' % \
              (train_loss/(batch_idx+1), 100.*train_correct/train_total, train_correct, train_total, \
               test_loss/(batch_idx+1), 100.*test_correct/test_total, test_correct, test_total))

    return (train_loss/(batch_idx+1), 100.*train_correct/train_total, test_loss/(batch_idx+1), 100.*test_correct/test_total)

In [6]:
start_epoch = 0
CELoss = nn.CrossEntropyLoss()
net = ResNet18()
net.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.decay)
writer = SummaryWriter('./performance/baseline')
for epoch in range(start_epoch, args.epoch):
    train_baseline(epoch, trainloader, net, CELoss, optimizer)
    train_loss, train_acc, test_loss, test_acc = performance(epoch, net, CELoss)
    writer.add_scalar('train_loss', train_loss, global_step = epoch)
    writer.add_scalar('train_accuracy', train_acc, global_step = epoch)
    writer.add_scalar('test_loss', test_loss, global_step = epoch)
    writer.add_scalar('test_accuracy', test_acc, global_step = epoch)
    adjust_learning_rate(optimizer, epoch)
    if train_acc == 100:
        break

writer.close()
torch.save(net, './models/baseline.pt')
del net


Train Loss: 1.926 | Train Acc: 65.238% (32619/50000) 	 Test  Loss: 1.028 | Test  Acc: 63.230% (6323/10000)

Train Loss: 0.096 | Train Acc: 98.340% (49170/50000) 	 Test  Loss: 0.692 | Test  Acc: 83.540% (8354/10000)

Train Loss: 0.000 | Train Acc: 100.000% (50000/50000) 	 Test  Loss: 1.006 | Test  Acc: 86.290% (8629/10000)


## Mixup

In [7]:
def mixup_data(x, y, alpha, use_cuda=True):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1
    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


def mixup_criterion(pred, y_a, y_b, lam):
    criterion = nn.CrossEntropyLoss()
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)


def train_mixup(epoch, trainloader, net, mixup_criterion, optimizer, alpha):
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, alpha, use_cuda)
        inputs, targets_a, targets_b = map(Variable, (inputs, targets_a, targets_b))
        outputs = net(inputs)
        loss = mixup_criterion(outputs, targets_a, targets_b, lam)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

### Gaussian Process for Tuning Hyperparameters

In [8]:
dim_alpha = Real(low=1.0, high=5.0, prior='uniform', name='alpha')
dimensions = [dim_alpha]
@use_named_args(dimensions=dimensions)
def fitness_mixup(alpha):
    start_epoch = 0
    CELoss = nn.CrossEntropyLoss()
    net = ResNet18()
    net.cuda()
    optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.decay)
    writer = SummaryWriter(f'./performance/mixup_alpha={np.round(alpha,2)}')
    for epoch in range(start_epoch, args.epoch):
        train_mixup(epoch, trainloader, net, mixup_criterion, optimizer, alpha)
        train_loss, train_acc, test_loss, test_acc = performance(epoch, net, CELoss)
        writer.add_scalar('train_loss', train_loss, global_step = epoch)
        writer.add_scalar('train_accuracy', train_acc, global_step = epoch)
        writer.add_scalar('test_loss', test_loss, global_step = epoch)
        writer.add_scalar('test_accuracy', test_acc, global_step = epoch)
        adjust_learning_rate(optimizer, epoch)
        if train_acc == 100:
            break
    writer.close()
    torch.save(net, f'./models/mixup_alpha={np.round(alpha,2)}.pt')
    del net
    return -float(test_acc)

In [10]:
gp_result_mixup = gp_minimize(func=fitness_mixup, dimensions=dimensions, n_calls=10, noise= 0.01)


Train Loss: 2.477 | Train Acc: 58.200% (29100/50000) 	 Test  Loss: 1.287 | Test  Acc: 56.770% (5677/10000)

Train Loss: 0.731 | Train Acc: 94.300% (47150/50000) 	 Test  Loss: 0.559 | Test  Acc: 85.980% (8598/10000)

Train Loss: 0.279 | Train Acc: 99.462% (49731/50000) 	 Test  Loss: 0.473 | Test  Acc: 87.140% (8714/10000)

Train Loss: 0.192 | Train Acc: 99.822% (49911/50000) 	 Test  Loss: 0.441 | Test  Acc: 88.190% (8819/10000)

Train Loss: 2.424 | Train Acc: 60.672% (30336/50000) 	 Test  Loss: 1.258 | Test  Acc: 59.280% (5928/10000)

Train Loss: 0.877 | Train Acc: 92.394% (46197/50000) 	 Test  Loss: 0.606 | Test  Acc: 84.420% (8442/10000)

Train Loss: 0.469 | Train Acc: 98.782% (49391/50000) 	 Test  Loss: 0.516 | Test  Acc: 87.430% (8743/10000)

Train Loss: 0.371 | Train Acc: 99.550% (49775/50000) 	 Test  Loss: 0.489 | Test  Acc: 88.090% (8809/10000)

Train Loss: 2.377 | Train Acc: 60.166% (30083/50000) 	 Test  Loss: 1.248 | Test  Acc: 58.010% (5801/10000)

Train Loss: 0.928 | Train A

In [11]:
pd.concat([pd.DataFrame(gp_result_mixup.x_iters, columns = ["alpha"]),
(pd.Series(np.round(gp_result_mixup.func_vals*-1,2), name="accuracy"))], axis=1)

Unnamed: 0,alpha,accuracy
0,2.133162,88.19
1,3.080251,88.09
2,3.686689,88.08
3,2.530056,88.25
4,1.592202,88.3
5,3.460635,88.06
6,1.186081,88.59
7,4.451972,87.56
8,1.49601,88.15
9,3.522516,88.13


## Cutmix

In [15]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2
    
def train_cutmix(epoch, trainloader, net, criterion, optimizer, alpha, prob):
    for i, (input, target) in enumerate(trainloader):
        input = input.cuda()
        target = target.cuda()
        r = np.random.rand(1)
        if alpha > 0 and r < prob:
            # generate mixed sample
            lam = np.random.beta(alpha, alpha)
            rand_index = torch.randperm(input.size()[0]).cuda()
            target_a = target
            target_b = target[rand_index]
            bbx1, bby1, bbx2, bby2 = rand_bbox(input.size(), lam)
            input[:, :, bbx1:bbx2, bby1:bby2] = input[rand_index, :, bbx1:bbx2, bby1:bby2]
            # adjust lambda to exactly match pixel ratio
            lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (input.size()[-1] * input.size()[-2]))
            # compute output
            output = net(input)
            loss = criterion(output, target_a) * lam + criterion(output, target_b) * (1. - lam)
        else:
            # compute output
            output = net(input)
            loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()#反向传播
        optimizer.step()

### Gaussian Process for Tuning Hyperparameters

In [16]:
dim_alpha = Real(low=1.0, high=5.0, name='alpha')
dim_prob = Real(low=0.1, high=0.9, name='prob')
dimensions = [dim_alpha, dim_prob]
@use_named_args(dimensions=dimensions)
def fitness_cutmix(alpha, prob):
    start_epoch = 0
    CELoss = nn.CrossEntropyLoss()
    net = ResNet18()
    net.cuda()
    optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.decay)
    writer = SummaryWriter(f'./performance/cutmix_alpha={np.round(alpha,2)}_prob={np.round(prob,2)}')
    for epoch in range(start_epoch, args.epoch):
        train_cutmix(epoch, trainloader, net, CELoss, optimizer, alpha, prob)
        train_loss, train_acc, test_loss, test_acc = performance(epoch, net, CELoss)
        writer.add_scalar('train_loss', train_loss, global_step = epoch)
        writer.add_scalar('train_accuracy', train_acc, global_step = epoch)
        writer.add_scalar('test_loss', test_loss, global_step = epoch)
        writer.add_scalar('test_accuracy', test_acc, global_step = epoch)
        adjust_learning_rate(optimizer, epoch)
        if train_acc == 100:
            break
    writer.close()
    torch.save(net, f'./models/cutmix_alpha={np.round(alpha,2)}_prob={np.round(prob,2)}.pt')
    del net
    return -float(test_acc)

In [17]:
gp_result_cutmix = gp_minimize(func=fitness_cutmix, dimensions=dimensions, n_calls=10, noise= 0.01)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations



Train Loss: 2.594 | Train Acc: 54.780% (27390/50000) 	 Test  Loss: 1.351 | Test  Acc: 53.060% (5306/10000)

Train Loss: 0.570 | Train Acc: 93.596% (46798/50000) 	 Test  Loss: 0.483 | Test  Acc: 85.760% (8576/10000)

Train Loss: 0.150 | Train Acc: 99.638% (49819/50000) 	 Test  Loss: 0.410 | Test  Acc: 87.980% (8798/10000)

Train Loss: 0.077 | Train Acc: 99.924% (49962/50000) 	 Test  Loss: 0.379 | Test  Acc: 88.840% (8884/10000)

Train Loss: 2.348 | Train Acc: 57.540% (28770/50000) 	 Test  Loss: 1.236 | Test  Acc: 56.170% (5617/10000)

Train Loss: 0.384 | Train Acc: 96.328% (48164/50000) 	 Test  Loss: 0.447 | Test  Acc: 86.160% (8616/10000)

Train Loss: 0.050 | Train Acc: 99.978% (49989/50000) 	 Test  Loss: 0.361 | Test  Acc: 88.780% (8878/10000)

Train Loss: 0.027 | Train Acc: 99.992% (49996/50000) 	 Test  Loss: 0.351 | Test  Acc: 89.310% (8931/10000)

Train Loss: 2.169 | Train Acc: 62.274% (31137/50000) 	 Test  Loss: 1.143 | Test  Acc: 60.740% (6074/10000)

Train Loss: 0.152 | Train A

In [18]:
pd.concat([pd.DataFrame(gp_result_cutmix.x_iters, columns = ["alpha","prob"]),
(pd.Series(np.round(gp_result_cutmix.func_vals*-1,2), name="accuracy"))], axis=1)

Unnamed: 0,alpha,prob,accuracy
0,2.237344,0.87598,88.84
1,4.221129,0.642483,89.31
2,2.711712,0.395033,88.48
3,4.987254,0.125719,86.95
4,4.879877,0.316913,88.46
5,3.213505,0.297492,87.91
6,4.250954,0.832802,88.91
7,1.563398,0.811996,88.43
8,3.303336,0.222179,87.54
9,1.421829,0.452589,88.01


## Cutout

In [84]:
class Cutout(object):
    """Randomly mask out one or more patches from an image.
    
    input: tensor of img
    output: tensor
    
    Args:
        n_holes (int): Number of patches to cut out of each image.
        length (int): The length (in pixels) of each square patch.
        cut_prob: the probability of img to be cut out
    """
    def __init__(self, n_holes, length,cut_prob=1):
        self.n_holes = n_holes
        self.length = length
        self.cut_prob = cut_prob

    def __call__(self, img):
        """
        Args:
            img (Tensor): Tensor image of size (C, H, W).
        Returns:
            Tensor: Image with n_holes of dimension length x length cut out of it.
        """
        r = np.random.random(1)
        if r > self.cut_prob:
            return img
        
        h = img.size(1)
        w = img.size(2)

        mask = np.ones((h, w), np.float32)

        for n in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)

            y1 = np.clip(y - self.length // 2, 0, h)
            y2 = np.clip(y + self.length // 2, 0, h)
            x1 = np.clip(x - self.length // 2, 0, w)
            x2 = np.clip(x + self.length // 2, 0, w)

            mask[y1: y2, x1: x2] = 0.

        mask = torch.from_numpy(mask)
        mask = mask.expand_as(img)
        img = img * mask

        return img
    
def cut_out(n_hole,length,prob=1):
    '''
    n_hole: Cutout hyparameter
    length: Cutout hyparameter
    prob: Cutout hyparameter
    
    input: Cutout hyparameter
    output: transforms.transform type (content : totensor + cutout)
    '''
    cutout = Cutout(n_hole,length,prob)
    return transforms.Compose([
    transforms.ToTensor(),
    cutout,
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010)),
])

### Gaussian Process for Tuning Hyperparameters

In [85]:
dim_num_hole = Integer(low=1, high=5, name='num_hole')
dim_cut_len = Integer(low=1, high=16, name='cut_len')
dim_cut_prob = Real(low=0.1, high=0.9, prior='uniform', name='cut_prob')
dimensions = [dim_num_hole, dim_cut_len, dim_cut_prob]
@use_named_args(dimensions=dimensions)
def fitness_cutout(num_hole, cut_len, cut_prob):

    transform_cutout = cut_out(n_hole=num_hole, length=cut_len, prob=cut_prob)
    trainset_cutout = datasets.CIFAR10(root = './data',train = True, download = False,transform = transform_cutout)
    trainloader_cotout = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=8)

    train_cutout = train_baseline
    start_epoch = 0
    CELoss = nn.CrossEntropyLoss()
    net = ResNet18()
    net.cuda()
    optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.decay)
    writer = SummaryWriter(f'./performance/cutout_hole={num_hole}_len={cut_len}_prob={np.round(cut_prob,2)}')
    for epoch in range(start_epoch, args.epoch):
        train_cutout(epoch, trainloader_cotout, net, CELoss, optimizer)
        train_loss, train_acc, test_loss, test_acc = performance(epoch, net, CELoss)
        writer.add_scalar('train_loss', train_loss, global_step = epoch)
        writer.add_scalar('train_accuracy', train_acc, global_step = epoch)
        writer.add_scalar('test_loss', test_loss, global_step = epoch)
        writer.add_scalar('test_accuracy', test_acc, global_step = epoch)
        adjust_learning_rate(optimizer, epoch)
        if train_acc == 100:
            break
    writer.close()
    torch.save(net, f'./models/cutout_hole={num_hole}_len={cut_len}_prob={np.round(cut_prob,2)}.pt')
    del net
    return -float(test_acc)

In [86]:
gp_result_cutout = gp_minimize(func=fitness_cutout, dimensions=dimensions, n_calls=20, noise= 0.01)


Train Loss: 2.214 | Train Acc: 59.520% (29760/50000) 	 Test  Loss: 1.171 | Test  Acc: 57.740% (5774/10000)

Train Loss: 0.133 | Train Acc: 97.688% (48844/50000) 	 Test  Loss: 0.660 | Test  Acc: 83.630% (8363/10000)

Train Loss: 0.001 | Train Acc: 100.000% (50000/50000) 	 Test  Loss: 0.912 | Test  Acc: 86.560% (8656/10000)

Train Loss: 2.147 | Train Acc: 60.624% (30312/50000) 	 Test  Loss: 1.124 | Test  Acc: 59.240% (5924/10000)

Train Loss: 0.134 | Train Acc: 97.650% (48825/50000) 	 Test  Loss: 0.626 | Test  Acc: 83.870% (8387/10000)

Train Loss: 0.000 | Train Acc: 100.000% (50000/50000) 	 Test  Loss: 0.932 | Test  Acc: 86.750% (8675/10000)

Train Loss: 1.963 | Train Acc: 63.538% (31769/50000) 	 Test  Loss: 1.054 | Test  Acc: 62.290% (6229/10000)

Train Loss: 0.111 | Train Acc: 98.122% (49061/50000) 	 Test  Loss: 0.670 | Test  Acc: 83.690% (8369/10000)

Train Loss: 0.002 | Train Acc: 100.000% (50000/50000) 	 Test  Loss: 0.858 | Test  Acc: 85.770% (8577/10000)

Train Loss: 2.137 | Trai

In [87]:
pd.concat([pd.DataFrame(gp_result_cutout.x_iters, columns = ["num_hole","cut_len","cut_prob"]),
(pd.Series(np.round(gp_result_cutout.func_vals*-1,2), name="accuracy"))], axis=1)

Unnamed: 0,num_hole,cut_len,cut_prob,accuracy
0,1,12,0.139495,86.56
1,2,3,0.583584,86.75
2,5,10,0.633652,85.77
3,2,9,0.294497,86.2
4,1,14,0.376475,86.29
5,3,8,0.800425,86.12
6,4,4,0.430647,85.93
7,2,7,0.1311,86.34
8,3,6,0.245405,83.74
9,2,12,0.646275,86.3
