In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import random_split, SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
%matplotlib inline

In [2]:
#Set cuda environment
def get_default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device

    def __iter__(self):
        for b in self.dl: 
            yield to_device(b, self.device)
            
    def __len__(self):
        return len(self.dl)

device = get_default_device()
device

device(type='cuda')

**Load data with data augmentation**

In [3]:
#apply data augmentation to data
img_transforms0 = transforms.Compose([transforms.ColorJitter(brightness=0.5, contrast=0.2, saturation=0.2),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.RandomRotation(degrees=15),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

transform0 = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [4]:
dataset_tf0 = CIFAR10(root='data/', train=True, download=True, transform=img_transforms0)
dataset_org0 = CIFAR10(root='data/', train=True, download=True, transform=transform0)

dataset_size = len(dataset_tf0)
dataset_indices = list(range(dataset_size))

val_split_index = int(np.floor(0.2 * dataset_size))
train_idx, val_idx = dataset_indices[val_split_index:], dataset_indices[:val_split_index]

train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

train_tf_loader0 = DataLoader(dataset_tf0, batch_size=64, shuffle=False, num_workers=4, sampler=train_sampler, pin_memory=True)
val_tf_loader0 = DataLoader(dataset_org0, batch_size=64, shuffle=False, num_workers=4, sampler=val_sampler, pin_memory=True)

test_dataset_0 = CIFAR10(root='data/', train=False, transform=transform0)
test_loader_0 = DataLoader(test_dataset_0, batch_size = 64, num_workers=4, pin_memory=True)



Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/cifar-10-python.tar.gz to data/
Files already downloaded and verified


In [None]:
img, label = dataset_tf0[0]
img_shape = img.shape
print('image shape', img_shape)

image shape torch.Size([3, 32, 32])


In [None]:
classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')
x = []
for i in range(dataset_size):
    x.append(dataset_tf0[i][1])
uimg = torch.tensor(x).unique(sorted=True)
uimg_count = torch.stack([(torch.tensor(x)==i).sum() for i in uimg])
for i in range(len(uimg)):
    print(f'{classes[i]}: {uimg_count[i].item()} count')

plane: 5000 count
car: 5000 count
bird: 5000 count
cat: 5000 count
deer: 5000 count
dog: 5000 count
frog: 5000 count
horse: 5000 count
ship: 5000 count
truck: 5000 count


**Load data without data augmentation**

In [6]:
#Load data without data augmentation
transform2 = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
dataset_tf2 = CIFAR10(root='data/', train=True, download=True, transform=transform2)
dataset_test = CIFAR10(root='data/', train=False, download=True, transform=transform2)

dataset_size = len(dataset_tf2)
dataset_indices = list(range(dataset_size))

np.random.shuffle(dataset_indices)
val_split_index = int(np.floor(0.2 * dataset_size))
train_idx, val_idx = dataset_indices[val_split_index:], dataset_indices[:val_split_index]

train_sampler2 = SubsetRandomSampler(train_idx)
val_sampler2 = SubsetRandomSampler(val_idx)

train_tf_loader2 = DataLoader(dataset_tf2, batch_size=64, shuffle=False, num_workers=4, sampler=train_sampler2, pin_memory=True)
val_tf_loader2 = DataLoader(dataset_tf2, batch_size=64, shuffle=False, num_workers=4, sampler=val_sampler2, pin_memory=True)
test_tf_loader2 = DataLoader(dataset_test, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)


Files already downloaded and verified
Files already downloaded and verified


In [5]:
# define accuracy checking function
def check_accuracy(loader, model):    
    num_correct = 0
    num_samples = 0
    model.eval()
    
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
        
        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}') 

**1 Configure ResNet-18 from scratch**


In [7]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])

def ResNet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])

def ResNet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])


**1 ResNet 18 - build from scratch**

In [None]:
in_channel = 3
num_classes = 10 
learning_rate = 0.01
batch_size = 100
num_epochs = 100

model1 = ResNet18()
if torch.cuda.is_available():
    model1.cuda()

criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(model1.parameters(), lr=learning_rate,  momentum=0.9, weight_decay=0.0001)

In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch_idx, (data, targets) in enumerate(train_tf_loader0):
        if torch.cuda.is_available():
          data = data.to(device=device)
          targets = targets.to(device=device)
        
        scores = model1(data)
        loss = criterion(scores, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if batch_idx%1000 == 999: 
            print('[%d, %5d] loss:%.3f'% (epoch +1, batch_idx+1, running_loss/1000))
            running_loss = 0

print('finished', epoch)

finished 99


In [None]:
#save trained model
PATH = './cifar_net.pth'
torch.save(model1.state_dict(), PATH)

In [None]:
print('train acc', check_accuracy(train_tf_loader0, model1))

print('val acc', check_accuracy(val_tf_loader0, model1))

print('test acc', check_accuracy(test_loader_0, model1))

Got 39729 / 40000 with accuracy 99.32
train acc None
Got 9016 / 10000 with accuracy 90.16
val acc None
Got 8974 / 10000 with accuracy 89.74
test acc None


**ResNet 34 Build from scratch**

In [8]:
in_channel = 3
num_classes = 10 
learning_rate = 0.01
batch_size = 100
num_epochs = 100

model1_34 = ResNet34()
if torch.cuda.is_available():
    model1_34.cuda()

criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(model1_34.parameters(), lr=learning_rate,  momentum=0.9, weight_decay=0.0001)

In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch_idx, (data, targets) in enumerate(train_tf_loader0):
        if torch.cuda.is_available():
          data = data.to(device=device)
          targets = targets.to(device=device)
        
        scores = model1_34(data)
        loss = criterion(scores, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if batch_idx%200 == 199: 
            print('[%d, %5d] loss:%.3f'% (epoch +1, batch_idx+1, running_loss/200))
            running_loss = 0

print('finished', epoch)

[1,   200] loss:1.921
[1,   400] loss:1.597
[1,   600] loss:1.439
[2,   200] loss:1.262
[2,   400] loss:1.125
[2,   600] loss:1.057
[3,   200] loss:0.933
[3,   400] loss:0.890
[3,   600] loss:0.849
[4,   200] loss:0.787
[4,   400] loss:0.765
[4,   600] loss:0.747
[5,   200] loss:0.675
[5,   400] loss:0.695
[5,   600] loss:0.641
[6,   200] loss:0.627
[6,   400] loss:0.589
[6,   600] loss:0.585
[7,   200] loss:0.559
[7,   400] loss:0.565
[7,   600] loss:0.558
[8,   200] loss:0.515
[8,   400] loss:0.524
[8,   600] loss:0.500
[9,   200] loss:0.479
[9,   400] loss:0.464
[9,   600] loss:0.474
[10,   200] loss:0.436
[10,   400] loss:0.443
[10,   600] loss:0.448
[11,   200] loss:0.394
[11,   400] loss:0.418
[11,   600] loss:0.416
[12,   200] loss:0.371
[12,   400] loss:0.385
[12,   600] loss:0.391
[13,   200] loss:0.345
[13,   400] loss:0.364
[13,   600] loss:0.362
[14,   200] loss:0.322
[14,   400] loss:0.341
[14,   600] loss:0.346
[15,   200] loss:0.304
[15,   400] loss:0.328
[15,   600] los

In [None]:
#save trained model
PATH = './cifar_net.pth'
torch.save(model1_34.state_dict(), PATH)

In [None]:
print('train acc', check_accuracy(train_tf_loader0, model1_34))
print('val acc', check_accuracy(val_tf_loader0, model1_34))
print('test acc', check_accuracy(test_loader_0, model1_34))

Got 39690 / 40000 with accuracy 99.22
train acc None
Got 9027 / 10000 with accuracy 90.27
val acc None
Got 8971 / 10000 with accuracy 89.71
test acc None


**ResNet-50 build from scratch**

In [None]:
in_channel = 3
num_classes = 10 
learning_rate = 0.01
batch_size = 100
num_epochs = 100

model1_50 = ResNet50()
if torch.cuda.is_available():
    model1_50.cuda()

criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(model1_50.parameters(), lr=learning_rate,  momentum=0.9, weight_decay=0.0001)

In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch_idx, (data, targets) in enumerate(train_tf_loader0):
        if torch.cuda.is_available():
          data = data.to(device=device)
          targets = targets.to(device=device)
        
        scores = model1_50(data)
        loss = criterion(scores, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if batch_idx%200 == 199: 
            print('[%d, %5d] loss:%.3f'% (epoch +1, batch_idx+1, running_loss/200))
            running_loss = 0
            
print('finished', epoch)


[1,   200] loss:2.374
[1,   400] loss:1.840
[1,   600] loss:1.691
[2,   200] loss:1.580
[2,   400] loss:1.485
[2,   600] loss:1.444
[3,   200] loss:1.331
[3,   400] loss:1.246
[3,   600] loss:1.148
[4,   200] loss:1.061
[4,   400] loss:1.023
[4,   600] loss:0.960
[5,   200] loss:0.892
[5,   400] loss:0.860
[5,   600] loss:0.819
[6,   200] loss:0.837
[6,   400] loss:0.843
[6,   600] loss:0.771
[7,   200] loss:0.710
[7,   400] loss:0.686
[7,   600] loss:0.670
[8,   200] loss:0.612
[8,   400] loss:0.632
[8,   600] loss:0.585
[9,   200] loss:0.576
[9,   400] loss:0.583
[9,   600] loss:0.571
[10,   200] loss:0.520
[10,   400] loss:0.516
[10,   600] loss:0.540
[11,   200] loss:0.529
[11,   400] loss:0.524
[11,   600] loss:0.508
[12,   200] loss:0.461
[12,   400] loss:0.475
[12,   600] loss:0.451
[13,   200] loss:0.416
[13,   400] loss:0.429
[13,   600] loss:0.427
[14,   200] loss:0.386
[14,   400] loss:0.403
[14,   600] loss:0.389
[15,   200] loss:0.356
[15,   400] loss:0.360
[15,   600] los

In [None]:
#save trained model
PATH = './cifar_net.pth'
torch.save(model1_50.state_dict(), PATH)

In [None]:
print('train acc', check_accuracy(train_tf_loader0, model1_50))

print('val acc', check_accuracy(val_tf_loader0, model1_50))

print('test acc', check_accuracy(test_loader_0, model1_50))

Got 39470 / 40000 with accuracy 98.67
train acc None
Got 8945 / 10000 with accuracy 89.45
val acc None
Got 8913 / 10000 with accuracy 89.13
test acc None


**2. Compare with model training without Data Augmentation**

**ResNet18 - train without data augmentation**




In [None]:
in_channel = 3
num_classes = 10 
learning_rate = 0.01
batch_size = 100
num_epochs = 100

model1_1 = ResNet18()
if torch.cuda.is_available():
    model1_1.cuda()

criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(model1_1.parameters(), lr=learning_rate,  momentum=0.9, weight_decay=0.0001)

In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch_idx, (data, targets) in enumerate(train_tf_loader2):
        if torch.cuda.is_available():
          data = data.to(device=device)
          targets = targets.to(device=device)
        
        scores = model1_1(data)
        loss = criterion(scores, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if batch_idx%1000 == 999: 
            print('[%d, %5d] loss:%.3f'% (epoch +1, batch_idx+1, running_loss/1000))
            running_loss = 0

print('finished', epoch)

finished 99


In [None]:
print('train acc',check_accuracy(train_tf_loader2, model1_1))

print('val acc', check_accuracy(val_tf_loader2, model1_1))

print('test acc', check_accuracy(test_tf_loader2, model1_1))

Got 40000 / 40000 with accuracy 100.00
train acc None
Got 8611 / 10000 with accuracy 86.11
val acc None
Got 8484 / 10000 with accuracy 84.84
test acc None


**2. ResNet 50 -Train without data augmentation**

In [None]:
in_channel = 3
num_classes = 10 
learning_rate = 0.01
batch_size = 100
num_epochs = 100

model2_2 = ResNet50()
if torch.cuda.is_available():
    model2_2.cuda()

criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(model2_2.parameters(), lr=learning_rate,  momentum=0.9, weight_decay=0.0001)

In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch_idx, (data, targets) in enumerate(train_tf_loader2):
        # Get data to cuda if possible
        if torch.cuda.is_available():
          data = data.to(device=device)
          targets = targets.to(device=device)
        
        scores = model2_2(data)
        loss = criterion(scores, targets)
        
        optimizer.zero_grad()
        loss.backward()
      
        optimizer.step()
        
        running_loss += loss.item()
        if batch_idx%1000 == 999: 
            print('[%d, %5d] loss:%.3f'% (epoch +1, batch_idx+1, running_loss/1000))
            running_loss = 0

print('finished', epoch)

finished 99


In [None]:
print('train acc',check_accuracy(train_tf_loader2, model2_2))
print('val acc', check_accuracy(val_tf_loader2, model2_2))
print('test acc', check_accuracy(test_tf_loader2, model2_2))

Got 39896 / 40000 with accuracy 99.74
train acc None
Got 8157 / 10000 with accuracy 81.57
val acc None
Got 8091 / 10000 with accuracy 80.91
test acc None


**3. Dropout ResNet 18 - 0.15**

In [None]:
class BasicBlock_2(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock_2, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.dropout = nn.Dropout(0.15)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.dropout(self.conv2(out)))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet_2(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet_2, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        m = nn.AdaptiveAvgPool2d(output_size=(1,1))
        out = m(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def ResNet18_2():
    return ResNet_2(BasicBlock_2, [2, 2, 2, 2])

def ResNet34_2():
    return ResNet_2(BasicBlock_2, [3, 4, 6, 3])


In [None]:
in_channel = 3
num_classes = 10 
learning_rate = 0.01
batch_size = 100
num_epochs = 100

model8 = ResNet18_2()
if torch.cuda.is_available():
    model8.cuda()

criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(model8.parameters(), lr=learning_rate, momentum = 0.9, weight_decay=0.0001)

In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch_idx, (data, targets) in enumerate(train_tf_loader0):
        if torch.cuda.is_available():
          data = data.to(device=device)
          targets = targets.to(device=device)
        
        scores = model8(data)
        loss = criterion(scores, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if batch_idx%200 == 199: 
            print('[%d, %5d] loss:%.3f'% (epoch +1, batch_idx+1, running_loss/200))
            running_loss = 0

print('finished', epoch)

[1,   200] loss:1.924
[1,   400] loss:1.578
[1,   600] loss:1.427
[2,   200] loss:1.269
[2,   400] loss:1.154
[2,   600] loss:1.093
[3,   200] loss:0.969
[3,   400] loss:0.942
[3,   600] loss:0.893
[4,   200] loss:0.809
[4,   400] loss:0.804
[4,   600] loss:0.777
[5,   200] loss:0.717
[5,   400] loss:0.701
[5,   600] loss:0.692
[6,   200] loss:0.654
[6,   400] loss:0.644
[6,   600] loss:0.621
[7,   200] loss:0.602
[7,   400] loss:0.596
[7,   600] loss:0.581
[8,   200] loss:0.531
[8,   400] loss:0.556
[8,   600] loss:0.548
[9,   200] loss:0.500
[9,   400] loss:0.510
[9,   600] loss:0.513
[10,   200] loss:0.467
[10,   400] loss:0.486
[10,   600] loss:0.464
[11,   200] loss:0.444
[11,   400] loss:0.438
[11,   600] loss:0.452
[12,   200] loss:0.421
[12,   400] loss:0.430
[12,   600] loss:0.418
[13,   200] loss:0.381
[13,   400] loss:0.412
[13,   600] loss:0.402
[14,   200] loss:0.361
[14,   400] loss:0.378
[14,   600] loss:0.398
[15,   200] loss:0.338
[15,   400] loss:0.358
[15,   600] los

In [None]:
#save trained model
PATH = './cifar_net.pth'
torch.save(model8.state_dict(), PATH)

In [None]:
print('train acc', check_accuracy(train_tf_loader0, model8))

print('val acc', check_accuracy(val_tf_loader0, model8))

print('test acc', check_accuracy(test_loader_0, model8))
                                                                                 

Got 39753 / 40000 with accuracy 99.38
train acc None
Got 9057 / 10000 with accuracy 90.57
val acc None
Got 9013 / 10000 with accuracy 90.13
test acc None


**3 Dropout 0.15 ResNet 34**

In [8]:
class BasicBlock_3(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock_3, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.dropout = nn.Dropout(0.15)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.dropout(self.conv2(out)))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet_3(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet_3, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)

        m = nn.AdaptiveAvgPool2d(output_size=(1,1))
        out = m(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def ResNet18_3():
    return ResNet_3(BasicBlock_3, [2, 2, 2, 2])

def ResNet34_3():
    return ResNet_3(BasicBlock_3, [3, 4, 6, 3])


In [9]:
in_channel = 3
num_classes = 10 
learning_rate = 0.01
batch_size = 100
num_epochs = 100

model9 = ResNet34_3()
if torch.cuda.is_available():
    model9.cuda()

criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(model9.parameters(), lr=learning_rate, momentum = 0.9, weight_decay=0.0001)

In [10]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch_idx, (data, targets) in enumerate(train_tf_loader0):
        if torch.cuda.is_available():
          data = data.to(device=device)
          targets = targets.to(device=device)
        
        scores = model9(data)
        loss = criterion(scores, targets)
        
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()
        
        running_loss += loss.item()
        if batch_idx%200 == 199: 
            print('[%d, %5d] loss:%.3f'% (epoch +1, batch_idx+1, running_loss/200))
            running_loss = 0

print('finished', epoch)

[1,   200] loss:2.014
[1,   400] loss:1.642
[1,   600] loss:1.497
[2,   200] loss:1.350
[2,   400] loss:1.254
[2,   600] loss:1.113
[3,   200] loss:1.046
[3,   400] loss:0.969
[3,   600] loss:0.918
[4,   200] loss:0.870
[4,   400] loss:0.847
[4,   600] loss:0.786
[5,   200] loss:0.748
[5,   400] loss:0.732
[5,   600] loss:0.721
[6,   200] loss:0.666
[6,   400] loss:0.675
[6,   600] loss:0.636
[7,   200] loss:0.629
[7,   400] loss:0.624
[7,   600] loss:0.607
[8,   200] loss:0.578
[8,   400] loss:0.569
[8,   600] loss:0.559
[9,   200] loss:0.528
[9,   400] loss:0.538
[9,   600] loss:0.542
[10,   200] loss:0.505
[10,   400] loss:0.488
[10,   600] loss:0.510
[11,   200] loss:0.474
[11,   400] loss:0.471
[11,   600] loss:0.479
[12,   200] loss:0.440
[12,   400] loss:0.455
[12,   600] loss:0.449
[13,   200] loss:0.422
[13,   400] loss:0.418
[13,   600] loss:0.421
[14,   200] loss:0.401
[14,   400] loss:0.421
[14,   600] loss:0.399
[15,   200] loss:0.373
[15,   400] loss:0.385
[15,   600] los

In [11]:
#save trained model
PATH = './cifar_net.pth'
torch.save(model9.state_dict(), PATH)

In [12]:
print('train acc', check_accuracy(train_tf_loader0, model9))

print('val acc', check_accuracy(val_tf_loader0, model9))

print('test acc', check_accuracy(test_loader_0, model9))

Got 39676 / 40000 with accuracy 99.19
train acc None
Got 9145 / 10000 with accuracy 91.45
val acc None
Got 9076 / 10000 with accuracy 90.76
test acc None


**3 Dropout ResNet-34 0.2** 

In [None]:
class BasicBlock_02(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock_02, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.dropout = nn.Dropout(0.2)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.dropout(self.conv2(out)))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet_02(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet_02, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        m = nn.AdaptiveAvgPool2d(output_size=(1,1))
        out = m(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def ResNet34_022():
    return ResNet_3(BasicBlock_02, [3, 4, 6, 3])


In [None]:
in_channel = 3
num_classes = 10 
learning_rate = 0.01
batch_size = 100
num_epochs = 100

model9a = ResNet34_022()
if torch.cuda.is_available():
    model9a.cuda()

criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(model9a.parameters(), lr=learning_rate, momentum = 0.9, weight_decay=0.0001)

In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch_idx, (data, targets) in enumerate(train_tf_loader0):
        if torch.cuda.is_available():
          data = data.to(device=device)
          targets = targets.to(device=device)
        
        scores = model9a(data)
        loss = criterion(scores, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if batch_idx%200 == 199: 
            print('[%d, %5d] loss:%.3f'% (epoch +1, batch_idx+1, running_loss/200))
            running_loss = 0

print('finished', epoch)

[1,   200] loss:2.067
[1,   400] loss:1.680
[1,   600] loss:1.562
[2,   200] loss:1.386
[2,   400] loss:1.309
[2,   600] loss:1.197
[3,   200] loss:1.122
[3,   400] loss:1.032
[3,   600] loss:0.975
[4,   200] loss:0.919
[4,   400] loss:0.874
[4,   600] loss:0.845
[5,   200] loss:0.788
[5,   400] loss:0.785
[5,   600] loss:0.748
[6,   200] loss:0.699
[6,   400] loss:0.711
[6,   600] loss:0.697
[7,   200] loss:0.648
[7,   400] loss:0.650
[7,   600] loss:0.633
[8,   200] loss:0.601
[8,   400] loss:0.601
[8,   600] loss:0.595
[9,   200] loss:0.571
[9,   400] loss:0.570
[9,   600] loss:0.542
[10,   200] loss:0.515
[10,   400] loss:0.516
[10,   600] loss:0.531
[11,   200] loss:0.488
[11,   400] loss:0.497
[11,   600] loss:0.495
[12,   200] loss:0.466
[12,   400] loss:0.458
[12,   600] loss:0.462
[13,   200] loss:0.428
[13,   400] loss:0.434
[13,   600] loss:0.449
[14,   200] loss:0.411
[14,   400] loss:0.406
[14,   600] loss:0.432
[15,   200] loss:0.392
[15,   400] loss:0.406
[15,   600] los

In [None]:
#save trained model
PATH = './cifar_net.pth'
torch.save(model9a.state_dict(), PATH)

In [None]:
print('train acc', check_accuracy(train_tf_loader0, model9a))

print('val acc', check_accuracy(val_tf_loader0, model9a))

print('test acc', check_accuracy(test_loader_0, model9a))

Got 39651 / 40000 with accuracy 99.13
train acc None
Got 9155 / 10000 with accuracy 91.55
val acc None
Got 9085 / 10000 with accuracy 90.85
test acc None


**3 ResNet 50 dropout 0.25**

In [None]:
class Bottleneck_5(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck_5, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
        self.dropout = nn.Dropout(0.25)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.dropout(self.conv2(out))))
        out = self.bn3(self.dropout(self.conv3(out)))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet_5(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet_5, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        m = nn.AdaptiveAvgPool2d(output_size=(1,1))
        out = m(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def ResNet50_5():
    return ResNet_5(Bottleneck_5, [3, 4, 6, 3])


In [None]:
in_channel = 3
num_classes = 10 
learning_rate = 0.01
batch_size = 100
num_epochs = 100

model10 = ResNet50_5()
if torch.cuda.is_available():
    model10.cuda()

criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(model10.parameters(), lr=learning_rate, momentum = 0.9, weight_decay=0.0001)

In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch_idx, (data, targets) in enumerate(train_tf_loader0):
        # Get data to cuda if possible
        if torch.cuda.is_available():
          data = data.to(device=device)
          targets = targets.to(device=device)
        
        scores = model10(data)
        loss = criterion(scores, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if batch_idx%200 == 199: 
            print('[%d, %5d] loss:%.3f'% (epoch +1, batch_idx+1, running_loss/200))
            running_loss = 0

print('finished', epoch)

[1,   200] loss:2.699
[1,   400] loss:2.135
[1,   600] loss:1.913
[2,   200] loss:1.789
[2,   400] loss:1.697
[2,   600] loss:1.639
[3,   200] loss:1.541
[3,   400] loss:1.493
[3,   600] loss:1.452
[4,   200] loss:1.360
[4,   400] loss:1.302
[4,   600] loss:1.280
[5,   200] loss:1.225
[5,   400] loss:1.181
[5,   600] loss:1.147
[6,   200] loss:1.129
[6,   400] loss:1.077
[6,   600] loss:1.072
[7,   200] loss:1.055
[7,   400] loss:1.002
[7,   600] loss:0.979
[8,   200] loss:0.948
[8,   400] loss:0.923
[8,   600] loss:0.946
[9,   200] loss:0.894
[9,   400] loss:0.879
[9,   600] loss:0.870
[10,   200] loss:0.830
[10,   400] loss:0.839
[10,   600] loss:0.838
[11,   200] loss:0.813
[11,   400] loss:0.783
[11,   600] loss:0.773
[12,   200] loss:0.750
[12,   400] loss:0.749
[12,   600] loss:0.723
[13,   200] loss:0.698
[13,   400] loss:0.712
[13,   600] loss:0.688
[14,   200] loss:0.691
[14,   400] loss:0.668
[14,   600] loss:0.653
[15,   200] loss:0.633
[15,   400] loss:0.622
[15,   600] los

In [None]:
#save trained model
PATH = './cifar_net.pth'
torch.save(model10.state_dict(), PATH)

In [None]:
print('train acc', check_accuracy(train_tf_loader0, model10))

print('val acc', check_accuracy(val_tf_loader0, model10))

print('test acc', check_accuracy(test_loader_0, model10))

Got 39250 / 40000 with accuracy 98.12
train acc None
Got 9060 / 10000 with accuracy 90.60
val acc None
Got 8954 / 10000 with accuracy 89.54
test acc None
