In [None]:
import argparse
import os, sys
import time
import datetime

# Import pytorch dependencies
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from tqdm import tqdm_notebook as tqdm
import numpy as np
import copy

# You cannot change this line.
from tools.dataloader import CIFAR10

### Basic Convolution layer

In [None]:
class BasicConvLayer(nn.Module):
    def __init__(self, input_filters, output_filters, kernel_size=(3,3), strides=(1,1), padding_size=1):
        super(BasicConvLayer, self).__init__()
        self.conv = nn.Conv2d(input_filters, output_filters, kernel_size, padding=padding_size)
        self.bn = nn.BatchNorm2d(output_filters)
        
    def forward(self, x):
        return self.bn(self.conv(x))

### Residual Block

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, conv_path=False, input_filters=64, output_filters=64):
        super(ResidualBlock, self).__init__()
        self.conv_path = conv_path
        if conv_path:
            assert input_filters != output_filters
            self.conv = BasicConvLayer(input_filters, output_filters, (1,1), padding_size=0)
        
    def forward(self, x, sublayer):
        if self.conv_path:
            conv_x = self.conv(x)
            return F.relu(conv_x + sublayer(x))
        return F.relu(x + sublayer(x))

### Convolution Block

In [None]:
class ConvBlock(nn.Module):
    def __init__(self, input_filters, output_filters, kernel_size=(3,3), strides=(1,1)):
        super(ConvBlock, self).__init__()
        self.conv1 = BasicConvLayer(input_filters, output_filters, kernel_size, strides)
        self.conv2 = BasicConvLayer(output_filters, output_filters, kernel_size, strides)
        
    def forward(self, x):
        return self.conv2(F.relu(self.conv1(x)))

### Model

In [None]:
def clones(module, N):
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])

class ResNet(nn.Module):
    def __init__(self, input_size, output_size):
        super(ResNet, self).__init__()
        self.convBN = BasicConvLayer(input_size, 32, kernel_size=(2,2), strides=(1,1))
        self.maxP = nn.MaxPool2d((2, 2), stride=(2, 2))
        self.resLayers = clones(ResidualBlock(), 13)
        self.resconvLayer1 = ResidualBlock(conv_path=True, input_filters=32, output_filters=64)
        self.resconvLayer2 = ResidualBlock(conv_path=True, input_filters=64, output_filters=128)
        self.resconvLayer3 = ResidualBlock(conv_path=True, input_filters=128, output_filters=256)
        self.transconvBn1 = ConvBlock(32, 64)
        self.transconvBn2 = ConvBlock(64, 128)
        self.transconvBn3 = ConvBlock(128, 256)
        self.layer1 = clones(ConvBlock(32, 32), 3)
        self.layer2 = clones(ConvBlock(64, 64), 3)
        self.layer3 = clones(ConvBlock(128, 128), 5)
        self.layer4 = clones(ConvBlock(256, 256), 2)
        self.avgP = nn.AvgPool2d((3,3), stride=(2,2))
        self.linear1 = nn.Linear(12544, 4096)
        self.linear2 = nn.Linear(4096, 1024)
        self.linear3 = nn.Linear(1024, output_size)
        
    def forward(self, x):
        x = self.maxP(self.convBN(x))
        x = self.resLayers[0](x, self.layer1[0])
        x = self.resLayers[1](x, self.layer1[1])
        x = self.resLayers[2](x, self.layer1[2])
        x = self.resconvLayer1(x, self.transconvBn1)
        x = self.resLayers[3](x, self.layer2[0])
        x = self.resLayers[4](x, self.layer2[1])
        x = self.resLayers[5](x, self.layer2[2])
        x = self.resconvLayer2(x, self.transconvBn2)
        x = self.resLayers[6](x, self.layer3[0])
        x = self.resLayers[7](x, self.layer3[1])
        x = self.resLayers[8](x, self.layer3[2])
        x = self.resLayers[9](x, self.layer3[3])
        x = self.resLayers[10](x, self.layer3[4])
        x = self.resconvLayer3(x, self.transconvBn3)
        x = self.resLayers[11](x, self.layer4[0])
        x = self.resLayers[12](x, self.layer4[1])
        x = self.avgP(x)
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        return F.softmax(self.linear3(x))
        

## Training

In [None]:
# Setting some hyperparameters
TRAIN_BATCH_SIZE = 256
VAL_BATCH_SIZE = 100
INITIAL_LR = 0.05
MOMENTUM = 0.9
REG = 1e-4
EPOCHS = 700
DATAROOT = "./data"
CHECKPOINT_PATH = "./saved_model"

In [None]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])])

'''
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])])
'''

transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]),])

trainset = CIFAR10(root=DATAROOT, train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=1)
valset = CIFAR10(root=DATAROOT, train=False, download=True, transform=transform_val)
valloader = torch.utils.data.DataLoader(valset, batch_size=VAL_BATCH_SIZE, shuffle=False, num_workers=1)

In [None]:
os.environ['CUDA_VISIBLE_DEVICES']='7'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = ResNet(3, 10)

for p in net.parameters():
    if p.dim() > 1:
        nn.init.xavier_normal(p)

net = net.to(device)
if device =='cuda':
    print("Train on GPU...")
else:
    print("Train on CPU...")

In [None]:
# FLAG for loading the pretrained model
TRAIN_FROM_SCRATCH = True
# Code for loading checkpoint and recover epoch id.
CKPT_PATH = "./saved_model/resnet.h5"
def get_checkpoint(ckpt_path):
    try:
        ckpt = torch.load(ckpt_path)
    except Exception as e:
        print(e)
        return None
    return ckpt

ckpt = get_checkpoint(CKPT_PATH)
if ckpt is None or TRAIN_FROM_SCRATCH:
    if not TRAIN_FROM_SCRATCH:
        print("Checkpoint not found.")
    print("Training from scratch ...")
    start_epoch = 0
    current_learning_rate = INITIAL_LR
else:
    print("Successfully loaded checkpoint: %s" %CKPT_PATH)
    net.load_state_dict(ckpt['net'])
    start_epoch = ckpt['epoch'] + 1
    current_learning_rate = ckpt['lr']
    print("Starting from epoch %d " %start_epoch)

print("Starting from learning rate %f:" %current_learning_rate)

In [None]:
# Create loss function and specify regularization
criterion = nn.CrossEntropyLoss().cuda()
#criterion = lambda y_pred, y : -y_pred[y] + torch.log(torch.sum(torch.exp(y_pred)))
# Add optimizer
optimizer = torch.optim.SGD(net.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=1e-5)

In [None]:
global_step = 0
best_val_acc = 0

for i in range(start_epoch, EPOCHS):
    print(datetime.datetime.now())
    # Switch to train mode
    net.train()
    print("Epoch %d:" %i)

    total_examples = 0
    correct_examples = 0

    train_loss = 0
    train_acc = 0
    # Train the training dataset for 1 epoch.
    #print(len(trainloader))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # Copy inputs to device
        inputs = inputs.to(device)
        targets = targets.to(device)
        # Zero the gradient
        optimizer.zero_grad()
        # Generate output
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        # Now backward loss
        loss.backward()
        # Apply gradient
        optimizer.step()
        # Calculate predicted labels
        _, predicted = torch.max(outputs, dim=1)
        # Calculate accuracy
        total_examples += inputs.shape[0]
        correct_examples +=  (predicted == targets).sum()

        train_loss += loss

        global_step += 1
        if global_step % 100 == 0:
            avg_loss = train_loss / (batch_idx + 1)
        pass
    avg_acc = float(correct_examples) / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))
    print(datetime.datetime.now())
    # Validate on the validation dataset
    #print("Validation...")
    total_examples = 0
    correct_examples = 0
    
    net.eval()

    val_loss = 0
    val_acc = 0
    # Disable gradient during validation
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(valloader):
            # Copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)
            # Zero the gradient
            optimizer.zero_grad()
            # Generate output from the DNN.
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            # Calculate predicted labels
            _, predicted = outputs.max(1)
            # Calculate accuracy
            total_examples += inputs.shape[0]
            correct_examples += (predicted == targets).sum()
            val_loss += loss

    avg_loss = val_loss / len(valloader)
    avg_acc = float(correct_examples) / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))

    DECAY_EPOCHS = 10
    DECAY = 0.9
    if i % DECAY_EPOCHS == 0 and i != 0:
        current_learning_rate = current_learning_rate * DECAY
        for param_group in optimizer.param_groups:
            # Assign the learning rate parameter
            param_group['lr'] = current_learning_rate
        print("Current learning rate has decayed to %f" %current_learning_rate)
    
    # Save for checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc
        if not os.path.exists(CHECKPOINT_PATH):
            os.makedirs(CHECKPOINT_PATH)
        #print("Saving ...")
        state = {'net': net.state_dict(),
                 'epoch': i,
                 'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_PATH, 'resnet2.h5'))

print("Optimization finished.")

### Test data prediction

In [None]:
net.eval()
all_predicted = [['Id', 'Category']]

test_set = np.load('cifar10-batches-images-test.npy')
#test_set = test_set.reshape((10000, 3, 32, 32))
#print(test_set[1][0])
tmp_test_set = []
for x in test_set:
    tmp_test_set.append(transform_val(x).view((3,32,32)).data.numpy())
print(tmp_test_set[3].shape)
test_set = np.array(tmp_test_set)

with torch.no_grad():
    for i in range(10):
        tmp_set = test_set[i*1000:(i+1)*1000,:,:,:]
        inputs = torch.tensor(tmp_set,dtype=torch.float32)
        inputs = inputs.to(device)
        outputs = net(inputs)
        _, predicted = outputs.max(1)
            
        predicted = [x for x in enumerate(predicted.data.cpu().numpy())]
        print(len(predicted))
        for x in predicted:
            tmp = list(x)
            tmp[0] += i*1000
            all_predicted.append(tmp)
            

In [None]:
f = open('result.csv', 'w')
for i in all_predicted:
    f.write(str(i[0])+','+str(i[1]) + '\n')
f.close()