In [90]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd

In [91]:
transform_train = transforms.Compose([
    # transforms.Resize((224,224)),
    transforms.AutoAugment(torchvision.transforms.AutoAugmentPolicy.CIFAR10),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=6)

testset = CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = DataLoader(testset, batch_size=256, shuffle=False, num_workers=6)

Files already downloaded and verified
Files already downloaded and verified


In [92]:
np.unique(np.array(trainset.targets),return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000]))

In [93]:
np.unique(np.array(testset.targets),return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]))

In [94]:
trainset.class_to_idx

{'airplane': 0,
 'automobile': 1,
 'bird': 2,
 'cat': 3,
 'deer': 4,
 'dog': 5,
 'frog': 6,
 'horse': 7,
 'ship': 8,
 'truck': 9}

In [95]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class BasicBlock(nn.Module):
    def __init__(self, in_planes, planes, kernel_size, skip_kernel, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=kernel_size, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=kernel_size, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=skip_kernel, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

# ResNet architecture
class ResNet(nn.Module):

    def __init__(self,N:int, B:list, C:list, F:list, K:list, P:int, num_classes=10):
            super(ResNet, self).__init__()
            self.in_planes = C[0]
            self.block = BasicBlock
            self.N = N                # No. of Residual Layers
            self.B = B                # No. of Residual Blocks in Residual Layer i
            self.C = C                # No. of channels in Residual Layer i
            self.F = F                # Conv. kernel size in Residual Layer i
            self.K = K                # Skip connection kernel size in Residual Layer i
            self.P = P                # Average pool kernel size
            self.layers = []          # layers container
            self.S = [2] * N          # strides for layers
            self.S[0] = 1
    
            # Output Liner layer input dimension
            self.outLayerInSize = C[N-1]*(32//(P*2**(N-1)))*(32//(P*2**(N-1)))
    
            # Print Model Config
            print("\n\nModel Config: "
                "\n-------------------------------------"
                "\nN (# Layers)\t:",self.N,
                "\nB (# Blocks)\t:",self.B,
                "\nC (# Channels)\t:",C,
                "\nF (Conv Kernel)\t:",F,
                "\nK (Skip Kernel)\t:",K,
                "\nP (Pool Kernel)\t:",P,)
    
            self.conv1 = nn.Conv2d(3, C[0], kernel_size=F[0], stride=1, padding=1, bias=False)
            self.bn1 = nn.BatchNorm2d(C[0])
            for i in range(N): 
                exec("self.layer{} = self._make_layer(self.block, self.C[{}], self.B[{}], self.F[{}], self.K[{}], self.S[{}])"\
                    .format(i+1,i,i,i,i,i))
                exec("self.layers.append(self.layer{})".format(i+1))
            self.linear = nn.Linear(self.outLayerInSize, num_classes)
            self.initialize_weights()
            
    
    def _make_layer(self, block, planes, num_blocks, kernel_size, skip_kernel, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, kernel_size, skip_kernel, stride))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        for layer in self.layers:
            out = layer(out)
        out = F.avg_pool2d(out, self.P)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            
    
    # N: : # Residual Layers
    # Bi : # Residual blocks in Residual Layer i
    # Ci : # channels in Residual Layer i
    # Fi : Conv. kernel size in Residual Layer i
    # Ki : Skip connection kernel size in Residual Layer i
    # P  : Average pool kernel size


def Final_Model():

    B=[3,3,2,3]
    C=[94,192,120,200]
    F=[3,3,3,3]
    K=[1,1,1,1]
    P=4
    N=len(B)

    return ResNet(N, B, C, F, K, P)


In [96]:
model = Final_Model()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adadelta(model.parameters(), lr=1e-1, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
# scheduler = StepLR(optimizer, step_size=15, gamma=0.1)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)



Model Config: 
-------------------------------------
N (# Layers)	: 4 
B (# Blocks)	: [3, 3, 2, 3] 
C (# Channels)	: [94, 192, 120, 200] 
F (Conv Kernel)	: [3, 3, 3, 3] 
K (Skip Kernel)	: [1, 1, 1, 1] 
P (Pool Kernel)	: 4


In [97]:
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
sum([np.prod(p.size()) for p in model_parameters])

4988256

In [98]:
def train(model, device, train_loader, criterion, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training hyperparameters
epochs = 200

for epoch in range(1, epochs + 1):
    train(model, device, trainloader, criterion, optimizer, epoch)
    test(model, device, testloader, criterion)
    scheduler.step()



Test set: Average loss: 0.0070, Accuracy: 3905/10000 (39%)


Test set: Average loss: 0.0060, Accuracy: 4814/10000 (48%)


Test set: Average loss: 0.0048, Accuracy: 5944/10000 (59%)


Test set: Average loss: 0.0037, Accuracy: 6831/10000 (68%)


Test set: Average loss: 0.0032, Accuracy: 7202/10000 (72%)


Test set: Average loss: 0.0035, Accuracy: 7158/10000 (72%)


Test set: Average loss: 0.0034, Accuracy: 7319/10000 (73%)


Test set: Average loss: 0.0043, Accuracy: 6858/10000 (69%)


Test set: Average loss: 0.0032, Accuracy: 7487/10000 (75%)


Test set: Average loss: 0.0025, Accuracy: 7988/10000 (80%)


Test set: Average loss: 0.0026, Accuracy: 7890/10000 (79%)


Test set: Average loss: 0.0030, Accuracy: 7729/10000 (77%)


Test set: Average loss: 0.0024, Accuracy: 7957/10000 (80%)


Test set: Average loss: 0.0021, Accuracy: 8244/10000 (82%)


Test set: Average loss: 0.0022, Accuracy: 8211/10000 (82%)


Test set: Average loss: 0.0024, Accuracy: 7946/10000 (79%)


Test set: Average loss:

In [99]:
torch.save(model,"S-95-x")

In [100]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict1 = pickle.load(fo, encoding='bytes')
    return dict1
def model_test(model, data_path):
    dict = unpickle(data_path)
    data_test = dict[b'data']
    model.eval()
    pred = []

    for i in range(data_test.shape[0]):
        # Reshape from (3072,) to (3, 32, 32) for an RGB image
        img_rgb = data_test[i].reshape(3, 32, 32).transpose((1, 2, 0))

        # Convert to PIL image
        img_pil = Image.fromarray(img_rgb)

        # Apply the transformation
        img_tensor = transform_test(img_pil)

        # Add a batch dimension and send the image to the model
        img_tensor = img_tensor.unsqueeze(0).cuda()
        with torch.no_grad():
            output = model(img_tensor)
            _, predicted = torch.max(output, 1)
            pred.extend(predicted.cpu().numpy())

    return pred

In [101]:
pred = model_test(model, "cifar_test_nolabels.pkl")
np.unique(pred, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([ 856,  986,  955, 1056,  972, 1088,  944, 1024, 1061, 1058]))

In [102]:
import pandas as pd
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict1 = pickle.load(fo, encoding='bytes')
    return dict1

dict = unpickle("cifar_test_nolabels.pkl")
df = pd.DataFrame({"ID" : dict[b'ids'], "Labels" : pred})

In [103]:
torch.save(model, "test-model-Resnet-adaptiveaugment-heinit-S2.pt")

In [104]:
df.to_csv("SSubApr11-1.csv", index = False)

In [105]:
pd.read_csv("SSubApr11-1.csv")['Labels'].value_counts()

Labels
5    1088
8    1061
9    1058
3    1056
7    1024
1     986
4     972
2     955
6     944
0     856
Name: count, dtype: int64