# Data Complexity
* Entropy
* Validation Accuracy
* model : Pretrained Resnet, Data : cifar-100

### 1. Pretrained 

In [57]:
import torchvision.models as models
from torchvision import datasets, transforms
import torch.nn as nn

import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as dsets
import torchvision.transforms as transforms

from torchsummary import summary
from tqdm.auto import tqdm

from ptflops import get_model_complexity_info 
## Reference of ptflops: https://github.com/sovrasov/flops-counter.pytorch

import random

In [60]:
random.random()

0.5848705988041283

In [51]:
# Global Variable For training
# You just use the following hyper-parameters
BATCH_SIZE = 80
NUM_EPOCH = 100
LEARNING_RATE = 0.01
CRITERION = nn.CrossEntropyLoss()

In [89]:
class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            # true_dist = pred.data.clone()
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

In [90]:
def fit(model,train_loader, loss_idx):
    model.train()
    device = next(model.parameters()).device.index
    optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)
    losses = []
    for i, data in enumerate(train_loader):
        image = data[0].type(torch.FloatTensor).cuda(device)
        label = data[1].type(torch.LongTensor).cuda(device)

        pred_label = model(image)
        if loss_idx == 0 :
            CRITERION = nn.CrossEntropyLoss()
        elif loss_idx  == 1:

            weights = [random.random() for i in range(100)]
            weight = torch.FloatTensor(weights).cuda()
            CRITERION = nn.CrossEntropyLoss(weight=weight)
        elif loss_idx == 2 :
            CRITERION = LabelSmoothingLoss(classes=10, smoothing=0.5)
        elif loss_idx == 3 :
            CRITERION = nn.MultiLabelMarginLoss()
            
            
            
        loss = CRITERION(pred_label, label)
        losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    avg_loss = sum(losses)/len(losses)
    return avg_loss

def eval(model, test_loader):
    model.eval()
    device = next(model.parameters()).device.index
    pred_labels = []
    real_labels = []

    for i, data in enumerate(test_loader):
        image = data[0].type(torch.FloatTensor).cuda(device)
        label = data[1].type(torch.LongTensor).cuda(device)
        real_labels += list(label.cpu().detach().numpy())
        
        pred_label = model(image)
        pred_label = list(pred_label.cpu().detach().numpy())
        pred_labels += pred_label
        
    real_labels = np.array(real_labels)
    pred_labels = np.array(pred_labels)
    pred_labels = pred_labels.argmax(axis=1)
    acc = sum(real_labels==pred_labels)/len(real_labels)*100
    
    return acc


In [62]:
data_path = '/home/nakyil/jupyter/data'

In [63]:
# CIFAR10 Dataset
train_dataset = dsets.CIFAR100(root=data_path, train=True, 
                              transform=transforms.Compose([
                                            transforms.RandomCrop(32, padding=4),
                                            transforms.RandomHorizontalFlip(),
                                            transforms.ToTensor(),
                                            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
                                        ]), download=True)
test_dataset = dsets.CIFAR100(root=data_path, train=False,
                             transform=transforms.Compose([
                                            transforms.ToTensor(),
                                            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
                                        ]))
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

Files already downloaded and verified


In [64]:
class Net18(nn.Module):
    def __init__(self, block, num_blocks, num_classes):
        super(Net18, self).__init__()
        self.inp = 64
        self.conv0 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn0 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512, num_classes)
        
    def _make_layer(self, block, oup, num_block, stride=1):
        layers = []
        strides = [stride] + [1]*(num_block-1)
        for stride in strides:
            layers.append(block(self.inp, oup, stride))
            self.inp = oup
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.relu(self.bn0(self.conv0(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [65]:
class ResidualBlock(nn.Module):
    def __init__(self, inp, oup, stride=1):
        super(ResidualBlock, self).__init__()
        
        #####################################
        
        # Write down your own code
        
        self.is_prj_sc = stride != 1
        
        if self.is_prj_sc : self.conv1x1 = nn.Conv2d(in_channels=inp,out_channels=oup,stride=2,kernel_size=1)
        
        self.conv0 = nn.Conv2d(in_channels=inp, out_channels=oup, kernel_size=3, stride=stride, padding=1, bias=False) 
        self.bn0 = nn.BatchNorm2d(oup)
        self.relu0 = nn.ReLU()
        self.conv1 = nn.Conv2d(in_channels=oup, out_channels=oup, kernel_size=3, stride=1, padding=1, bias=False) 
        self.bn1 = nn.BatchNorm2d(oup)
        self.relu1 = nn.ReLU()
        #####################################
        
    def forward(self, x1):
        
        #####################################
        
        # Write down your own code
        x = self.conv0(x1)
        x = self.bn0(x)
        x = self.relu0(x)
        x = self.conv1(x)
        x = self.bn1(x)
        out = self.relu1(x)
        if self.is_prj_sc :  
            x = self.conv1x1(x1) # for project shortcut
            out = out + x # residual connection
        #####################################
        return out

In [75]:
import time
from tqdm import tqdm
import pickle

train_list = []
NUM_EPOCH = 100
result_dict = {}


CRITERION_dict = {}
CRITERION_dict[0] = "Cross Entropy"
CRITERION_dict[1] = "Weighted Cross Entropy"
CRITERION_dict[2] = "Smooth Label Cross Entropy"
CRITERION_dict[3] = 'MultiLabelMarginLoss'

file_name = 'results.pkl'

for idx in range(4):
    ## Model initialization
    resnet = Net18(ResidualBlock, [2, 2, 2, 2], 100).cuda()
    train_list = []
    
    for i in tqdm(range(NUM_EPOCH)):
        start_time = time.time()
        loss = fit(resnet, train_loader, loss_idx=idx)
        test_acc = eval(resnet, test_loader)
        train_list.append({'epoch':i, 'loss':loss,'test_acc':test_acc, 'model':'resnet'})
        end_time = time.time()
        
    result_dict[idx] = {'loss':CRITERION_dict[idx], 'train_log':train_list}
    with open(file_name,'wb') as f:
        pickle.dump(result_dict,f)


  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:26<44:19, 26.87s/it][A
  2%|▏         | 2/100 [00:54<44:02, 26.96s/it][A
  3%|▎         | 3/100 [01:21<43:50, 27.12s/it][A
  4%|▍         | 4/100 [01:49<43:34, 27.23s/it][A
  5%|▌         | 5/100 [02:16<43:21, 27.38s/it][A
  6%|▌         | 6/100 [02:44<42:59, 27.44s/it][A
  7%|▋         | 7/100 [03:12<42:41, 27.54s/it][A
  8%|▊         | 8/100 [03:39<42:20, 27.61s/it][A
  9%|▉         | 9/100 [04:07<41:55, 27.65s/it][A
 10%|█         | 10/100 [04:35<41:29, 27.66s/it][A
 11%|█         | 11/100 [05:03<41:04, 27.69s/it][A
 12%|█▏        | 12/100 [05:30<40:34, 27.67s/it][A
 13%|█▎        | 13/100 [05:58<40:05, 27.65s/it][A
 14%|█▍        | 14/100 [06:25<39:34, 27.61s/it][A
 15%|█▌        | 15/100 [06:53<39:00, 27.54s/it][A
 16%|█▌        | 16/100 [07:20<38:30, 27.50s/it][A
 17%|█▋        | 17/100 [07:48<38:04, 27.52s/it][A
 18%|█▊        | 18/100 [08:15<37:39, 27.56s/it][A
 19%|█▉        | 19/100 [08:4

TypeError: cannot assign 'list' object to buffer 'weight' (torch Tensor or None required)

In [76]:
with open(file_name,'rb') as f:
    r = pickle.load(f)

In [83]:
for idx in range(1,4):
    ## Model initialization
    resnet = Net18(ResidualBlock, [2, 2, 2, 2], 100).cuda()
    train_list = []
    
    for i in tqdm(range(NUM_EPOCH)):
        start_time = time.time()
        loss = fit(resnet, train_loader, loss_idx=idx)
        test_acc = eval(resnet, test_loader)
        train_list.append({'epoch':i, 'loss':loss,'test_acc':test_acc, 'model':'resnet'})
        end_time = time.time()
        
    result_dict[idx] = {'loss':CRITERION_dict[idx], 'train_log':train_list}
    with open(file_name,'wb') as f:
        pickle.dump(result_dict,f)


  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:27<44:34, 27.01s/it][A
  2%|▏         | 2/100 [00:54<44:19, 27.14s/it][A
  3%|▎         | 3/100 [01:21<44:00, 27.22s/it][A
  4%|▍         | 4/100 [01:49<43:37, 27.27s/it][A
  5%|▌         | 5/100 [02:16<43:20, 27.38s/it][A
  6%|▌         | 6/100 [02:44<43:02, 27.47s/it][A
  7%|▋         | 7/100 [03:12<42:39, 27.52s/it][A
  8%|▊         | 8/100 [03:39<42:12, 27.53s/it][A
  9%|▉         | 9/100 [04:07<41:45, 27.53s/it][A
 10%|█         | 10/100 [04:35<41:24, 27.61s/it][A
 11%|█         | 11/100 [05:02<41:00, 27.65s/it][A
 12%|█▏        | 12/100 [05:30<40:35, 27.67s/it][A
 13%|█▎        | 13/100 [05:58<40:11, 27.72s/it][A
 14%|█▍        | 14/100 [06:26<39:45, 27.74s/it][A
 15%|█▌        | 15/100 [06:54<39:21, 27.78s/it][A
 16%|█▌        | 16/100 [07:21<38:52, 27.77s/it][A
 17%|█▋        | 17/100 [07:49<38:23, 27.76s/it][A
 18%|█▊        | 18/100 [08:17<37:50, 27.69s/it][A
 19%|█▉        | 19/100 [08:4

RuntimeError: The size of tensor a (80) must match the size of tensor b (100) at non-singleton dimension 1

In [87]:
def fit(model,train_loader, loss_idx):
    model.train()
    device = next(model.parameters()).device.index
    optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)
    losses = []
    for i, data in enumerate(train_loader):
        image = data[0].type(torch.FloatTensor).cuda(device)
        label = data[1].type(torch.LongTensor).cuda(device)

        pred_label = model(image)
        if loss_idx == 0 :
            CRITERION = nn.CrossEntropyLoss()
        elif loss_idx  == 1:

            weights = [random.random() for i in range(100)]
            weight = torch.FloatTensor(weights).cuda()

            CRITERION = nn.CrossEntropyLoss(weight=weight)
        elif loss_idx == 2 :
            CRITERION = nn.MultiLabelSoftMarginLoss()
        elif loss_idx == 3 :
            CRITERION = nn.MultiLabelMarginLoss()
            
        print("pred_label shape : {}, label : {}".format(pred_label.size(),label.size()))
            
        loss = CRITERION(pred_label, label)
        losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    avg_loss = sum(losses)/len(losses)
    return avg_loss

In [91]:
for idx in range(2,4):
    ## Model initialization
    resnet = Net18(ResidualBlock, [2, 2, 2, 2], 100).cuda()
    train_list = []
    
    for i in tqdm(range(NUM_EPOCH)):
        start_time = time.time()
        loss = fit(resnet, train_loader, loss_idx=idx)
        test_acc = eval(resnet, test_loader)
        train_list.append({'epoch':i, 'loss':loss,'test_acc':test_acc, 'model':'resnet'})
        end_time = time.time()
        
    result_dict[idx] = {'loss':CRITERION_dict[idx], 'train_log':train_list}
    with open(file_name,'wb') as f:
        pickle.dump(result_dict,f)


  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:26<44:10, 26.77s/it][A
  2%|▏         | 2/100 [00:53<43:52, 26.86s/it][A
  3%|▎         | 3/100 [01:21<43:40, 27.01s/it][A
  4%|▍         | 4/100 [01:48<43:19, 27.07s/it][A
  5%|▌         | 5/100 [02:15<43:01, 27.18s/it][A
  6%|▌         | 6/100 [02:43<42:46, 27.31s/it][A
  7%|▋         | 7/100 [03:11<42:27, 27.39s/it][A
  8%|▊         | 8/100 [03:38<42:01, 27.41s/it][A
  9%|▉         | 9/100 [04:05<41:34, 27.41s/it][A
 10%|█         | 10/100 [04:33<41:08, 27.43s/it][A
 11%|█         | 11/100 [05:00<40:40, 27.42s/it][A
 12%|█▏        | 12/100 [05:28<40:15, 27.45s/it][A
 13%|█▎        | 13/100 [05:55<39:54, 27.52s/it][A
 14%|█▍        | 14/100 [06:23<39:28, 27.54s/it][A
 15%|█▌        | 15/100 [06:51<39:02, 27.55s/it][A
 16%|█▌        | 16/100 [07:18<38:32, 27.53s/it][A
 17%|█▋        | 17/100 [07:46<38:01, 27.49s/it][A
 18%|█▊        | 18/100 [08:13<37:33, 27.48s/it][A
 19%|█▉        | 19/100 [08:4

RuntimeError: invalid argument 3: inconsistent target size at /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/generic/MultiLabelMarginCriterion.cu:45

In [95]:
result_dict[2]['loss'] =  'SmoothCrossEntropy'