In [1]:
%matplotlib inline
import torch
import matplotlib.pyplot as plt
import torchvision
import torchvision.transforms as transforms
import numpy as np
from sklearn.model_selection import train_test_split
import seaborn as sns
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time 

In [2]:
torch.cuda.empty_cache()

In [3]:
batch_size_train = 128
# learning_rate = 0.01
num_workers = 2
pin_memory = True
random_seed = 42

criterion = nn.CrossEntropyLoss()
early_stop = 20


momentum = 0.9
nesterov = True
weight_decay = 5e-4

In [4]:
transform = transforms.Compose(
    [
     transforms.ToTensor(),
    ]
)

In [5]:
trainset = torchvision.datasets.CIFAR10(root='./cifar-data', train=True, download=True, transform=transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./cifar-data/cifar-10-python.tar.gz to ./cifar-data


In [6]:
testset = torchvision.datasets.CIFAR10(root='./cifar-data', train=False, download=True, transform=transform)

Files already downloaded and verified


In [7]:
def nomarllization(dataset):
  mean = np.mean(dataset.data, axis=(0,1,2))/255
  std  = np.mean(dataset.data, axis=(0,1,2))/255
  return transforms.Normalize(mean=mean, std=std)

In [8]:
train_transf = transforms.Compose(
    [
     transforms.Resize((40, 40)),     
     transforms.RandomCrop((32, 32)),  
     transforms.RandomHorizontalFlip(),
     transforms.RandomRotation(15),
     transforms.ToTensor(),
     nomarllization(trainset)
    ]
)

In [9]:
test_transf = transforms.Compose(
    [
     transforms.ToTensor(),
     nomarllization(testset)
    ]
)

In [10]:
trainset = torchvision.datasets.CIFAR10(root='./cifar-data', train=True, download=True, transform=train_transf)

Files already downloaded and verified


In [11]:
validset = torchvision.datasets.CIFAR10(root='./cifar-data', train=True, download=True, transform=train_transf)

Files already downloaded and verified


In [12]:
testset = torchvision.datasets.CIFAR10(root='./cifar-data', train=False, download=True, transform=test_transf)

Files already downloaded and verified


In [13]:
classes = trainset.class_to_idx

In [14]:
trainset

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./cifar-data
    Split: Train
    StandardTransform
Transform: Compose(
               Resize(size=(40, 40), interpolation=PIL.Image.BILINEAR)
               RandomCrop(size=(32, 32), padding=None)
               RandomHorizontalFlip(p=0.5)
               RandomRotation(degrees=[-15.0, 15.0], resample=False, expand=False)
               ToTensor()
               Normalize(mean=[0.49139968 0.48215841 0.44653091], std=[0.49139968 0.48215841 0.44653091])
           )

In [15]:
num_train = len(trainset)
indices = list(range(num_train))
split = int(np.floor(num_train*0.8/5))

np.random.seed(random_seed)
np.random.shuffle(indices)

train_idx, val_idx = indices[split:], indices[:split]

train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_idx)
valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(val_idx)

trainloader = torch.utils.data.DataLoader(
    trainset, 
    batch_size=batch_size_train, 
    sampler=train_sampler, 
    num_workers=num_workers, 
    pin_memory = pin_memory)

validloader = torch.utils.data.DataLoader(
    trainset, 
    batch_size=batch_size_train, 
    sampler=valid_sampler, 
    num_workers=num_workers, 
    pin_memory = pin_memory)


In [16]:
split

8000

In [17]:
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size_train, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)

In [18]:
# X_train, X_validate, y_train, y_validate = train_test_split(trainloader.dataset.data, trainloader.dataset.targets, train_size=0.7, random_state=random_seed)

In [19]:
 train, validate = torch.utils.data.random_split(
    trainset,
    [num_train-split, split])
 trainloader = torch.utils.data.DataLoader(
     train, 
     batch_size=batch_size_train, 
     shuffle=True,
     num_workers=num_workers,
     pin_memory=pin_memory
     )
 validloader = torch.utils.data.DataLoader(
     validate,
     batch_size=batch_size_train, 
     shuffle=True,
     num_workers=num_workers,
     pin_memory=pin_memory
     )

In [20]:
n_cols = 10
n_rows = 10
n      = n_cols * n_rows

In [21]:
train_images = [trainloader.dataset.dataset.data[i] for i in train_idx]
train_labels = [trainloader.dataset.dataset.targets[i] for i in train_idx]

validate_labels = [validloader.dataset.dataset.targets[i] for i in val_idx]
test_labels = testloader.dataset.targets

In [22]:
def show_img_per_class():
  fig, ax = plt. subplots(n_rows, n_cols + 1, figsize=(15,15))
  fig.tight_layout(pad=0.3)
  for _, (k,v) in enumerate(classes.items()):
    random_sample = np.random.choice([i for i, c in enumerate(train_labels) if c == v], n_rows)
    ax[v, 0].text(0.5, 0.5, k, fontsize=15, fontweight='bold', color='white')
    ax[v, 0].axis("off")
    for idx, imidx in enumerate(random_sample):
      ax[v, idx+1].imshow(train_images[imidx])
      ax[v, idx+1].axis("off")
  plt.show()

In [23]:
# show_img_per_class()

In [24]:
# # bins = np.arange(0, 10.5) - 0.5
# # plt.hist(trainloader.dataset.targets, density=True, bins=bins,edgecolor='k')
# # plt.xticks(bins + 0.5)

stat =  pd.DataFrame({'Label': np.array(train_labels), 'Set_type':'train'})
stat = stat.append(pd.DataFrame({'Label': np.array(validate_labels), 'Set_type':'validate'}),ignore_index=True)
stat = stat.append(pd.DataFrame({'Label': test_labels,'Set_type':'test'}),ignore_index=True)

In [25]:
plt.figure(figsize=(20,10))
# sns.countplot(x='Label',hue='Set_type',data=stat)

<Figure size 1440x720 with 0 Axes>

<Figure size 1440x720 with 0 Axes>

In [26]:
class Net(nn.Module):
    def __init__(self):
      super(Net, self).__init__()
      self.conv_layer = nn.Sequential(
        # Conv Layer block 1
        nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
        nn.BatchNorm2d(32),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),

        # Conv Layer block 2
        nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),
        nn.Dropout2d(p=0.05),

        # Conv Layer block 3
        nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),
      )

      self.fc_layer = nn.Sequential(
        nn.Dropout(p=0.1),
        nn.Linear(4096, 1024),
        nn.ReLU(inplace=True),
        nn.Linear(1024, 512),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.1),
        nn.Linear(512, 10)
      )
    def forward(self, x):
      # conv layers
      x = self.conv_layer(x)    
      # flatten
      x = x.view(x.size(0), -1)    
      # fc layer
      x = self.fc_layer(x)
      return x

In [27]:
def train(network, device):
  # epoch_start_time = time.time()
  running_loss = 0
  accuracy = 0
  network.train()
  for batch_idx, (data, target) in enumerate(trainloader):
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()
    output = network(data)
    # loss = F.nll_loss(F.log_softmax(output), target)
    loss = criterion(output,target)
    loss.backward()
    optimizer.step()
  print("Number of batches: ", ite)
  print("Accurary average over batches: ", accuracy)
  print("Loss average over batches    : ",running_loss)
    # if batch_idx == 1:
    #   network.eval()
    #   pout = network(data)
    #   pred = torch.argmax(pout, dim=1)
    #   print("pred  : ",pred)
    #   print("target: ",target)
    #   print((pred==target).sum())
    #   wrong_pred = data[pred!=target].cpu()
    #   wrong_id = (pred!=target.view_as(pred)).nonzero()[:,0]
    #   print("wrong_id: ", wrong_id)
    #   print("wrong pred size: ",wrong_pred.shape)
    #   fig = plt.figure()
    #   for i,v in enumerate(wrong_id):
    #     print("Id wrong: ", v)
    #     ax = fig.add_subplot(2,5,i+1)
    #     ax.imshow(torch.squeeze(data[v].cpu()).numpy())
    #     ax.axis('off')
    #     ax.set_title(
    #         (
    #             target[v].cpu().item(), 
    #             pred[v].cpu().item()
    #         )
    #         )
    #   break
      # print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        # epoch, batch_idx * len(data), len(trainloader.dataset),
        # 100. * batch_idx / len(trainloader), loss.item()))
      # train_losses.append(loss.item())
      # train_counter.append(
        # (batch_idx*64) + ((epoch-1)*len(trainloader.dataset)))
      # torch.save(network.state_dict(), './results/model.pth')
      # torch.save(optimizer.state_dict(), './results/optimizer.pth')

In [28]:
def calculate_loss_accuracy(network, dataloader, mode=""):
  running_loss = 0
  correct = 0
  number_of_data = 0

  device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
  network = network.to(device)
  
  network.eval()
  with torch.no_grad():
    for batch_idx, (data, target) in enumerate(dataloader):
      data, target = data.to(device), target.to(device)
      output = network(data)
      pred = torch.argmax(output, dim=1)
      correct += (pred==target).sum().item()

      loss = criterion(output, target)
      running_loss += loss.item()
      number_of_data += len(data)

      if mode == 'test':
        wrong_id = (pred!=target.view_as(pred)).nonzero()[:,0]
        fig = plt.figure(figsize=(10,10))
        for i,v in enumerate(wrong_id):
          ax = fig.add_subplot(2,10,i+1)
          ax.imshow(torch.squeeze(data[v].cpu()).numpy())
          ax.axis('off')
          ax.set_title((
                target[v].cpu().item(), 
                pred[v].cpu().item()
          ))
        fig.tight_layout()
    accuracy = correct/number_of_data
    loss = running_loss/len(dataloader)
    print("Number of batches: ", len(dataloader))
    print("Correct: {} On {} - Accuracy: {:.4f}".format(correct, number_of_data,accuracy))
    print("Loss over whole dataset: {:.4f}".format(loss))
  return accuracy, loss


In [29]:
def fit(network, trainloader, validateloader, opts):

  learning_rate, momentum, weight_decay, n_epochs = opts['learning_rate'], opts['momentum'], opts['weight_decay'], opts['n_epochs']

  # best_val_loss = float('Inf')
  best_val_accuracy = 0
  counter = 0

  # criterion = nn.CrossEntropyLoss()
  device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
  network = network.to(device)

  for epoch in range(1, n_epochs + 1):
    if epoch%50 == 0:
      learning_rate /=10
    learning_rates.append(learning_rate)

    optimizer = optim.SGD(network.parameters(), lr = learning_rate, momentum=momentum, weight_decay=weight_decay, nesterov=True)

    epoch_start_time = time.time()
    network.train()

    train_accuracy, train_loss = 0, 0

    # epoch_start_time = time.time()
    for batch_idx, (data, target) in enumerate(trainloader):
      data, target = data.to(device), target.to(device)
      optimizer.zero_grad()
      output = network(data)
      loss = criterion(output, target)
      
      pred = torch.argmax(output, dim=1)
      train_loss += loss.item()
      train_accuracy += (pred==target).sum().item()

      loss.backward()
      optimizer.step()

    epoch_times.append(time.time() - epoch_start_time)
    #evaluate per epoch
    val_accuracy, val_loss = calculate_loss_accuracy(network, validateloader)
    # train_accuracy, train_loss = calculate_loss_accuracy(network, device, trainloader)
    train_accuracy /= 42000
    train_loss /= len(trainloader)
    print("==========================================================================================================================")
    print("Epoch {}/{} - Train accuracy: {:.4f} - Train loss: {:.4f} - Validate accuracy: {:.4f} - Validate loss: {:.4f}".format(
        epoch, n_epochs, train_accuracy, train_loss, val_accuracy, val_loss
    ))
    print("==========================================================================================================================")
    train_losses.append(train_loss)
    train_acces.append(train_accuracy)
    val_losses.append(val_loss)
    val_acces.append(val_accuracy)

    #Save best network
    # if val_losses[-1] < best_val_loss:
      # best_val_loss = val_losses[-1]
    if val_acces[-1] > best_val_accuracy:
      best_val_accuracy = val_acces[-1]
      counter = 0
      torch.save(network.state_dict(),'checkpoint.pth')
    else:
      counter += 1
      print("***************************************************************************")
      # print('Validation loss has not improved from: {:.4f} - Count: {}'.format(best_val_loss, counter))
      print('Validation accuracy has not improved from: {:.4f} - Count: {}'.format(best_val_accuracy, counter))
      print("***************************************************************************")
      # if counter > early_stop:
      print("Early Stopping!")
  network.load_state_dict(torch.load('checkpoint.pth'))
        # break

In [30]:
def predict(network, dataloader):
  return 1

In [None]:
network = Net()

# optimizer = optim.SGD(network.parameters(), lr = learning_rate)
# device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
# network = network.to(device)

# test(device)
# for epoch in range(1, n_epochs + 1):
#   train(network,device)
#   check(network,device)

opts = {
    "n_epochs": 300,
    "learning_rate": 0.1,
    "momentum": 0.9,
    "weight_decay": 5e-4
}

train_losses, train_acces = [], []
val_losses, val_acces = [], []
epoch_times = []
learning_rates = []
fit(network, trainloader, validloader, opts)

Number of batches:  63
Correct: 3052 On 8000 - Accuracy: 0.3815
Loss over whole dataset: 1.7425
Epoch 1/300 - Train accuracy: 0.3641 - Train loss: 1.7094 - Validate accuracy: 0.3815 - Validate loss: 1.7425
Number of batches:  63
Correct: 4070 On 8000 - Accuracy: 0.5088
Loss over whole dataset: 1.5054
Epoch 2/300 - Train accuracy: 0.5116 - Train loss: 1.3678 - Validate accuracy: 0.5088 - Validate loss: 1.5054
Number of batches:  63
Correct: 2997 On 8000 - Accuracy: 0.3746
Loss over whole dataset: 2.2218
Epoch 3/300 - Train accuracy: 0.5813 - Train loss: 1.1857 - Validate accuracy: 0.3746 - Validate loss: 2.2218
***************************************************************************
Validation accuracy has not improved from: 0.5088 - Count: 1
***************************************************************************
Early Stopping!
Number of batches:  63
Correct: 4645 On 8000 - Accuracy: 0.5806
Loss over whole dataset: 1.1842
Epoch 4/300 - Train accuracy: 0.6310 - Train loss: 1.0536

In [None]:
np.sum(epoch_times)/60

103.56990678310395

In [None]:
calculate_loss_accuracy(network, validloader)

In [None]:
calculate_loss_accuracy(network, testloader)

In [None]:
trainset.data[1].shape

In [None]:
# loss = nn.CrossEntropyLoss()
# input = torch.randn(2, 2, requires_grad=True)
# target = torch.empty(2, dtype=torch.long).random_(2)
# output = loss(input, target)

In [None]:
# fig = plt.figure()
# for batch_idx, (data, target) in enumerate(trainloader):
#   if batch_idx %128 == 1:
#     for i in range(1,7):
#       ax = fig.add_subplot(2,3,i)
#       ax.imshow(torch.squeeze(data[i]).numpy())
#       ax.axis('off')
#       ax.set_title(target[i])

In [None]:
logs = pd.DataFrame({
    "Train_loss": np.array(train_losses),
    "Validate_loss": np.array(val_losses),
    "Train_accuracy": np.array(train_acces),
    "Validate_accuracy": np.array(val_acces),
    "Epoch_time": np.array(epoch_times),
    "Learning_rate": np.array(learning_rates)
    })

In [None]:
logs

In [None]:
logs['n_epoch'] = np.array([x for x in range(1, logs.shape[0]+1)])

In [None]:
logs

In [None]:
figure, (ax1, ax2) = plt.subplots(2, sharex=True, gridspec_kw = {'height_ratios':[3, 1]}, figsize=(20,10))
tr_loss = ax1.plot(logs['n_epoch'], logs['Train_loss'], label='Train_loss', color='red')
va_loss = ax1.plot(logs['n_epoch'], logs['Validate_loss'], label='Validate_loss', color='blue')
ax1.set_ylabel('Loss')
ax12 = ax1.twinx()
ax12.set_ylabel('Accuracy')
tr_acc = ax12.plot(logs['n_epoch'], logs['Train_accuracy'], label='Train_accuracy', color='green')
va_acc = ax12.plot(logs['n_epoch'], logs['Validate_accuracy'], label='Validate_accuracy', color='brown')
ax2.plot(logs['n_epoch'], logs['Learning_rate'])
ax2.set_ylabel('Learning rate')
leds = tr_loss + va_loss + tr_acc + va_acc
labs = [l.get_label() for l in leds]
figure.legend(leds, labs, loc="upper right")

In [None]:
logs.plot(x="n_epoch", y=["Train_loss","Validate_loss"], figsize=(20,10))

In [None]:
logs.plot(x="n_epoch", y=["Train_accuracy","Validate_accuracy"],figsize=(20,10))

In [None]:
logs.plot(x="n_epoch", y="Epoch_time", figsize=(20,10))

In [None]:
def confusion_matrix(net, dataloader):
  classes = dataloader.dataset.classes
  matrix = np.zeros((len(classes), len(classes)))

  device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
  net = net.to(device)
  
  net.eval()
  with torch.no_grad():
    for _, (data, target) in enumerate(dataloader):
      data, target = data.to(device), target.to(device)
      output = net(data)
      preds = torch.argmax(output, dim=1)
      for idx, pred in enumerate(preds):
        actual = target[idx]
        matrix[pred, actual] += 1
  con_mat = pd.DataFrame(data=matrix, columns=classes, index=classes)
  for i, v in enumerate(classes):
    con_mat.loc[v,'total_predict'] = con_mat.loc[v,:].sum()
    con_mat.loc['total_actual',v] = con_mat.loc[:,v].sum()
  return con_mat.fillna(0).astype('int64')

In [None]:
c_m = confusion_matrix(network, testloader)

In [None]:
c_m

In [None]:
sns.heatmap(data=c_m.loc[trainset.classes, trainset.classes]) 

In [None]:
def stat_result(classes, confusion_matrix):
  columns = ['Class', 'Precision', 'Recall', 'F1_score']
  stat_matrix = pd.DataFrame(columns=columns, data=np.zeros((len(classes), len(columns))))
  for i, v in enumerate(classes):
    precision = confusion_matrix.iloc[i,i]/confusion_matrix.iloc[i, -1]
    recall    = confusion_matrix.iloc[i,i]/confusion_matrix.iloc[-1, i]
    stat_matrix.loc[i, 'Class'] = v
    stat_matrix.loc[i, 'Precision'] = precision
    stat_matrix.loc[i, 'Recall'] = recall
    stat_matrix.loc[i, 'F1_score'] = 2*precision*recall/(precision+recall)
  return stat_matrix

In [None]:
stat_table = stat_result(trainset.classes, c_m)

In [None]:
stat_table

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(data=pd.melt(stat_table, id_vars = 'Class', value_vars=['Precision', 'Recall', 'F1_score']), x='Class', y='value', hue='variable')

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(data=stat_table, x=stat_table.index, y='Recall')

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(data=stat_table, x=stat_table.index, y='Precision')

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(data=stat_table, x=stat_table.index, y='F1_score')

In [None]:
torch.cuda.empty_cache()

In [None]:
from torchsummary import summary

In [None]:
summary(network, (3,32,32))