# Torch playground

In [None]:
import numpy as np
import random

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms

from torchsummary import summary
from torch.autograd import Variable

from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import OneHotEncoder

from tqdm.notebook import tqdm, trange

import matplotlib.pyplot as plt
%matplotlib notebook
plt.style.use('ggplot')

import pickle

from extra.utils import *

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

## Preconfig

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [None]:
if torch.cuda.is_available():
    cudnn.benchmark = True
#     !nvidia-smi

## CIFAR preload

In [None]:
batch_size = 128
epochs = 32
seed_everything(1337)

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
#     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
#     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=batch_size, shuffle=True, num_workers=4)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=batch_size, shuffle=False, num_workers=4)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

## MobileNetV2

In [None]:
from models.mobilenetv2 import MobileNetV2

## MobileNetV3

In [None]:
from models.mobilenetv3 import MobileNetV3

## Prep

In [None]:
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

# model = MobileNetV2()
# name_of_model = 'MobileNetV2'
# checkpoint_path = f'./checkpoint/ckpt_{name_of_model}.pth'

model = MobileNetV3(mode='large', classes_num=10, input_size=32, dropout=0.8, width_multiplier=1.0)
name_of_model = 'MobileNetV3'
checkpoint_path = f'./checkpoint/ckpt_{name_of_model}.pth'

device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device == 'cuda':
    model = model.cuda()

In [None]:
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=0.001,
#                       momentum=0.9, weight_decay=5e-4)
optimizer = optim.Adam(model.parameters(), lr=0.0003) #0.01, 0.0003
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [None]:
summary(model, (3, 32, 32), batch_size=batch_size, device='cuda')

## Training

In [None]:
# Load checkpoint.
print('==> Resuming from checkpoint..')
assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
checkpoint = torch.load(checkpoint_path)
model.load_state_dict(checkpoint['model'])
best_acc = checkpoint['accuracy']
start_epoch = checkpoint['epoch']

In [None]:
train_losses, train_accuracies = [], []
test_losses, test_accuracies = [], []

In [None]:
def train_nn(epoch):
    model.train()
    with tqdm(trainloader, unit='batch') as tepoch:
        for data, target in tepoch:
            tepoch.set_description(f'Train epoch {epoch}')

            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
#             output = model(data)
            
            with torch.cuda.amp.autocast():
                output = model(data)
                loss = criterion(output, target)

            _, predictions = output.max(1)
#             loss = criterion(output, target)
            correct = predictions.eq(target).sum().item()
            accuracy = (predictions == target).float().mean()
            
            loss.backward()
            optimizer.step()
            
            train_losses.append(loss.item())
            train_accuracies.append(accuracy.item())
            
            tepoch.set_postfix(loss=loss.item(), accuracy=100. * accuracy.item())

In [None]:
def test_nn(epoch):
    global best_acc
    model.eval()
    with torch.no_grad():
        with tqdm(testloader, unit='batch') as tepoch:
            for data, target in tepoch:
                tepoch.set_description(f'Test epoch {epoch}')
                
                data, target = data.to(device), target.to(device)
                output = model(data)
                _, predictions = output.max(1)
                loss = criterion(output, target)
                correct = predictions.eq(target).sum().item()
                accuracy = (predictions == target).float().mean()
                
                if accuracy > best_acc:
                    state = {
                        'model': model.state_dict(),
                        'accuracy': accuracy,
                        'epoch': epoch,
                    }
                    if not os.path.isdir('checkpoint'):
                        os.mkdir('checkpoint')
                    torch.save(state, checkpoint_path)
                    best_acc = accuracy
                
                test_losses.append(loss.item())
                test_accuracies.append(accuracy.item())
                
                tepoch.set_postfix(loss=loss.item(), accuracy=100. * accuracy.item())     

In [None]:
for epoch in trange(start_epoch, start_epoch+epochs):
    train_nn(epoch)
    test_nn(epoch)
    scheduler.step()
    
# adam with lr = 0.0003, 5,90 batch/s

In [None]:
x = np.linspace(0, 15, num=15)

fig = plt.figure(figsize=(12, 6))
plt.plot(train_losses, color='m', label='losses')
plt.plot(train_accuracies, color='b', label='accuracies') 
plt.ylim(-0.1, 2.1)

fig.suptitle('History of accuracy and loss on train data', fontsize=20)
plt.xlabel('batch', fontsize=18)
plt.ylabel('training loss', fontsize=16)
plt.legend(loc='upper right')

plt.show()

pickle.dump(fig, open(f'./figures/{name_of_model.lower()}_acc_loss_train_epochs{epochs}_bs{batch_size}.pickle', 'wb'))

In [None]:
x = np.linspace(0, 15, num=15)

fig = plt.figure(figsize=(12, 6))
plt.plot(test_losses, color='m', label='losses')
plt.plot(test_accuracies, color='b', label='accuracies') 

fig.suptitle('History of accuracy and loss on test data', fontsize=20)
plt.xlabel('batch', fontsize=18)
plt.ylabel('validation loss', fontsize=16)
plt.legend(loc='upper right')

plt.show()
pickle.dump(fig, open(f'./figures/{name_of_model.lower()}_acc_loss_test_epochs{epochs}_bs{batch_size}.pickle', 'wb'))

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(12, 6), sharex=True)
fig.suptitle('History of accuracy and loss on train data', fontsize=20)
ax1.plot(train_accuracies)
ax1.set_ylabel("training accuracy")
ax2.plot(train_losses)
ax2.set_ylabel("training loss")
ax2.set_xlabel("batches");
plt.show()

pickle.dump(fig, open(f'./figures/{name_of_model.lower()}_acc_loss_train_combo_epochs{epochs}_bs{batch_size}.pickle', 'wb'))

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(12, 6), sharex=True)
fig.suptitle('History of accuracy and loss on test data', fontsize=20)
ax1.plot(test_accuracies)
ax1.set_ylabel("validation accuracy")
ax2.plot(test_losses)
ax2.set_ylabel("validation loss")
ax2.set_xlabel("batches");

pickle.dump(fig, open(f'./figures/{name_of_model.lower()}_acc_loss_test_combo_epochs{epochs}_bs{batch_size}.pickle', 'wb'))

## Loading model

In [None]:
# loaded_model = MobileNetV2()
# loaded_model = loaded_model.to(device)
# if device == 'cuda':
#     loaded_model = torch.nn.DataParallel(loaded_model)
#     cudnn.benchmark = True
# loaded_model.load_state_dict(torch.load('./state_dict_net_200.pt'))
loaded_model = model

In [None]:
dataiter = iter(testloader)
images, labels = dataiter.next()

In [None]:
fig = plt.figure()
imshow(torchvision.utils.make_grid(images[0:4]))
plt.show()
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

## Testing network, 4 images

In [None]:
loaded_model = loaded_model.cpu()
loaded_outputs = loaded_model(images)
_, loaded_predicted = torch.max(loaded_outputs, 1)

print('Predicted: ', ' '.join('%5s' % classes[loaded_predicted[j]]
                              for j in range(4)))

## Testing network, 1000 images

In [None]:
correct = 0
total = 0
loaded_model = loaded_model.cuda()
with torch.no_grad():
    with tqdm(testloader, unit="batch") as tepoch:
        for images, labels in tepoch:
            images, labels = images.to(device), labels.to(device)
            outputs = loaded_model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

## Testing network, accuracy of the each class

In [None]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

with torch.no_grad():
    with tqdm(testloader, unit="batch") as tepoch:
        for images, labels in tepoch:
            images, labels = images.to(device), labels.to(device)
            outputs = loaded_model(images)
            _, predicted = torch.max(outputs.data, 1)
            c = (predicted == labels).squeeze()
            for i in range(4):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

In [None]:
is_cuda(model)

## ROC curve

In [None]:
def test_class_probabilities(model, device, test_loader, which_class):
    model.eval()
    actuals = []
    probabilities = []
    with torch.no_grad():
        with tqdm(test_loader, unit='batch') as tepoch:
            for data, target in tepoch:
                data, target = data.to(device), target.to(device)
                output = model(data)            
                prediction = output.argmax(dim=1, keepdim=True)
                actuals.extend(target.view_as(prediction) == which_class)
                probabilities.extend(torch.exp(output[:, which_class]))

    return [i.item() for i in actuals], [i.item() for i in probabilities]

which_class = 2
actuals, class_probabilities = test_class_probabilities(model, device, testloader, which_class)

fpr, tpr, _ = roc_curve(actuals, class_probabilities)
roc_auc = auc(fpr, tpr)
# plt.figure()
lw = 2

fig = plt.figure(figsize=(8, 8))
plt.plot(fpr, tpr, color='darkorange',
         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
# plt.xlim([0.0, 1.0])
# plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC for label=cat(%d) class' % which_class)
plt.legend(loc="lower right")

# pickle.dump(fig, open(f'./figures/{name_of_model.lower()}_ROC_{epochs}_bs{batch_size}.pickle', 'wb'))

## Evaluating

In [None]:
X_val, Y_val = next(iter(testloader))
Y_pred = model(X_val.to(device))
Y_pred = torch.argmax(Y_pred, 1)

In [None]:
Y_val[0].item()
classes[Y_val[0].item()]

In [None]:
fig = plt.figure(figsize=(12, 12))
columns, rows = 4, 4
for i in range(1, columns*rows+1):
    fig.add_subplot(rows, columns, i)
    plt.imshow(X_val[i][0].squeeze(), cmap='gray')
    
    plt.title('real: {}, predicted: {}'.format(classes[Y_val[i].item()], classes[Y_pred[i].item()]))
plt.show()

In [None]:
dataiter = iter(testloader)
images, labels = dataiter.next()
# lol = np.reshape(images[0].numpy(), (32, 32, 3))
plt.imshow(np.transpose(images[0], (1, 2, 0)))

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from six.moves import cPickle 

f = open('./data/cifar-10-batches-py/data_batch_1', 'rb')
datadict = cPickle.load(f,encoding='latin1')
f.close()
X = datadict["data"] 
Y = datadict['labels']
X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("uint8")
Y = np.array(Y)

#Visualizing CIFAR 10
fig, axes1 = plt.subplots(5,5,figsize=(3,3))
for j in range(5):
    for k in range(5):
        i = np.random.choice(range(len(X)))
        axes1[j][k].set_axis_off()
        axes1[j][k].imshow(X[i:i+1][0])