In [0]:
https://bit.ly/2TTGDw8from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


# **Preprocessing**

In [0]:
 # reproducibility
 seed = 42
import numpy as np
np.random.seed(seed)
import torch
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
import torchvision
from torchvision import transforms, datasets
download_ = False
device = torch.device("cuda:0")

In [0]:
path_data = "/content/drive/My Drive/Colab Notebooks/RNA/Practica2/data/"
!mkdir -p "/content/drive/My Drive/Colab Notebooks/RNA/Practica2/data/"
# Dataset is not stored like a tensor, for this reasones is made the transformation, pixel/255, normalized [0,1]
# variables train, test; are type dataset, located in a real directory

# DATA AUGMENTATION:
train_transforms = torchvision.transforms.Compose([torchvision.transforms.RandomHorizontalFlip(p=0.0),                                          
                                                   transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),
                                                   torchvision.transforms.ToTensor()])
transforms_ = transforms.Compose([transforms.ToTensor()])
train = datasets.MNIST(path_data, train=True, download=download_, transform=train_transforms)
test = datasets.MNIST(path_data, train=False, download=download_, transform=transforms_ )
path_mnist = f"{path_data}/MNIST"

In [0]:
from math import ceil
batch_size = 64
num_traning_samples = len(train)
num_batches = ceil(num_traning_samples / batch_size)
num_test_samples = len(test)
# DataLoader is like a generator in python
train_set = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
test_set = torch.utils.data.DataLoader(test, batch_size=num_test_samples, shuffle=False)

# **MLP for classification**

In [0]:
import torch.nn as nn
import torch.nn.functional as F

In [0]:
# https://discuss.pytorch.org/t/writing-a-simple-gaussian-noise-layer-in-pytorch/4694/4
class GaussianNoise(nn.Module):
    def __init__(self, sigma, device="cpu"):
        super().__init__()
        self.sigma = sigma
        self.device = device

    def forward(self, x):
        if self.training and self.sigma != 0:
           # detach: es nueva view del tensor x, donde su contribucion (sampled noise) no se tiene en cuenta a la hora de calcular el gradiente.
           # http://www.bnikolic.co.uk/blog/pytorch-detach.html
            scale = self.sigma * x.detach()
            sampled_noise = torch.zeros(*x.size()).normal_().to(self.device).float() * scale
            x = x + sampled_noise
        return x

In [0]:
class Net(nn.Module):
  
  def __init__(self):
    super().__init__()
    dim_ = 1024
    # input images 28*28 -> flatten matrix to a vector 784
    self.fc1 = nn.Linear(28*28, dim_) # dim_ units, linear is a fully connected layer
    self.bn1 = nn.BatchNorm1d(dim_)
    self.gn1 = GaussianNoise(0.3, device=device)
    self.fc2 = nn.Linear(dim_, dim_) # dim_ units
    self.bn2 = nn.BatchNorm1d(dim_)
    self.gn2 = GaussianNoise(0.3, device=device)
    self.fc3 = nn.Linear(dim_, dim_) # dim_ units
    self.bn3 = nn.BatchNorm1d(dim_)
    self.gn3 = GaussianNoise(0.3, device=device)
    self.fc4 = nn.Linear(dim_, 10) # dim_ units
  
  def forward(self, x):
    s1 = F.relu(self.gn1(self.bn1(self.fc1(x))))
    s2 = F.relu(self.gn2(self.bn2(self.fc2(s1))))
    s3 = F.relu(self.gn3(self.bn3(self.fc3(s2))))    
    logits = self.fc4(s3)
    return logits

In [0]:
# torch.cuda.Tensor occupies GPU memory. Of course operations on a GPU / CUDA Tensor are computed on GPU.
net = Net().to(device) # torch cuda tensors, instead normal torch tensors (cpu)
print(net)
print(net.fc1.weight.type())

Net(
  (fc1): Linear(in_features=784, out_features=1024, bias=True)
  (bn1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (gn1): GaussianNoise()
  (fc2): Linear(in_features=1024, out_features=1024, bias=True)
  (bn2): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (gn2): GaussianNoise()
  (fc3): Linear(in_features=1024, out_features=1024, bias=True)
  (bn3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (gn3): GaussianNoise()
  (fc4): Linear(in_features=1024, out_features=10, bias=True)
)
torch.cuda.FloatTensor


# **Training**

In [0]:
import torch.optim as optim

In [0]:
model_name = "mlp_bn_gn_lra_da"
lr_=0.1
num_epochs =  100
#patience = 15
optimizer = optim.SGD(net.parameters(), lr=lr_, weight_decay=1e-6, momentum=0.9)
#optimizer = optim.SGD(net.parameters(), lr=lr_)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 75], gamma=lr_)
criterion = nn.CrossEntropyLoss()

In [0]:
import os
import numpy as np
from sklearn.metrics import confusion_matrix

acc_epoch_train = np.full((num_epochs), np.inf)
acc_epoch_test = np.full((num_epochs), np.inf)
cost_epoch_train = np.full((num_epochs), np.inf)
cost_epoch_test = np.full((num_epochs), np.inf)

best_cost_test = np.inf

dir_checkpoint_model = "/content/drive/My Drive/Colab Notebooks/RNA/Practica2/model_checkpoints/MNIST/"
if not os.path.exists(dir_checkpoint_model):
    os.makedirs(dir_checkpoint_model)
path_checkpoint_model = f"{dir_checkpoint_model}{model_name}.pth"
confussion_matrix_ = None

# restore checkpoint 
# checkpoint_ = torch.load(path_checkpoint_model)
# net.load_state_dict(checkpoint_['model_state_dict'])
# optimizer.load_state_dict(checkpoint_['optimizer_state_dict'])
# best_cost_test = checkpoint_["cost"]
            
#epochs_patience = 0
for id_epoch in range(0, num_epochs, 1):
  # TRAIN
  num_corrected_predictions_train = 0
  total_loss_train = 0
  for idx_batch, (batch_x, batch_y) in enumerate(train_set):
    batch_x_ = batch_x.view(-1, 28*28).to(device)
    batch_y_ = batch_y.to(device)
    optimizer.zero_grad() # initiated to zero the gradients, very usefull rnn for acummulating
    predictions = net(batch_x_) # -1 means all de examples of the batch, in each one, beacuse the last one can have different number of examples
    #loss = F.nll_loss(predictions, batch_y_) # F.nll_loos, the negative log likelihood loss, before was applied the softmax layer: F.log_softmax
    loss = criterion(predictions, batch_y_)
    loss.backward()
    optimizer.step()
    _, predicted_labels = predictions.max(1) # get per sample the maximum value
    num_corrected_predictions_train += batch_y_.eq(predicted_labels).sum().item()
    total_loss_train += loss
  acc_train = (num_corrected_predictions_train / num_traning_samples)
  cost_train = (total_loss_train / num_batches)
  acc_epoch_train[id_epoch] = acc_train
  cost_epoch_train[id_epoch] = cost_train
  # EVAL
  with torch.no_grad():
    for batch_x_test, batch_y_test in test_set:
      batch_x_ = batch_x_test.view(-1, 28*28).to(device)
      batch_y_ = batch_y_test.to(device)
      predictions = net(batch_x_)
      _, predicted_labels = predictions.max(1) # get per sample the maximum value
      acc_test = batch_y_.eq(predicted_labels).sum().item()/num_test_samples
      #cost_test = torch.mean(F.nll_loss(predictions, batch_y_))
      cost_test = criterion(predictions, batch_y_)
      acc_epoch_test[id_epoch] = acc_test
      cost_epoch_test[id_epoch] = cost_test
      if cost_test < best_cost_test:
        predictions_labels_ = predicted_labels.to("cpu")
        confusion_matrix_ = confusion_matrix(batch_y_test, predictions_labels_)
        best_cost_test = cost_test
        #epochs_patience = 0
        # save model
        # torch.save({
        #     'epoch': id_epoch+1,
        #     'model_state_dict': net.state_dict(),
        #     'optimizer_state_dict': optimizer.state_dict(),
        #     'costt': cost_test,
        # }, path_checkpoint_model)
        confusion_matrix(batch_y_test, predictions_labels_)
  #     else:
  #       epochs_patience += 1
  # if epochs_patience >= patience:
  #   break
  
  scheduler.step()

  print(f"Epoch {id_epoch}:\n \tcost train: {cost_train:.3f}\n \tcost test: {cost_test:.3f}\n \tacc train: {acc_train*100:.3f}\n \tacc test: {acc_test*100:.3f}\n")

acc_epoch_train = acc_epoch_train[:id_epoch]
acc_epoch_test = acc_epoch_test[:id_epoch]
cost_epoch_train = cost_epoch_train[:id_epoch]
cost_epoch_test = cost_epoch_test[:id_epoch]

print(f"Best test-acc: {np.max(acc_epoch_test):.4f}")    

Epoch 0:
 	cost train: 0.560
 	cost test: 0.155
 	acc train: 85.232
 	acc test: 95.170

Epoch 1:
 	cost train: 0.220
 	cost test: 0.117
 	acc train: 93.265
 	acc test: 96.340

Epoch 2:
 	cost train: 0.169
 	cost test: 0.094
 	acc train: 94.752
 	acc test: 96.900

Epoch 3:
 	cost train: 0.149
 	cost test: 0.069
 	acc train: 95.298
 	acc test: 97.710

Epoch 4:
 	cost train: 0.136
 	cost test: 0.070
 	acc train: 95.622
 	acc test: 97.630

Epoch 5:
 	cost train: 0.119
 	cost test: 0.067
 	acc train: 96.352
 	acc test: 97.910

Epoch 6:
 	cost train: 0.112
 	cost test: 0.073
 	acc train: 96.438
 	acc test: 97.830

Epoch 7:
 	cost train: 0.108
 	cost test: 0.063
 	acc train: 96.600
 	acc test: 98.010

Epoch 8:
 	cost train: 0.102
 	cost test: 0.054
 	acc train: 96.795
 	acc test: 98.180

Epoch 9:
 	cost train: 0.098
 	cost test: 0.063
 	acc train: 96.943
 	acc test: 97.940

Epoch 10:
 	cost train: 0.090
 	cost test: 0.064
 	acc train: 97.122
 	acc test: 97.910

Epoch 11:
 	cost train: 0.091
 