In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
# Hyper-parameters 
input_size = 784
num_classes = 10
num_epochs = 5
batch_size_train = 64
batch_size_test = 1000
momentum = 0.9
log_interval = 100
learning_rate=0.001

# MNIST dataset 
train_dataset = torchvision.datasets.MNIST(root='data', 
                    train=True, 
                    transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,),(0.5,))]),  
                    download=True)

test_dataset = torchvision.datasets.MNIST(root='data', 
                                          train=False, 
                                          transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,),(0.5,))]))


# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size_train, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size_test, 
                                          shuffle=False)

In [None]:
batch = next(enumerate(train_loader))
print(batch)

In [None]:
examples = enumerate(test_loader) 

batchId, (exampleData, exampleTargets) = next(examples) #Next batch
print('Numero de batch: {}'.format(batchId))
print(exampleData.shape)
print(exampleTargets.shape)

import matplotlib.pyplot as plt

#Show the first 6 elements in the batch
plt.figure()
for i in range(16):
  plt.subplot(4, 4, i+1)
  plt.tight_layout()
  plt.imshow(exampleData[i][0], cmap='gray', interpolation='none')
  plt.title('Groundtruth: {}'.format(exampleTargets[i]))
  plt.xticks([])
  plt.yticks([])

In [5]:
#Create the neural network
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
    self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
    self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
    self.fc1 = nn.Linear(5*5*64, 256)
    self.fc2 = nn.Linear(256,10)
    
  def forward(self, x):
    # x es 28 x 28
    x = F.relu(F.max_pool2d(self.conv1(x), 2))
    x = F.relu(F.max_pool2d(self.conv2(x), 2))
    x = F.relu(self.conv3(x))
    x = x.view(-1, 5*5*64)
    x = F.relu(self.fc1(x))
    x = self.fc2(x)
    return F.log_softmax(x)
  
network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)

network = network.to(device)

In [None]:
import torchsummary as ts

print(network)
ts.summary(network.to(device), (1, 28, 28), device='cuda')

In [7]:
train_losses = []
train_counter = []
test_losses = []

test_counter = [i*len(train_loader.dataset) for i in range(num_epochs+1)]

#Function to train one epoch
def train(network, optimizer,  epoch):
  network.train() #Modo entrenamiento
  for batchId, (data, target) in enumerate(train_loader): #Iterate over batches
    data = data.to(device)
    target = target.to(device)
    
    #Feedforward pass
    optimizer.zero_grad()
    output = network(data)
    loss = F.nll_loss(output, target)
    loss.backward()
    optimizer.step()
    
    if batchId % log_interval == 0:
      print('Train epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batchId*len(data), len(train_loader.dataset), 100.*batchId/len(train_loader), loss.item()))
      train_losses.append(loss.item())
      train_counter.append((batchId*64) + ((epoch-1)*len(train_loader.dataset)))
      torch.save(network.state_dict(),'model.pth')
      torch.save(optimizer.state_dict(), 'optimizer.pth')
      

#Testing
def test(network):
  network.eval() #Test mode
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      data = data.to(device)
      target = target.to(device)
      output = network(data)
      test_loss += F.nll_loss(output, target, size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  length = len(test_loader.dataset)
  test_loss /= length
  test_losses.append(test_loss)
  print('\nTest set: Avg. Loss: {:.4f}, Accuracy: {}/{} ({:.0f}%), Error: {:.3f}% \n'.format(test_loss, correct, len(test_loader.dataset), 100.*correct/len(test_loader.dataset), 100.*(length-correct)/length))

In [None]:
test(network)
for epoch in range(1, num_epochs + 1):
  train(network, optimizer, epoch)
  test(network)

In [None]:
plt.figure()
plt.plot(train_counter, train_losses, color='blue')
plt.scatter(test_counter, test_losses, color='red')
plt.show()

In [None]:
with torch.no_grad():
  output = network(exampleData.to(device))

In [None]:
fig = plt.figure()
for i in range(6):
  plt.subplot(2,3,i+1)
  plt.tight_layout()
  plt.imshow(exampleData[i][0], cmap='gray', interpolation='none')
  plt.title("Prediction: {}".format(
    output.data.max(1, keepdim=True)[1][i].item()))
  plt.xticks([])
  plt.yticks([])
plt.show()

In [12]:
continued_network = Net()
network_state_dict = torch.load('model.pth')
continued_network.load_state_dict(network_state_dict)
continued_network = continued_network.to(device)

continued_optimizer = optim.SGD(continued_network.parameters(), lr=0.0005, momentum=momentum)
optimizer_state_dict=torch.load('optimizer.pth')
continued_optimizer.load_state_dict(optimizer_state_dict)

In [None]:
for i in range(6, 10):
  test_counter.append(i*len(train_loader.dataset))
  train(continued_network, continued_optimizer, i)
  test(continued_network)

In [None]:
print(len(test_counter))
print(len(test_losses))

plt.figure()
plt.plot(train_counter, train_losses, color='blue')
plt.scatter(test_counter, test_losses, color='red')
plt.show()

In [None]:
print(network)

In [17]:
#Function to get predictions over a dataset

def get_predictions(model, iterator, device):

    #For prediction, we also deactivate training features
    model.eval()

    images = []
    labels = []
    probs = []

    with torch.no_grad():
        for (data, target) in iterator:
            data = data.to(device)
            y_pred = model(data)

            #Remember that our network does not apply the softmax
            #We have to do it explicitly for prediction
            #y_prob = F.softmax(y_pred, dim = -1)
            top_pred = y_pred.argmax(1, keepdim = True)

            #We store the images, their labels and the pdf of each sample
            #images.append(x.cpu())
            labels.append(target.cpu())
            probs.append(y_pred.cpu())

    #images = torch.cat(images, dim = 0)
    labels = torch.cat(labels, dim = 0)
    probs = torch.cat(probs, dim = 0)

    return labels, probs

In [None]:
#Comute predictions and the label with the maximum probability
labels, probs = get_predictions(continued_network, test_loader, device)

pred_labels = torch.argmax(probs, 1)

In [None]:
from sklearn import metrics

#Plot a confussion matrix
def plot_confusion_matrix(labels, pred_labels):
    
    fig = plt.figure(figsize = (10, 10));
    ax = fig.add_subplot(1, 1, 1);
    cm = metrics.confusion_matrix(labels, pred_labels);
    cm = metrics.ConfusionMatrixDisplay(cm);
    cm.plot(values_format = 'd', cmap = 'Blues', ax = ax)

plot_confusion_matrix(labels, pred_labels)

In [None]:
print(metrics.classification_report(labels, pred_labels, digits=6))

