In [None]:

import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor


# Neural network implementation



The following code creates a neural network made of several layers, `Linear` for matrix multiplication by some (trainable) weights, `ReLu` for recifying units (threshold linear). The generic class for neural network is `nn.Module`, which imposes some constraints on the design, but all of this is kind of hidden to the user.

Note that for running on gpu, functions are provided to move the model and data (see argument `device` for and `model = NeuralNetwork().to(device)`).


In [None]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 100),
            nn.ReLU(),
            nn.Linear(100, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


To calculate the output for a given input (image), we use the `forward` method, or simply `model(X)`. The predicted class corresponds to the maximum output (the `Softmax` simply converts the output to probabilities). 


In [None]:
# create an instance of the model
model = MLP()

# create a random input and apply it
X = torch.rand(1, 28, 28)
logits = model.forward(X)
print('output:\n', logits)
model(X)
pred_probab = nn.Softmax(dim=1)(logits)
print('probabilities:\n', pred_probab)
y_pred = pred_probab.argmax(1)
print('predicted class:', y_pred)


Now we can check the parameters of the model, in particular to know which ones will be trained.


In [None]:
print('model structure:\n', model)
for name, param in model.named_parameters():
    print('layer: {} | size: {} \nweight values : {} \n'.format(name, param.size(), param.ravel()[:100]))

# Training using autograd



Now we can present the model with train data and optimize the weights to reduce a loss function.


In [None]:
# loss function
loss_fn = nn.CrossEntropyLoss()
# optimizer with learning rate lr
optimizer = optim.SGD(model.parameters(), lr=0.01)
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    batch_size = dataloader.batch_size
    # set the model to training mode (good practice even if not crucial here)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 50 == 0:
            loss, current = loss.item(), batch * batch_size + len(X)
            print('loss: {:>7f}  [{:>5d}/{:>5d}]'.format(loss, current, size))


def test_loop(dataloader, model, loss_fn):
    # set the model to evaluation mode (good practice even if not crucial here)
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # evaluate the model with torch.no_grad() to ensure that no gradients are computed during test mode
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print('test evaluation: \n accuracy: {:>0.1f}%, avg loss: {:>8f} \n'.format(100*correct, test_loss))


In [None]:
training_data = datasets.MNIST(
    root='./tmp',
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.MNIST(
    root='./tmp',
    train=False,
    download=True,
    transform=ToTensor()
)

# batch size
batch_size = 200

train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
# training epochs (go through full training set per epoch)
epochs = 3

for t in range(epochs):
    print('epoch {}\n-------------------------------'.format(t+1))
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
    
print('finished')
# Initialization and optimization schemes



We can now look into some more fine tuning of the model before and during its optimization. For instance, what is implied in using `nn.Module` is the initialization of the weights.


In [None]:
# set a seed for the random number generator
torch.manual_seed(12345)

# check that the generated network has always the same initial weights
model = MLP()
for name, param in model.named_parameters():
    print('layer: {} | size: {} \nweight values : {} \n'.format(name, param.size(), param.ravel()[:10]))
# loss function
loss_fn = nn.CrossEntropyLoss()

# optimizer with learning rate lr
optimizer = optim.Adam(model.parameters())

# loop over epochs
for t in range(epochs):
    print('epoch {}\n-------------------------------'.format(t+1))
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
    
print('finished')


Adam is an optimizer with adaptive learning rate that has been shown to reach better performance than classical stochastic gradient descent. Check [https://pytorch.org/docs/stable/optim.html](https://pytorch.org/docs/stable/optim.html) for other powerful options.


# Saving a trained model

High-level functions are provided by `torch` to save models defined using `torch.nn`. This makes it easy to record the evolution of trained parameters, etc. One can save the parameters only with ̀̀`state_dict` as below (but then one needs to know the architecture, i.e. in which layer the parameters go...) or the full model architecture with `torch.save` directly.


In [None]:
# save all relevant model parameters
torch.save(model.state_dict(), './tmp/mlp_weights.pth')
# random initialization
model = MLP()
model.eval()
test_loop(test_dataloader, model, loss_fn)

# load previously trained weights
model.load_state_dict(torch.load('./tmp/mlp_weights.pth'))
model.eval()
test_loop(test_dataloader, model, loss_fn)

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

Adapted from https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html


In [None]:
## Check Data
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root='./tmp', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./tmp', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))
# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)
x = images[0] / 2 + 0.5
print(images.shape)
print(x.shape)


plt.figure(figsize=(16,4))
plt.subplot(1,4,1)
plt.imshow(np.transpose(x.numpy(), (1, 2, 0)))
for i in range(3):
    plt.subplot(1,4,i+2)
    plt.imshow(x[i,:,:].numpy(), cmap='Greys')
## Test Image Transformations
plt.imshow(x[1,:,:].numpy(), cmap='Greys', interpolation='nearest')
plt.savefig('ex_img')



In [None]:
conv = nn.Conv2d(3, 6, 5)

conv(x).shape

In [None]:
pool = nn.MaxPool2d(2, 2)

pool(conv(x)).shape
torch.flatten(pool(conv(x)), 0).shape

In [None]:
## Build and Run NEural Network Model
import torch.nn as nn
import torch.nn.functional as F

# convolutional neural network
class CNN_Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = CNN_Net()


In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
for epoch in range(2):  # loop over the train data multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 0:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')


In [None]:
def calc_acc(dataloader):
    
    with torch.no_grad():

        # number of correct predictions
        corr_pred = 0
        # number of samples
        n_samples = 0
        
        # loop over the test data
        for i, data in enumerate(dataloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
    
            # forward only and prediction
            outputs = model(inputs)
            
            # accuracy as count of correct prediction and total number of samples
            corr_pred += (torch.argmax(outputs, axis=1) == labels).sum()
            n_samples += outputs.shape[0]

        # return accuracy
        return corr_pred / n_samples

        
print('Accuracy on train set:', calc_acc(trainloader))
print('Accuracy on test set:', calc_acc(testloader))
