# Optimizing and Saving / Loading a Model

In [1]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

  from .autonotebook import tqdm as notebook_tqdm


Downloading the FashionMNIST dataset and wrapping it in a DataLoader.

In [2]:
BATCH_SIZE = 64

train_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE)


In [3]:
class NeuralNetwork(torch.nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.model = torch.nn.Sequential(
            torch.nn.Flatten(),
            torch.nn.Linear(28*28, 1024),
            torch.nn.ReLU(),
            torch.nn.Linear(1024, 1024),
            torch.nn.ReLU(),
            torch.nn.Linear(1024, 10),
            torch.nn.Softmax(dim=1)
        )

    def forward(self, input):
        return self.model(input)

model = NeuralNetwork()

In [11]:
def train_loop(model, train_dataloader, epochs, learning_rate=1e-6):
    loss_function = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    size_train = len(train_dataloader.dataset)
    iterations = len(train_dataloader)
    for epoch in range(epochs):
        correct_train = 0
        print(f'Epoch: [{epoch+1}/{epochs}]')
        for i, (x, y) in enumerate(train_dataloader):
            predictions = model(x)
            loss = loss_function(predictions, y)
            with torch.no_grad():
                correct_train += (predictions.argmax(1) == y).type(torch.float).sum().item()
            
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % 100 == 0 and i>0:
                print(f'    - [{i+1} / {iterations}] - loss: {loss.item()}')
            
        print(f"loss: {loss:>7f} | Train Accuracy: {correct_train/size_train}")

In [12]:
train_loop(model, train_loader, epochs=3)

Epoch: [1/3]
    - [101 / 938] - loss: 1.7372955083847046
    - [201 / 938] - loss: 1.6352391242980957
    - [301 / 938] - loss: 1.7216038703918457
    - [401 / 938] - loss: 1.6904946565628052
    - [501 / 938] - loss: 1.6492092609405518
    - [601 / 938] - loss: 1.6795302629470825
    - [701 / 938] - loss: 1.6973241567611694
    - [801 / 938] - loss: 1.6769626140594482
    - [901 / 938] - loss: 1.7166748046875
loss: 1.700471 | Train Accuracy: 0.7982166666666667
Epoch: [2/3]
    - [101 / 938] - loss: 1.7339836359024048
    - [201 / 938] - loss: 1.634127140045166
    - [301 / 938] - loss: 1.7208024263381958
    - [401 / 938] - loss: 1.690353274345398
    - [501 / 938] - loss: 1.6492655277252197
    - [601 / 938] - loss: 1.6785224676132202
    - [701 / 938] - loss: 1.6968646049499512
    - [801 / 938] - loss: 1.6759741306304932
    - [901 / 938] - loss: 1.7155123949050903
loss: 1.699959 | Train Accuracy: 0.79845
Epoch: [3/3]
    - [101 / 938] - loss: 1.7327214479446411
    - [201 / 938] 

In [28]:
# Testing the model
def test(model, test_loader):
    correct_predictions = 0
    total_predictions = len(test_loader.dataset)
    for (x,y) in test_loader:
        y_prob_pred = model(x)
        with torch.no_grad():
            y_pred = y_prob_pred.argmax(1)
            correct_predictions += (y == y_pred).type(torch.float32).sum().item()
    print(f"Test Accuracy: {correct_predictions/total_predictions}")

test(model, test_loader)

Test Accuracy: 0.7918


Saving the model

In [29]:
# We can save the instance of our NeuralNetwork class with its optimized weights and biases
torch.save(model, 'model.pth')

model_loaded = torch.load('model.pth')

# Testing the model loaded (should be the same result as above)
test(model_loaded, test_loader)

Test Accuracy: 0.7918


In [31]:
# We can also just save the parameters and then pass it to an empty instance of the NeuralNetwork class
torch.save(model.state_dict(), 'model_params.pth')

model_empty = NeuralNetwork()

loaded_state_dict = torch.load('model_params.pth')
model_empty.load_state_dict(loaded_state_dict)
model_empty.eval() # turning on evaluation mode (ignores dropout layers etc)

# Testing the model loaded (should be the same result as above)
test(model_empty, test_loader)

Test Accuracy: 0.7918
