<a href="https://colab.research.google.com/github/binary-study/DemoBinary/blob/main/Save_load_PyTorch_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%mkdir checkpoint best_model

In [2]:
pwd

'/content'

In [3]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import numpy as np

In [4]:
use_cuda = torch.cuda.is_available()
print(use_cuda)

True


## **1. Prepare Dataset**

In [14]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize([0.5], [0.5])])
# Download and load the training data
trainset = datasets.FashionMNIST('F_MNIST_data/', download=True, train=True, transform=transform)

# Download and load the test data
testset = datasets.FashionMNIST('F_MNIST_data/', download=True, train=False, transform=transform)

loaders = {
    'train' : torch.utils.data.DataLoader(trainset,batch_size = 64,shuffle=True),
    'test'  : torch.utils.data.DataLoader(testset,batch_size = 64,shuffle=True),
}

In [6]:
loaders

{'test': <torch.utils.data.dataloader.DataLoader at 0x7f1f93a6ff10>,
 'train': <torch.utils.data.dataloader.DataLoader at 0x7f1fedc46850>}

## **2. Define Model**

In [13]:
# Define your network ( Simple Example )
class FashionClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        input_size = 784
        self.fc1 = nn.Linear(input_size, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64,10)
        self.dropout = nn.Dropout(p=0.2)
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.dropout(F.relu(self.fc4(x)))
        x = F.log_softmax(self.fc5(x), dim=1)
        return x

In [8]:
# Create the network, define the criterion and optimizer
model = FashionClassifier()
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# move model to GPU if CUDA is available
if use_cuda:
    model = model.cuda()

print(model)

FashionClassifier(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=128, bias=True)
  (fc4): Linear(in_features=128, out_features=64, bias=True)
  (fc5): Linear(in_features=64, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)


In [9]:
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Model's state_dict:
fc1.weight 	 torch.Size([512, 784])
fc1.bias 	 torch.Size([512])
fc2.weight 	 torch.Size([256, 512])
fc2.bias 	 torch.Size([256])
fc3.weight 	 torch.Size([128, 256])
fc3.bias 	 torch.Size([128])
fc4.weight 	 torch.Size([64, 128])
fc4.bias 	 torch.Size([64])
fc5.weight 	 torch.Size([10, 64])
fc5.bias 	 torch.Size([10])
Optimizer's state_dict:
state 	 {}
param_groups 	 [{'lr': 0.001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}]


In [10]:
import shutil
def save_ckp(state, is_best, checkpoint_path, best_model_path):
    f_path = checkpoint_path
    torch.save(state, f_path)
    if is_best:
        best_fpath = best_model_path
        shutil.copyfile(f_path, best_fpath)

In [15]:
def train(start_epochs, n_epochs, valid_loss_min_input, loaders, model, optimizer, criterion, use_cuda, checkpoint_path, best_model_path):
    """
    Keyword arguments:
    start_epochs -- the real part (default 0.0)
    n_epochs -- the imaginary part (default 0.0)
    valid_loss_min_input
    loaders
    model
    optimizer
    criterion
    use_cuda
    checkpoint_path
    best_model_path
    
    returns trained model
    """
    # initialize tracker for minimum validation loss
    valid_loss_min = valid_loss_min_input 
    
    for epoch in range(start_epochs, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## find the loss and update the model parameters accordingly
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            ## record the average training loss, using something like
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['test']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # update average validation loss 
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
        # calculate average losses
        train_loss = train_loss/len(loaders['train'].dataset)
        valid_loss = valid_loss/len(loaders['test'].dataset)

        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
        
        checkpoint = {
            'epoch': epoch + 1,
            'valid_loss_min': valid_loss,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        
        save_ckp(checkpoint, False, checkpoint_path, best_model_path)
        ## TODO: save the model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))
            save_ckp(checkpoint, True, checkpoint_path, best_model_path)
            valid_loss_min = valid_loss
            
    # return trained model
    return model

In [16]:
trained_model = train(1, 3, np.Inf, loaders, model, optimizer, criterion, use_cuda, "./checkpoint/current_checkpoint.pt", "./best_model/best_model.pt")



Epoch: 1 	Training Loss: 0.000010 	Validation Loss: 0.000045
Validation loss decreased (inf --> 0.000045).  Saving model ...
Epoch: 2 	Training Loss: 0.000007 	Validation Loss: 0.000042
Validation loss decreased (0.000045 --> 0.000042).  Saving model ...
Epoch: 3 	Training Loss: 0.000007 	Validation Loss: 0.000041
Validation loss decreased (0.000042 --> 0.000041).  Saving model ...


In [17]:
%ls ./best_model/

best_model.pt


In [18]:
%ls ./checkpoint/


current_checkpoint.pt


## **Loading the model**

In [19]:
def load_ckp(checkpoint_fpath, model, optimizer):
    checkpoint = torch.load(checkpoint_fpath)
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    valid_loss_min = checkpoint['valid_loss_min']
    return model, optimizer, checkpoint['epoch'], valid_loss_min.item()

In [20]:
%pwd
%ls

[0m[01;34mbest_model[0m/  [01;34mcheckpoint[0m/  [01;34mF_MNIST_data[0m/  [01;34msample_data[0m/


In [21]:
model = FashionClassifier()
# move model to GPU if CUDA is available
if use_cuda:
    model = model.cuda()

print(model)

FashionClassifier(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=128, bias=True)
  (fc4): Linear(in_features=128, out_features=64, bias=True)
  (fc5): Linear(in_features=64, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)


In [22]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
ckp_path = "./checkpoint/current_checkpoint.pt"
model, optimizer, start_epoch, valid_loss_min = load_ckp(ckp_path, model, optimizer)

In [23]:
print("model = ", model)
print("optimizer = ", optimizer)
print("start_epoch = ", start_epoch)
print("valid_loss_min = ", valid_loss_min)
print("valid_loss_min = {:.6f}".format(valid_loss_min))

model =  FashionClassifier(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=128, bias=True)
  (fc4): Linear(in_features=128, out_features=64, bias=True)
  (fc5): Linear(in_features=64, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)
optimizer =  Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0
)
start_epoch =  4
valid_loss_min =  4.095881013199687e-05
valid_loss_min = 0.000041


In [24]:
trained_model = train(start_epoch, 6, valid_loss_min, loaders, model, optimizer, criterion, use_cuda, "./checkpoint/current_checkpoint.pt", "./best_model/best_model.pt")

Epoch: 4 	Training Loss: 0.000006 	Validation Loss: 0.000038
Validation loss decreased (0.000041 --> 0.000038).  Saving model ...
Epoch: 5 	Training Loss: 0.000006 	Validation Loss: 0.000038
Validation loss decreased (0.000038 --> 0.000038).  Saving model ...
Epoch: 6 	Training Loss: 0.000006 	Validation Loss: 0.000035
Validation loss decreased (0.000038 --> 0.000035).  Saving model ...


In [25]:
trained_model.eval()

FashionClassifier(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=128, bias=True)
  (fc4): Linear(in_features=128, out_features=64, bias=True)
  (fc5): Linear(in_features=64, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [26]:
test_acc = 0.0
for samples, labels in loaders['test']:
    with torch.no_grad():
        samples, labels = samples.cuda(), labels.cuda()
        output = trained_model(samples)
        # calculate accuracy
        pred = torch.argmax(output, dim=1)
        correct = pred.eq(labels)
        test_acc += torch.mean(correct.float())

print('Accuracy of the network on {} test images: {}%'.format(len(testset), round(test_acc.item()*100.0/len(loaders['test']), 2)))

Accuracy of the network on 10000 test images: 87.49%
