In [None]:
import torch
import torch.nn as nn

class model(nn.Module):
  def __init__(self):
    super().__init__()
    
    self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=0)
    self.pool1 = nn.MaxPool2d(2,2)
    self.bn = nn.BatchNorm2d(16)
    self.fc1 = nn.Linear(256,10)

  def forward(self, x):
    x=self.conv1(x)
    x=self.pool1(x)
    x=self.bn(x)
    x=self.Linear(x)

    return x



In [None]:
my_model = model()

We can save either the model archeticture + weights or just the weights

##Saving the archeticture + weights:
The model is saved as pickle file

In [None]:
model_path = 'models'
torch.save(my_model , model_path)

In [None]:
#to load the model use torch.load()
#no need to define the model archeticture (object) first
new_model = torch.load(model_path)

##Saving just the model weights (state_dict)

In [None]:
#The state_dict is a python dictionary that contains all the layers paprametars + register buffer (like BN parameters)
for param in my_model.state_dict():
  print(f'{param} shape {my_model.state_dict()[param].shape}')

conv1.weight shape torch.Size([16, 3, 3, 3])
conv1.bias shape torch.Size([16])
bn.weight shape torch.Size([16])
bn.bias shape torch.Size([16])
bn.running_mean shape torch.Size([16])
bn.running_var shape torch.Size([16])
bn.num_batches_tracked shape torch.Size([])
fc1.weight shape torch.Size([10, 256])
fc1.bias shape torch.Size([10])


In [None]:
params_path='model_state_dict'
torch.save(my_model.state_dict() , params_path)

In [None]:
#We can not use torch.load() directly to load the weights!. We need to define the model archeticure first.
#If the model matches the weights it will work well
my_new_model = model()
state_dict = torch.load(params_path)
my_new_model.load_state_dict(state_dict)

<All keys matched successfully>

## Strict Loading:
load_state_dict(state_dict, strict=True)

If strict is True, then the keys of state_dict must exactly match the keys returned by this module’s state_dict() function.

load_state_dict() returns:


1.   **missing_keys** : a list of str containing the missing keys

2.   **unexpected_keys** : a list of str containing the unexpected keys


In [None]:
import torchvision

model = torchvision.models.resnet50().cuda(gpu)
state_dict = torch.load(args.pretrained, map_location='cpu')
missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
assert missing_keys == ['fc.weight', 'fc.bias'] and unexpected_keys == []
model.fc.weight.data.normal_(mean=0.0, std=0.01)
model.fc.bias.data.zero_()
if args.weights == 'freeze':
    model.requires_grad_(False)
    model.fc.requires_grad_(True)
classifier_parameters, model_parameters = [], []
for name, param in model.named_parameters():
    if name in {'fc.weight', 'fc.bias'}:
        classifier_parameters.append(param)
    else:
        model_parameters.append(param)

model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu])

criterion = nn.CrossEntropyLoss().cuda(gpu)

param_groups = [dict(params=classifier_parameters, lr=args.lr_classifier)]
if args.weights == 'finetune':
    param_groups.append(dict(params=model_parameters, lr=args.lr_backbone))
optimizer = optim.SGD(param_groups, 0, momentum=0.9, weight_decay=args.weight_decay)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs)

In [None]:
from pathlib import Path
p=Path('data')
p.mkdir()

In [None]:
!pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.9-cp37-cp37m-linux_x86_64.whl

Collecting torch-xla==1.9
[?25l  Downloading https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.9-cp37-cp37m-linux_x86_64.whl (149.9MB)
[K     |████████████████████████████████| 149.9MB 73kB/s 
Installing collected packages: torch-xla
  Found existing installation: torch-xla 1.8.1
    Uninstalling torch-xla-1.8.1:
      Successfully uninstalled torch-xla-1.8.1
Successfully installed torch-xla-1.9


In [None]:
!pip uyninstall torch
!pip install torch==1.9

Uninstalling torch-1.9.0+cu102:
  Would remove:
    /usr/local/bin/convert-caffe2-to-onnx
    /usr/local/bin/convert-onnx-to-caffe2
    /usr/local/lib/python3.7/dist-packages/caffe2/*
    /usr/local/lib/python3.7/dist-packages/torch-1.9.0+cu102.dist-info/*
    /usr/local/lib/python3.7/dist-packages/torch/*
Proceed (y/n)? y
y


  Successfully uninstalled torch-1.9.0+cu102
Collecting torch==1.9
[?25l  Downloading https://files.pythonhosted.org/packages/d2/a9/b3cea4a97ffabd6639e71608814dbd08081e202e8ac9580250273c0541ff/torch-1.9.0-cp37-cp37m-manylinux1_x86_64.whl (831.4MB)
[K     |████████████████████████████████| 831.4MB 11kB/s 
Installing collected packages: torch
Successfully installed torch-1.9.0


In [None]:
import torch_xla



In [None]:
import torch_xla.core.xla_model as xm
xm.xla_device()

device(type='xla', index=1)

In [None]:
len(xm.get_xla_supported_devices())

8

##Optimizers also have parameters that we can save:


In [None]:
optimizer = torch.optim.Adam(my_model.parameters(), lr=0.003, betas=(0.9, 0.90), eps=1e-08, weight_decay=0, amsgrad=False)
for param in optimizer.state_dict():
  print(f'{param}    shape      {optimizer.state_dict()[param]}')

state    shape      {}
param_groups    shape      [{'lr': 0.003, 'betas': (0.9, 0.9), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'params': [0, 1, 2, 3, 4, 5]}]


**Saving the entire optimizer object**

In [None]:
optimizer_path = 'adam_optimizer'
torch.save(optimizer , optimizer_path)
optimizer = torch.load(optimizer_path)

**Saving the optimizer state_dict:**

In [None]:
optimizer_state_dict_path="adam_optimizer_state_dict"
torch.save(optimizer.state_dict() , optimizer_state_dict_path)


In [None]:
new_optimizer = torch.optim.Adam(my_model.parameters())

In [None]:
torch.load(optimizer_state_dict_path)

{'param_groups': [{'amsgrad': False,
   'betas': (0.9, 0.9),
   'eps': 1e-08,
   'lr': 0.003,
   'params': [0, 1, 2, 3, 4, 5],
   'weight_decay': 0}],
 'state': {}}

In [None]:
#load the optimizer state_dict
new_optimizer.load_state_dict(torch.load(optimizer_state_dict_path))

for param in new_optimizer.state_dict():
  print(f'{param}    shape      {new_optimizer.state_dict()[param]}')

state    shape      {}
param_groups    shape      [{'lr': 0.003, 'betas': (0.9, 0.9), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'params': [0, 1, 2, 3, 4, 5]}]


##Torch.save can save any python object!:
We can save all our work in one ckpt file (model, optimizer, loss, epochs, ... )

In [None]:
path_for_all = 'all'
epoch=8
loss=0.022
torch.save({
            'epoch': epoch,
            'model': my_model,
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            
            }, path_for_all)


In [None]:
checkpoint = torch.load(path_for_all)
model_to_continue_training = checkpoint['model']
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch=checkpoint['epoch']
loss=checkpoint['loss']

#continue training....
#evaluate.............

In [None]:
for param in model_to_continue_training.state_dict():
  print(f'{param}    shape      {model_to_continue_training.state_dict()[param].shape}')

conv1.weight    shape      torch.Size([16, 3, 3, 3])
conv1.bias    shape      torch.Size([16])
bn.weight    shape      torch.Size([16])
bn.bias    shape      torch.Size([16])
bn.running_mean    shape      torch.Size([16])
bn.running_var    shape      torch.Size([16])
bn.num_batches_tracked    shape      torch.Size([])
fc1.weight    shape      torch.Size([10, 256])
fc1.bias    shape      torch.Size([10])


In [None]:
for param in optimizer.state_dict():
  print(f'{param}    shape      {optimizer.state_dict()[param]}')

state    shape      {}
param_groups    shape      [{'lr': 0.003, 'betas': (0.9, 0.9), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'params': [0, 1, 2, 3, 4, 5]}]


In [None]:
loss

0.022

In [None]:
epoch

8

#Saving and laoding with GPU:


##Saving and loading on GPU:
If you trained your model on GPU and want to load it on GPU remember to use model.to(device='cuda') after loading the model.

In [None]:
device= torch.device("cuda") #suppose the GPU is available
model.to(device) #move the model to GPU
torch.save(my_model , 'saved_model') #save the model on GPU . torch.save() saves the model on the same device
model_cont = torch.load('saved_model')
#it is important to move the model to GPU, even if the model was saved on GPU  
model.to(device)
##

#Continue training on GPU

##

### Saving and loading on different devices:

In [None]:
#if you save the model on GPU and want to load it to CPU, use map_location=cpu_device
cpu_device = troch.device('cpu')
model = torch.load(path , map_location=cpu_device)

In [None]:
#if you save the model on CPU and want to load it to GPU, use map_location=gpu_device

gpu_device = troch.device('gpu')
model = torch.load(path , map_location=gpu_device)
model.to(gpu_device)

#Saving and Loading XLA Models:


In [None]:
import torch
import torch_xla
import torch_xla.core.xla_model as xm
path = 'xla_model_state_dict'
xm.save(xla_model.state_dict(), path , mater_only=True , global_only=False)

In [None]:
state_dict = torch.load('xla_model_state_dict', map_location='cpu')
model.load_state_dict(state_dict)
device=xm.xla_device()
model.to(device)

#Strict Loadging:

In [None]:
resnet = trochvision.models.resnet50()

In [None]:
# save only the conv layers
torch.save(list(resnet.children())[0].state_dict(), 'resnet50_conv')


In [None]:
model = trochvision.models.resnet50()
missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
#missing_keys [fc.weights , fc.bias]