In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

In [4]:
class TheModelClass(nn.Module):
    def __init__(self):
        super(TheModelClass, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [11]:
model = TheModelClass()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
print('Model state_dict:')
for param_tensor in model.state_dict():
    print('%-12s\t%s' 
          % (param_tensor, model.state_dict()[param_tensor].size()))

Model state_dict:
conv1.weight	torch.Size([6, 3, 5, 5])
conv1.bias  	torch.Size([6])
conv2.weight	torch.Size([16, 6, 5, 5])
conv2.bias  	torch.Size([16])
fc1.weight  	torch.Size([120, 400])
fc1.bias    	torch.Size([120])
fc2.weight  	torch.Size([84, 120])
fc2.bias    	torch.Size([84])
fc3.weight  	torch.Size([10, 84])
fc3.bias    	torch.Size([10])


In [12]:
print('Optimizer state_dict:')
for var_name in optimizer.state_dict():
    print(var_name, '\t', optimizer.state_dict()[var_name])

Optimizer state_dict:
state 	 {}
param_groups 	 [{'lr': 0.001, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [4649837912, 4401979184, 4649839712, 4649824400, 4649826704, 4649824544, 4649826344, 4649825048, 4649825264, 4649827496]}]


# Saving & Loading Model
### Save/Load `state_dict` (Recommended)

In [13]:
torch.save(model.state_dict(), './savedStateDictEx.pt')

In [15]:
model = TheModelClass() # may need ModClass(*args, **kwargs) for some
model.load_state_dict(torch.load('./savedStateDictEx.pt'))
model.eval()

TheModelClass(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

### Save/Load Entire Model

In [17]:
torch.save(model, './savedModelEx.pt')

  "type " + obj.__name__ + ". It won't be checked "


In [18]:
model = torch.load('./savedModelEx.pt')
model.eval()

TheModelClass(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

### Saving & Loading Checkpoints for Inference or Resuming Training

In [22]:
torch.save({'epoch': 100, # or whatever
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            #'loss': loss,
            #, ...
           },
           './savedCheckpointEx.pt') 

In [23]:
model = TheModelClass()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) # etc.
checkpoint = torch.load('./savedCheckpointEx.pt')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
# loss = chekpoint['loss']
# ...
model.eval() # or
# model.train()

TheModelClass(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)