In [1]:
import torch
import torch.nn as nn

### 3 DIFFERENT METHODS TO REMEMBER:
* torch.save(arg, PATH) # can be model, tensor, or dictionary
* torch.load(PATH)
* torch.load_state_dict(arg)

### 1) lazy way: save whole model
torch.save(model, PATH)

model class must be defined somewhere

model = torch.load(PATH)

model.eval()

# 2) recommended way: save only the state_dict

torch.save(model.state_dict(), PATH)

model must be created again with parameters

model = Model(*args, **kwargs)

model.load_state_dict(torch.load(PATH))

model.eval()

#### Example

In [3]:
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)

    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

model = Model(n_input_features=6)
model

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [4]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[-0.2176,  0.2615,  0.2767,  0.0339,  0.2663, -0.3468]],
       requires_grad=True)
Parameter containing:
tensor([-0.2720], requires_grad=True)


##### save model using lazy method

In [5]:
# save and load entire model

FILE = "model.pth"
torch.save(model, FILE)

  "type " + obj.__name__ + ". It won't be checked "


In [6]:
loaded_model = torch.load(FILE)
loaded_model.eval()

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [7]:
for param in loaded_model.parameters():
    print(param)

Parameter containing:
tensor([[-0.2176,  0.2615,  0.2767,  0.0339,  0.2663, -0.3468]],
       requires_grad=True)
Parameter containing:
tensor([-0.2720], requires_grad=True)


##### save only the state_dict

In [8]:
print(model.state_dict())

OrderedDict([('linear.weight', tensor([[-0.2176,  0.2615,  0.2767,  0.0339,  0.2663, -0.3468]])), ('linear.bias', tensor([-0.2720]))])


In [9]:
# save only state dict
FILE = "model.pth"
torch.save(model.state_dict(), FILE)

In [10]:
loaded_model = Model(n_input_features=6)
loaded_model.load_state_dict(torch.load(FILE)) # it takes the loaded dictionary, not the path file itself
loaded_model.eval()

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [11]:
print(loaded_model.state_dict())

OrderedDict([('linear.weight', tensor([[-0.2176,  0.2615,  0.2767,  0.0339,  0.2663, -0.3468]])), ('linear.bias', tensor([-0.2720]))])


load checkpoint

In [14]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [1657164515584, 1657164515800]}]}


In [15]:
checkpoint = {
"epoch": 90,
"model_state": model.state_dict(),
"optim_state": optimizer.state_dict()
}
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [1657164515584, 1657164515800]}]}


In [16]:
FILE = "checkpoint.pth"
torch.save(checkpoint, FILE)

In [17]:
model = Model(n_input_features=6)
print(model)
optimizer = optimizer = torch.optim.SGD(model.parameters(), lr=0)
print(optimizer)

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [19]:
checkpoint = torch.load(FILE)
print(checkpoint)

{'epoch': 90, 'model_state': OrderedDict([('linear.weight', tensor([[-0.2176,  0.2615,  0.2767,  0.0339,  0.2663, -0.3468]])), ('linear.bias', tensor([-0.2720]))]), 'optim_state': {'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [1657164515584, 1657164515800]}]}}


In [20]:
model.load_state_dict(checkpoint['model_state'])
optimizer.load_state_dict(checkpoint['optim_state'])
epoch = checkpoint['epoch']
print(epoch)

90


In [21]:
model.eval()

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [22]:
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [1657179691840, 1657179691984]}]}


### 3) Saving on CPU/GPU

In [30]:
# 1) Save on GPU, Load on CPU

#PATH=''
#device = torch.device("cuda")
#model.to(device)
#torch.save(model.state_dict(), PATH)

#device = torch.device('cpu')
#model = Model(*args, **kwargs)
#model.load_state_dict(torch.load(PATH, map_location=device))

In [31]:
# 2) Save on GPU, Load on GPU

#device = torch.device("cuda")
#model.to(device)
#torch.save(model.state_dict(), PATH)

#model = Model(*args, **kwargs)
#model.load_state_dict(torch.load(PATH))
#model.to(device)

In [32]:
# 3) Save on CPU, Load on GPU

#torch.save(model.state_dict(), PATH)

#device = torch.device("cuda")
#model = Model(*args, **kwargs)
#model.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # Choose whatever GPU device number you want
#model.to(device)