In [None]:
# LAZY method

#### COMPLEET  MODEL ####
#torch.save(model, PATH)

## model class must be defined somewhere
#model = torch.load(PATH)
#model.eval

In [2]:
import torch
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)

    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred
    
model = Model(n_input_features=6)

# train your model...

FILE = "models/lazy-model.pth" # pytorch model
torch.save(model, FILE)



In [3]:
FILE = "models/lazy-model.pth"
model = torch.load(FILE)
model.eval()

for param in model.parameters():
    print(param)

Parameter containing:
tensor([[ 0.1601,  0.2643, -0.2641, -0.2033, -0.3851, -0.1785]],
       requires_grad=True)
Parameter containing:
tensor([0.2170], requires_grad=True)


In [None]:
# Recommended method: define model structure and only load the parameters

#### STATE DICT ####
#torch.save(model.state_dict(), PATH)

## model must be created again with parameters
#model = Model(*args, **kwargs)
#model.load_state_dict(torch.load(PATH))
#model.eval()

In [6]:

import torch
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)

    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred
    
model = Model(n_input_features=6)

# train your model...

for param in model.parameters():
    print(param)

FILE = "models/prefer-model.pth" # pytorch model
torch.save(model.state_dict(), FILE)

Parameter containing:
tensor([[ 0.1163,  0.1026,  0.3707,  0.1641, -0.2660, -0.1547]],
       requires_grad=True)
Parameter containing:
tensor([0.3416], requires_grad=True)


In [16]:
FILE = "models/prefer-model.pth" # pytorch model

loaded_model = Model(n_input_features=6)
loaded_model.load_state_dict(torch.load(FILE))
loaded_model.eval()

for param in loaded_model.parameters():
    print(param)

Parameter containing:
tensor([[ 0.1163,  0.1026,  0.3707,  0.1641, -0.2660, -0.1547]],
       requires_grad=True)
Parameter containing:
tensor([0.3416], requires_grad=True)


In [18]:
# More about the state_dict, optimizer, checkpoint

print(loaded_model.state_dict(), "\n")

learning_rate = 0.01
optimizer = torch.optim.SGD(loaded_model.parameters(), lr=learning_rate)
print(optimizer.state_dict())

checkpoint = {
    "epoch": 90,
    "model_state": loaded_model.state_dict(),
    "optim_state": optimizer.state_dict()
}

torch.save(checkpoint, "models/checkpoint.pth")

OrderedDict([('linear.weight', tensor([[ 0.1163,  0.1026,  0.3707,  0.1641, -0.2660, -0.1547]])), ('linear.bias', tensor([0.3416]))]) 

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1]}]}


In [19]:
loaded_checkpoint = torch.load("models/checkpoint.pth")
epoch = loaded_checkpoint["epoch"]

model = Model(n_input_features=6)
optimizer = torch.optim.SGD(model.parameters(), lr=0) # lr=0 will be override by loaded optim

model.load_state_dict(checkpoint["model_state"])
optimizer.load_state_dict(checkpoint["optim_state"])

print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1]}]}


In [None]:
#### GPU ####
PATH = "models/another-model.pth"

# 1. Save on GPU, Load on CPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

device = torch.device("cpu")
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location=device))

# 2. Save on GPU, Load on GPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.to(device)

# 3. Save on CPU, Load on GPU
torch.save(model.state_dict(), PATH)

model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location="cuda:0")) # Choose whatever GPU number
model.to(device)