In [1]:
import torch
import torch.nn as nn

### Lazy Method: Save Path

In [2]:
class Model(nn.Module):
    def __init__(self, input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(input_features, 1)
        
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

In [3]:
model = Model(input_features=6)

In [4]:
# Train model

In [5]:
# Serialized data, not human readable
FILE = "model.pth"
torch.save(model, FILE)

In [6]:
model = torch.load(FILE)
model.eval()

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [7]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[ 0.2613,  0.3757, -0.2197, -0.3278, -0.2710,  0.1129]],
       requires_grad=True)
Parameter containing:
tensor([-0.1828], requires_grad=True)


### Preferred Method: State Dictionary

In [8]:
# Save model state dictionary
FILE = "model.pth"
torch.save(model.state_dict(), FILE)

In [9]:
loaded_model = Model(input_features=6)
loaded_model.load_state_dict(torch.load(FILE))
loaded_model.eval()

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [10]:
for param in loaded_model.parameters():
    print(param)

Parameter containing:
tensor([[ 0.2613,  0.3757, -0.2197, -0.3278, -0.2710,  0.1129]],
       requires_grad=True)
Parameter containing:
tensor([-0.1828], requires_grad=True)


In [11]:
print(model.state_dict())

OrderedDict([('linear.weight', tensor([[ 0.2613,  0.3757, -0.2197, -0.3278, -0.2710,  0.1129]])), ('linear.bias', tensor([-0.1828]))])


### Save Checkpoints

In [12]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [13]:
optimizer.state_dict()

{'state': {},
 'param_groups': [{'lr': 0.01,
   'momentum': 0,
   'dampening': 0,
   'weight_decay': 0,
   'nesterov': False,
   'params': [0, 1]}]}

In [14]:
checkpoint = {
    "epoch": 90,
    "model_state": model.state_dict(),
    "optim_state": optimizer.state_dict()
}

In [15]:
torch.save(checkpoint, "checkpoint.pth")

In [16]:
# Load checkpoint
loaded_checkpoint = torch.load("checkpoint.pth")
epoch = loaded_checkpoint["epoch"]

model = Model(input_features=6)
optimizer = torch.optim.SGD(model.parameters(), lr=0)

model.load_state_dict(checkpoint["model_state"])
optimizer.load_state_dict(checkpoint["optim_state"])

In [17]:
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0, 1]}]}


### Devices

In [19]:
PATH = "dev_model.pth"

**GPU to CPU**

In [20]:
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

In [21]:
device = torch.device("cpu")
model = Model(input_features=6)
model.load_state_dict(torch.load(PATH, map_location=device))

<All keys matched successfully>

**GPU to GPU**

In [22]:
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)