In [6]:
import torch
import torch.optim as optim

In [7]:
weight = torch.tensor([[10, 11],
                       [12, 13]], dtype=torch.float32, requires_grad=True)
weight.grad = torch.ones((2, 2))
new_optimizer = optim.SGD([weight], lr=0.1, momentum=0.9)

'''
Returns the state of the optimizer as a dict.

It contains two entries:
state -
    a dict holding current optimization state.
    Its contentdiffers between optimizer classes.
param_groups -
    a dict containing all parameter groups
'''
opt_state_dict = new_optimizer.state_dict()

print("state_dict before step:\n", opt_state_dict)

for i in range(10):
    new_optimizer.step()

print("state_dict after step:\n", new_optimizer.state_dict())
torch.save(new_optimizer.state_dict(), "Optimizer_state_dict.pkl") # Saves an object to a disk file

state_dict before step:
 {'state': {}, 'param_groups': [{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0]}]}
state_dict after step:
 {'state': {0: {'momentum_buffer': tensor([[6.5132, 6.5132],
        [6.5132, 6.5132]])}}, 'param_groups': [{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0]}]}


In [8]:
last_optimizer = optim.SGD([weight], lr=0.1, momentum=0.9)
state_dict = torch.load("optimizer_state_dict.pkl")

print("state_dict before load state:\n", last_optimizer.state_dict())

# Loads the optimizer state.
last_optimizer.load_state_dict(state_dict)
print("state_dict after load state:\n", last_optimizer.state_dict())

state_dict before load state:
 {'state': {}, 'param_groups': [{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0]}]}
state_dict after load state:
 {'state': {0: {'momentum_buffer': tensor([[6.5132, 6.5132],
        [6.5132, 6.5132]])}}, 'param_groups': [{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0]}]}
