In [None]:
import torch
import torch.nn as nn

In [None]:
!ls -l

total 4
drwxr-xr-x 1 root root 4096 May 16 13:24 sample_data


In [None]:
class Model(nn.Module):
    def __init__(self, n_feats):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(n_feats, 4)
        self.fc2 = nn.Linear(4, 2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        y = torch.sigmoid(self.fc2(x))
        return y

model = Model(6)

######################### save all ############################
print('*********** Original Model *************')
for p in model.parameters():
    print(p)

FILE = 'model.pth'
torch.save(model, FILE)

loaded_model = torch.load(FILE)
loaded_model.eval()

print('\n*********** Loaded Model *************')
for p in loaded_model.parameters():
    print(p)

############################## save only the state dict ###################
FILE = 'model_st.pth'
torch.save(model.state_dict(), FILE)

print('\n*********** Original Model state dict *************')
print(model.state_dict())

loaded_model = Model(6)
loaded_model.load_state_dict(torch.load(FILE))
loaded_model.eval()

print('\n*********** Loaded Model state dict *************')
print(loaded_model.state_dict())


*********** Original Model *************
Parameter containing:
tensor([[-0.4050,  0.1931,  0.1872, -0.1365,  0.3770, -0.2190],
        [ 0.3027, -0.1252,  0.3956,  0.1458, -0.3646, -0.0695],
        [ 0.2489,  0.4007, -0.3023,  0.3285, -0.3634,  0.2381],
        [-0.1335, -0.2177, -0.1226, -0.1804, -0.4056, -0.2411]],
       requires_grad=True)
Parameter containing:
tensor([-0.3521, -0.1276,  0.3366, -0.0815], requires_grad=True)
Parameter containing:
tensor([[-0.4728,  0.1173,  0.4946,  0.0745],
        [-0.1168, -0.4052, -0.3549,  0.2945]], requires_grad=True)
Parameter containing:
tensor([-0.2332,  0.2292], requires_grad=True)

*********** Loaded Model *************
Parameter containing:
tensor([[-0.4050,  0.1931,  0.1872, -0.1365,  0.3770, -0.2190],
        [ 0.3027, -0.1252,  0.3956,  0.1458, -0.3646, -0.0695],
        [ 0.2489,  0.4007, -0.3023,  0.3285, -0.3634,  0.2381],
        [-0.1335, -0.2177, -0.1226, -0.1804, -0.4056, -0.2411]],
       requires_grad=True)
Parameter contai

In [None]:
!ls -l

total 12
-rw-r--r-- 1 root root 3008 May 20 03:07 model.pth
-rw-r--r-- 1 root root 2136 May 20 03:07 model_st.pth
drwxr-xr-x 1 root root 4096 May 16 13:24 sample_data


In [None]:
############## checkpoint #############

optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum =0.9)

# training..............

checkpt = {
    'epoch': 100,
    'model_st': model.state_dict(),
    'optim_st': optimizer.state_dict()
}
print('************ Original optim state *************')
print(optimizer.state_dict())
FILE = 'chkpt.pth'
torch.save(checkpt, FILE)

optimizer = torch.optim.SGD(model.parameters(), lr=0)

checkpt_ld = torch.load(FILE)
model.load_state_dict(checkpt_ld['model_st'])
optimizer.load_state_dict(checkpt_ld['optim_st'])
epoch = checkpt_ld['epoch']

model.eval()

print('************ Loaded optim state *************')
print(optimizer.state_dict())


************ Original optim state *************
{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1, 2, 3]}]}
************ Loaded optim state *************
{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1, 2, 3]}]}


In [None]:
!ls -l

total 16
-rw-r--r-- 1 root root 2368 May 20 03:07 chkpt.pth
-rw-r--r-- 1 root root 3008 May 20 03:07 model.pth
-rw-r--r-- 1 root root 2136 May 20 03:07 model_st.pth
drwxr-xr-x 1 root root 4096 May 16 13:24 sample_data


In [None]:
##################### SAVING ON GPU/CPU ######################
# partial code snippet for illustration only; shall not run it
##############################################################

# 1) Save on GPU, Load on CPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

device = torch.device('cpu')
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location=device))

# 2) Save on GPU, Load on GPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.to(device)

# Note: Be sure to use the .to(torch.device('cuda')) function
# on all model inputs, too!

# 3) Save on CPU, Load on GPU
torch.save(model.state_dict(), PATH)

device = torch.device("cuda")
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # Choose whatever GPU device number you want
model.to(device)

# This loads the model to a given GPU device.
# Next, be sure to call model.to(torch.device('cuda')) to convert the model’s parameter tensors to CUDA tensors

