In [2]:
import torch
import torch.nn as nn

In [None]:
''' 3 DIFFERENT METHODS TO REMEMBER:
 - torch.save(arg, PATH) # can be model, tensor, or dictionary
 - torch.load(PATH)
 - torch.load_state_dict(arg)
'''

''' 2 DIFFERENT WAYS OF SAVING
# 1) lazy way: save whole model
torch.save(model, PATH)

# model class must be defined somewhere
model = torch.load(PATH)
model.eval()

# 2) recommended way: save only the state_dict
torch.save(model.state_dict(), PATH)

# model must be created again with parameters
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.eval()
'''

In [3]:
class Model(nn.Module):
    def __init__(self , input_param , output_param):
        super(Model , self).__init__()
        self.linear = nn.Linear(input_param , output_param)
        
    def forward(self , x):
        sig = torch.sigmoid(self.linear(x , 1))
        return sig
            

In [4]:
model = Model(input_param = 8 , output_param = 1)
# after this we need to train our model as we want

In [5]:
# now we wil save our model
# file name
FILE = "model.pth"
torch.save(model , FILE)

![model.pth%20eg%20image.png](attachment:model.pth%20eg%20image.png)

In [6]:
# as we can see the model is saved as model.pth in the same file 
# and the model file would look something like 

![model%20pth%20look.png](attachment:model%20pth%20look.png)

In [None]:
####################save all ######################################
for param in model.parameters():
    print(param)

# save and load entire model

FILE = "model.pth"
torch.save(model, FILE)

loaded_model = torch.load(FILE)
loaded_model.eval()

for param in loaded_model.parameters():
    print(param)

In [7]:
# the above implemented was to save the whole model 
# now we'll try to implement a method that is used in practise much more
# we'll just save state_dict of the model , which is 
# state_dict returns a dictionary containing a whole state of the module

In [10]:
FILE = 'new_model.pth'
torch.save(model.state_dict() , FILE)

# if we need to load back the model we need to initiate the model once again 
loaded_model = Model(input_param = 8 , output_param = 1)
# we will load the state of the model , not the whole model 
loaded_model.load_state_dict(torch.load(FILE))# it takes the loaded dictionary, not the path file itself
loaded_model.eval()
print(loaded_model.state_dict())

OrderedDict([('linear.weight', tensor([[-0.1557,  0.1229,  0.1426, -0.1161,  0.1054,  0.0089, -0.3278, -0.2104]])), ('linear.bias', tensor([0.1844]))])


In [14]:
# we can do this with an model optimizer too 
optimizer = torch.optim.SGD(model.parameters() , lr = 0.001)

FILE = 'optimizer.pth'
torch.save(optimizer.state_dict() , FILE)

print(optimizer.state_dict)

new_optim = torch.optim.SGD(model.parameters() , lr = 0)
new_optim.load_state_dict(torch.load(FILE))
print(new_optim.state_dict)

<bound method Optimizer.state_dict of SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)>
<bound method Optimizer.state_dict of SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)>


In [12]:
# as we can see that the new 
#optimizer had the same state as the previous one 
#as we loaded the state_dict of the previous one into this one

In [16]:
'''model.eval() is a kind of switch for some specific 
layers/parts of the model that behave differently 
during training and inference (evaluating) time. 
For example, Dropouts Layers, BatchNorm Layers etc. 
You need to turn them off during model evaluation, and .eval() 
will do it for you. In addition, the common practice 
for evaluating/validation is using torch.no_grad() 
in pair with model.eval() to turn off gradients computation:'''

'model.eval() is a kind of switch for some specific \nlayers/parts of the model that behave differently \nduring training and inference (evaluating) time. \nFor example, Dropouts Layers, BatchNorm Layers etc. \nYou need to turn them off during model evaluation, and .eval() \nwill do it for you. In addition, the common practice \nfor evaluating/validation is using torch.no_grad() \nin pair with model.eval() to turn off gradients computation:'

In [None]:
###########load checkpoint#####################
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# checkpoints need to be dictionaries 
checkpoint = {
"epoch": 90,
"model_state": model.state_dict(),
"optim_state": optimizer.state_dict()
}
print(optimizer.state_dict())
FILE = "checkpoint.pth"
torch.save(checkpoint, FILE)

model = Model(n_input_features=6)
optimizer = torch.optim.SGD(model.parameters(), lr=0)

checkpoint = torch.load(FILE)
model.load_state_dict(checkpoint['model_state'])
optimizer.load_state_dict(checkpoint['optim_state'])
epoch = checkpoint['epoch']

model.eval()
# - or -
# model.train()

print(optimizer.state_dict())

# Remember that you must call model.eval() to set dropout and batch normalization layers 
# to evaluation mode before running inference. Failing to do this will yield 
# inconsistent inference results. If you wish to resuming training, 
# call model.train() to ensure these layers are in training mode.