In [1]:
import torch 
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
# define a list of features and label
x = [[1, 2], [3, 4], [5, 6], [7, 8]]
y = [[3], [7], [11], [15]]

In [3]:
# define a class MyDataset
class MyDataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x).float().to(device)
        self.y = torch.tensor(y).float().to(device)
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, ix):
        return self.x[ix], self.y[ix]

In [4]:
ds = MyDataset(x, y)
dl = DataLoader(ds, batch_size=2, shuffle=True)

In [5]:
# model
mynet = nn.Sequential(
            nn.Linear(2, 8),
            nn.ReLU(),
            nn.Linear(8, 1)
            ).to(device)

In [6]:
from torchsummary import summary

In [7]:
summary(mynet, torch.zeros(1, 2))

Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 8]                   24
├─ReLU: 1-2                              [-1, 8]                   --
├─Linear: 1-3                            [-1, 1]                   9
Total params: 33
Trainable params: 33
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 8]                   24
├─ReLU: 1-2                              [-1, 8]                   --
├─Linear: 1-3                            [-1, 1]                   9
Total params: 33
Trainable params: 33
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [8]:
# get the loss
loss_func = nn.MSELoss()

# get the optimizer
from torch.optim import SGD
optimizer = SGD(mynet.parameters(), lr = 0.001)

loss_history = []
# train
for epoch in range(50):
    for batches in dl:
        batch_x, batch_y = batches

        # flush the previous gradients
        optimizer.zero_grad()
        
        # compute loss
        loss_value = loss_func(mynet(batch_x), batch_y)
        
        # backward pass
        loss_value.backward()
        
        # update the parameters
        optimizer.step()
        
        loss_history.append(loss_value.item())

In [9]:
x_test = [[8,9],[10,11],[1.5,2.5]]
mynet(torch.tensor(x_test).float().to(device))

tensor([[17.0317],
        [21.0548],
        [ 3.9567]], device='cuda:0', grad_fn=<AddmmBackward0>)

#
#### save a model

In [11]:
# get the layer's names and their associated parameters
mynet.state_dict()

OrderedDict([('0.weight',
              tensor([[ 0.9404,  0.6635],
                      [ 0.5668, -0.0278],
                      [-0.0121, -0.2320],
                      [-0.2491, -0.6255],
                      [ 0.2395,  0.5419],
                      [ 0.0909, -0.3123],
                      [-0.2675, -0.5055],
                      [-0.4562, -0.5065]], device='cuda:0')),
             ('0.bias',
              tensor([-0.6239,  0.7138, -0.0672, -0.4634, -0.2901, -0.1516, -0.2663, -0.2228],
                     device='cuda:0')),
             ('2.weight',
              tensor([[ 0.8618,  0.4759,  0.1461, -0.1565,  0.4770, -0.0771,  0.1488,  0.3015]],
                     device='cuda:0')),
             ('2.bias', tensor([0.4587], device='cuda:0'))])

In [12]:
# store the model to the cpu
# then save
torch.save(mynet.to('cpu').state_dict(), 'my_first_sequential_model.pth')

#
#### load a model

In [13]:
# create an empty model with random weights
# model
model = nn.Sequential(
            nn.Linear(2, 8),
            nn.ReLU(),
            nn.Linear(8, 1)
            ).to(device)

# load the state_dict model from the disk
state_dict = torch.load('my_first_sequential_model.pth')

# load the downloaded state_dict into the new model
model.load_state_dict(state_dict)

# store the model to the 'cuda'
model.to(device)

Sequential(
  (0): Linear(in_features=2, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)

In [14]:
model(torch.tensor(x_test).float().to(device))

tensor([[17.0317],
        [21.0548],
        [ 3.9567]], device='cuda:0', grad_fn=<AddmmBackward0>)