### Using a Sequential method to build a Neural Network

In [4]:
# import the dependencies
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader

# device agnostic check
device = 'cuda' if torch.cuda.is_available() else "cpu"


In [2]:
# Let's build a toy dataset
x = [[1,2],
     [3,4],
     [5,6],
     [7,8]]
y = [[3],
     [7],
     [11],
     [15]]

In [3]:
class MyDataset(Dataset):
    def __init__(self, x, y):
        super().__init__()
        self.x = torch.tensor(x).float().to(device)
        self.y = torch.tensor(y).float().to(device)
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

In [5]:
# Define Dataset and DataLoader
ds = MyDataset(x, y)
dl = DataLoader(ds, batch_size=2, shuffle=True)

#### Define the model architecture using Sequential

In [6]:
model = nn.Sequential(
    nn.Linear(2, 8),
    nn.ReLU(),
    nn.Linear(8, 1)
).to(device)

In [8]:
from torchsummary import summary
summary(model)

Layer (type:depth-idx)                   Param #
├─Linear: 1-1                            24
├─ReLU: 1-2                              --
├─Linear: 1-3                            9
Total params: 33
Trainable params: 33
Non-trainable params: 0


Layer (type:depth-idx)                   Param #
├─Linear: 1-1                            24
├─ReLU: 1-2                              --
├─Linear: 1-3                            9
Total params: 33
Trainable params: 33
Non-trainable params: 0

#### loss function


In [9]:
loss_func = nn.MSELoss()
from torch.optim import SGD
import time 

optimizer = SGD(model.parameters(), lr=0.001)
start = time.time()

loss_history = []
for _ in range(50):
    for ix, iy in dl:
        optimizer.zero_grad()
        loss_value = loss_func(model(ix), iy)
        loss_value.backward()
        optimizer.step()
        loss_history.append(loss_value)
        
end = time.time()
print(end - start)


0.09643912315368652


#### validation data

In [10]:
val_x = [[8, 9],
         [10, 11],
         [1.5, 2.5]]

In [11]:
val_x = torch.tensor(val_x).float().to(device)

In [12]:
model(val_x)

tensor([[16.6203],
        [20.3583],
        [ 4.4272]], grad_fn=<AddmmBackward0>)

In the previous output, we see that the predicted value is close to the sum of inputs.



#### Saving and loading the model

In [13]:
# to see what our model has learnt use state_dict
model.state_dict()

OrderedDict([('0.weight',
              tensor([[ 0.1493, -0.2398],
                      [ 0.1466, -0.5832],
                      [-0.6394,  0.0815],
                      [ 0.0024,  0.8388],
                      [-0.6165,  0.4688],
                      [ 0.2964,  0.6424],
                      [ 0.4430,  0.4638],
                      [ 0.6700, -0.6621]])),
             ('0.bias',
              tensor([-0.6561, -0.5126,  0.6359, -0.5342,  0.2528,  0.2564,  0.6946,  0.0699])),
             ('2.weight',
              tensor([[-0.2830, -0.2201,  0.0900,  0.5610, -0.0894,  0.7567,  0.7573, -0.3493]])),
             ('2.bias', tensor([-0.0599]))])

A good practice for saving a model is first to transfer it to cpu.

This will convert it from cuda tensors to cpu tensors.

Then you can use the torch.save() to save the model.

In [14]:
learned_paramaters = model.to('cpu').state_dict()
torch.save(learned_paramaters, 'mymodel.pth')

In [15]:
# initialize model with random parameters
model = nn.Sequential(
    nn.Linear(2, 8),
    nn.ReLU(),
    nn.Linear(8, 1)
).to(device)

# load the state_dict 
state_dict = torch.load('mymodel.pth')

# load the state_dict to the model
model.load_state_dict(state_dict)

model.to(device)

model(val_x)



tensor([[16.6203],
        [20.3583],
        [ 4.4272]], grad_fn=<AddmmBackward0>)