In [1]:
import torch
import torch.nn as nn
from torch.optim import SGD
from torch.utils.data import Dataset, DataLoader
device = 'cuda' if torch.cuda.is_available() else 'cpu'

from typing import Any
import time

In [2]:
x = [[1,2],[3,4],[5,6],[7,8]]
y = [[3], [7], [11], [15]]

In [3]:
class MyDataset(Dataset):
    def __init__(self, x, y) -> None:
        super().__init__()
        self.x = torch.tensor(x).float().to(device)
        self.y = torch.tensor(y).float().to(device)
    def __getitem__(self, index) -> Any:
        return self.x[index], self.y[index]
    def __len__(self) -> int:
        return len(self.x)
ds = MyDataset(x, y)
dl = DataLoader(ds, batch_size=2, shuffle=True)

In [4]:
model = nn.Sequential(
    nn.Linear(2,8),
    nn.ReLU(),
    nn.Linear(8,1)
).to(device)

In [5]:
!pip install torch_summary
from torchsummary import summary



In [6]:
summary(model, torch.zeros(1,2))

Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 8]                   24
├─ReLU: 1-2                              [-1, 8]                   --
├─Linear: 1-3                            [-1, 1]                   9
Total params: 33
Trainable params: 33
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 8]                   24
├─ReLU: 1-2                              [-1, 8]                   --
├─Linear: 1-3                            [-1, 1]                   9
Total params: 33
Trainable params: 33
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [8]:
loss_history = []
loss_func = nn.MSELoss()
opt = SGD(model.parameters(), lr=0.001)
start = time.time()

for _ in range(50):
    for ix, iy in dl:
        opt.zero_grad()
        loss_value = loss_func(model.forward(ix),iy)
        loss_value.backward()
        opt.step()
        loss_history.append(loss_value)
end = time.time()
print(end-start)

0.2060103416442871


In [9]:
model.state_dict()

OrderedDict([('0.weight',
              tensor([[ 0.3592,  0.0793],
                      [ 0.6443,  0.7311],
                      [ 0.3309,  0.3586],
                      [-0.0954,  0.5896],
                      [ 0.2716, -0.4693],
                      [-0.6850, -0.5903],
                      [-0.3784, -0.2268],
                      [-0.3643, -0.6384]], device='cuda:0')),
             ('0.bias',
              tensor([ 0.3222, -0.3216,  0.4103,  0.2030,  0.6059,  0.5545,  0.0568, -0.4998],
                     device='cuda:0')),
             ('2.weight',
              tensor([[ 0.4498,  0.8370,  0.5661,  0.4514, -0.2057, -0.3253, -0.1792,  0.2891]],
                     device='cuda:0')),
             ('2.bias', tensor([-0.1240], device='cuda:0'))])

In [11]:
for i in model.parameters():
    print(i)

Parameter containing:
tensor([[ 0.3592,  0.0793],
        [ 0.6443,  0.7311],
        [ 0.3309,  0.3586],
        [-0.0954,  0.5896],
        [ 0.2716, -0.4693],
        [-0.6850, -0.5903],
        [-0.3784, -0.2268],
        [-0.3643, -0.6384]], device='cuda:0', requires_grad=True)
Parameter containing:
tensor([ 0.3222, -0.3216,  0.4103,  0.2030,  0.6059,  0.5545,  0.0568, -0.4998],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4498,  0.8370,  0.5661,  0.4514, -0.2057, -0.3253, -0.1792,  0.2891]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([-0.1240], device='cuda:0', requires_grad=True)


#### Saving

In [15]:
save_path = "mymodel.pth"
torch.save(model.state_dict(), save_path)

#### Loading

In [16]:
load_path = 'mymodel.pth'
model.load_state_dict(torch.load(load_path))

<All keys matched successfully>

#### Predictions

In [17]:
val = [[8,9],[10,11],[1.5,2.5]]
val = torch.tensor(val).float().to('cuda')

In [18]:
model(val)

tensor([[16.8872],
        [20.8109],
        [ 4.1352]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [20]:
val.sum(-1)

tensor([17., 21.,  4.], device='cuda:0')