In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from datasets import load_dataset
from sklearn import metrics
from sklearn.model_selection import train_test_split
from tqdm import tqdm

## Build very simple NN

In [2]:
class Network(nn.Module):
    def __init__(self):
        super(Network,self).__init__()
        
        self.model = nn.Sequential(nn.Linear(6, 3),
                      nn.Linear(3, 2))
        
        self.loss = nn.MSELoss()
 
        
    def forward(self,x):
        x=self.model(x)
        return x
    
    def train_model(self, dataset, epochs): 
        self.train()
        optimizer = torch.optim.SGD(self.parameters(), lr=0.1) # TODO tune

        for epoch in range(epochs):
            with tqdm(dataset, unit="batch") as tepoch:
                for inputs, targets in tepoch:

                    tepoch.set_description(f"Epoch {epoch + 1}")

                    # clear the gradients
                    optimizer.zero_grad()
                    # compute the model output
                    yhat = self(inputs)
                    # calculate accuracy
                    correct = (yhat.argmax(1) == targets).type(torch.float).sum().item()
                    accuracy = correct / len(inputs)
                    # calculate loss
                    loss = self.loss(yhat, targets)
                    print("Loss: ", loss)
                    print("Predicted:", yhat, "Wanted: ", targets)
                    # credit assignment
                    loss.backward()
                    
                    for name, param in self.model.named_parameters():
                        print(name, param.grad)
                    
                    
                    # update model weights
                    optimizer.step()

                    tepoch.set_postfix(loss=loss.item(), accuracy=100. * accuracy)
        

## Create dataset

It will contain only one sample: 2x + y = 1; 3x - y = 4


In [3]:
class DSS(Dataset):
    def __init__(self):
        self.X = np.array([[2,1,1,3,-1,4]], dtype=np.float32)
        self.y = np.array([[1,-1]], dtype=np.float32)
        self.len = 1
    
    def __len__(self):
        return self.len
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [4]:
dset = DSS()
loader = DataLoader(dset, batch_size=1, shuffle=False)

## Model training step by step

In [5]:
model = Network()

In [6]:
print(model)

Network(
  (model): Sequential(
    (0): Linear(in_features=6, out_features=3, bias=True)
    (1): Linear(in_features=3, out_features=2, bias=True)
  )
  (loss): MSELoss()
)


### Modify weights, our initialization for the lesson

In [7]:
with torch.no_grad():
    model.model[0].weight = nn.Parameter(torch.tensor([[ 0.5,  0.4, -0.2,  0.2, -0.2,  0.3],
        [-0.1,  0.0,  0.2,  0.1,  0.4,  0.0],
        [0.0,  0.0, -0.2,  0.3, -0.1, -0.3]]))
    model.model[0].bias = nn.Parameter(torch.tensor([-0.1, -0.3,  0.1]))
    model.model[1].weight = nn.Parameter(torch.tensor([[ 1.,  1., 1.],
        [1.,  1., 1.]]))
    model.model[1].bias = nn.Parameter(torch.tensor([1., 1.]))

### Freeze first layer of weights and bias

In [8]:
for i, param in enumerate(model.model.parameters()):
    if i == 0 or i == 1:
        param.requires_grad = False

### Predicted values with initialized weights

In [9]:
model(torch.from_numpy(np.array([[2,1,1,3,-1,4]], dtype=np.float32)))

tensor([[3.4000, 3.4000]], grad_fn=<AddmmBackward0>)

### Train with frozen layer, one step, we can try it multiple times

In [10]:
model.train_model(loader, 1)

Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 32.30batch/s, accuracy=0, loss=12.6]

Loss:  tensor(12.5600, grad_fn=<MseLossBackward0>)
Predicted: tensor([[3.4000, 3.4000]], grad_fn=<AddmmBackward0>) Wanted:  tensor([[ 1., -1.]])
0.weight None
0.bias None
1.weight tensor([[ 7.4400, -0.9600, -0.7200],
        [13.6400, -1.7600, -1.3200]])
1.bias tensor([2.4000, 4.4000])





### New weights after training

In [11]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data)

model.1.weight tensor([[ 0.2560,  1.0960,  1.0720],
        [-0.3640,  1.1760,  1.1320]])
model.1.bias tensor([0.7600, 0.5600])


### New prediction after training

In [12]:
model(torch.from_numpy(np.array([[2,1,1,3,-1,4]], dtype=np.float32)))

tensor([[ 0.7936, -1.3784]], grad_fn=<AddmmBackward0>)

-----------------------------

## Try training and evaluation on completely new input

In [15]:
model(torch.from_numpy(np.array([[2,2,4,3,1,4]], dtype=np.float32)))

tensor([[ 1.3168, -0.4192]], grad_fn=<AddmmBackward0>)

In [16]:
class DSS2(Dataset):
    def __init__(self):
        self.X = np.array([[2,2,4,3,1,4]], dtype=np.float32)
        self.y = np.array([[1,1]], dtype=np.float32)
        self.len = 1
    
    def __len__(self):
        return self.len
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [17]:
dset2 = DSS2()
loader2 = DataLoader(dset2, batch_size=1, shuffle=False)

In [18]:
model.train_model(loader2, 10)

Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 162.57batch/s, accuracy=0, loss=1.06]


Loss:  tensor(1.0572, grad_fn=<MseLossBackward0>)
Predicted: tensor([[ 1.3168, -0.4192]], grad_fn=<AddmmBackward0>) Wanted:  tensor([[1., 1.]])
0.weight None
0.bias None
1.weight tensor([[ 0.7920,  0.3168, -0.3485],
        [-3.5480, -1.4192,  1.5611]])
1.bias tensor([ 0.3168, -1.4192])


Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 187.32batch/s, accuracy=0, loss=0.00308]


Loss:  tensor(0.0031, grad_fn=<MseLossBackward0>)
Predicted: tensor([[1.0171, 0.9234]], grad_fn=<AddmmBackward0>) Wanted:  tensor([[1., 1.]])
0.weight None
0.bias None
1.weight tensor([[ 0.0428,  0.0171, -0.0188],
        [-0.1916, -0.0766,  0.0843]])
1.bias tensor([ 0.0171, -0.0766])


Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 191.36batch/s, accuracy=0, loss=8.99e-6]


Loss:  tensor(8.9893e-06, grad_fn=<MseLossBackward0>)
Predicted: tensor([[1.0009, 0.9959]], grad_fn=<AddmmBackward0>) Wanted:  tensor([[1., 1.]])
0.weight None
0.bias None
1.weight tensor([[ 0.0023,  0.0009, -0.0010],
        [-0.0103, -0.0041,  0.0046]])
1.bias tensor([ 0.0009, -0.0041])


Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 190.77batch/s, accuracy=0, loss=2.62e-8]


Loss:  tensor(2.6222e-08, grad_fn=<MseLossBackward0>)
Predicted: tensor([[1.0000, 0.9998]], grad_fn=<AddmmBackward0>) Wanted:  tensor([[1., 1.]])
0.weight None
0.bias None
1.weight tensor([[ 1.2457e-04,  4.9829e-05, -5.4812e-05],
        [-5.5879e-04, -2.2352e-04,  2.4587e-04]])
1.bias tensor([ 4.9829e-05, -2.2352e-04])


Epoch 5: 100%|██████████| 1/1 [00:00<00:00, 189.38batch/s, accuracy=0, loss=7.52e-11]


Loss:  tensor(7.5206e-11, grad_fn=<MseLossBackward0>)
Predicted: tensor([[1.0000, 1.0000]], grad_fn=<AddmmBackward0>) Wanted:  tensor([[1., 1.]])
0.weight None
0.bias None
1.weight tensor([[ 6.5565e-06,  2.6226e-06, -2.8849e-06],
        [-2.9951e-05, -1.1981e-05,  1.3179e-05]])
1.bias tensor([ 2.6226e-06, -1.1981e-05])


Epoch 6: 100%|██████████| 1/1 [00:00<00:00, 191.40batch/s, accuracy=0, loss=2.43e-13]


Loss:  tensor(2.4336e-13, grad_fn=<MseLossBackward0>)
Predicted: tensor([[1.0000, 1.0000]], grad_fn=<AddmmBackward0>) Wanted:  tensor([[1., 1.]])
0.weight None
0.bias None
1.weight tensor([[ 5.9605e-07,  2.3842e-07, -2.6226e-07],
        [-1.6391e-06, -6.5565e-07,  7.2122e-07]])
1.bias tensor([ 2.3842e-07, -6.5565e-07])


Epoch 7: 100%|██████████| 1/1 [00:00<00:00, 172.79batch/s, accuracy=0, loss=2.84e-14]


Loss:  tensor(2.8422e-14, grad_fn=<MseLossBackward0>)
Predicted: tensor([[1.0000, 1.0000]], grad_fn=<AddmmBackward0>) Wanted:  tensor([[1., 1.]])
0.weight None
0.bias None
1.weight tensor([[ 5.9605e-07,  2.3842e-07, -2.6226e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00]])
1.bias tensor([2.3842e-07, 0.0000e+00])


Epoch 8: 100%|██████████| 1/1 [00:00<00:00, 206.67batch/s, accuracy=0, loss=0]


Loss:  tensor(0., grad_fn=<MseLossBackward0>)
Predicted: tensor([[1., 1.]], grad_fn=<AddmmBackward0>) Wanted:  tensor([[1., 1.]])
0.weight None
0.bias None
1.weight tensor([[0., 0., 0.],
        [0., 0., 0.]])
1.bias tensor([0., 0.])


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 192.06batch/s, accuracy=0, loss=0]


Loss:  tensor(0., grad_fn=<MseLossBackward0>)
Predicted: tensor([[1., 1.]], grad_fn=<AddmmBackward0>) Wanted:  tensor([[1., 1.]])
0.weight None
0.bias None
1.weight tensor([[0., 0., 0.],
        [0., 0., 0.]])
1.bias tensor([0., 0.])


Epoch 10: 100%|██████████| 1/1 [00:00<00:00, 204.65batch/s, accuracy=0, loss=0]

Loss:  tensor(0., grad_fn=<MseLossBackward0>)
Predicted: tensor([[1., 1.]], grad_fn=<AddmmBackward0>) Wanted:  tensor([[1., 1.]])
0.weight None
0.bias None
1.weight tensor([[0., 0., 0.],
        [0., 0., 0.]])
1.bias tensor([0., 0.])





In [19]:
model(torch.from_numpy(np.array([[2,2,4,3,1,4]], dtype=np.float32)))

tensor([[1., 1.]], grad_fn=<AddmmBackward0>)