In [1]:
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn

### NOTES ON GRADIENT COMPUTATION USING BATCH DATA AS AN INPUT

During training, a batch of data is passed through the model,
which produces predictions for each item in the batch.
The loss is then calculated by comparing these predictions to the
target values for each data point in the batch, resulting in a vector of individual losses.
This vector is usually averaged or summed to get a single scalar loss for the entire batch.
When .backward() is called on this scalar loss, the gradients are computed with respect to
each model parameter, based on the averaged (or summed) loss for the batch. This gives the
gradient of the batch loss with respect to each parameter.

In [2]:
x = [[1,2],[3,4],[5,6],[7,8]]
y = [[3],[7],[11],[15]]

In [3]:
X = torch.tensor(x).float()
Y = torch.tensor(y).float()

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
X = X.to(device)
Y = Y.to(device)

In [5]:
class MyDataset(Dataset):
    def __init__(self,x,y):
        self.x = x.clone().detach().float()
        self.y = y.clone().detach().float()
    def __len__(self):
        return len(self.x)
    def __getitem__(self, ix):
        return self.x[ix], self.y[ix]

ds = MyDataset(X,Y)
print(len(ds)) #number of datapoints in the dataset


print(ds[1]) #second entry in the dataset

4
(tensor([3., 4.]), tensor([7.]))


In [6]:
dl = DataLoader(ds, batch_size=2, shuffle=True)


for batch_x, batch_y in dl:
    print("Batch X:", batch_x)
    print("Batch Y:", batch_y)

Batch X: tensor([[1., 2.],
        [7., 8.]])
Batch Y: tensor([[ 3.],
        [15.]])
Batch X: tensor([[5., 6.],
        [3., 4.]])
Batch Y: tensor([[11.],
        [ 7.]])


In [7]:
class MyNeuralNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_to_hidden_layer = nn.Linear(2,8)
        self.hidden_layer_activation = nn.ReLU()
        self.hidden_to_output_layer = nn.Linear(8,1)
    def forward(self, x):
        x = self.input_to_hidden_layer(x)
        x = self.hidden_layer_activation(x)
        x = self.hidden_to_output_layer(x)
        return x

In [8]:
mynet = MyNeuralNet().to(device)
loss_func = nn.MSELoss()
from torch.optim import SGD
opt = SGD(mynet.parameters(), lr = 0.001)

In [9]:
import time
loss_history = []
start = time.time()
for _ in range(50):
    for data in dl:
        x, y = data
        opt.zero_grad()
        loss_value = loss_func(mynet(x),y)
        loss_value.backward()
        opt.step()
        loss_history.append(loss_value.item())
end = time.time()
print(end - start)

0.05105710029602051


In [10]:
#PREDICTING NEW VALUE
val_x = [[10,11]]
val_x = torch.tensor(val_x).float().to(device)
mynet(val_x)

tensor([[21.1599]], grad_fn=<AddmmBackward0>)

Saving the model

In [11]:
mynet.state_dict()
torch.save(mynet.to('cpu').state_dict(), 'mymodel.pth')