In [1]:
import numpy as np
import torch
import torch.nn as nn

In [2]:
t1 = torch.tensor(4.)

In [3]:
t1.dtype

torch.float32

In [4]:
t2 = torch.tensor([1., 2, 3, 4])
t2

tensor([1., 2., 3., 4.])

In [5]:
t3 = torch.tensor([[5., 6], 
                   [7, 8], 
                   [9, 10]])
t3

tensor([[ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.]])

In [6]:
t4 = torch.tensor([
    [[11, 12, 13], 
     [13, 14, 15]], 
    [[15, 16, 17], 
     [17, 18, 19.]]])
t4

tensor([[[11., 12., 13.],
         [13., 14., 15.]],

        [[15., 16., 17.],
         [17., 18., 19.]]])

In [7]:
t1.shape


torch.Size([])

In [8]:
t2.shape

torch.Size([4])

In [9]:

t3.shape


torch.Size([3, 2])

In [10]:
t4.shape
t4

tensor([[[11., 12., 13.],
         [13., 14., 15.]],

        [[15., 16., 17.],
         [17., 18., 19.]]])

In [11]:
x = torch.tensor(3.)
w = torch.tensor(4., requires_grad=True)
b = torch.tensor(5., requires_grad=True)
x, w, b

(tensor(3.), tensor(4., requires_grad=True), tensor(5., requires_grad=True))

In [12]:
y = w * x + b
y

tensor(17., grad_fn=<AddBackward0>)

In [13]:
y.backward()

In [14]:
print('dy/dx:', x.grad)
print('dy/dw:', w.grad)
print('dy/db:', b.grad)

dy/dx: None
dy/dw: tensor(3.)
dy/db: tensor(1.)


In [15]:
x = np.array([[1, 2], [3, 4.]])
x

array([[1., 2.],
       [3., 4.]])

In [16]:
y = torch.from_numpy(x)
y

tensor([[1., 2.],
        [3., 4.]], dtype=torch.float64)

In [17]:
z = y.numpy()
z

array([[1., 2.],
       [3., 4.]])

In [18]:
x.dtype, y.dtype

(dtype('float64'), torch.float64)

Linear Regression

In [19]:
inputs = np.array([[73, 67, 43],
                   [91, 88, 64],
                   [87, 143, 68],
                   [102, 47, 37],
                   [69, 96, 70]], dtype='float32')

In [20]:
targets = np.array([[56, 70],
                    [81, 101],
                    [119, 133],
                    [22, 37],
                    [103, 119]], dtype='float32')

In [21]:
inputs , targets

(array([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 143.,  68.],
        [102.,  47.,  37.],
        [ 69.,  96.,  70.]], dtype=float32),
 array([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]], dtype=float32))

In [22]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 143.,  68.],
        [102.,  47.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


Linear Regression model

In [23]:
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)

print(w)
print(b)

tensor([[ 0.7310, -2.0987,  0.6372],
        [ 0.1265,  0.9796, -1.1761]], requires_grad=True)
tensor([0.5086, 0.7973], requires_grad=True)


In [24]:
def model(x):
    return x @ w.t() + b
    

In [25]:
preds = model(inputs)
print(preds)

tensor([[-5.9340e+01,  2.5090e+01],
        [-7.6873e+01,  2.3240e+01],
        [-1.9268e+02,  7.1907e+01],
        [ 9.5912e-03,  1.6223e+01],
        [-1.0592e+02,  2.1237e+01]], grad_fn=<AddBackward0>)


In [26]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [27]:
diff = preds - targets
torch.sum(diff*diff) / diff.numel()

tensor(20128.5898, grad_fn=<DivBackward0>)

Loss function

In [28]:
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff*diff) / diff.numel()

In [29]:
loss = mse(preds, targets)
print(loss)

tensor(20128.5898, grad_fn=<DivBackward0>)


Gradient Descent 

In [30]:
loss.backward()

In [31]:
print(w)
print(w.grad)

tensor([[ 0.7310, -2.0987,  0.6372],
        [ 0.1265,  0.9796, -1.1761]], requires_grad=True)
tensor([[-13312.1328, -17456.0508, -10339.1191],
        [ -4906.9170,  -5789.9736,  -3734.8467]])


Adding Weights and biases to thhe loss

In [32]:
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    

In [33]:
w, b

(tensor([[ 0.8641, -1.9241,  0.7406],
         [ 0.1755,  1.0375, -1.1388]], requires_grad=True),
 tensor([0.5102, 0.7979], requires_grad=True))

In [34]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(14140.1699, grad_fn=<DivBackward0>)


In [35]:
w.grad.zero_()
b.grad.zero_()

print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


Training the model using gradient descent

In [36]:
preds = model(inputs)
print(preds)

tensor([[ -33.4792,   34.1580],
        [ -42.7790,   35.1911],
        [-149.1000,   86.9963],
        [  25.6194,   25.3315],
        [ -72.7392,   32.7965]], grad_fn=<AddBackward0>)


In [37]:
for i in range(120):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
    
        w.grad.zero_()
        b.grad.zero_()


In [38]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(670.6558, grad_fn=<DivBackward0>)


In [39]:
preds

tensor([[ 65.2982,  71.4721],
        [ 93.5965,  86.7132],
        [ 80.5618, 155.6892],
        [ 74.8369,  55.7589],
        [ 90.4489,  87.9888]], grad_fn=<AddBackward0>)

In [40]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [82]:
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70], 
                   [74, 66, 43], 
                   [91, 87, 65], 
                   [88, 134, 59], 
                   [101, 44, 37], 
                   [68, 96, 71], 
                   [73, 66, 44], 
                   [92, 87, 64], 
                   [87, 135, 57], 
                   [103, 43, 36], 
                   [68, 97, 70]], 
                  dtype='float32')


targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119],
                    [57, 69], 
                    [80, 102], 
                    [118, 132], 
                    [21, 38], 
                    [104, 118], 
                    [57, 69], 
                    [82, 100], 
                    [118, 134], 
                    [20, 38], 
                    [102, 120]], 
                   dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 74.,  66.,  43.],
        [ 91.,  87.,  65.],
        [ 88., 134.,  59.],
        [101.,  44.,  37.],
        [ 68.,  96.,  71.],
        [ 73.,  66.,  44.],
        [ 92.,  87.,  64.],
        [ 87., 135.,  57.],
        [103.,  43.,  36.],
        [ 68.,  97.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 57.,  69.],
        [ 80., 102.],
        [118., 132.],
        [ 21.,  38.],
        [104., 118.],
        [ 57.,  69.],
        [ 82., 100.],
        [118., 134.],
        [ 20.,  38.],
        [102., 120.]])


Datasets and Dataloader

In [83]:
from torch.utils.data import TensorDataset

In [84]:
train_ds = TensorDataset(inputs, targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [85]:
from torch.utils.data import DataLoader

In [86]:
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [87]:
for xb, yb in train_dl:
    print(xb)
    print(yb)
    
    break

tensor([[ 92.,  87.,  64.],
        [101.,  44.,  37.],
        [ 88., 134.,  59.],
        [ 73.,  67.,  43.],
        [102.,  43.,  37.]])
tensor([[ 82., 100.],
        [ 21.,  38.],
        [118., 132.],
        [ 56.,  70.],
        [ 22.,  37.]])


In [88]:
model = nn.Linear(3, 2)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[ 0.4195, -0.2789,  0.2638],
        [ 0.0061, -0.2670,  0.0016]], requires_grad=True)
Parameter containing:
tensor([ 0.2236, -0.0906], requires_grad=True)


In [89]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.4195, -0.2789,  0.2638],
         [ 0.0061, -0.2670,  0.0016]], requires_grad=True),
 Parameter containing:
 tensor([ 0.2236, -0.0906], requires_grad=True)]

In [90]:
preds = model(inputs)
preds

tensor([[ 23.5095, -17.4617],
        [ 30.7450, -22.9245],
        [ 14.6553, -35.2395],
        [ 40.7864, -10.8855],
        [ 20.8670, -25.1860],
        [ 24.2079, -17.1886],
        [ 31.2877, -22.6559],
        [ 15.3386, -35.2318],
        [ 40.0880, -11.1586],
        [ 20.7113, -25.1906],
        [ 24.0522, -17.1931],
        [ 31.4435, -22.6514],
        [ 14.1126, -35.5081],
        [ 40.9422, -10.8809],
        [ 20.1686, -25.4591]], grad_fn=<AddmmBackward0>)

In [91]:
import torch.nn.functional as F

In [92]:
loss_fn = F.mse_loss

In [93]:
loss = loss_fn(preds, targets)
loss

tensor(9580.9092, grad_fn=<MseLossBackward0>)

In [94]:
opt = torch.optim.SGD(model.parameters(), lr=1e-5)

In [95]:
def fit(num_epochs, model, loss_fn, opt, train_dl):
    for epoch in range(num_epochs):
        for xb,yb in train_dl:
            pred = model(xb)
            loss = loss_fn(pred, yb)
            loss.backward()
    
            opt.step()
            opt.zero_grad()
            
        if (epoch+1) % 10 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))


In [96]:
fit(100, model, loss_fn, opt, train_dl)

Epoch [10/100], Loss: 243.6127
Epoch [20/100], Loss: 257.1334
Epoch [30/100], Loss: 285.9352
Epoch [40/100], Loss: 256.5263
Epoch [50/100], Loss: 252.4533
Epoch [60/100], Loss: 154.6088
Epoch [70/100], Loss: 37.0003
Epoch [80/100], Loss: 57.2667
Epoch [90/100], Loss: 16.0453
Epoch [100/100], Loss: 31.1937


In [97]:
preds = model(inputs)
preds

tensor([[ 58.7890,  71.8828],
        [ 82.5659,  99.0947],
        [114.9413, 134.4673],
        [ 30.4895,  45.6204],
        [ 97.1217, 111.3249],
        [ 57.8006,  70.9480],
        [ 82.3788,  98.8146],
        [115.2823, 134.9164],
        [ 31.4779,  46.5552],
        [ 97.9230, 111.9797],
        [ 58.6018,  71.6027],
        [ 81.5775,  98.1598],
        [115.1284, 134.7473],
        [ 29.6882,  44.9656],
        [ 98.1102, 112.2598]], grad_fn=<AddmmBackward0>)

In [102]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 57.,  69.],
        [ 80., 102.],
        [118., 132.],
        [ 21.,  38.],
        [104., 118.],
        [ 57.,  69.],
        [ 82., 100.],
        [118., 134.],
        [ 20.,  38.],
        [102., 120.]])

In [103]:
model(torch.tensor([[75, 63, 44.]]))

tensor([[55.8667, 68.9010]], grad_fn=<AddmmBackward0>)

In [104]:
model2 = nn.Sequential(
    nn.Linear(3, 5),
    nn.Sigmoid(),
    nn.Linear(5, 2)
)

In [107]:
opt = torch.optim.SGD(model2.parameters(), lr=1e-3)

In [108]:
fit(100, model, loss_fn, opt, train_dl)

Epoch [10/100], Loss: 5.9530
Epoch [20/100], Loss: 26.2793
Epoch [30/100], Loss: 38.3262
Epoch [40/100], Loss: 24.2395
Epoch [50/100], Loss: 30.3861
Epoch [60/100], Loss: 22.5832
Epoch [70/100], Loss: 36.8315
Epoch [80/100], Loss: 36.6370
Epoch [90/100], Loss: 12.4883
Epoch [100/100], Loss: 30.7838
