In [159]:
import torch
import numpy as np
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

In [160]:
print(torch.__version__)

2.1.2


In [161]:
t1=torch.tensor(4.0)
t1

tensor(4.)

In [162]:
t1.dtype

torch.float32

In [163]:
t2=torch.tensor([1.,2,3,4])
t2

tensor([1., 2., 3., 4.])

In [164]:
t2.dtype

torch.float32

In [165]:
t3=torch.tensor([[5.,6],
                 [7,8],
                 [9,10]])
t3

tensor([[ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.]])

In [166]:
t4=torch.tensor([
    [[11,12,13],
     [13,14,15]],
     [[15,16,17],
      [17,18,19]]
])
t4

tensor([[[11, 12, 13],
         [13, 14, 15]],

        [[15, 16, 17],
         [17, 18, 19]]])

In [167]:
print(t1)
t1.shape

tensor(4.)


torch.Size([])

In [168]:
print(t2)
t2.shape

tensor([1., 2., 3., 4.])


torch.Size([4])

In [169]:
print(t3)
t3.shape

tensor([[ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.]])


torch.Size([3, 2])

In [170]:
print(t4)
t4.shape

tensor([[[11, 12, 13],
         [13, 14, 15]],

        [[15, 16, 17],
         [17, 18, 19]]])


torch.Size([2, 2, 3])

In [171]:
# bnasic tensor operations
x=torch.tensor(3.0)
w=torch.tensor(4.,requires_grad=True)
b=torch.tensor(5.,requires_grad=True)
x,w,b

(tensor(3.), tensor(4., requires_grad=True), tensor(5., requires_grad=True))

In [172]:
x.is_leaf

True

In [173]:
y=w*x+b
y

tensor(17., grad_fn=<AddBackward0>)

In [174]:
# y.retain_grad()
y.backward()
print('dy/dx:',x.grad)
print('dy/dw:',w.grad)
print('dy/db:',b.grad)
print('dy/dw 2:',w.grad_fn)

dy/dx: None
dy/dw: tensor(3.)
dy/db: tensor(1.)
dy/dw 2: None


In [175]:
w2=torch.tensor(42.,requires_grad=True)
b2=torch.tensor(52.,requires_grad=True)
y2=w2+b2

In [176]:
y2.backward
print(w2.grad)

None


In [177]:
t6=torch.full((3,2),42)
t6

tensor([[42, 42],
        [42, 42],
        [42, 42]])

In [178]:
t7=torch.concatenate((t3,t6))
t7

tensor([[ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.],
        [42., 42.],
        [42., 42.],
        [42., 42.]])

In [179]:
t8=torch.sin(t7)
t8

tensor([[-0.9589, -0.2794],
        [ 0.6570,  0.9894],
        [ 0.4121, -0.5440],
        [-0.9165, -0.9165],
        [-0.9165, -0.9165],
        [-0.9165, -0.9165]])

In [180]:
t9=t8.reshape(3,2,2)
t9

tensor([[[-0.9589, -0.2794],
         [ 0.6570,  0.9894]],

        [[ 0.4121, -0.5440],
         [-0.9165, -0.9165]],

        [[-0.9165, -0.9165],
         [-0.9165, -0.9165]]])

In [181]:
x=np.array([[1,2],
            [3,4.]])
x

array([[1., 2.],
       [3., 4.]])

In [182]:
y=torch.from_numpy(x)
y

tensor([[1., 2.],
        [3., 4.]], dtype=torch.float64)

In [183]:
x.dtype,y.dtype

(dtype('float64'), torch.float64)

In [184]:
z=y.numpy()
z

array([[1., 2.],
       [3., 4.]])

Fundamentals of Pytorch and Numpy done.
Next progress towards linear regression znd gradient descent

Video course Timestamp 25.28 / 1.43.32  

In [185]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

In [186]:
# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [187]:
# Convert inputs and targets to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [188]:
# Weights and biases
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[-0.6455, -0.1803,  0.7895],
        [-0.6945, -2.6122, -1.9756]], requires_grad=True)
tensor([-1.3185,  0.2464], requires_grad=True)


In [189]:
inputs @ w.t()+b

tensor([[ -26.5684, -310.4168],
        [ -25.3932, -419.2604],
        [ -35.8405, -524.7911],
        [ -45.6983, -256.0098],
        [  -7.8975, -436.7332]], grad_fn=<AddBackward0>)

In [190]:
def model(x):
    return x @ w.t()+b

In [191]:
preds=model(inputs)
print(preds)

tensor([[ -26.5684, -310.4168],
        [ -25.3932, -419.2604],
        [ -35.8405, -524.7911],
        [ -45.6983, -256.0098],
        [  -7.8975, -436.7332]], grad_fn=<AddBackward0>)


In [192]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [193]:
diff=preds-targets
torch.sum(diff * diff)/diff.numel()

tensor(130176.5156, grad_fn=<DivBackward0>)

to get rid of negatives in diff, square numbers; * does a element wise multiplication

In [194]:
# mean squared error loss 
def mse(t1,t2):
    diff=t1-t2
    return torch.sum(diff*diff)/diff.numel()

In [195]:
loss=mse(preds,targets)
print(loss)

tensor(130176.5156, grad_fn=<DivBackward0>)


In [196]:
loss.backward()

In [197]:
print(w)
print(w.grad)

tensor([[-0.6455, -0.1803,  0.7895],
        [-0.6945, -2.6122, -1.9756]], requires_grad=True)
tensor([[ -8747.5127,  -9840.1016,  -5921.6040],
        [-40114.9102, -45072.9336, -27509.8320]])


In [198]:
print(b)
print(b.grad)

tensor([-1.3185,  0.2464], requires_grad=True)
tensor([-104.4796, -481.4423])


In [199]:
with torch.no_grad():
    w-=w.grad*1e-5
    b-=b.grad*1e-5

In [200]:
w,b

(tensor([[-0.5580, -0.0819,  0.8487],
         [-0.2933, -2.1615, -1.7005]], requires_grad=True),
 tensor([-1.3175,  0.2512], requires_grad=True))

In [201]:
preds=model(inputs)
loss=mse(preds,targets)
print(loss)

tensor(88237.3594, grad_fn=<DivBackward0>)


the grad values for w and b should be reset to zero, else the next operation will add to existing grad values

In [202]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


to train the model repeat the above operation x number of times, called epochs

In [203]:
for i in range(100):
    preds=model(inputs)
    loss=mse(preds,targets)
    loss.backward()
    with torch.no_grad():
        w-=w.grad*1e-5
        b-=b.grad*1e-5
        w.grad.zero_()
        b.grad.zero_()

In [204]:
preds=model(inputs)
loss=mse(preds,targets)
print(loss)

tensor(504.2423, grad_fn=<DivBackward0>)


In [205]:
preds

tensor([[ 58.5270,  80.2606],
        [ 87.0164,  99.3824],
        [105.5612, 119.8894],
        [ 30.6669,  93.7866],
        [104.4344,  83.8210]], grad_fn=<AddBackward0>)

In [206]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

xxxxxxxxx
Starting work on using Troch for gradient descent

In [207]:
# inputs are temperature, rainfall, humidity
inputs=np.array([[73,67,43],
                 [91,88,64],
                 [87,134,58],
                 [102,43,37],
                 [69,96,70],
                 [74,66,43],
                 [91,87,65],
                 [88,114,59],
                 [101,44,37],
                 [68,96,71],
                 [73,66,44],
                 [92,87,64],
                 [87,135,57],
                 [103,43,36],
                 [68,97,70]],
                 dtype='float')

# targets are apples, oranges
targets=np.array([[56,70],
                  [81,101],
                  [119,133],
                  [22,37],
                  [103,119],
                  [57,69],
                  [80,102],
                  [118,132],
                  [21,38],
                  [104,119],
                  [57,69],
                  [82,100],
                  [118,134],
                  [20,38],
                  [102,120]],
                  dtype='float')

inputs=torch.from_numpy(inputs)
targets=torch.from_numpy(targets)

In [208]:
inputs.shape

torch.Size([15, 3])

In [209]:
targets.shape

torch.Size([15, 2])

use tensordataset to split the data into smaller chunks to run code, returns a tuple with 2 elements, in our cse inputs and targets

In [210]:
train_ds=TensorDataset(inputs,targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]], dtype=torch.float64),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]], dtype=torch.float64))

use dataloader to split data into bathces of predefined size; enbles shuffling and ramdom sampling of data

In [212]:
batch_size=5
train_dl=DataLoader(train_ds,batch_size,shuffle=True)

In [217]:
for xb,yb in train_dl:
    print(xb)
    print(yb)
    # break

tensor([[101.,  44.,  37.],
        [103.,  43.,  36.],
        [ 69.,  96.,  70.],
        [ 74.,  66.,  43.],
        [102.,  43.,  37.]], dtype=torch.float64)
tensor([[ 21.,  38.],
        [ 20.,  38.],
        [103., 119.],
        [ 57.,  69.],
        [ 22.,  37.]], dtype=torch.float64)
tensor([[ 91.,  88.,  64.],
        [ 73.,  66.,  44.],
        [ 68.,  96.,  71.],
        [ 92.,  87.,  64.],
        [ 88., 114.,  59.]], dtype=torch.float64)
tensor([[ 81., 101.],
        [ 57.,  69.],
        [104., 119.],
        [ 82., 100.],
        [118., 132.]], dtype=torch.float64)
tensor([[ 68.,  97.,  70.],
        [ 87., 134.,  58.],
        [ 73.,  67.,  43.],
        [ 91.,  87.,  65.],
        [ 87., 135.,  57.]], dtype=torch.float64)
tensor([[102., 120.],
        [119., 133.],
        [ 56.,  70.],
        [ 80., 102.],
        [118., 134.]], dtype=torch.float64)
