# Torch Basic

In [1]:
import torch 

In [4]:
x = torch.empty(1) #<--1D tensor with 1 value
x

tensor([16.9906])

In [5]:
x = torch.empty(3) #1D tensor with 3 value
x

tensor([1.6990e+01, 4.5621e-41, 3.8723e-35])

In [6]:
x = torch.empty(2,3) #2D
x

tensor([[1.4605e-37, 0.0000e+00, 1.4604e-37],
        [0.0000e+00, 8.9683e-44, 0.0000e+00]])

In [8]:
# random
x = torch.rand(4,7)
x

tensor([[0.2146, 0.2470, 0.1729, 0.7160, 0.5653, 0.6465, 0.6296],
        [0.4632, 0.6054, 0.9097, 0.8697, 0.7862, 0.1525, 0.0710],
        [0.0489, 0.8209, 0.0131, 0.6116, 0.3806, 0.7269, 0.3091],
        [0.5920, 0.7559, 0.0421, 0.8701, 0.2036, 0.6484, 0.9474]])

In [9]:
#dtype 
x.dtype

torch.float32

In [10]:
#size
x.size()

torch.Size([4, 7])

In [11]:
# creating tensor with list
torch.tensor([2.5,0.1])

tensor([2.5000, 0.1000])

In [12]:
# Basic operations
x =torch.rand(2,2)
y =torch.rand(2,2)
x,y

(tensor([[0.9300, 0.0638],
         [0.3405, 0.2974]]),
 tensor([[0.9454, 0.1601],
         [0.6468, 0.2262]]))

In [14]:
#addition
x+y # performs element wise addition 

tensor([[1.8754, 0.2239],
        [0.9874, 0.5236]])

In [15]:
# or perform addition with method
torch.add(x,y)

tensor([[1.8754, 0.2239],
        [0.9874, 0.5236]])

In [16]:
# inplace addition
y.add_(x) #<--in pytorch all functions with trailing _(underscore) will do an in-place operation 


tensor([[1.8754, 0.2239],
        [0.9874, 0.5236]])

In [17]:
# indexing and slicing 
x=torch.rand(5,3)
print(x)
print(x[:,0])

tensor([[0.9340, 0.8160, 0.1577],
        [0.8966, 0.7506, 0.4116],
        [0.1751, 0.0521, 0.6028],
        [0.8982, 0.4156, 0.3321],
        [0.2017, 0.3600, 0.1515]])
tensor([0.9340, 0.8966, 0.1751, 0.8982, 0.2017])


In [19]:
# .item() method to get the actual value
print(x[1,1])
print(x[1,1].item())

tensor(0.7506)
0.7506189346313477


In [20]:
#reshaping tensor
x = torch.rand(4,4) #<-- 2D
print(x)
y = x.view(16) #<--1D
print(y)

tensor([[0.2248, 0.9213, 0.8510, 0.7273],
        [0.6445, 0.9999, 0.7064, 0.2928],
        [0.6696, 0.5272, 0.5626, 0.4283],
        [0.5000, 0.5222, 0.1006, 0.3698]])
tensor([0.2248, 0.9213, 0.8510, 0.7273, 0.6445, 0.9999, 0.7064, 0.2928, 0.6696,
        0.5272, 0.5626, 0.4283, 0.5000, 0.5222, 0.1006, 0.3698])


In [22]:
# numpy to tensor and viceversa
import numpy as np

a=torch.ones(5)
print(a)

b=a.numpy()
print(type(b))

tensor([1., 1., 1., 1., 1.])
<class 'numpy.ndarray'>


In [23]:
a.add_(1)

tensor([2., 2., 2., 2., 2.])

In [24]:
b

array([2., 2., 2., 2., 2.], dtype=float32)

changing 'a' changes 'b' because they both point to same memory location

In [25]:
#viceversa

a=np.ones(5)
print(a)

b=torch.from_numpy(a)
print(b)

[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


# Gradient calculation with Autograd

In [30]:
x = torch.randn(3, requires_grad=True) #<-- to calculate gradient of some function with resp. to x
x #<-- whenever we do operations with this tensor, it creates a computational graph 

tensor([-1.0857,  0.4361, -0.8030], requires_grad=True)

In [31]:
y = x+2 #<-- creates computational graph
y

tensor([0.9143, 2.4361, 1.1970], grad_fn=<AddBackward0>)

pytorch automatically create and store a function for us which is used in back propagation and to get the gradients

![image](image.png)


In [32]:
z = y*y*2
z

tensor([ 1.6718, 11.8695,  2.8657], grad_fn=<MulBackward0>)

In [33]:
z = z.mean()

In [34]:
z

tensor(5.4690, grad_fn=<MeanBackward0>)

In [35]:
# calculate the gardients 
z.backward() #dz/dx
x.grad #<stores gradient

tensor([1.2190, 3.2482, 1.5960])

In [36]:
z = y*y*2
z

tensor([ 1.6718, 11.8695,  2.8657], grad_fn=<MulBackward0>)

In [38]:
#if calculated gradient, throes error because grad can be implicitly created only for scalr output

# so create vector of sam size
v = torch.tensor([0.1,1.0,0.001],dtype=torch.float32)
z.backward(v)

In [39]:
x.grad

tensor([ 1.5848, 12.9927,  1.6008])

In [42]:
## avoid torch to keep history
x.requires_grad_(False) #<--method 1
x

tensor([-1.0857,  0.4361, -0.8030])

In [43]:
y = x.detach() #method 2
y

tensor([-1.0857,  0.4361, -0.8030])

In [44]:
with torch.no_grad(): #method 3
    y = x+2
    print(y)

tensor([0.9143, 2.4361, 1.1970])


In [48]:
## training example

weights = torch.ones(4, requires_grad=True)

for epoch in range(2):
    model_output = (weights*3).sum()
    print(model_output) #<-- scalar output for grad 
    
    model_output.backward() #<-- second backward call with again accumulate the values and write them into grad attribute
    
    print(weights.grad)

tensor(12., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
tensor(12., grad_fn=<SumBackward0>)
tensor([6., 6., 6., 6.])


the gradients are clearly incorrect

In [50]:
# before optimization clearing gradients
weights = torch.ones(4, requires_grad=True)

for epoch in range(2):
    model_output = (weights*3).sum()
    print(model_output) #<-- scalar output for grad 
    
    model_output.backward() #<-- second backward call with again accumulate the values and write them into grad attribute
    
    print(weights.grad)
    
    weights.grad.zero_()

tensor(12., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
tensor(12., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])


# Backpropagartion with pytorch
![image-2](image-2.png)


In [1]:
import torch

x = torch.tensor(1.0)
y = torch.tensor(2.0)
w = torch.tensor(1.0, requires_grad=True)

#forward pass and compute the loss
y_hat = w*x
loss = (y_hat - y)**2

print(loss)

tensor(1., grad_fn=<PowBackward0>)


In [2]:
# backward pass
loss.backward()
w.grad

tensor(-2.)

# Gradient descent and Backpropagation

- Prediction: PyTorch Model
- Gradients Computation: Autograd
- Loss Computation: PyTorch Loss
- Parameter updates: PyTorch Optimizer

A cost function estimates the error of a model.
Gradient descent is a technique that uses derivative of the cost function
to change the parameter values (weights / co-efficients) to minimize the 
cost or error.
Change the current weights by delta and take a step. The direction and size 
of step is calculated by the gradient (slope) of cost function at the 
current position by some specified learning rate. The gradient vector 
contains the slopes for all weight vectors / co-efficients. This gradient 
vector is used to update all existing weights.
If slope is negative, we are stepping downhill and will reach a 
minimum position when the slope becomes 0 / algorithm converges.
If slope is positive, we are stepping uphill and will reach a 
maximum position when the slope becomes 0 / algorithm converges.

Process of applying gradient descent:
1. Initialize parameters randomly
2. Calculate cost for training set with a cost function
3. Calculate gradient of the cost function (partial derivative for all dataset)
4. Update weights with new values
5. Repeat from step 2 until cost is small enough

In [1]:
import torch

In [12]:
import numpy as np

# f = w*x

# suppose weight=2 --> f=2*x
x=np.array([1,2,3,4],dtype=np.float32)

# actual value 
y=np.array([2,4,6,8],dtype=np.float32) 

#intialize weight (randomly)
w=0.0

In [13]:
# model prediction (returns y_predicted)
def forward(x):
    return w*x

# loss = MSE
def loss(y,y_predicted):
    return ((y_predicted - y)**2).mean()

# gradient
# MSE = 1/N * (w*x - y)**2
# dJ/dw = 1/N * 2x(wx - y)
def gradient(x,y,y_predicted):
    return np.dot(2*x, y_predicted - y).mean()

In [14]:
print(f'Prediction before training: f(5) = {forward(5):.3f}')

Prediction before training: f(5) = 0.000


In [15]:
#Training 
learning_rate = 0.01
n_iters =10

for epoch in range(n_iters):
    #prediction = forward pass
    y_pred = forward(x)
    
    #loss
    l = loss(y,y_pred)
    
    #gradients
    dw = gradient(x,y,y_pred)
    
    #update weights
    w -= learning_rate *dw
    
    if epoch %1 ==0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss={l:.8f}')
        
print(f'Prediction after training: f(5) = {forward(5):.3f}')

epoch 1: w = 1.200, loss=30.00000000
epoch 2: w = 1.680, loss=4.79999924
epoch 3: w = 1.872, loss=0.76800019
epoch 4: w = 1.949, loss=0.12288000
epoch 5: w = 1.980, loss=0.01966083
epoch 6: w = 1.992, loss=0.00314574
epoch 7: w = 1.997, loss=0.00050331
epoch 8: w = 1.999, loss=0.00008053
epoch 9: w = 1.999, loss=0.00001288
epoch 10: w = 2.000, loss=0.00000206
Prediction after training: f(5) = 9.999


## Implementing in torch

In [21]:
import torch

# f = w*x

# suppose weight=2 --> f=2*x
x=torch.tensor([1,2,3,4],dtype=torch.float32)

# actual value 
y=torch.tensor([2,4,6,8],dtype=torch.float32) 

#intialize weight (randomly)
w=torch.tensor(0.0,dtype=torch.float32, requires_grad =True)

In [22]:
# model prediction (returns y_predicted)
def forward(x):
    return w*x

# loss = MSE
def loss(y,y_predicted):
    return ((y_predicted - y)**2).mean()


In [23]:
#Training 
learning_rate = 0.01
n_iters =10

for epoch in range(n_iters):
    #prediction = forward pass
    y_pred = forward(x)
    
    #loss
    l = loss(y,y_pred)
    
    #gradients = backward pass
    l.backward() #dl/dw
    
    #update weights
    with torch.no_grad():
        w -= learning_rate *w.grad
        
    #zero gradients
    w.grad.zero_()
    
    if epoch %1 ==0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss={l:.8f}')
        
print(f'Prediction after training: f(5) = {forward(5):.3f}')

epoch 1: w = 0.300, loss=30.00000000
epoch 2: w = 0.555, loss=21.67499924
epoch 3: w = 0.772, loss=15.66018772
epoch 4: w = 0.956, loss=11.31448650
epoch 5: w = 1.113, loss=8.17471695
epoch 6: w = 1.246, loss=5.90623236
epoch 7: w = 1.359, loss=4.26725292
epoch 8: w = 1.455, loss=3.08308983
epoch 9: w = 1.537, loss=2.22753215
epoch 10: w = 1.606, loss=1.60939169
Prediction after training: f(5) = 8.031


# Complete PyTorch pipeline

In [24]:
# 1) Design model (input, output size, forward pass)
# 2) Construct loss and optimizer
# 3) Training loop
#   - forward pass: compute prediction
#   - backward pass: gradients
#   - update weights

import torch

In [28]:
# nn module
import torch.nn as nn

# f = w*x

# suppose weight=2 --> f=2*x
x=torch.tensor([1,2,3,4],dtype=torch.float32)

# actual value 
y=torch.tensor([2,4,6,8],dtype=torch.float32) 

#intialize weight (randomly)
w=torch.tensor(0.0,dtype=torch.float32, requires_grad =True)

# model prediction (returns y_predicted)
def forward(x):
    return w*x

#Training 
learning_rate = 0.01
n_iters =100

# loss and optimizer
loss = nn.MSELoss() #<-- callable fucntion
optimizer = torch.optim.SGD([w], lr=learning_rate) #<-- automatically update weights

## training loop
for epoch in range(n_iters):
    #prediction = forward pass
    y_pred = forward(x)
    
    #loss
    l = loss(y,y_pred) #<--pass to nn callable function to calc. loss
    
    #gradients = backward pass
    l.backward() #dl/dw
    
    #update weights
    # (w -= learning_rate *w.grad) using optimizer
    optimizer.step() 
        
    #zero gradients
    # w.grad.zero_() using optimizer
    optimizer.zero_grad()
    
    if epoch %10 ==0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss={l:.8f}')
        
print(f'Prediction after training: f(5) = {forward(5):.3f}')

epoch 1: w = 0.300, loss=30.00000000
epoch 11: w = 1.665, loss=1.16278565
epoch 21: w = 1.934, loss=0.04506890
epoch 31: w = 1.987, loss=0.00174685
epoch 41: w = 1.997, loss=0.00006770
epoch 51: w = 1.999, loss=0.00000262
epoch 61: w = 2.000, loss=0.00000010
epoch 71: w = 2.000, loss=0.00000000
epoch 81: w = 2.000, loss=0.00000000
epoch 91: w = 2.000, loss=0.00000000
Prediction after training: f(5) = 10.000


In [5]:
## replace manually implemented forward method with pytorch model

import torch
# nn module
import torch.nn as nn

# f = w*x

# suppose weight=2 --> f=2*x

# x,y should be different shape -- 2D 
x=torch.tensor([[1],[2],[3],[4]],dtype=torch.float32) # 4 samples, 1 feature
# actual value 
y=torch.tensor([[2],[4],[6],[8]],dtype=torch.float32) 

n_samples, n_features = x.shape
print(n_samples, n_features)

# pytorch model
input_size = n_features
output_size = n_features
model = nn.Linear(input_size,output_size) #model creation

#Training 
learning_rate = 0.01
n_iters =100

# loss and optimizer
loss = nn.MSELoss() #<-- callable fucntion
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) #<-- automatically update weights

## training loop
for epoch in range(n_iters):
    #prediction = forward pass
    y_pred = model(x)
    
    #loss
    l = loss(y,y_pred) #<--pass to nn callable function to calc. loss
    
    #gradients = backward pass
    l.backward() #dl/dw
    
    #update weights
    # (w -= learning_rate *w.grad) using optimizer
    optimizer.step() 
        
    #zero gradients
    # w.grad.zero_() using optimizer
    optimizer.zero_grad()
    
    if epoch %10 ==0:
        [w,b] = model.parameters()
        print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss={l:.8f}')
   
X_test = torch.tensor([5],dtype=torch.float32)
print(f'Prediction after training: f(5) = {model(X_test).item():.3f}')

4 1
epoch 1: w = 0.924, loss=10.55867481
epoch 11: w = 1.672, loss=0.32202727
epoch 21: w = 1.797, loss=0.05433739
epoch 31: w = 1.821, loss=0.04473386
epoch 41: w = 1.829, loss=0.04196347
epoch 51: w = 1.835, loss=0.03951670
epoch 61: w = 1.840, loss=0.03721647
epoch 71: w = 1.845, loss=0.03505031
epoch 81: w = 1.849, loss=0.03301021
epoch 91: w = 1.854, loss=0.03108880
Prediction after training: f(5) = 9.707


In [7]:
[w,b] = model.parameters()
w

Parameter containing:
tensor([[1.8576]], requires_grad=True)