### **Gradient Calculation**

In [3]:
import torch as tch

In [3]:
x=tch.randn(5)        # Gradients are false, with this we can't compute gradients
x

tensor([-0.0145,  0.4443, -0.1249, -1.8893,  0.2505])

In [26]:
x=tch.randn(5,requires_grad=True)  # Gradients are true
print("x = {}".format(x))

y=x*x*23+2
y_sum=y.sum()
print("y = {}".format(y))

z=y.sum()
print("z = {}".format(z))

z.backward() # Computes dz/dx
print("Gradients wrt x {}".format(x.grad))

# y.backward(x)  # Computes dy/dx
# print("Gradients wrt y {}".format(y_sum.grad))



x = tensor([-0.3379,  1.0093,  0.5507,  0.0982,  0.6641], requires_grad=True)
y = tensor([ 4.6257, 25.4297,  8.9759,  2.2218, 12.1450], grad_fn=<AddBackward0>)
z = 53.39802169799805
Gradients wrt x tensor([-15.5422,  46.4277,  25.3334,   4.5176,  30.5506])


#### **Preventing the Pytorch to track gradients**


**1. Using requires_grad_(requires_grad=False)**

In [31]:
x=tch.randn(6,requires_grad=True)
print("x = {}".format(x))

x.requires_grad_(False)
print("x = {}".format(x))


x = tensor([-2.2986, -0.3224,  0.5825, -0.7033, -1.0650,  0.1364],
       requires_grad=True)
x = tensor([-2.2986, -0.3224,  0.5825, -0.7033, -1.0650,  0.1364])


**2. Using detach()**

This creates the copy of another tensor withount gradients

In [33]:
x=tch.randn(6,requires_grad=True)
y=x**3+x**2+45*x+9
print("y = {}".format(y))

z=y.detach()
print("z = {}".format(z))

y = tensor([-21.1951,  28.4543, 100.0550,  11.1985, -80.3295,  -2.0442],
       grad_fn=<AddBackward0>)
z = tensor([-21.1951,  28.4543, 100.0550,  11.1985, -80.3295,  -2.0442])


**3. Using torch.no_grad()**

In [35]:
x=tch.randn(6,requires_grad=True)
y=x**3+x**2+45*x+9
print("y = {}".format(y))

with tch.no_grad():
    z=y+4
    print("z = {}".format(z))

y = tensor([ 14.2390,  67.6528, -50.8189,  44.0368, -11.8679,  52.4718],
       grad_fn=<AddBackward0>)
z = tensor([ 18.2390,  71.6528, -46.8189,  48.0368,  -7.8679,  56.4718])


#### **Gradients in a Loop**

In loop the gradients will be summed up so we have to make them zero.

In [6]:
weights=tch.ones(5,requires_grad=True)

for epochs in range(5):
    model_output=(weights**4+10*weights).sum()
    # print(model_output)
    model_output.backward()
    print(weights.grad)
    
    weights.grad.zero_()        # Makes the gradients zero 

tensor(55., grad_fn=<SumBackward0>)
tensor([14., 14., 14., 14., 14.])
tensor(55., grad_fn=<SumBackward0>)
tensor([14., 14., 14., 14., 14.])
tensor(55., grad_fn=<SumBackward0>)
tensor([14., 14., 14., 14., 14.])
tensor(55., grad_fn=<SumBackward0>)
tensor([14., 14., 14., 14., 14.])
tensor(55., grad_fn=<SumBackward0>)
tensor([14., 14., 14., 14., 14.])


### **Linear Regression with backpropagation**

In [60]:
X=tch.tensor([2,9,10,3],dtype=float)
y=tch.tensor([4,18,20,6],dtype=float)

# weight
w=tch.tensor([0.0],requires_grad=True)

# model prediction
def forward(x):
    return w*x

# Loss calculation = MSE
def MSE(y,y_hat):
    return ((y_hat-y)**2).mean()

# Gradients
# MSE = 1/N * (w*x-y)**2
# dJ/dx = 1/N 2x*(w*x-y)

# Gradient
def gradient(x,y,y_hat):
    return tch.dot(2*x,y_hat-y).mean()

# Learning rate
learning_rate=0.001

epochs=6

print("Prediction before training: forward(3.4) = {}".format(forward(5)[0]))
for epoch in range(epochs):
    # y_prediction
    y_pred=forward(X)
    
    # loss
    loss=MSE(y,y_pred)
    
    # This is general method
    # # gradients
    # dw=gradient(X,y,y_pred)
    # w-= learning_rate*dw
    
    
    # Let us see this in pytorch
    loss.backward() # Computes the dL/dw
    with tch.no_grad():
        w-=learning_rate*w.grad
        
    # clearing gradients
    w.grad.zero_()
    
    
print("Prediction after training: forward(3.4) = {}".format(forward(5)[0]))
    


Prediction before training: forward(3.4) = 0.0
Prediction after training: forward(3.4) = 4.5784125328063965
