In [2]:
import torch


# How gradient is used

Gradient is just the derivative value of output with respect to the weight which is used in backpropagation for weight adjustment

![gradient](assets\gradient.png)

requires_grad = True ensures that the gradient are automatically calculated for each weights for each operation

In [3]:
x = torch.rand((2,3,4),requires_grad=True)


a = x + 2

b = a*2

c = b**2

d = c.mean()

print(f"x: {x}")
print(f"a: {a}")
print(f"b: {b}")
print(f"c: {c}")
print(f"d: {d}")

back_ward=d.backward() # d(d)/d(x)

grad = x.grad

print(f"back_ward: {back_ward}")
print(f"grad: {grad}")







x: tensor([[[0.1801, 0.6695, 0.7117, 0.5697],
         [0.5046, 0.6651, 0.2095, 0.1856],
         [0.3545, 0.7559, 0.8294, 0.2690]],

        [[0.3658, 0.7116, 0.2215, 0.7136],
         [0.2797, 0.4324, 0.8426, 0.9727],
         [0.1415, 0.4659, 0.0742, 0.4353]]], requires_grad=True)
a: tensor([[[2.1801, 2.6695, 2.7117, 2.5697],
         [2.5046, 2.6651, 2.2095, 2.1856],
         [2.3545, 2.7559, 2.8294, 2.2690]],

        [[2.3658, 2.7116, 2.2215, 2.7136],
         [2.2797, 2.4324, 2.8426, 2.9727],
         [2.1415, 2.4659, 2.0742, 2.4353]]], grad_fn=<AddBackward0>)
b: tensor([[[4.3603, 5.3391, 5.4235, 5.1395],
         [5.0092, 5.3302, 4.4190, 4.3712],
         [4.7090, 5.5117, 5.6589, 4.5379]],

        [[4.7316, 5.4232, 4.4431, 5.4272],
         [4.5594, 4.8648, 5.6852, 5.9454],
         [4.2830, 4.9319, 4.1484, 4.8707]]], grad_fn=<MulBackward0>)
c: tensor([[[19.0121, 28.5058, 29.4139, 26.4140],
         [25.0916, 28.4110, 19.5280, 19.1071],
         [22.1743, 30.3791, 32.0231, 20.

# jacobian matrix with loss function

![image](assets\jacobian_matrix.png)

jocobian matrix is calcuated from the computation graph and the loss function gradient is multiplied for the weight update with the help of appropriate optimizer

In [4]:
input = torch.rand((2,3,4),requires_grad=True,dtype=torch.float32)


a = input + 2

b = a*2

output= b**2


print(f"input: {input}")
print(f"a: {a}")
print(f"b: {b}")
print(f"output: {output}")

# loss must me multiplied with the jocobian matrix to calculate the gradients
loss = torch.rand((2,3,4),dtype=torch.float32)

# calculating the gradient for backpropagation
output.backward(loss) # d(output)/d(loss)*d(loss)/d(inputs)

grad = input.grad

print(f"grad: {grad}")

input: tensor([[[0.7233, 0.0123, 0.7041, 0.0693],
         [0.8370, 0.8623, 0.5902, 0.3440],
         [0.9012, 0.1902, 0.2526, 0.6914]],

        [[0.3445, 0.5153, 0.0702, 0.5585],
         [0.7818, 0.6864, 0.6772, 0.7779],
         [0.9363, 0.5863, 0.7204, 0.3247]]], requires_grad=True)
a: tensor([[[2.7233, 2.0123, 2.7041, 2.0693],
         [2.8370, 2.8623, 2.5902, 2.3440],
         [2.9012, 2.1902, 2.2526, 2.6914]],

        [[2.3445, 2.5153, 2.0702, 2.5585],
         [2.7818, 2.6864, 2.6772, 2.7779],
         [2.9363, 2.5863, 2.7204, 2.3247]]], grad_fn=<AddBackward0>)
b: tensor([[[5.4466, 4.0245, 5.4082, 4.1385],
         [5.6741, 5.7246, 5.1804, 4.6880],
         [5.8024, 4.3805, 4.5052, 5.3828]],

        [[4.6891, 5.0306, 4.1404, 5.1170],
         [5.5637, 5.3727, 5.3544, 5.5558],
         [5.8727, 5.1725, 5.4407, 4.6494]]], grad_fn=<MulBackward0>)
output: tensor([[[29.6658, 16.1968, 29.2491, 17.1275],
         [32.1952, 32.7710, 26.8368, 21.9775],
         [33.6680, 19.1884, 20.

# removing require gradient 

In [5]:
weight = torch.rand((2,2),requires_grad=True)

print(f"___________________before__________________________")
print(weight)

weight.requires_grad_(False)
print(f"___________________after__________________________")
print(weight)



___________________before__________________________
tensor([[0.7146, 0.2951],
        [0.7212, 0.2629]], requires_grad=True)
___________________after__________________________
tensor([[0.7146, 0.2951],
        [0.7212, 0.2629]])


In [6]:
weight = torch.rand((2,2),requires_grad=True)

print(f"___________________before__________________________")
print(weight)

new_weight = weight.detach()
print(f"___________________after__________________________")
print(new_weight)

___________________before__________________________
tensor([[0.8682, 0.3246],
        [0.8875, 0.9272]], requires_grad=True)
___________________after__________________________
tensor([[0.8682, 0.3246],
        [0.8875, 0.9272]])


In [7]:
weight = torch.rand((2,2),requires_grad=True)

print(f"___________________before__________________________")
print(weight)

with torch.no_grad():
    print(f"___________________after__________________________")
    a = weight + 2
    b = a*a
    
    print(a)
    print(b)


___________________before__________________________
tensor([[0.2268, 0.8826],
        [0.1913, 0.2979]], requires_grad=True)
___________________after__________________________
tensor([[2.2268, 2.8826],
        [2.1913, 2.2979]])
tensor([[4.9586, 8.3093],
        [4.8016, 5.2805]])



# avoiding accumulation of gradient in each forward propagation

In [9]:
weights = torch.rand(size=(3,3),requires_grad=True)
print(f"weights: {weights}")


for i in range(3):
    print(f"------------------------iteration--{i}--------------------------")
    output = (weights*3).sum()
    

    print(f"output: {output}")

    error  = torch.tensor(1)
    output.backward(error)


    grad = weights.grad
    print(f"grad: {grad}")
    # must call grad.zero_() in each epoch to avoid accumulation 
    weights.grad.zero_()

   


weights: tensor([[0.5625, 0.6959, 0.0786],
        [0.5783, 0.9495, 0.2622],
        [0.0274, 0.6851, 0.8505]], requires_grad=True)
------------------------iteration--0--------------------------
output: 14.069750785827637
grad: tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])
------------------------iteration--1--------------------------
output: 14.069750785827637
grad: tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])
------------------------iteration--2--------------------------
output: 14.069750785827637
grad: tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])


In [None]:
# weights = torch.rand(3,requires_grad=True,dtype=torch.float)
# optimizer = torch.optim.SGD(weights,lr=.01)

# optimizer.step()
# optimizer.zero_grad_()

