In [25]:
import torch
import torch.functional as F
import matplotlib.pyplot as plt

In [41]:
a = torch.tensor([1.0, 2, 3], requires_grad=True)
b = torch.tensor([5.0, 4, 3], requires_grad=True)
c = a @ b
c.backward()
a.grad, b.grad

(tensor([5., 4., 3.]), tensor([1., 2., 3.]))

In [44]:
a_grad = b
b_grad = a
a_grad, b_grad

(tensor([5., 4., 3.], requires_grad=True),
 tensor([1., 2., 3.], requires_grad=True))

In [47]:
h = 0.001
c1 = a@b
c2 = (a + h) @ b
(c2 - c1)/h

tensor(12.0010, grad_fn=<DivBackward0>)

In [46]:
""" 
if a&b are vectors & a@b = c then 
dc/da = b
dc/db = a
"""


' \nif a&b are vectors & a@b = c then \ndc/da = b\ndc/db = a\n'

In [57]:
a = torch.tensor([[1.0, 2, 3], [1.0, 5, 4]], requires_grad=True)
b = torch.tensor([5.0, 4, 7], requires_grad=True)
c = a @ b
d = c.sum()
d.backward()
a.grad, b.grad

(tensor([[5., 4., 7.],
         [5., 4., 7.]]),
 tensor([2., 7., 7.]))

In [58]:
a_grad = torch.zeros_like(a)
a_grad = a_grad + b
b_grad = a.sum(axis = 0)
a_grad, b_grad

(tensor([[5., 4., 7.],
         [5., 4., 7.]], grad_fn=<AddBackward0>),
 tensor([2., 7., 7.], grad_fn=<SumBackward1>))

In [98]:
a = torch.tensor([[1.0, 2, 3], [1.0, 5, 4]], requires_grad=True)
b = torch.tensor([[5.0, 4],[7.0, 4],[5.0, 2]], requires_grad=True)
c = a @ b
d = c.sum()
d.backward()
a.grad, b.grad

(tensor([[ 9., 11.,  7.],
         [ 9., 11.,  7.]]),
 tensor([[2., 2.],
         [7., 7.],
         [7., 7.]]))

In [115]:
a_grad = torch.zeros_like(a)
b_grad = torch.zeros_like(b)

a_grad = a_grad + b.sum( axis =1 )
b_grad = b_grad + a.sum( axis =0 ).reshape(4,1)
a_grad, b_grad

(tensor([[-0.4551, -1.1625,  4.1116,  1.8241],
         [-0.4551, -1.1625,  4.1116,  1.8241],
         [-0.4551, -1.1625,  4.1116,  1.8241]], grad_fn=<AddBackward0>),
 tensor([[-1.8002, -1.8002],
         [-0.8647, -0.8647],
         [-1.9716, -1.9716],
         [-0.2986, -0.2986]], grad_fn=<AddBackward0>))

In [112]:
a.sum( axis =0 ).reshape(4,1)

tensor([[-1.8002],
        [-0.8647],
        [-1.9716],
        [-0.2986]], grad_fn=<ReshapeAliasBackward0>)

In [114]:
b.grad

tensor([[-1.8002, -1.8002],
        [-0.8647, -0.8647],
        [-1.9716, -1.9716],
        [-0.2986, -0.2986]])

In [163]:
a = torch.tensor([2.0],requires_grad=True)
b = torch.tensor([5.0],requires_grad=True)
c = a* b
d = torch.log(c)
c.retain_grad(), d.retain_grad()
d.backward()
print(a.grad, b.grad, c.grad, d.grad)

tensor([0.5000]) tensor([0.2000]) tensor([0.1000]) tensor([1.])


In [166]:
c.grad * b

tensor([0.5000], grad_fn=<MulBackward0>)

In [160]:
counts = torch.randn(7, 3, requires_grad=True)
counts_sum_inv = torch.randn(7, 1,requires_grad=True)
probs = counts * counts_sum_inv
probs.retain_grad()
s = torch.tanh(probs).sum()
s.backward()
counts.grad, counts_sum_inv.grad, probs.grad

(tensor([[-0.2174, -0.4052, -0.8308],
         [-0.0012, -0.0012, -0.0012],
         [-0.2930, -0.5837, -0.5904],
         [ 0.5044,  0.0610,  0.9179],
         [-0.4468, -0.3126, -0.2493],
         [-0.6403, -1.0043, -0.2055],
         [-0.2484, -0.2497, -0.2288]]),
 tensor([[ 0.2524],
         [-2.0831],
         [ 0.9801],
         [ 0.5831],
         [ 1.7227],
         [ 0.8066],
         [-1.2251]]),
 tensor([[0.2585, 0.4817, 0.9876],
         [1.0000, 1.0000, 1.0000],
         [0.4958, 0.9877, 0.9992],
         [0.4060, 0.0491, 0.7389],
         [0.9895, 0.6923, 0.5522],
         [0.4725, 0.7412, 0.1517],
         [0.9880, 0.9932, 0.9101]]))

In [171]:
(counts * probs.grad).sum(axis=1)

tensor([ 0.2524, -2.0831,  0.9801,  0.5831,  1.7227,  0.8066, -1.2251],
       grad_fn=<SumBackward1>)

In [172]:
counts.sum(axis=1).reshape(7,1) + probs.grad.sum(axis=1).reshape(7,1)

tensor([[1.3971],
        [0.9168],
        [4.2222],
        [0.7097],
        [5.1829],
        [3.6366],
        [1.5528]], grad_fn=<AddBackward0>)

In [174]:
counts.shape, probs.grad.shape

(torch.Size([7, 3]), torch.Size([7, 3]))

In [175]:
a = torch.randn(4,3,requires_grad=True)
b = torch.randn(4,3,requires_grad=True)
c = a*b
c.retain_grad()
d = c.sum()

In [176]:
d.backward()

In [216]:
a = torch.tensor([3.0, 4.0, 5.0, 6.0], requires_grad=True)
b = torch.tensor([3.0, 4.0, 5.0, 6.0], requires_grad=True)

c.retain_grad()
d = c.sum()
d.backward()

In [217]:
a.grad

tensor([ 20.0855,  54.5981, 148.4132, 403.4288])

In [220]:
(c * a)/a

tensor([ 20.0855,  54.5981, 148.4132, 403.4288], grad_fn=<DivBackward0>)

In [218]:
c.grad

tensor([1., 1., 1., 1.])

In [189]:
from sympy import *
a = symbols("a")
c = a**-1
c.diff(a)

-1/a**2

In [191]:

c.grad,a.grad

(tensor([1., 1., 1., 1.]), tensor([-0.1111, -0.0625, -0.0400, -0.0278]))

In [192]:
-1/(a**2)

tensor([-0.1111, -0.0625, -0.0400, -0.0278], grad_fn=<MulBackward0>)

In [211]:
a = torch.randn(10,5, requires_grad=True)
b = a.sum(axis=1)
b.retain_grad()
c = torch.square(b).sum()
c.backward()

In [212]:
a.grad

tensor([[ -3.6146,  -3.6146,  -3.6146,  -3.6146,  -3.6146],
        [ -7.5984,  -7.5984,  -7.5984,  -7.5984,  -7.5984],
        [ -2.9487,  -2.9487,  -2.9487,  -2.9487,  -2.9487],
        [ -0.8913,  -0.8913,  -0.8913,  -0.8913,  -0.8913],
        [ -4.4061,  -4.4061,  -4.4061,  -4.4061,  -4.4061],
        [-10.9157, -10.9157, -10.9157, -10.9157, -10.9157],
        [  0.6426,   0.6426,   0.6426,   0.6426,   0.6426],
        [  3.3341,   3.3341,   3.3341,   3.3341,   3.3341],
        [ -6.8356,  -6.8356,  -6.8356,  -6.8356,  -6.8356],
        [  0.4898,   0.4898,   0.4898,   0.4898,   0.4898]])

In [213]:
b.grad

tensor([ -3.6146,  -7.5984,  -2.9487,  -0.8913,  -4.4061, -10.9157,   0.6426,
          3.3341,  -6.8356,   0.4898])

In [215]:
a.shape

torch.Size([10, 5])

In [None]:
a = torch.randn(10,5, requires_grad=True)
b = a.sum(axis=1)

In [225]:
a = torch.randn(4,5,requires_grad=True)
b = a.max(axis = 1,keepdim=True).values
b.retain_grad()
c = b.sum()
c.backward()
a.grad, b.grad

(tensor([[1., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0.],
         [1., 0., 0., 0., 0.]]),
 tensor([[1.],
         [1.],
         [1.],
         [1.]]))

In [235]:
a_grad = torch.zeros_like(a)
print(a_grad)
a.max(dim =1).indices

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])


tensor([0, 0, 3, 0])

In [238]:
a_grad[range(4),a.max(dim =1).indices ] = 4

In [239]:
a_grad

tensor([[4., 0., 0., 0., 0.],
        [4., 0., 0., 0., 0.],
        [0., 0., 0., 4., 0.],
        [4., 0., 0., 0., 0.]])

In [291]:
h = torch.randn(4, 8, requires_grad=True)
W2 = torch.randn(8, 3, requires_grad=True)
b2 = torch.randn(3, requires_grad=True)

In [292]:
logits = h @ W2 + b2 # output layer
logits.retain_grad()
x = logits.square()
y = x.sum()
y.backward()

In [305]:
logits.grad.shape

torch.Size([4, 3])

In [307]:
dh = torch.zeros_like(h)

In [314]:
dh += W2.sum(axis = 1)

In [316]:
h.grad

tensor([[  2.0517,  -0.5638,  -0.2785, -17.1280,   0.0617,   9.4932,  -2.5261,
          -0.4681],
        [ -0.5647,  10.6514,  15.2778, -12.9380,  -9.1384,  11.4859,  -6.3783,
         -14.5812],
        [  1.7212,  -3.3679,  -0.4587,  -6.5844,  -2.1773,   1.9717,  -1.8803,
          -0.4377],
        [  0.3498, -17.9332, -28.2925,  25.3042,  18.3964, -21.0611,  12.6310,
          27.4126]])

In [321]:
logits.sum(axis = 1).shape

torch.Size([4])

In [320]:
dh.shape

torch.Size([4, 8])

In [323]:
dh += logits.sum(axis = 1).reshape(-1,1)

In [325]:
h.grad

tensor([[  2.0517,  -0.5638,  -0.2785, -17.1280,   0.0617,   9.4932,  -2.5261,
          -0.4681],
        [ -0.5647,  10.6514,  15.2778, -12.9380,  -9.1384,  11.4859,  -6.3783,
         -14.5812],
        [  1.7212,  -3.3679,  -0.4587,  -6.5844,  -2.1773,   1.9717,  -1.8803,
          -0.4377],
        [  0.3498, -17.9332, -28.2925,  25.3042,  18.3964, -21.0611,  12.6310,
          27.4126]])

In [324]:
dh

tensor([[ -1.4525,   1.8144,   2.9197,  -0.1245,  -3.1377,  -0.2453,  -1.8573,
          -4.3149],
        [  4.7695,   8.0364,   9.1417,   6.0975,   3.0843,   5.9767,   4.3647,
           1.9072],
        [ -1.0899,   2.1770,   3.2824,   0.2381,  -2.7751,   0.1174,  -1.4947,
          -3.9522],
        [-10.4359,  -7.1690,  -6.0637,  -9.1079, -12.1212,  -9.2287, -10.8407,
         -13.2983]], grad_fn=<AddBackward0>)