In [2]:
import torch
import torch.nn as nn

https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html

### 1-variable input and output

In [3]:
class Mymodel(nn.Module): 
    def __init__(self, a, b):
        super().__init__()
        self.a = nn.Parameter(torch.tensor(a))
        self.b = nn.Parameter(torch.tensor(b))
        
    def forward(self, x):
        return self.a*x + self.b

# Instantiate the model
mymodel = Mymodel(a = 2., b = 1.)

# Create input and enable gradient calculation
input_data = torch.tensor(4.)
input_data.requires_grad = True
#input_data = torch.tensor([4.,5.], requires_grad = True)

# Call the forward model
output_data = mymodel(input_data)
print('output =', output_data, '\n')

# Calculate the gradients
mymodel.zero_grad() # Clear any existing gradients
output_data.backward()
print('Gradients:')
print('d(output)/d(a) =',mymodel.a.grad)
print('d(output)/d(b) =',mymodel.b.grad)
print('d(output)/d(input) =',input_data.grad)    

# obj = a*x + b
# d(obj)/d(a) = x
# d(obj)/d(b) = 1.
# d(obj)/d(x) = a

output = tensor(9., grad_fn=<AddBackward0>) 

Gradients:
d(output)/d(a) = tensor(4.)
d(output)/d(b) = tensor(1.)
d(output)/d(input) = tensor(2.)


###  n-variable input and 1-variable output

In [4]:
import torch
import torch.nn as nn

class Mymodel(nn.Module): 
    def __init__(self, a, b):
        super().__init__()
        self.a = nn.Parameter(torch.tensor(a))
        self.b = nn.Parameter(torch.tensor(b))
        
    def forward(self, x):
        return torch.sum(self.a*x) + self.b

# Instantiate the model
mymodel = Mymodel(a = [2.,3.], b = 1.)

# Create input and enable gradient calculation
input_data = torch.tensor([4.,5.])
input_data.requires_grad = True
#input_data = torch.tensor([4.,5.], requires_grad = True)

# Call the forward model
output_data = mymodel(input_data)
print('output =', output_data, '\n')

# Calculate the gradients
mymodel.zero_grad() # Clear any existing gradients
output_data.backward()
print('Gradients:')
print('d(output)/d(a) =',mymodel.a.grad)
print('d(output)/d(b) =',mymodel.b.grad)
print('d(output)/d(input) =',input_data.grad)  

# obj = a1*x1 + a2*x2 + b
# d(obj)/d(a1) = x1, d(obj)/d(a2) = x2 
# d(obj)/d(b) = 1.
# d(obj)/d(x1) = a1, d(obj)/d(x2) = a2 

output = tensor(24., grad_fn=<AddBackward0>) 

Gradients:
d(output)/d(a) = tensor([4., 5.])
d(output)/d(b) = tensor(1.)
d(output)/d(input) = tensor([2., 3.])


###  n-variable input and n-variable output

In [5]:
class Mymodel(nn.Module): 
    def __init__(self, a, b):
        super().__init__()
        self.a = nn.Parameter(torch.tensor(a))
        self.b = nn.Parameter(torch.tensor(b))
        
    def forward(self, x):
        return self.a*x + self.b


# Instantiate the model
mymodel = Mymodel(a = [2.,3.], b = 1.)

# Create input and enable gradient calculation
input_data = torch.tensor([4.,5.])
input_data.requires_grad = True
#input_data = torch.tensor([4.,5.], requires_grad = True)

# Call the forward model
output_data = mymodel(input_data)
print('output =', output_data, '\n')

# Choose output -> e.g. torch.tensor([1,0]) or torch.tensor([0,1]); torch.tensor([1,1]) returns sum of grads
output_choose = torch.zeros_like(output_data)
output_choose[0] = 1 
#output_choose[1] = 1
print('output to calculate grad:', output_choose, '\n')

# Calculate the gradients
mymodel.zero_grad() # Clear any existing gradients
output_data.backward(output_choose) 
print('Gradients:')
print('d(output)/d(a) =',mymodel.a.grad)
print('d(output)/d(b) =',mymodel.b.grad)
print('d(output)/d(input) =',input_data.grad)   

# obj1 = a1*x1 + b
# obj2 = a2*x2 + b
# d(obj1)/d(a1) = x1, # d(obj1)/d(a2) = 0 
# d(obj2)/d(a1) = 0,  # d(obj2)/d(a2) = x2 
# d(obj1)/d(b) = 1.
# d(obj2)/d(b) = 1.
# d(obj1)/d(x1) = a1, # d(obj1)/d(x2) = 0 
# d(obj2)/d(x1) = 0,  # d(obj2)/d(x2) = a2 


output = tensor([ 9., 16.], grad_fn=<AddBackward0>) 

output to calculate grad: tensor([1., 0.]) 

Gradients:
d(output)/d(a) = tensor([4., 0.])
d(output)/d(b) = tensor(1.)
d(output)/d(input) = tensor([2., 0.])


### Using torch.autograd.grad

The difference is that autograd.grad() is returning the gradients to you.
While .backward() is populating the .grad field on the different leaf Tensors that were used to compute y.

In [6]:
class Mymodel(nn.Module): 
    def __init__(self, a, b):
        super().__init__()
        self.a = nn.Parameter(torch.tensor(a))
        self.b = nn.Parameter(torch.tensor(b))
        
    def forward(self, x):
        return self.a*x + self.b


# Instantiate the model
mymodel = Mymodel(a = [2.,3.], b = 1.)

# Create input and enable gradient calculation
input_data = torch.tensor([4.,5.])
input_data.requires_grad = True
#input_data = torch.tensor([4.,5.], requires_grad = True)

# Call the forward model
output_data = mymodel(input_data)
print('output =', output_data, '\n')

# Choose output -> e.g. torch.tensor([1,0]) or torch.tensor([0,1]); torch.tensor([1,1]) returns sum of grads
output_choose = torch.zeros_like(output_data)
output_choose[0] = 1
output_choose[1] = 0
print('output to calculate grad:', output_choose, '\n')

# Calculate the gradients
grad = torch.autograd.grad(output_data, input_data, output_choose)
print('Gradients:')
print('d(output)/d(input) =',grad)   
print('d(output)/d(input) =',input_data.grad) # torch.autograd.grad return the gradient do not save it

# obj1 = a1*x1 + b
# obj2 = a2*x2 + b
# d(obj1)/d(x1) = a1, # d(obj1)/d(x2) = 0 
# d(obj2)/d(x1) = 0,  # d(obj2)/d(x2) = a2 

output = tensor([ 9., 16.], grad_fn=<AddBackward0>) 

output to calculate grad: tensor([1., 0.]) 

Gradients:
d(output)/d(input) = (tensor([2., 0.]),)
d(output)/d(input) = None
