# Pytorch AutoGrad Functions

## SISO

In [None]:
import torch
from torch.autograd import grad

x = torch.Tensor([2])
x.requires_grad = True

def model(x): return x ** 3

y = model(x)

dy_dx = grad(outputs= y, inputs=x)

print(f"x: \n   {x} \n y=x^3: \n    {y} \n dy_dx:\n    {dy_dx} , 3*x^2: \n    {3*x**2}")

# 3x² 

x: 
   tensor([2.], requires_grad=True) 
 y=x^3: 
    tensor([8.], grad_fn=<PowBackward0>) 
 dy_dx:
    (tensor([12.]),)


## MISO

In [2]:
import torch
from torch.autograd import grad
import traceback

x = torch.Tensor([2, 3, 4])
x.requires_grad = True

def model(x): return x ** 3

print(x)
y = model(x)
print(y)

try:
    dy_dx = grad(outputs= y, inputs=x)

    print(f"x: \n   {x} \n y=x^3: \n    {y} \n dy_dx:\n    {dy_dx}")

except Exception as e:
    print(f"!!!!!!!!!!!!!!! ERROR: {e}")
    traceback.print_exc()

tensor([2., 3., 4.], requires_grad=True)
tensor([ 8., 27., 64.], grad_fn=<PowBackward0>)
!!!!!!!!!!!!!!! ERROR: grad can be implicitly created only for scalar outputs


Traceback (most recent call last):
  File "/tmp/ipykernel_1955206/3879111839.py", line 15, in <module>
    dy_dx = grad(outputs= y, inputs=x)
  File "/home/bneveu/.local/lib/python3.10/site-packages/torch/autograd/__init__.py", line 367, in grad
    grad_outputs_ = _make_grads(
  File "/home/bneveu/.local/lib/python3.10/site-packages/torch/autograd/__init__.py", line 117, in _make_grads
    raise RuntimeError(
RuntimeError: grad can be implicitly created only for scalar outputs


### MISO - Loop

In [3]:
import torch
from torch.autograd import grad

x = torch.Tensor([2, 3, 4])
x.requires_grad = True

def model(x): return x ** 3

y = model(x)

test = torch.autograd.grad(outputs=y[0], inputs=x, retain_graph=True)
print('test: ' + str(test))

for i, out in enumerate(y):

    dy_dx = torch.autograd.grad(outputs=out, inputs=x, retain_graph=True)
    print(dy_dx)


dy_dx = [torch.autograd.grad(outputs=out, inputs=x, retain_graph=True)[0][i]
    for i, out in enumerate(y)]

#print(f"x: \n   {x} \n y=x^3: \n    {y} \n dy_dx:\n    {dy_dx}")

test: (tensor([12.,  0.,  0.]),)
(tensor([12.,  0.,  0.]),)
(tensor([ 0., 27.,  0.]),)
(tensor([ 0.,  0., 48.]),)


In [4]:
import torch
from torch.autograd import grad

x = torch.Tensor([2, 3, 4])
x.requires_grad = True

def model(x): return x ** 3

y = model(x)

test = torch.autograd.grad(outputs=y[0], inputs=x, retain_graph=True)
print('test: ' + str(test))
for i, out in enumerate(y):
    test = torch.autograd.grad(outputs=out, inputs=x, retain_graph=True)
    print(test)

# dy_dx = [torch.autograd.grad(outputs=out, inputs=x, retain_graph=True)[0][i]
#     for i, out in enumerate(y)]

print(f"x: \n   {x} \n y=x^3: \n    {y} \n dy_dx:\n    {dy_dx}")

test: (tensor([12.,  0.,  0.]),)
(tensor([12.,  0.,  0.]),)
(tensor([ 0., 27.,  0.]),)
(tensor([ 0.,  0., 48.]),)
x: 
   tensor([2., 3., 4.], requires_grad=True) 
 y=x^3: 
    tensor([ 8., 27., 64.], grad_fn=<PowBackward0>) 
 dy_dx:
    [tensor(12.), tensor(27.), tensor(48.)]


### MISO - Loop with Large Data


:warning: Pretty SLOW ->  
![Use the sum trick ](https://github.com/pytorch/pytorch/issues/7786)

In [5]:
import torch
from torch.autograd import grad

x = torch.Tensor(range(40000))
x.requires_grad = True

def model(x): return x ** 3

y = model(x)

dy_dx = [torch.autograd.grad(outputs=out, inputs=x, retain_graph=True)[0][i]
    for i, out in enumerate(y)]

#print(f"x: \n   {x} \n y=x^3: \n    {y} \n dy_dx:\n    {dy_dx}")

KeyboardInterrupt: 

### MISO - Sum

In [6]:
import torch
from torch.autograd import grad

x = torch.Tensor([2, 3, 4])
x.requires_grad = True

def model(x): return x ** 3

y = model(x)

dy_dx = torch.autograd.grad(outputs=y.sum(), inputs=x, retain_graph=True)[0]

print(f"x: \n   {x} \n y=x^3: \n    {y} \n dy_dx:\n    {dy_dx}")

x: 
   tensor([2., 3., 4.], requires_grad=True) 
 y=x^3: 
    tensor([ 8., 27., 64.], grad_fn=<PowBackward0>) 
 dy_dx:
    tensor([12., 27., 48.])


In [8]:
import torch
from torch.autograd import grad

x = torch.Tensor(range(40000))
x.requires_grad = True

def model(x): return x ** 3

y = model(x)

dy_dx = torch.autograd.grad(outputs=y.sum(), inputs=x, retain_graph=True)[0]
print(dy_dx[:20])

tensor([   0.,    3.,   12.,   27.,   48.,   75.,  108.,  147.,  192.,  243.,
         300.,  363.,  432.,  507.,  588.,  675.,  768.,  867.,  972., 1083.])


## MIMO - Jacobian

In [9]:
from torch.autograd.functional import jacobian
from torch import tensor

#Defining the main function
def f(x1,x2,x3):
    x23 = x2**3
    x31 = x3*x1
    return (x1.sum() + x23.sum(), x31.sum())

x1 = torch.Tensor(range(400))
x2 = 2*torch.Tensor(range(400))
x3 = 3*torch.Tensor(range(400))

x = (x1, x2, x3)

#Printing the Jacobian
jac = jacobian(f,x)
print(len(jac))
print(jac[0][0].size())

2
torch.Size([400])


In [15]:
from torch.autograd.functional import jacobian
from torch import tensor

#Defining the main function
def f(x1,x2,x3):
    return (x1 + x2**3, x3*x1, x2**2*x3+x1)

#Defining input tensors
x1 = tensor(3.0)
x2 = tensor(4.0)
x3 = tensor(5.0)

#Printing the Jacobian
print(jacobian(f,(x1,x2,x3)))
# 4 outputs , 3 inputs
# ->  4x3 matrix

((tensor(1.), tensor(48.), tensor(0.)), (tensor(5.), tensor(0.), tensor(3.)), (tensor(1.), tensor(40.), tensor(16.)))


## MIMO - Hessian

In [16]:
from torch.autograd.functional import hessian
from torch import tensor

#Defining the main function
def f(x1,x2,x3):
    return x1**3+2*x2**3+x1*x3**2

#Defining input tensors
x1 = tensor(3.0)
x2 = tensor(4.0)
x3 = tensor(5.0)

H = hessian(f,(x1,x2,x3))
print(H)

((tensor(18.), tensor(0.), tensor(10.)), (tensor(0.), tensor(48.), tensor(0.)), (tensor(10.), tensor(0.), tensor(6.)))


# Example problem:
 source: https://github.com/udemirezen/PINN-1/blob/main/solve_PDE_NN.ipynb

We have been given a PDE: du/dx=2du/dt+u and boundary condition: u(x,0)=6e^(-3x)

Independent variables: x,t (input)
Dependent variables: u (outputs)
We have to find out u(x,t) for all x in range [0,2] and t in range [0,1]

When we solved this problem analytically, we found the solution: u(x,t) = 6e^(-3x-2t)

Our f is f = du/dx - 2du/dt - u

In [17]:
import torch
import torch.nn as nn
from torch.autograd import Variable
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
import numpy as np

In [18]:
# define NN:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.hidden_layer1 = nn.Linear(2,5)
        self.hidden_layer2 = nn.Linear(5,5)
        self.hidden_layer3 = nn.Linear(5,5)
        self.hidden_layer4 = nn.Linear(5,5)
        self.hidden_layer5 = nn.Linear(5,5)
        self.output_layer = nn.Linear(5,1)

    def forward(self, x,t):
        inputs = torch.cat([x,t],axis=1) # combined two arrays of 1 columns each to one array of 2 columns
        layer1_out = torch.sigmoid(self.hidden_layer1(inputs))
        layer2_out = torch.sigmoid(self.hidden_layer2(layer1_out))
        layer3_out = torch.sigmoid(self.hidden_layer3(layer2_out))
        layer4_out = torch.sigmoid(self.hidden_layer4(layer3_out))
        layer5_out = torch.sigmoid(self.hidden_layer5(layer4_out))
        output = self.output_layer(layer5_out) ## For regression, no activation is used in output layer
        return output

In [19]:
### (2) Model
net = Net()
net = net.to(device)
mse_cost_function = torch.nn.MSELoss() # Mean squared error
optimizer = torch.optim.Adam(net.parameters())

In [20]:
## PDE as loss function
def f(x,t, net):
    u = net(x,t)
    ## Based on our f = du/dx - 2du/dt - u, we need du/dx and du/dt
    u_x = torch.autograd.grad(u.sum(), x, create_graph=True)[0]
    u_t = torch.autograd.grad(u.sum(), t, create_graph=True)[0]
    pde = u_x - 2*u_t - u
    return pde


In [21]:
## Data from Boundary Conditions for NN
x_bc = np.random.uniform(low=0.0, high=2.0, size=(500,1))
t_bc = np.zeros((500,1))
u_bc = 6*np.exp(-3*x_bc)

## Data generated for entire PINN
x_all = np.random.uniform(low=0.0, high=2.0, size=(500,1))
t_all = np.random.uniform(low=0.0, high=1.0, size=(500,1))
all_zeros = np.zeros((500,1))

In [22]:
### (3) Training / Fitting
iterations = 5000
for epoch in range(iterations):
    optimizer.zero_grad()

    # Loss based on boundary conditions
    pt_x_bc = Variable(torch.from_numpy(x_bc).float(), requires_grad=False).to(device)
    pt_t_bc = Variable(torch.from_numpy(t_bc).float(), requires_grad=False).to(device)
    pt_u_bc = Variable(torch.from_numpy(u_bc).float(), requires_grad=False).to(device)

    net_bc_out = net(pt_x_bc, pt_t_bc) # output of u(x,t)
    mse_u = mse_cost_function(net_bc_out, pt_u_bc)

    # Loss based on PDE
    pt_x_all = Variable(torch.from_numpy(x_all).float(), requires_grad=True).to(device)
    pt_t_all = Variable(torch.from_numpy(t_all).float(), requires_grad=True).to(device)
    pt_all_zeros = Variable(torch.from_numpy(all_zeros).float(), requires_grad=False).to(device)

    f_out = f(pt_x_all, pt_t_all, net) # output of f(x,t)
    mse_f = mse_cost_function(f_out, pt_all_zeros)

    # Combining the loss functions
    loss = mse_u + mse_f

    loss.backward()
    optimizer.step()

    with torch.autograd.no_grad():
    	print(epoch,"Traning Loss:",loss.data)

0 Traning Loss: tensor(2.3730, device='cuda:0')
1 Traning Loss: tensor(2.3680, device='cuda:0')
2 Traning Loss: tensor(2.3632, device='cuda:0')
3 Traning Loss: tensor(2.3584, device='cuda:0')
4 Traning Loss: tensor(2.3537, device='cuda:0')
5 Traning Loss: tensor(2.3492, device='cuda:0')
6 Traning Loss: tensor(2.3447, device='cuda:0')
7 Traning Loss: tensor(2.3404, device='cuda:0')
8 Traning Loss: tensor(2.3362, device='cuda:0')
9 Traning Loss: tensor(2.3320, device='cuda:0')
10 Traning Loss: tensor(2.3281, device='cuda:0')
11 Traning Loss: tensor(2.3242, device='cuda:0')
12 Traning Loss: tensor(2.3204, device='cuda:0')
13 Traning Loss: tensor(2.3168, device='cuda:0')
14 Traning Loss: tensor(2.3132, device='cuda:0')
15 Traning Loss: tensor(2.3098, device='cuda:0')
16 Traning Loss: tensor(2.3065, device='cuda:0')
17 Traning Loss: tensor(2.3034, device='cuda:0')
18 Traning Loss: tensor(2.3003, device='cuda:0')
19 Traning Loss: tensor(2.2973, device='cuda:0')
20 Traning Loss: tensor(2.2945

## Plot Results

In [23]:
import pandas as pd
import plotly.express as px
pd.options.plotting.backend = "plotly"

In [24]:
x=np.arange(0,2,0.02)
t=np.arange(0,1,0.02)
ms_x, ms_t = np.meshgrid(x, t)
## Just because meshgrid is used, we need to do the following adjustment
x = np.ravel(ms_x).reshape(-1,1)
t = np.ravel(ms_t).reshape(-1,1)

pt_x = Variable(torch.from_numpy(x).float(), requires_grad=True).to(device)
pt_t = Variable(torch.from_numpy(t).float(), requires_grad=True).to(device)
pt_u = net(pt_x,pt_t)
u=pt_u.data.cpu().numpy()

In [25]:
dict_data = {"x":ms_x.flatten(),"t":ms_t.flatten(),"u":u.flatten(), "u_analytical": 6*np.exp(-3*ms_x.flatten()-2*ms_t.flatten())}
df = pd.DataFrame(dict_data)

In [26]:
fig = px.scatter_3d(df, x='x', y='t', z='u',color="u")
fig.show()

In [27]:
fig = px.scatter_3d(df, x='x', y='t', z='u_analytical',color="u_analytical")
fig.show()