# Exercise 20 - Extensions for physics-informed neural networks
This exercise’s intended purpose is to provide a physics-informed neural network with the most important extensions, which can be used for your own projects.

### Task
A physics-informed neural network for the static bar equation is considered. Adapt the code with the following changes to enable different extensions and observe how they affect the training of the physics-informed neural network:
- Optimzer: check block 13
- Sampling: check block 16
- Loss term weighting: check block 11
    - manual weighting: check block 11
    - automatic weighting: check blocks 11, 15
- Strong enforcement of boundary conditions: check block 9
- weighting per collocation point: check 10, 15
- Learning rate scheduler: check block 17 and adapt learning rate to lr = 5e-2 in block 13
- Activation functions: 
    - common activation functions: check blocks 13, 8
    - learnable activation functions: check block 13
- Numerical differentiation: check block 4
- Convolutional neural network: check block 14, change to numerical differentiation in block 4 and remove plot of test set in block 19 (and use strong boundary enforcement of block 9 to get a convergence)
- Feature layer: check block 6

### Learning goals
- Understand the most prominent extensions of physics-informed neural networks
- Be able to implement the most prominent extensions of physics-informed neural networks
- Gain an intuition on how to improve a physics-informed neural network

**import libraries & set seed**

In [None]:
import numpy as np
import torch
from torch.autograd import grad
import time
import matplotlib.pyplot as plt
from scipy.stats import qmc

In [None]:
torch.manual_seed(2)

## Utilities

**gradient computation with automatic differentiation**

In [None]:
def getDerivativeAutomaticDifferentation(y, x, n):
    if n == 0:
        return y
    else:
        dydx = grad(
            y, x, torch.ones(x.size()[0], 1), create_graph=True, retain_graph=True
        )[0]
        return getDerivative(dydx, x, n - 1)

**gradient computation with numerical differentiation**

central difference $$f'(x)\approx\frac{f(x+\Delta x) - f(x-\Delta x)}{2\Delta x}$$
forward difference $$f'(x)\approx\frac{f(x+\Delta x) - f(x)}{\Delta x}$$
backward difference $$f'(x)\approx\frac{f(x)-f(x-\Delta x)}{\Delta x}$$

In [None]:
def getDerivativeFiniteDifference(y, x, n):
    if n == 0:
        return y
    else:
        dydx = y * 0
        dydx[1:-1] = (y[:-2] - y[2:]) / (x[:-2] - x[2:])  # central difference
        dydx[0] = (y[1] - y[0]) / (x[1] - x[0])  # forward difference
        dydx[-1] = (y[-1] - y[-2]) / (x[-1] - x[-2])  # backward difference
        return getDerivativeFiniteDifference(dydx, x, n - 1)
    
# select differentiation method
getDerivative = getDerivativeAutomaticDifferentation
# getDerivative = getDerivativeFiniteDifference         # needed for CNN

**adaptive activation functions**

In [None]:
class makeAdaptiveActivation(torch.nn.Module):
    def __init__(self, n, activation):
        super().__init__()
        self.n = n
        self.alpha = torch.nn.parameter.Parameter(torch.tensor(1.0 / n))
        self.activation = activation

    def forward(self, x):
        return self.activation(self.n * self.alpha * x)

**fully connected neural network**

In [None]:
class FNN(torch.nn.Module):
    def __init__(
            self,
            inputDimension,
            hiddenDimensions,
            outputDimension,
            activationFunction=torch.nn.Tanh(),
    ):
        super().__init__()

        # inputDimension = 5 # hardcoded override of inputDimension for feature layer

        modules = []

        modules.append(torch.nn.Linear(inputDimension, hiddenDimensions[0]))
        modules.append(activationFunction)
        for i in range(len(hiddenDimensions) - 1):
            modules.append(
                torch.nn.Linear(hiddenDimensions[i], hiddenDimensions[i + 1])
            )
            modules.append(activationFunction)
        modules.append(torch.nn.Linear(hiddenDimensions[-1], outputDimension))

        self.model = torch.nn.Sequential(*modules)

    def forward(self, x):
        # x = torch.cat((torch.sin(torch.pi * x),         # feature layer composed of sin(n * torch.pi * x)
        #               torch.sin(2 * torch.pi * x),
        #               torch.sin(3 * torch.pi * x),
        #               torch.sin(4 * torch.pi * x),
        #               torch.sin(5 * torch.pi * x)), 1)
        return self.model(x)

**convolutional neural network**

In [None]:
class CNN(torch.nn.Module):
    def __init__(
            self,
            inputDimension,
            hiddenDimensions,
            outputDimension,
            Nx,
            activationFunction=torch.nn.Tanh(),
    ):
        super().__init__()

        modules = []

        modules.append(
            torch.nn.Conv1d(
                inputDimension, hiddenDimensions[0], kernel_size=3, stride=1, padding=1
            )
        )
        modules.append(activationFunction)
        for i in range(len(hiddenDimensions) - 1):
            modules.append(
                torch.nn.Conv1d(
                    hiddenDimensions[i],
                    hiddenDimensions[i + 1],
                    kernel_size=3,
                    stride=1,
                    padding=1,
                )
            )
            modules.append(activationFunction)
        modules.append(
            torch.nn.Conv1d(
                hiddenDimensions[-1],
                outputDimension,
                kernel_size=3,
                stride=1,
                padding=1,
            )
        )

        self.model = torch.nn.Sequential(*modules)

        self.modelInput = torch.randn(
            (1, inputDimension, Nx)
        )  # Gaussian noise as input
        self.modelInput = (
                self.modelInput
                / (torch.max(self.modelInput) - torch.min(self.modelInput))
                * 2
        )

    def forward(self, x):
        return self.model(self.modelInput).reshape(
            -1, 1
        )  # x is a dummy variable to match the interface of NN

**initialization of neural network weights**

In [None]:
def initWeights(m):
    """Initialize weights of neural network with xavier initialization."""
    if type(m) == torch.nn.Linear:
        torch.nn.init.xavier_uniform_(
            m.weight, gain=torch.nn.init.calculate_gain("tanh")
        )  # adapt if using a different initialization in block 10
        # torch.nn.init.xavier_uniform_(m.weight, gain=torch.nn.init.calculate_gain('relu'))
        # torch.nn.init.xavier_uniform_(m.weight, gain=torch.nn.init.calculate_gain('sigmoid'))
        m.bias.data.fill_(0.0)
    if type(m) == torch.nn.Conv1d:
        torch.nn.init.xavier_uniform_(
            m.weight, gain=torch.nn.init.calculate_gain("leaky_relu", 0.2)
        )
        m.bias.data.fill_(0.0)

## PINN helper functions

**displacement computation**
$$\hat{u}=F_{NN}(x)$$
or
$$\hat{u}=F_{NN}(x)\cdot x\cdot(1-x)$$

In [None]:
def getDisplacements(model, x):
    return model(x)

#    return model(x) * x * (1 - x) # satisfies the boundary conditions by construction

**loss term computation**

the differential equation loss
$$\mathcal{L}_R=\sum_{i=1}^N\bigl(\frac{d}{dx}EA\bigl(\frac{d\hat{u}}{dx}\bigr)+p\bigr)^2$$
the boundary condition loss 
$$\mathcal{L}_B=\sum_{i=1}^{N_B}\bigl( \frac{d^{n_i} \hat{u}}{dx^{n_i}} - F \bigr)^2$$

In [None]:
def getLossTerms(x, xB, u, uB, EA, distLoad, uBLabel, weights):
    differentialEquationLoss = (
            getDerivative(EA * getDerivative(u, x, 1), x, 1) + distLoad
    )
    differentialEquationLoss = torch.sum(differentialEquationLoss ** 2).squeeze()

        # for automatic weighting of all collocation points, remember to also modify block 11
        # differentialEquationLoss = torch.sum(differentialEquationLoss ** 2 * weights[len(uBLabel):]).squeeze() 

    # initialization
    boundaryConditionLoss = 0

    for i in range(len(uBLabel)):
        boundaryConditionLoss += (
                                         getDerivative(uB, xB, uBLabel[i][1])[i] - uBLabel[i][0]
                                 ).squeeze() ** 2
        
        # for automatic weighting of all collocation points, remember to also modify block 11
        # boundaryConditionLoss += (getDerivative(uB, xB, uBLabel[i][1])[i] - uBLabel[i][0]).squeeze() ** 2 * weights[i] 

    return differentialEquationLoss, boundaryConditionLoss

**cost function computation**
$$C=\mathcal{L}_R+\mathcal{L}_B$$
or 
$$C=\kappa_R\mathcal{L}_R+\kappa_B\mathcal{L}_B$$

In [None]:
def getCostFunction(lossTerms, weights):
    return lossTerms[0] + lossTerms[1]

#    return lossTerms[0] + lossTerms[1] * 1e3 # manual weighting
#    return lossTerms[0] * weights[0] + lossTerms[1] * weights[1] # automatic weighting, remember to also modify block 10

## Problem setup

**physical parameters**

In [None]:
# Analytial solution
uAnalytic = lambda x: np.sin(2 * np.pi * x)  # (1. - np.cos(3. * np.pi * x))

# Problem data
E = lambda x: 1.0 + x * 0  # Young's modulus
A = lambda x: 1.0 + x * 0  # cross-sectional area
L = 1.0  # bar length
uB = [
    [0, 0, 0],
    [0, 0, L],
]  # boundary conditions: [value, degree of differentiation, index]
distLoad = lambda x: 4 * np.pi ** 2 * torch.sin(2 * np.pi * x)  # distributed load p(x)

**hyperparameters**

currently Adam is selected as optimizer. By commenting the Adam block and uncommenting the L-BFGS block, you can enable L-BFGS as optimizer.

In [None]:
Nx = 100  # number of collocation points
hiddenDimensions = [100]  # definition of hidden layers
activationFunction = (
    torch.nn.Tanh()
)  # if this is changed, also adapt the initialization in block 8

# activationFunction = torch.nn.ReLU()
# activationFunction = torch.nn.Sigmoid()

# activationFunction = makeAdaptiveActivation(10, torch.nn.Tanh()) # hyperparameter n=10 controls the learning rate of the activation

alpha = -0.5
beta = 0.2
initialWeights = 1e0  # emulates learning rates for weighting terms, could be modified with (optimizer.param_groups[-1]['lr'] = lr * lrWeights), but interferes with scheduler

epochs = 5000  # number of epochs
lr = 1e-3  # learning rate (if learning rate scheduler active, increase to lr = 5e-2)
selectOptimizer = "Adam"

# epochs = 500
# selectOptimizer = "LBFGS"
# lr = 1e-2

**neural network & optimizer setup**

In [None]:
model = FNN(1, hiddenDimensions, 1, activationFunction)

# hiddenDimensions = [20, 40, 20, 10] # adapted hyperparameters for CNN
# lr = 2e-3
# activationFunction = torch.nn.PReLU(init=0.2)
# model = CNN(10, hiddenDimensions, 1, Nx, activationFunction)

model.apply(initWeights)
if selectOptimizer == "Adam":
    optimizer = torch.optim.Adam(model.parameters(), lr)
elif selectOptimizer == "LBFGS":
    optimizer = torch.optim.LBFGS(model.parameters(), lr)

# learning rate scheduler
lr_lambda = lambda epoch: (beta * epoch + 1) ** alpha
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)

**additional learnable parameters**

In [None]:
weights = torch.tensor([])
# weights = torch.tensor([1, 1]) # for automatic weighting of loss terms, remember to also modify block 11
# weights = torch.ones(Nx + len(uB)) # for automatic weighting of all collocation points, remember to also modify block 10

weights *= initialWeights
weights.requires_grad = True
# optimizer.add_param_group({'params': weights}) # add weights to the optimizer

**training grid**

In [None]:
x = torch.linspace(0, L, Nx, requires_grad=True).unsqueeze(
    1
)  # sampling: uniform spacing

# sampler = qmc.LatinHypercube(d=1) # sampling: latin hypercube
# x = torch.from_numpy(sampler.random(Nx - 2) * L).to(torch.float32)
# x = torch.sort(x)
# x = torch.cat((torch.tensor([[0]]), x, torch.tensor([[L]]))) # to include the boundary points
# x.requires_grad = True

# x = torch.from_numpy((0.5*np.polynomial.legendre.leggauss(Nx - 2)[0]+0.5) * L).to(torch.float32).unsqueeze(1) # sampling: Gauss-Legendre points + boundary points
# x = torch.cat((torch.tensor([[0]]), x, torch.tensor([[L]]))) # to include the boundary points
# x.requires_grad = True

# boundary points
xB = torch.tensor([uBi[2] for uBi in uB]).unsqueeze(1).to(torch.float32)
xB.requires_grad = True

## Training

In [None]:
differentialEquationLossHistory = np.zeros(epochs)
boundaryConditionLossHistory = np.zeros(epochs)
costHistory = np.zeros(epochs)

start = time.perf_counter()
start0 = start
for epoch in range(epochs):
    # predict displacements
    uPred = getDisplacements(model, x)
    uBPred = getDisplacements(model, xB)

    lossTerms = getLossTerms(
        x, xB, uPred, uBPred, E(x) * A(x), distLoad(x), uB, weights
    )
    differentialEquationLossHistory[epoch] = lossTerms[0].detach()
    boundaryConditionLossHistory[epoch] = lossTerms[1].detach()
    costHistory[epoch] = getCostFunction(lossTerms, weights).detach()


    def closure():
        optimizer.zero_grad()
        uPred = getDisplacements(model, x)
        uBPred = getDisplacements(model, xB)
        lossTerms = getLossTerms(
            x, xB, uPred, uBPred, E(x) * A(x), distLoad(x), uB, weights
        )
        cost = getCostFunction(lossTerms, weights)
        cost.backward()
        if weights.grad != None:
            weights.grad = -weights.grad  # maximization with regard to weighting terms
        return cost


    optimizer.step(closure)
    # scheduler.step() # learning rate scheduler

    if epoch % 100 == 0:
        elapsedTime = (time.perf_counter() - start) / 100
        string = "Epoch: {}/{}\t\tDifferential equation loss = {:2e}\t\tBoundary condition closs = {:2e}\nCost = {:2e}\t\tElapsed time = {:2f}"
        # Format string and print
        print(
            string.format(
                epoch,
                epochs - 1,
                differentialEquationLossHistory[epoch],
                boundaryConditionLossHistory[epoch],
                costHistory[epoch],
                elapsedTime,
            )
        )
        start = time.perf_counter()
elapsedTime = time.perf_counter() - start0
string = "Total elapsed time: {:2f}\nAverage elapsed time per epoch: {:2f}"
print(string.format(elapsedTime, elapsedTime / epochs))

## Post-processing

**training history**

In [None]:
fig, ax = plt.subplots()
ax.set_xlabel("epoch")
ax.set_ylabel("cost function $C$")
ax.set_yscale("log")

ax.plot(costHistory, "k", linewidth=2, label="cost $C$")
ax.plot(
    differentialEquationLossHistory,
    "r:",
    linewidth=2,
    label="differential equation loss $\\mathcal{L}_{\\mathcal{R}}$",
)
ax.plot(
    boundaryConditionLossHistory,
    "b--",
    linewidth=2,
    label="boundary condition loss $\\mathcal{L}_{\\mathcal{B}}$",
)

ax.grid()
ax.legend()
fig.tight_layout()
plt.show()

**displacement prediction**

In [None]:
xTest = torch.linspace(0, L, 1000).unsqueeze(1)
uPredTest = getDisplacements(model, xTest).detach()  # disable with CNN
uPred = getDisplacements(model, x).detach()

fig, ax = plt.subplots()
ax.set_xlabel("$x$")
ax.set_ylabel("Displacement $u$")

ax.plot(xTest, uAnalytic(xTest), "gray", linewidth=2, label="Analytical solution")
ax.plot(xTest, uPredTest, "k:", linewidth=2, label="Prediction")  # disable with CNN
ax.plot(x.detach(), uPred, "rs", markersize=6, label="Collocation points")

ax.grid()
ax.legend()
fig.tight_layout()
# plt.savefig("prediction.eps")
plt.show()

**L2 norm**

In [None]:
print(
    "{:2e}".format(
        (
                1.0
                / L
                * np.sqrt(
            np.trapz(
                (uPred[:, 0] - uAnalytic(x.detach())[:, 0]) ** 2,
                dx=xTest[1] - xTest[0],
            )
        )
        ).item()
    )
)  # if CNN is used, x has to come from uniform grid

In [None]:
print(
    "{:2e}".format(
        (
                1.0
                / L
                * np.sqrt(
            np.trapz(
                (uPredTest[:, 0] - uAnalytic(xTest)[:, 0]) ** 2,
                dx=xTest[1] - xTest[0],
            )
        )
        ).item()
    )
)  # if CNN is used this does not work