# Exercise 21 - Variations of the Ansatz Space
### Task
Compare the deep energy method, FE-interpolated neural networks, and HiDeNN with and without automatic differentiation. Select the corresponding method with `method`. Can you reproduce the results from Table 5.2?

### Learning goals
- Understand the difference between the deep energy method, FE-intepolated neural networks, and HiDeNN (theory & implementation)
- Understand the problems and benefits associated with correspondingly neural network and linear ansatz'

In [None]:
import numpy as np
import torch
from torch.autograd import grad
import time
import matplotlib.pyplot as plt

In [None]:
torch.manual_seed(2)

## Method selection

In [None]:
# method = "DEM"
# method = "interpolatedNN"
# method = "hiDeNN"
method = "hiDeNNWithoutAD"

## Utilities

**gradient computation with automatic differentiation**

In [None]:
def getDerivative(y, x, n):
    """Compute the nth order derivative of y = f(x) with respect to x."""

    if n == 0:
        return y
    else:
        dy_dx = grad(y, x, torch.ones(x.shape), create_graph=True, retain_graph=True)[0]
        return getDerivative(dy_dx, x, n - 1)

**weight initialization**

In [None]:
def initWeights(m):
    """Initialize weights of neural network with xavier initialization."""
    if type(m) == torch.nn.Linear:
        torch.nn.init.xavier_uniform_(
            m.weight, gain=torch.nn.init.calculate_gain("tanh")
        )  # adapt if using a different initialization
        m.bias.data.fill_(0.0)

**neural network**

In [None]:
class NN(torch.nn.Module):
    def __init__(
            self,
            inputDimension,
            hiddenDimensions,
            outputDimension,
            activationFunction=torch.nn.Tanh(),
    ):
        super().__init__()

        modules = []

        modules.append(torch.nn.Linear(inputDimension, hiddenDimensions[0]))
        modules.append(activationFunction)
        for i in range(len(hiddenDimensions) - 1):
            modules.append(
                torch.nn.Linear(hiddenDimensions[i], hiddenDimensions[i + 1])
            )
            modules.append(activationFunction)
        modules.append(torch.nn.Linear(hiddenDimensions[-1], outputDimension))

        self.model = torch.nn.Sequential(*modules)

    def forward(self, x):
        return self.model(x)

**degrees of freedom as (neural network) model**

In [None]:
class directDofs(torch.nn.Module):
    def __init__(self, numberOfDofs):
        super().__init__()

        self.dofs = torch.nn.Parameter(torch.zeros(numberOfDofs))

    def forward(self, dummy):
        return self.dofs * 1

## Finite element helper functions

**potential energy computation**

In [None]:
def getPotentialEnergy(
        u, dudx, E, A, distLoad, x, integrationWeights, Jacobian
):  # assuming homogeneous Neumann boundary conditions
    internalEnergy = 0.5 * torch.sum(
        dudx ** 2 * E(x) * A(x) * integrationWeights * Jacobian
    )
    externalEnergy = torch.sum(distLoad(x) * u * integrationWeights * Jacobian)
    return internalEnergy, externalEnergy

**analytic computation of displacement derivative**

In [None]:
def getFirstDisplacementDerivativeAnalytically(
        dofs, dofMaskMatrix, derivedShapeFunctionMatrix, Jacobian
):
    return (
            torch.sum(
                derivedShapeFunctionMatrix
                * torch.vstack([dofs.squeeze()[mask] for mask in dofMaskMatrix]),
                axis=1,
            )
            / Jacobian
    )

**analytic computation of cost function derivative**

In [None]:
def getCostFunctionGradientAnalytically(
        dudx,
        dofs,
        E,
        A,
        distLoad,
        x,
        integrationWeights,
        derivedShapeFunctionMatrix,
        Jacobian,
        integrationOrder,
):
    costGradient = torch.zeros(len(dofs))
    # internal Energy # Note that Jacobian cancels out (derivatives and integral)
    # left element
    costGradient[:-1] += torch.sum(
        (
                dudx[:]
                * derivedShapeFunctionMatrix[:, 0]
                * E(x[:])
                * A(x[:])
                * integrationWeights[:]
        ).reshape((-1, integrationOrder)),
        axis=1,
    )
    # right element
    costGradient[1:] += torch.sum(
        (
                dudx[:]
                * derivedShapeFunctionMatrix[:, 1]
                * E(x[:])
                * A(x[:])
                * integrationWeights[:]
        ).reshape((-1, integrationOrder)),
        axis=1,
    )
    # external Energy # Here the Jacobian does not cancel out
    # left element
    costGradient[:-1] -= (
            torch.sum(
                (
                        distLoad(x[:]) * shapeFunctionMatrix[:, 0] * integrationWeights[:]
                ).reshape((-1, integrationOrder)),
                axis=1,
            )
            * Jacobian
    )
    # right element
    costGradient[1:] -= (
            torch.sum(
                (
                        distLoad(x[:]) * shapeFunctionMatrix[:, 1] * integrationWeights[:]
                ).reshape((-1, integrationOrder)),
                axis=1,
            )
            * Jacobian
    )
    return costGradient

**integration point determinor**

In [None]:
def getIntegrationPoints(numberOfElements, nodes, integrationOrder):
    gaussPoints, integrationWeights = np.polynomial.legendre.leggauss(integrationOrder)
    gaussPoints = torch.from_numpy(gaussPoints)
    integrationWeights = torch.from_numpy(integrationWeights)
    integrationWeights = torch.tile(integrationWeights, (numberOfElements,))
    integrationPoints = torch.zeros(integrationOrder * numberOfElements)
    for i in range(numberOfElements):
        integrationPoints[i * integrationOrder: (i + 1) * integrationOrder] = (
                0.5 * (gaussPoints + 1) * (nodes[i + 1] - nodes[i]) + nodes[i]
        )
    return integrationPoints, integrationWeights

**shape function computation**

In [None]:
def integratedLegendrePolynomials(x, nodes):
    xi = 2 * (x - nodes[0]) / (nodes[-1] - nodes[0]) - 1
    shapeFunctions = torch.zeros(2)
    shapeFunctions[0] = 0.5 * (1 - xi)
    shapeFunctions[-1] = 0.5 * (1 + xi)
    return shapeFunctions

**shape function derivative computation**

In [None]:
def derivativesOfIntegratedLegendrePolynomials(x, nodes):
    xi = 2 * (x - nodes[0]) / (nodes[-1] - nodes[0]) - 1
    shapeFunctions = torch.zeros(2)
    shapeFunctions[0] = -0.5
    shapeFunctions[-1] = 0.5
    return shapeFunctions

**shape functions assembled in matrix**

In [None]:
def getShapeFunctionMatrix(x, nodes, numberOfElements, L):
    shapeFunctionMatrix = torch.zeros((len(x), 2))
    derivedShapeFunctionMatrix = torch.zeros((len(x), 2))
    dofMaskMatrix = torch.zeros((len(x), numberOfElements + 1), dtype=bool)

    for i in range(len(x)):
        elementIndex = torch.searchsorted(nodes, x[i], side="right") - 1
        # check right-most entry
        if x[i] == L:
            elementIndex = numberOfElements - 1  # last point on last element

        shapeFunctionMatrix[i, :] = integratedLegendrePolynomials(
            x[i], nodes[elementIndex: (elementIndex + 2)]
        )
        dofMaskMatrix[i, elementIndex: (elementIndex + 2)] = True
        derivedShapeFunctionMatrix[i, :] = derivativesOfIntegratedLegendrePolynomials(
            x[i], nodes[elementIndex: (elementIndex + 2)]
        )

    return shapeFunctionMatrix, derivedShapeFunctionMatrix, dofMaskMatrix

## Problem setup

In [None]:
# analytial solution
uAnalytic = lambda x: (1.0 - np.cos(3.0 * np.pi * x))
analyticPotentialEnergy = 0.5 * 116.959701987868 - 86.329173615

# problem data
E = lambda x: 1.0
A = lambda x: x ** 2 + 1.0
L = 3.0 / 2.0
bcMask = [[0, -1], torch.tensor([0.0, 1.0])]
strongEnforcement = lambda u, x: u * x * (L - x) + x / L * 1.0
distLoad = lambda x: -6 * x * np.pi * torch.sin(3 * np.pi * x) - 9 * (
        x ** 2 + 1
) * np.pi ** 2 * torch.cos(3 * np.pi * x)

**hyperparameters**

In [None]:
if method == "DEM":
    lr = 5e-3
    epochs = 1000
    hiddenDimensions = [100]
    activationFunction = (
        torch.nn.Tanh()
    )  # if this is changed, also adapt the initialization
elif method == "interpolatedNN":
    lr = 2e-3  # 8e-3 #1e-2
    epochs = 400
    hiddenDimensions = [50, 50, 50]  # [50, 50]
    activationFunction = torch.nn.ReLU()
elif method == "hiDeNN" or method == "hiDeNNWithoutAD":
    lr = 1e0
    epochs = 100
# if you increase number of elements, increase epochs and decrease learning rate, e.g., lr = 1e-2, epochs = 1000

**finite element parameters**

In [None]:
integrationOrder = 2

numberOfElements = 30
elementLength = L / numberOfElements
Jacobian = 0.5 * elementLength
nodes = torch.linspace(0, L, numberOfElements + 1)  # uniformly distributed weights
integrationPoints, integrationWeights = getIntegrationPoints(
    numberOfElements, nodes, integrationOrder
)
x = integrationPoints
x.requires_grad = True

**training setup**

In [None]:
if method == "DEM" or method == "interpolatedNN":
    model = NN(1, hiddenDimensions, 1, activationFunction)
    if method == "DEM":
        model.apply(initWeights)
elif method == "hiDeNN" or method == "hiDeNNWithoutAD":
    model = directDofs(numberOfElements + 1)

shapeFunctionMatrix, derivedShapeFunctionMatrix, dofMaskMatrix = getShapeFunctionMatrix(
    x, nodes, numberOfElements, L
)  # TODO change x to integrationPoints
optimizer = torch.optim.Adam(model.parameters(), lr)

## Training

In [None]:
costHistory = np.zeros(epochs)
start0 = time.perf_counter()
start = time.perf_counter()
for epoch in range(epochs):
    optimizer.zero_grad()

    dofs = model(nodes.unsqueeze(1)).squeeze()
    # enforce boundary conditions
    dofs[bcMask[0]] = bcMask[1]

    if method == "DEM":
        uPred = strongEnforcement(model(x.unsqueeze(1)).squeeze(), x)
    elif (
            method == "interpolatedNN" or method == "hiDeNN" or method == "hiDeNNWithoutAD"
    ):
        uPred = torch.sum(
            shapeFunctionMatrix
            * torch.vstack([dofs.squeeze()[mask] for mask in dofMaskMatrix]),
            axis=1,
        )

    if method == "DEM" or method == "interpolatedNN" or method == "hiDeNN":
        dudxPred = getDerivative(uPred, x, 1)
    elif method == "hiDeNNWithoutAD":
        dudxPred = getFirstDisplacementDerivativeAnalytically(
            dofs, dofMaskMatrix, derivedShapeFunctionMatrix, Jacobian
        )

    internalEnergy, externalEnergy = getPotentialEnergy(
        uPred, dudxPred, E, A, distLoad, x, integrationWeights, Jacobian
    )

    cost = internalEnergy - externalEnergy
    costHistory[epoch] = cost.detach()

    if method == "DEM" or method == "interpolatedNN" or method == "hiDeNN":
        cost.backward(retain_graph=True)
        # gradient stored in dofs.grad
    elif method == "hiDeNNWithoutAD":
        costGradient = getCostFunctionGradientAnalytically(
            dudxPred,
            dofs,
            E,
            A,
            distLoad,
            x,
            integrationWeights,
            derivedShapeFunctionMatrix,
            Jacobian,
            integrationOrder,
        )
        model.dofs.grad = costGradient

    optimizer.step()

    if epoch % 10 == 0:
        elapsedTime = (time.perf_counter() - start) / 10.0
        string = "Epoch: {}/{}\t\tCost = {:.2f}\t\tElapsed time = {:2f}"
        # Format string and print
        print(string.format(epoch, epochs - 1, costHistory[epoch], elapsedTime))
        start = time.perf_counter()

elapsedTime = time.perf_counter() - start0
string = "Total elapsed time: {:2f} s\nAverage elapsed time per epoch: {:.2e} s"
print(string.format(elapsedTime, elapsedTime / epochs))

## Post-processing

**training history**

In [None]:
fig, ax = plt.subplots()
ax.plot(costHistory, "k")
ax.grid()
plt.show()

**displacement prediction**

In [None]:
postProcessingGrid = torch.linspace(0, L, 10 * numberOfElements + 1)
shapeFunctionMatrix, derivedShapeFunctionMatrix, dofMaskMatrix = getShapeFunctionMatrix(
    postProcessingGrid, nodes, numberOfElements, L
)

if method == "DEM":
    uPred = strongEnforcement(
        model(postProcessingGrid.unsqueeze(1)).squeeze(), postProcessingGrid
    )
elif method == "interpolatedNN" or method == "hiDeNN" or method == "hiDeNNWithoutAD":
    uPred = torch.sum(
        shapeFunctionMatrix
        * torch.vstack([dofs.squeeze()[mask] for mask in dofMaskMatrix]),
        axis=1,
    )

fig, ax = plt.subplots()
ax.plot(postProcessingGrid.detach(), uPred.detach(), "k")
if method == "interpolatedNN" or method == "hiDeNN" or method == "hiDeNNWithoutAD":
    ax.plot(nodes.detach(), dofs.detach(), "ko")
ax.plot(postProcessingGrid.detach(), uAnalytic(postProcessingGrid.detach()), "r:")
ax.grid()
plt.show()

In [None]:
print("MSE: {:.2e}".format(torch.mean((uAnalytic(postProcessingGrid) - uPred) ** 2)))