In [1]:
import torch
import torch.nn as nn


In [2]:
device = torch.device("cpu")
if torch.backends.mps.is_available(): # Apple Silicon
    device = torch.device("mps")
if torch.cuda.is_available(): # Nvidia GPU
    device = torch.device("cuda")

torch.manual_seed(0)

<torch._C.Generator at 0x1087fbb10>

In [3]:
def periodic_differences(x):
    forward_diff = torch.roll(x, -1, dims=1) - x
    backward_diff = x - torch.roll(x, 1, dims=1)
    central_diff = 0.5*(torch.roll(x, -1, dims=1) - torch.roll(x, 1, dims=1))

    return forward_diff, central_diff, backward_diff

In [4]:
class FiniteDifferenceNet(nn.Module):
    """Neural network to learn first-order finite differences

    Demonstrate that a 1D convolutional layer can be used to estimate
    first-order finite differences [forward, central, and backward].

    """
    def __init__(self):
        super(FiniteDifferenceNet, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=3, kernel_size=3, padding=0, bias=False)

    def forward(self, x):
        x = self.conv1(x)
        return x

In [5]:
a = torch.randn(10,50)
a[:,0] = a[:,-1]
gradx = periodic_differences(a)
fd_true = torch.stack(gradx, dim=1)[..., 1:-1]
fd_true = fd_true.to(device)

fd_net = FiniteDifferenceNet().to(device)
fd_loss = nn.MSELoss()
optimizer = torch.optim.Adam(fd_net.parameters(), lr=1e-1)

a_channel = a.unsqueeze(dim=1).to(device)

fd_net.train()
for i in range(1000):
    optimizer.zero_grad()
    fd_pred = fd_net(a_channel)
    loss = fd_loss(fd_pred, fd_true)
    loss.backward()
    optimizer.step()

    if (i-1) % 1000 == 0:
        print(loss.item(), end="\r")
print(fd_net.conv1.weight.data)
# Output for the above should be very close to:
#  [  0   -1  1   ]
#  [  0.5  0  0.5 ]
#  [ -1    1  0   ]

tensor([[[ 1.2484e-09, -1.0000e+00,  1.0000e+00]],

        [[-5.0000e-01,  5.4967e-10,  5.0000e-01]],

        [[-1.0000e+00,  1.0000e+00, -1.1383e-10]]], device='mps:0')


In [6]:
def upwind_difference(x):
    forward, _, backward = periodic_differences(x)

    sign = torch.sign(x)
    upwind = forward*(sign < 0) + backward*(sign >= 0)
    return upwind

In [7]:
class UpwindDifferenceSignFunctionNet(nn.Module):
    """Convolutional neural network with sign function

    Trains a convolutional layer that should learn the weights for a forward and
    backward finite difference as two channels. A sign function (similar to an
    actual upwind difference scheme) is used to "choose" which channel is output
    """
    def __init__(self):
        super(UpwindDifferenceNet, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=2, kernel_size=3, padding=0, bias=False)

    def forward(self, x):
        sign = torch.sign(x)[:,:,1:-1].squeeze()
        x = self.conv1(x)
        x = x[:,0,:]*(sign < 0) + x[:,1,:]*(sign >= 0)
        x = x.unsqueeze(1)
        return x

class UpwindDifferenceNet(nn.Module):
    """Convolutional neural network with learned weights

    Trains a convolutional layer that should learn the weights for a forward and
    backward finite difference as two channels. Two linear layers with sigmoid
    activation functions predict how the two channels of the convolutional layer
    should be combined for the final output. An extra factor is included to make
    the sigmoid activation function behave more like a heaviside function while
    still being differentiable with non-zero derivatives.

    Note: The two weights could be combined into one linear layer with two
    outputs.
    """

    def __init__(self):
        super(UpwindDifferenceNet, self).__init__()

        self.steepen_sigmoid_factor = 10 # Make the sigmoid more like a heaviside function
        self.channel_wt1 = nn.Linear(1,1)
        self.channel_wt2 = nn.Linear(1,1)
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=2, kernel_size=3, padding=0, bias=False)

    def forward(self, x):
        nbatch, nchannels, nelements = x.shape

        channel1_weight = torch.nn.Sigmoid()(self.steepen_sigmoid_factor*self.channel_wt1(
            x.reshape(-1,1)).reshape(nbatch, nelements)[:, 1:-1]
        )
        channel2_weight = torch.nn.Sigmoid()(self.steepen_sigmoid_factor*self.channel_wt2(
            x.reshape(-1,1)).reshape(nbatch, nelements)[:, 1:-1]
        )

        x = self.conv1(x)
        x = x[:,0,:]*channel1_weight + x[:,1,:]*channel2_weight
        x = x.unsqueeze(1)
        return x

net = UpwindDifferenceNet().to(device)

In [12]:
a = torch.randn(10,50)
a[:,0] = a[:,-1]
uw_true = upwind_difference(a).unsqueeze(1)[..., 1:-1]
uw_true = uw_true.to(device)

uw_net = UpwindDifferenceNet().to(device)
uw_loss = nn.MSELoss()
optimizer = torch.optim.Adam(uw_net.parameters(), lr=1e-2)

a_channel = a.unsqueeze(dim=1).to(device)


In [20]:
for i in range(5000):
    uw_net.train()
    optimizer.zero_grad()
    uw_pred = uw_net(a_channel)
    loss = uw_loss(uw_pred, uw_true)
    loss.backward()
    optimizer.step()

    if (i-1) % 100 == 0:
        print(loss.item(), end="\r")

print(uw_net.conv1.weight.data)
# Output should be close to
# [ 0  -1  1 ]
# [ -1  1  0 ]
# The rows could be swapped since there is not restriction around the order of
# the channels

# Note: One interesting behavior. There seems to be three minima for loss
# function: one where the channels correspond to the forward and backward finite
# differences, the others being it may either be the forward or backward
# derivative for one channel and the second derivative (or its negative) for the
# second channel. This likely arises because the "missing" finite difference
# is just a linear combination of hte other two.


tensor([[[ 3.9986e-05, -1.0002e+00,  1.0002e+00]],

        [[-9.9997e-01,  2.0000e+00, -9.9999e-01]]], device='mps:0')
