In [None]:
# | default_exp layers

# Layers

> API details.

In [None]:
# |hide
from nbdev.showdoc import *

In [None]:
# |export
import math
import torch
import torch.nn as nn
from torch.autograd import Function

## MLMVN

In [None]:
# |export
class FirstLayer(nn.Module):
    """Custom Linear layer but mimics a standard linear layer"""

    def __init__(self, size_in, size_out):
        super().__init__()
        self.size_in, self.size_out = size_in, size_out
        # weights = torch.Tensor(size_out, size_in)

        # initialize weights and biases
        weights = torch.randn(
            self.size_in, self.size_out, dtype=torch.cdouble
        ) / math.sqrt(self.size_in)
        self.weights = nn.Parameter(
            weights
        )  # nn.Parameter is a Tensor that's a module parameter.

        bias = torch.unsqueeze(
            torch.zeros(size_out, dtype=torch.cdouble, requires_grad=True), 0
        )
        self.bias = nn.Parameter(bias)

    def forward(self, x):
        x = FirstLayerFB.apply(x, self.weights, self.bias)
        x.register_hook(self._hook_fn)
        return x

    def _hook_fn(self, tensor):
        self.grad_output = tensor


class FirstLayerFB(Function):
    @staticmethod
    def forward(ctx, input, weights, bias):
        w_times_x = torch.mm(input, weights)
        output = torch.add(w_times_x, bias)
        ctx.save_for_backward(input, weights, bias, output)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        input, weight, bias, output = ctx.saved_tensors
        grad_input = grad_weight = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.

        # output = torch.ones(1, grad_output.size(1))
        # grad_output = grad_output / (input.size(1) + 1)
        if ctx.needs_input_grad[0]:
            cinv = (torch.conj(weight) / torch.square(torch.abs(weight))).T
            grad_input = grad_output.mm(cinv)
        if ctx.needs_input_grad[1]:
            x_pinv = torch.linalg.pinv(
                torch.cat([torch.ones(1, input.size(0)), input.T[0:]])
            ).T
            angle_pinv = x_pinv[1:, :]
            grad_weight = angle_pinv @ torch.div(grad_output, torch.abs(output))
            grad_weight = grad_weight * (-1)
        if bias is not None and ctx.needs_input_grad[2]:
            angle_pinv = x_pinv[0, :]
            grad_bias = (
                angle_pinv @ torch.div(grad_output, torch.abs(output))
            ).unsqueeze(dim=0)
            grad_bias = grad_bias * (-1)

        return grad_input, grad_weight, grad_bias

In [None]:
# |export
class HiddenLayer(nn.Module):
    """Custom Linear layer but mimics a standard linear layer"""

    def __init__(self, size_in, size_out):
        super().__init__()
        self.size_in, self.size_out = size_in, size_out
        # weights = torch.Tensor(size_out, size_in)

        # initialize weights and biases
        weights = torch.randn(
            self.size_in, self.size_out, dtype=torch.cdouble
        ) / math.sqrt(self.size_in)
        self.weights = nn.Parameter(
            weights
        )  # nn.Parameter is a Tensor that's a module parameter.

        bias = torch.unsqueeze(
            torch.zeros(size_out, dtype=torch.cdouble, requires_grad=True), 0
        )
        self.bias = nn.Parameter(bias)

    def forward(self, x):
        x = HiddenLayerFB.apply(x, self.weights, self.bias)
        x.register_hook(self._hook_fn)
        return x

    def _hook_fn(self, tensor):
        self.grad_output = tensor / (self.size_in + 1)


class HiddenLayerFB(Function):
    @staticmethod
    def forward(ctx, input, weights, bias):
        w_times_x = torch.mm(input, weights)
        output = torch.add(w_times_x, bias)
        ctx.save_for_backward(input, weights, bias, output)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        input, weight, bias, output = ctx.saved_tensors
        grad_input = grad_weight = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.

        # output = torch.ones(1, grad_output.size(1))
        grad_output = grad_output / (input.size(1) + 1)
        if ctx.needs_input_grad[0]:
            cinv = (torch.conj(weight) / torch.square(torch.abs(weight))).T
            grad_input = grad_output.mm(cinv)
        if ctx.needs_input_grad[1]:
            x_pinv = torch.linalg.pinv(
                torch.cat([torch.ones(1, input.size(0)), input.T[0:]])
            ).T
            angle_pinv = x_pinv[1:, :]
            grad_weight = angle_pinv @ torch.div(grad_output, torch.abs(output))
            grad_weight = grad_weight * (-1)
        if bias is not None and ctx.needs_input_grad[2]:
            angle_pinv = x_pinv[0, :]
            grad_bias = (
                angle_pinv @ torch.div(grad_output, torch.abs(output))
            ).unsqueeze(dim=0)
            grad_bias = grad_bias * (-1)

        return grad_input, grad_weight, grad_bias

In [None]:
# |export
class OutputLayer(nn.Module):
    """Custom Linear layer but mimics a standard linear layer"""

    def __init__(self, size_in, size_out):
        super().__init__()
        self.size_in, self.size_out = size_in, size_out
        # weights = torch.Tensor(size_out, size_in)

        # initialize weights and biases
        weights = torch.randn(
            self.size_in, self.size_out, dtype=torch.cdouble
        ) / math.sqrt(self.size_in)
        self.weights = nn.Parameter(
            weights
        )  # nn.Parameter is a Tensor that's a module parameter.

        bias = torch.unsqueeze(
            torch.zeros(size_out, dtype=torch.cdouble, requires_grad=True), 0
        )
        self.bias = nn.Parameter(bias)

    def forward(self, x):
        x = OutputLayerFB.apply(x, self.weights, self.bias)
        x.register_hook(self._hook_fn)
        return x

    def _hook_fn(self, tensor):
        self.grad_output = tensor / (self.size_in + 1)
        # self.grad_output = torch.ones(1, self.size_out)


class OutputLayerFB(Function):
    @staticmethod
    def forward(ctx, input, weights, bias):
        w_times_x = torch.mm(input, weights)
        output = torch.add(w_times_x, bias)
        ctx.save_for_backward(input, weights, bias, output)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        input, weight, bias, output = ctx.saved_tensors
        grad_input = grad_weight = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.

        # output = torch.ones(1, grad_output.size(1))
        grad_output = grad_output / (input.size(1) + 1)
        if ctx.needs_input_grad[0]:
            cinv = (torch.conj(weight) / torch.square(torch.abs(weight))).T
            grad_input = grad_output.mm(cinv)
        if ctx.needs_input_grad[1]:
            x_pinv = torch.linalg.pinv(
                torch.cat([torch.ones(1, input.size(0)), input.T[0:]])
            ).T
            angle_pinv = x_pinv[1:, :]
            grad_weight = angle_pinv @ torch.div(grad_output, torch.abs(output))
            grad_weight = grad_weight * (-1)
        if bias is not None and ctx.needs_input_grad[2]:
            angle_pinv = x_pinv[0, :]
            grad_bias = (
                angle_pinv @ torch.div(grad_output, torch.abs(output))
            ).unsqueeze(dim=0)
            grad_bias = grad_bias * (-1)

        return grad_input, grad_weight, grad_bias

## Activation

In [None]:
# |export
class phase_activation(Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return input / torch.abs(input)

    @staticmethod
    def backward(ctx, grad_output):
        return grad_output, None


class cmplx_phase_activation(nn.Module):
    """Custom Linear layer but mimics a standard linear layer"""

    def __init__(self):
        super().__init__()

    def forward(self, x):
        return phase_activation.apply(x)

## Dropout

In [None]:
# | export
class DropoutFB(Function):
    @staticmethod
    def forward(ctx, input, p):
        # ctx.save_for_backward(input)
        # return input / torch.abs(input)
        binomial = torch.distributions.binomial.Binomial(probs=1 - p)
        return input * binomial.sample(input.size()) * (1.0 / (1 - p))

    @staticmethod
    def backward(ctx, grad_output):
        return grad_output, None


class MyDropout(nn.Module):
    def __init__(self, p: float = 0.5):
        super(MyDropout, self).__init__()
        if p < 0 or p > 1:
            raise ValueError(
                "dropout probability has to be between 0 and 1, " "but got {}".format(p)
            )
        self.p = p

    def forward(self, X):
        if self.training:
            return DropoutFB.apply(X, self.p)
        return X

## Linear Layers

In [None]:
# |export
class FirstLayerCplx(nn.Module):
    """Custom Linear layer but mimics a standard linear layer"""

    def __init__(self, size_in, size_out):
        super().__init__()
        self.size_in, self.size_out = size_in, size_out
        # weights = torch.Tensor(size_out, size_in)

        # initialize weights and biases
        weights = torch.randn(
            self.size_in, self.size_out, dtype=torch.cdouble
        ) / math.sqrt(self.size_in)
        self.weights = nn.Parameter(
            weights
        )  # nn.Parameter is a Tensor that's a module parameter.

        bias = torch.unsqueeze(
            torch.zeros(size_out, dtype=torch.cdouble, requires_grad=True), 0
        )
        self.bias = nn.Parameter(bias)

    def forward(self, x):
        return FirstLayerCplxFB.apply(x, self.weights, self.bias)


class FirstLayerCplxFB(Function):
    @staticmethod
    def forward(ctx, input, weights, bias):
        w_times_x = torch.mm(input, weights)
        output = torch.add(w_times_x, bias)
        ctx.save_for_backward(input, weights, bias, output)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        input, weight, bias, output = ctx.saved_tensors
        grad_input = grad_weight = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.

        # output = torch.ones(1, grad_output.size(1))
        # grad_output = grad_output / (input.size(1) + 1)
        if ctx.needs_input_grad[0]:
            cinv = (torch.conj(weight) / torch.square(torch.abs(weight))).T
            grad_input = grad_output.mm(cinv)
        if ctx.needs_input_grad[1]:
            x_pinv = torch.linalg.pinv(
                torch.cat([torch.ones(1, input.size(0)), input.T[0:]])
            ).T
            angle_pinv = x_pinv[1:, :]
            grad_weight = angle_pinv @ torch.div(grad_output, torch.abs(output))
            grad_weight = grad_weight * (-1)
        if bias is not None and ctx.needs_input_grad[2]:
            angle_pinv = x_pinv[0, :]
            grad_bias = (
                angle_pinv @ torch.div(grad_output, torch.abs(output))
            ).unsqueeze(dim=0)
            grad_bias = grad_bias * (-1)

        return grad_input, grad_weight, grad_bias

In [None]:
# |export
class HiddenLayerCplx(nn.Module):
    """Custom Linear layer but mimics a standard linear layer"""

    def __init__(self, size_in, size_out):
        super().__init__()
        self.size_in, self.size_out = size_in, size_out
        # weights = torch.Tensor(size_out, size_in)

        # initialize weights and biases
        weights = torch.randn(
            self.size_in, self.size_out, dtype=torch.cdouble
        ) / math.sqrt(self.size_in)
        self.weights = nn.Parameter(
            weights
        )  # nn.Parameter is a Tensor that's a module parameter.

        bias = torch.unsqueeze(
            torch.zeros(size_out, dtype=torch.cdouble, requires_grad=True), 0
        )
        self.bias = nn.Parameter(bias)

    def forward(self, x):
        return HiddenLayerCplxFB.apply(x, self.weights, self.bias)


class HiddenLayerCplxFB(Function):
    @staticmethod
    def forward(ctx, input, weights, bias):
        w_times_x = torch.mm(input, weights)
        output = torch.add(w_times_x, bias)
        ctx.save_for_backward(input, weights, bias, output)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        input, weight, bias, output = ctx.saved_tensors
        grad_input = grad_weight = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.

        # output = torch.ones(1, grad_output.size(1))
        grad_output = grad_output / (input.size(1) + 1)
        if ctx.needs_input_grad[0]:
            cinv = (torch.conj(weight) / torch.square(torch.abs(weight))).T
            grad_input = grad_output.mm(cinv)
        if ctx.needs_input_grad[1]:
            x_pinv = torch.linalg.pinv(
                torch.cat([torch.ones(1, input.size(0)), input.T[0:]])
            ).T
            angle_pinv = x_pinv[1:, :]
            grad_weight = angle_pinv @ torch.div(grad_output, torch.abs(output))
            grad_weight = grad_weight * (-1)
        if bias is not None and ctx.needs_input_grad[2]:
            angle_pinv = x_pinv[0, :]
            grad_bias = (
                angle_pinv @ torch.div(grad_output, torch.abs(output))
            ).unsqueeze(dim=0)
            grad_bias = grad_bias * (-1)

        return grad_input, grad_weight, grad_bias

In [None]:
# |export
class OutputLayerCplx(nn.Module):
    """Custom Linear layer but mimics a standard linear layer"""

    def __init__(self, size_in, size_out):
        super().__init__()
        self.size_in, self.size_out = size_in, size_out
        # weights = torch.Tensor(size_out, size_in)

        # initialize weights and biases
        weights = torch.randn(
            self.size_in, self.size_out, dtype=torch.cdouble
        ) / math.sqrt(self.size_in)
        self.weights = nn.Parameter(
            weights
        )  # nn.Parameter is a Tensor that's a module parameter.

        bias = torch.unsqueeze(
            torch.zeros(size_out, dtype=torch.cdouble, requires_grad=True), 0
        )
        self.bias = nn.Parameter(bias)

    def forward(self, x):
        return OutputLayerCplxFB.apply(x, self.weights, self.bias)


class OutputLayerCplxFB(Function):
    @staticmethod
    def forward(ctx, input, weights, bias):
        w_times_x = torch.mm(input, weights)
        output = torch.add(w_times_x, bias)
        ctx.save_for_backward(input, weights, bias, output)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        input, weight, bias, output = ctx.saved_tensors
        grad_input = grad_weight = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.

        # output = torch.ones(1, grad_output.size(1))
        grad_output = grad_output / (input.size(1) + 1)
        if ctx.needs_input_grad[0]:
            cinv = (torch.conj(weight) / torch.square(torch.abs(weight))).T
            grad_input = grad_output.mm(cinv)
        if ctx.needs_input_grad[1]:
            x_pinv = torch.linalg.pinv(
                torch.cat([torch.ones(1, input.size(0)), input.T[0:]])
            ).T
            angle_pinv = x_pinv[1:, :]
            grad_weight = angle_pinv @ torch.div(grad_output, torch.abs(output))
            grad_weight = grad_weight * (-1)
        if bias is not None and ctx.needs_input_grad[2]:
            angle_pinv = x_pinv[0, :]
            grad_bias = (
                angle_pinv @ torch.div(grad_output, torch.abs(output))
            ).unsqueeze(dim=0)
            grad_bias = grad_bias * (-1)

        return grad_input, grad_weight, grad_bias