In [2]:
import math
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import pytorch_lightning as pl
import torch.nn.functional as F
from pytorch_lightning import Trainer
from torchnlp.nn.attention import Attention

In [3]:
from blitz.modules import BayesianLinear
from blitz.modules.conv_bayesian_layer import BayesianConv1d
from blitz.modules.gru_bayesian_layer import BayesianGRU

In [61]:
import math
import torch
from torch import nn
import torch.nn.functional as F


class GELU(nn.Module):
    def forward(self, x):
        return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))


class PositionalEmbedding(nn.Module):
    def __init__(self, max_len, d_model):
        super().__init__()
        self.pe = nn.Embedding(max_len, d_model)

    def forward(self, x):
        batch_size = x.size(0)
        return self.pe.weight.unsqueeze(0).repeat(batch_size, 1, 1)


class LayerNorm(nn.Module):
    def __init__(self, features, eps=1e-6):
        super(LayerNorm, self).__init__()
        self.weight = nn.Parameter(torch.ones(features))
        self.bias = nn.Parameter(torch.zeros(features))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        std = x.std(-1, keepdim=True)
        return self.weight * (x - mean) / (std + self.eps) + self.bias


class Attention(nn.Module):
    def forward(self, query, key, value, mask=None, dropout=None):
        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(query.size(-1))
        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e9)

        p_attn = F.softmax(scores, dim=-1)
        if dropout is not None:
            p_attn = dropout(p_attn)

        return torch.matmul(p_attn, value), p_attn


class MultiHeadedAttention(nn.Module):
    def __init__(self, h, d_model, dropout=0.1):
        super().__init__()
        assert d_model % h == 0

        self.d_k = d_model // h
        self.h = h

        self.linear_layers = nn.ModuleList([nn.Linear(d_model, d_model) for _ in range(3)])
        self.output_linear = nn.Linear(d_model, d_model)
        self.attention = Attention()

        self.dropout = nn.Dropout(p=dropout)

    def forward(self, query, key, value, mask=None):
        batch_size = query.size(0)

        query, key, value = [l(x).view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
                             for l, x in zip(self.linear_layers, (query, key, value))]

        x, attn = self.attention(
            query, key, value, mask=mask, dropout=self.dropout)

        x = x.transpose(1, 2).contiguous().view(
            batch_size, -1, self.h * self.d_k)

        return self.output_linear(x)


class PositionwiseFeedForward(nn.Module):
    def __init__(self, d_model, d_ff):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.activation = GELU()

    def forward(self, x):
        return self.w_2(self.activation(self.w_1(x)))


class SublayerConnection(nn.Module):
    def __init__(self, size, dropout):
        super(SublayerConnection, self).__init__()
        self.layer_norm = LayerNorm(size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, sublayer):
        return self.layer_norm(x + self.dropout(sublayer(x)))


class TransformerBlock(nn.Module):
    def __init__(self, hidden, attn_heads, feed_forward_hidden, dropout):
        super().__init__()
        self.attention = MultiHeadedAttention(
            h=attn_heads, d_model=hidden, dropout=dropout)
        self.feed_forward = PositionwiseFeedForward(
            d_model=hidden, d_ff=feed_forward_hidden)
        self.input_sublayer = SublayerConnection(size=hidden, dropout=dropout)
        self.output_sublayer = SublayerConnection(size=hidden, dropout=dropout)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x, mask):
        x = self.input_sublayer(
            x, lambda _x: self.attention.forward(_x, _x, _x, mask=mask))
        x = self.output_sublayer(x, self.feed_forward)
        return self.dropout(x)


class BERT4NILM(nn.Module):
    # def __init__(self, args):
    def __init__(self, window_size,drop_out=0.5,output_size=1):
        super().__init__()
        # self.args = args
        # self.dropout_rate = args.drop_out
        # self.original_len = args.window_size
        # self.output_size = args.output_size
        self.original_len = window_size
        self.latent_len = int(self.original_len / 2)
        self.dropout_rate = drop_out

        self.hidden = 256
        self.heads = 2
        self.n_layers = 2
        self.output_size = output_size

        self.conv = nn.Conv1d(in_channels=1, out_channels=self.hidden,
                               kernel_size=5, stride=1, padding=2, padding_mode='replicate')
        self.pool = nn.LPPool1d(norm_type=2, kernel_size=2, stride=2)

        self.position = PositionalEmbedding(
            max_len=self.latent_len, d_model=self.hidden)
        self.layer_norm = LayerNorm(self.hidden)
        self.dropout = nn.Dropout(p=self.dropout_rate)

        self.transformer_blocks = nn.ModuleList([TransformerBlock(
            self.hidden, self.heads, self.hidden * 4, self.dropout_rate) for _ in range(self.n_layers)])

        self.deconv = nn.ConvTranspose1d(
            in_channels=self.hidden, out_channels=self.hidden, kernel_size=4, stride=2, padding=1)
        
        self.linear1 = nn.Linear(self.hidden, 128)
        self.flat = nn.Flatten()
        self.linear2 = nn.Linear(128*window_size, 128)
        self.out = nn.Linear(128, 1)     
        self.truncated_normal_init()
        

    def truncated_normal_init(self, mean=0, std=0.02, lower=-0.04, upper=0.04):
        params = list(self.named_parameters())
        for n, p in params:
            if 'layer_norm' in n:
                continue
            else:
                with torch.no_grad():
                    l = (1. + math.erf(((lower - mean) / std) / math.sqrt(2.))) / 2.
                    u = (1. + math.erf(((upper - mean) / std) / math.sqrt(2.))) / 2.
                    p.uniform_(2 * l - 1, 2 * u - 1)
                    p.erfinv_()
                    p.mul_(std * math.sqrt(2.))
                    p.add_(mean)

    def forward(self, sequence):
        print(sequence.shape)
        x_token = self.pool(self.conv(sequence.unsqueeze(1))).permute(0, 2, 1)
        embedding = x_token + self.position(sequence)
        x = self.dropout(self.layer_norm(embedding))
        print(x.shape)

        mask = None
        for transformer in self.transformer_blocks:
            x = transformer.forward(x, mask)
        print(x.shape)
        x = self.deconv(x.permute(0, 2, 1)).permute(0, 2, 1)
        print(x.shape)
        
        x = torch.tanh(self.linear1(x))
        print(x.shape)
        x = self.flat(x)
        print(x.shape)
        x = self.linear2(x)
        print(x.shape)
        out = self.out(x)
        print(out.shape)
        return out


In [62]:
batch_size = 1024
window = 50 #aka seq length
# embedding_dim = 1
x = torch.randn(batch_size, window) 
cf = BERT4NILM(window)
x = x
out = cf(x)

torch.Size([1024, 50])
torch.Size([1024, 25, 256])
torch.Size([1024, 25, 256])
torch.Size([1024, 50, 256])
torch.Size([1024, 50, 128])
torch.Size([1024, 6400])
torch.Size([1024, 128])
torch.Size([1024, 1])


In [41]:

class FReal(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        # x = torch.fft.fft(torch.fft.fft(x, dim=-1), dim=-2)
        x = torch.fft.fft(x, dim=-1)
        return x.real

class FImag(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        # x = torch.fft.fft(torch.fft.fft(x, dim=-1), dim=-2)
        x = torch.fft.fft(x, dim=-1)
        return x.imag

class _Cnn1(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dropout):
        super(_Cnn1, self).__init__()

        left, right = kernel_size//2, kernel_size//2
        if kernel_size%2==0 :
            right -= 1
        padding = (left, right, 0, 0)

        self.conv = nn.Sequential(
            nn.ZeroPad2d(padding),
            nn.Conv1d(in_channels, out_channels, kernel_size),
            nn.Dropout(dropout),
            nn.ReLU(inplace=True),
        )
    def forward(self, x):
        return self.conv(x)
class ConvFourier(nn.Module):

    def __init__(self, window_size, dropout=0, lr=None):
        super(ConvFourier, self).__init__()
        self.MODEL_NAME = 'ConvFourier'
        self.drop = dropout
        self.lr = lr
        cnn_out = 8 #the out_features of last CNN
        self.dense_input = cnn_out*window_size


        self.conv = nn.Sequential(
            _Cnn1(1, cnn_out, kernel_size=5, dropout=self.drop),
            # nn.LPPool1d(norm_type=2, kernel_size=2, stride=2)
        )
        self.freal = FReal()
        self.fimag = FImag()
        self.attention = Attention(window_size,attention_type='dot')
        self.flat = nn.Flatten()
        self.mlp = nn.Linear(self.dense_input, 1)
#         self.mlp = nn.Sequential(
#             nn.Linear(self.dense_input, 4*self.dense_input),
#             nn.Dropout(self.drop),
#             nn.GELU(),
#             nn.Linear(4*self.dense_input, self.dense_input),
#             nn.Dropout(self.drop),
#             nn.GELU(),
#             nn.Linear(self.dense_input, 1),
#         )
        
    def forward(self, x):
        x = x
        x = x.unsqueeze(1)
        cnn = self.conv(x)
        print(cnn.shape)
        real = self.freal(cnn)
        imag = self.fimag(cnn)
        print(imag.shape,real.shape)
        attn, _ = self.attention(real,imag)
        attn = self.flat(attn)
        mlp = self.mlp(attn)
        print(mlp.shape)
        return mlp

In [42]:
batch_size = 1024
window = 50 #aka seq length
# embedding_dim = 1
x = torch.randn(batch_size, window) 
cf = ConvFourier(window)
x = x
out = cf(x)

TypeError: __init__() takes 1 positional argument but 2 were given

In [53]:
class _Dense(nn.Module):
    def __init__(self, in_features, out_features, dropout):
        super(_Dense, self).__init__()
        self.linear = nn.Sequential(
            nn.Linear(in_features, out_features),
            nn.Dropout(dropout),
            nn.GELU(),
        )
    def forward(self, x):
        return self.linear(x)

class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)
      
class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

class FNetBlock(nn.Module):
  def __init__(self):
    super().__init__()

  def forward(self, x):
    x = torch.fft.fft(torch.fft.fft(x, dim=-1), dim=-2).real
    return x

class FNet(nn.Module):
    def __init__(self, dim, depth, mlp_dim, dropout = 0.):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                PreNorm(dim, FNetBlock()),
                PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout))
            ]))
    def forward(self, x):
        for attn, ff in self.layers:
            x = attn(x) + x
            x = ff(x) + x
        return x


    
class MyNet(nn.Module):
    def __init__(self, window, features, hidden_dim, depth, drop):
        super(MyNet, self).__init__()
        
        self.linear = nn.Sequential(
            nn.Linear(1, features),
#             nn.GELU(),
            nn.Dropout(drop),
        )
        
        self.fnet = FNet(features, depth, hidden_dim, drop)
        self.flat = nn.Flatten()
        self.dense1 = nn.Linear(features*window, 64)
        self.dense2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        
    def forward(self, x):
        x = x
        x = x.unsqueeze(2)
        print(x.shape)
        x = self.linear(x)
        print(x.shape)
        x = self.fnet(x)
        print(x.shape)
        x = self.flat(x)
        print('flat', x.shape)
        x = self.dense1(x)
        print(x.shape)
        x = self.dense2(x)
        print(x.shape)
        out = self.output(x)
        print(out.shape)
        return out

In [54]:
batch_size = 1024
window = 50 #aka seq length
# embedding_dim = 1
x = torch.randn(batch_size, window) 
cf = MyNet(window, 128, 256, 1, 0.5)
x = x
out = cf(x)

torch.Size([1024, 50, 1])
torch.Size([1024, 50, 128])
torch.Size([1024, 50, 128])
flat torch.Size([1024, 6400])
torch.Size([1024, 64])
torch.Size([1024, 32])
torch.Size([1024, 1])


In [11]:
batch_size = 1024
window = 50 #aka seq length
# embedding_dim = 1
x = torch.randn(batch_size,window) 

x = x.unsqueeze(2)
print(x.shape)
linear_net = nn.Sequential(
            nn.Linear(1, 16),)
linear_net(x).shape

torch.Size([1024, 50, 1])


torch.Size([1024, 50, 16])

In [6]:
class _Dense(nn.Module):
    def __init__(self, in_features, out_features, dropout):
        super(_Dense, self).__init__()
        self.linear = nn.Sequential(
            nn.Linear(in_features, out_features),
            nn.Dropout(dropout),
            nn.ReLU(inplace=True),
        )
    def forward(self, x):
        return self.linear(x)

class _Cnn1(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dropout):
        super(_Cnn1, self).__init__()

        left, right = kernel_size//2, kernel_size//2
        if kernel_size%2==0 :
            right -= 1
        padding = (left, right, 0, 0)

        self.conv = nn.Sequential(
            nn.ZeroPad2d(padding),
            nn.Conv1d(in_channels, out_channels, kernel_size),
            nn.Dropout(dropout),
            nn.ReLU(inplace=True),
        )
    def forward(self, x):
        return self.conv(x)


class FNETBLock(nn.Module):

    def __init__(self, input_dim, dim_feedforward, dropout=0.0):
        """
        Inputs:
            input_dim - Dimensionality of the input
            dim_feedforward - Dimensionality of the hidden layer in the MLP
            dropout - Dropout probability to use in the dropout layers
        """
        super().__init__()

        # Two-layer MLP
        self.linear_net = nn.Sequential(
            nn.Linear(input_dim, dim_feedforward),
            nn.Dropout(dropout),
            nn.ReLU(inplace=True),
            nn.Linear(dim_feedforward, input_dim)
        )

        # Layers to apply in between the main layers
        self.norm1 = nn.LayerNorm(input_dim)
        self.norm2 = nn.LayerNorm(input_dim)
        self.dropout = nn.Dropout(dropout)


    def forward(self, x, mask=None):
        x = x
        fft_out = torch.fft.fft(torch.fft.fft(x, dim=-1), dim=-2).real
        x = x + self.dropout(fft_out)
        print('shape dropout', x.shape)
        x = x.permute(0, 2, 1)
        x = self.norm1(x)
#         x = x.permute(0, 2, 1)
        print('shape after norm', x.shape)

        # MLP part
        linear_out = self.linear_net(x)
        print('shape after linear', x.shape)
        x = x + self.dropout(linear_out)
        x = self.norm2(x)
        print('shape after norm2', x.shape)
        return x

class FNET(nn.Module):

    def __init__(self, depth, kernel_size, cnn_dim, **block_args):
        super(FNET, self).__init__()

        drop = block_args['dropout']
        input_dim = block_args['input_dim']
        dense_in = input_dim*cnn_dim//2

        self.conv = _Cnn1(1, cnn_dim, kernel_size=kernel_size,dropout=drop)
        self.pool = nn.LPPool1d(norm_type=2, kernel_size=2, stride=2)

        self.fnet_layers = nn.ModuleList([FNETBLock(**block_args) for _ in range(depth)])

        self.flat = nn.Flatten()
        self.dense1 = _Dense(dense_in, cnn_dim, drop)
        self.dense2 = _Dense(cnn_dim, cnn_dim//2, drop)
        self.output = nn.Linear(cnn_dim//2, 1)

    def forward(self, x):
        # x must be in shape [batch_size, 1, window_size]
        # eg: [1024, 1, 50]
        x = x
        x = x.unsqueeze(1)
        x = self.conv(x)
        print('shape after cnn', x.shape)
        x = x.transpose(1, 2).contiguous()
        x = self.pool(x)
        x = x.transpose(1, 2).contiguous()
        print('shape after pool', x.shape)
        for layer in self.fnet_layers:
            x = layer(x)
        print('shape after fblock', x.shape)
        x = self.flat(x)
        x = self.dense1(x)
        x = self.dense2(x)
        out = self.output(x)
        print(out.shape)
        return out
    
    
    
    
class FAED(nn.Module):

    def __init__(self, depth, kernel_size, **block_args):
        super(FAED, self).__init__()

        drop = block_args['dropout']
        dim_cnn = block_args['input_dim']
        dim_feedforward = block_args['dim_feedforward']

        self.conv = _Cnn1(1, dim_cnn, kernel_size=kernel_size,dropout=drop)
        self.fnet_layers = nn.ModuleList([FNETBLock(**block_args) for _ in range(depth)])

        self.dense = _Dense(dim_cnn, dim_feedforward*2, drop)
        self.output = nn.Linear(dim_feedforward*2, 1)

    def forward(self, x):
        # x must be in shape [batch_size, 1, window_size]
        # eg: [1024, 1, 50]
        x = x
        print(x.shape)
        x = x.unsqueeze(1)
        print(x.shape)
        x = self.conv(x)
        print(x.shape)
        x = x.permute(0, 2, 1)
        for layer in self.fnet_layers:
            x = layer(x)
            print(x.shape)

        x = self.dense(x)
        print(x.shape)
        out = self.output(x)
        print(out.shape)
        return out

In [7]:
batch_size = 1024
window = 50 #aka seq length
embedding_dim = 16
x = torch.randn(batch_size, window, embedding_dim) 
# cf = FNETBLock(50, 50*4, 0)
cf = FAED(50, 50*4, 0)
x = x
out = cf(x)

TypeError: __init__() takes 3 positional arguments but 4 were given

In [None]:
batch_size = 1024
window = 50 #aka seq length
embedding_dim = 16
x = torch.randn(batch_size, window, embedding_dim) 
print(x.shape)
# x = x.permute(0, 2, 1)
linear_net = nn.Linear(16, 200)
# linear_net = nn.Sequential(
#             nn.Linear(50, 200),
#             nn.Dropout(0.5),
#             nn.ReLU(inplace=True),
#             nn.Linear(200, 50)
# )
x = linear_net(x)
print(x.shape)

In [None]:
batch_size = 1024
window = 50 #aka seq length
embedding_dim = 1
x = torch.randn(batch_size, window, embedding_dim) 
print(x.shape)
x = x.transpose(1, 2).contiguous()
print('input shape: ', x.shape)

conv = _Cnn1(in_channels=1, out_channels=16, kernel_size=kernel_size,dropout=drop)
x = conv(x)
x.shape

In [None]:
batch_size = 1024
window = 50 #aka seq length
embedding_dim = 111
x = torch.randn(batch_size, window, embedding_dim) 
# print(x.shape)
x = x.transpose(1, 2).contiguous()
print('input shape: ', x.shape)
dim_cnn = 256
input_dim = 50 #seq len
hidden_dim = 512
drop = 0.5
linear_net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.Dropout(drop),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_dim, input_dim)
        )
# linear_net(x).shape
x = _Dense(50, 125, .5)(x)
print(x.shape)
x = _Dense(125, 50, .5)(x)
print(x.shape)
x = _Dense(50, 1, .5)(x)
print(x.shape)

In [None]:
class FReal(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        # x = torch.fft.fft(x, dim=-1)
        x = torch.fft.fft(torch.fft.fft(x, dim=-1), dim=-2)
        return x.real

class FImag(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        x = torch.fft.fft(torch.fft.fft(x, dim=-1), dim=-2)
        return x.imag
class ConvFourier(nn.Module):

    def __init__(self, window_size, dropout=0, lr=None):
        super(ConvFourier, self).__init__()
        self.MODEL_NAME = 'CNN model with fourier between linear layers'
        self.drop = dropout
        self.lr = lr

        self.dense_input = 50*window_size #50 is the out_features of last CNNF

        self.conv = nn.Sequential(
            _Cnn1(1, 30, kernel_size=10, dropout=self.drop),
            _Cnn1(30, 40, kernel_size=8, dropout=self.drop),
            _Cnn1(40, 50, kernel_size=5, dropout=self.drop),
            nn.Flatten()
        )
        # self.dense = _Dense(self.dense_input, 1024, self.drop)
        self.linear = nn.Linear(self.dense_input, 2*self.dense_input)
        self.freal = FReal()
        self.fimag = FImag()
        self.output = nn.Linear(4*self.dense_input, 1)

    def forward(self, x):
        # x must be in shape [batch_size, 1, window_size]
        # eg: [1024, 1, 50]
        x = x
        print('input', x.shape)

        x = self.conv(x)
        print('x after cnn', x.shape)
        x = self.linear(x)
        print('x after linear', x.shape)
        x_r = self.freal(x) + x
        x_im = self.fimag(x) + x
        y = torch.cat([x_r, x_im], dim= -1)
#         print('cat real+imag', y.shape)
#         x = torch.cat([x,y], dim= -1)
#         print('cat x+y', x.shape)
        out = self.output(y)
        return out


In [None]:
batch_size = 1024
window = 50 #aka seq length
embedding_dim = 1
x = torch.randn(batch_size, window, embedding_dim) 
# print(x.shape)
x = x.transpose(1, 2).contiguous()
print('input shape: ', x.shape)


In [None]:
drop = 0.1
dense_input = 50*window
cf = ConvFourier( window, drop)
x = x
out = cf(x)
# conv = nn.Sequential(
#             _Cnn1(1, 30, kernel_size=10, dropout=drop),
#             _Cnn1(30, 40, kernel_size=8, dropout=drop),
#             _Cnn1(50, 50, kernel_size=5, dropout=drop),
#             nn.Flatten())
# linear = nn.Linear(dense_input, 4*dense_input)
# freal = FReal()
# fimag = FImag()
# output = nn.Linear(4*dense_input, 1)

In [None]:
x = torch.randn(2, 3)
print(x)
torch.cat([x,x], dim= 0)

In [None]:
class FReal(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        x = torch.fft.fft(torch.fft.fft(x, dim=-1), dim=-2)
#         x = torch.fft.fft(x, dim=-1)
        return x.real

class FImag(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        x = torch.fft.fft(torch.fft.fft(x, dim=-1), dim=-2)
#         x = torch.fft.fft(x, dim=-1)
        return x.imag
class ConvFourier2(nn.Module):

    def __init__(self, window_size, dropout=0, lr=None):
        super(ConvFourier2, self).__init__()
        self.MODEL_NAME = 'CNN model with fourier between linear layers'
        self.drop = dropout
        self.lr = lr
        cnn_out = 16 #the out_features of last CNN
        self.dense_input = cnn_out*window_size


        self.conv = nn.Sequential(
            _Cnn1(1, cnn_out, kernel_size=11, dropout=self.drop),
            # _Cnn1(30, 40, kernel_size=8, dropout=self.drop),
            # _Cnn1(40, 50, kernel_size=6, dropout=self.drop),
            # _Cnn1(50, 50, kernel_size=5, dropout=self.drop),
            # _Cnn1(50, 50, kernel_size=5, dropout=self.drop),
            # nn.Flatten()
        )
        self.freal = FReal()
        self.fimag = FImag()

        self.mlp1 = nn.Sequential(
            nn.Linear(self.dense_input, 2*self.dense_input),
            nn.Dropout(self.drop),
            nn.ReLU(inplace=True),
            nn.Linear(2*self.dense_input, self.dense_input)
        )

        self.mlp2 = nn.Sequential(
            nn.Linear(self.dense_input, 2*self.dense_input),
            nn.Dropout(self.drop),
            nn.ReLU(inplace=True),
            nn.Linear(2*self.dense_input, self.dense_input)
        )

        self.flat = nn.Flatten()
        self.dense1 = _Dense(2*self.dense_input, 1024, dropout=self.drop)
        self.output = nn.Linear(1024, 1)

    def forward(self, x):
        # x must be in shape [batch_size, 1, window_size]
        # eg: [1024, 1, 50]
        x = x
#         x = x.unsqueeze(1)
        cnn = self.conv(x)
        print(cnn.shape)
        real_x = self.freal(cnn)
        imag_x = self.fimag(cnn)
        real_x = self.flat(real_x)
        imag_x = self.flat(imag_x)
        cnn = self.flat(cnn)
        mlp1 = self.mlp1(real_x) + cnn
        mlp2 = self.mlp2(imag_x) + cnn
        x = torch.cat([real_x, imag_x], dim= -1)
        x = self.flat(x)
        x = self.dense1(x)

        out = self.output(x)
        return out

In [None]:
cf = ConvFourier2( window, drop)
x = x
out = cf(x)

# THIS

In [None]:
batch_size = 1024
window = 50 #aka seq length
embedding_dim = 1
x = torch.randn(batch_size, window, embedding_dim) 
# print(x.shape)
x = x.transpose(1, 2).contiguous()
print('input shape: ', x.shape)

dim_cnn = 128
input_dim = 50 #seq len
hidden_dim = 2*input_dim
drop = 0.5
ccc = _Cnn1(in_channels=1, out_channels=dim_cnn, kernel_size=5,dropout=drop)

pool = nn.LPPool1d(norm_type=2, kernel_size=2, stride=2)
m2pool = nn.MaxPool2d(3, stride=2)
f = nn.Flatten()

norm1 = nn.LayerNorm(input_dim)
norm2 = nn.LayerNorm(input_dim)

linear_net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.Dropout(drop),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_dim, input_dim)
        )

# linear_out = nn.Sequential(
#             nn.Linear(50*128, 256),
#             nn.Dropout(drop),
#             nn.ReLU(inplace=True),
#         )
d1 = _Dense(50*dim_cnn//2, dim_cnn,drop)
d2 = _Dense(dim_cnn, dim_cnn//2, drop)
output = nn.Linear(dim_cnn//2, 1)
x = ccc(x)
print('ccc out shape: ', x.shape)
x = x.transpose(1, 2).contiguous()
x = pool(x)
# x = m2pool(x)
x = x.transpose(1, 2).contiguous()
print('pool out shape: ', x.shape)

print('fft out shape: ', x.shape)

x = x + torch.fft.fft(torch.fft.fft(x, dim=-1), dim=-2).real
x = norm1(x)
print('norm1 out shape: ', x.shape)


x = x + linear_net(x)
x = norm2(x)
print('norm2 out shape: ', x.shape)
x = f(x)
print('flat out shape: ', x.shape)
# x = linear_out(x)
x = d1(x)
print('d1 out shape: ', x.shape)
x = d2(x)
print('d2 out shape: ', x.shape)
x = output(x)
print('final out shape: ', x.shape)
del x

In [None]:
12//5

In [None]:
128*50

In [None]:
# pool = nn.LPPool1d(norm_type=2, kernel_size=2, stride=2)

In [None]:
batch_size = 1024
window = 50 #aka seq length
embedding_dim = 1
x = torch.randn(batch_size, window, embedding_dim) 
print(x.shape)
x = x.transpose(1, 2).contiguous()
print('input shape: ', x.shape)

block_args = {'dropout':0.5,
              'input_dim': 50,
              'dim_feedforward': 64}

depth = 1
dim_cnn = 16
kernel_size = 4
drop = block_args['dropout']
dim_cnn = block_args['input_dim']
dim_feedforward = block_args['dim_feedforward']

conv = _Cnn1(in_channels=1, out_channels=16, 
             kernel_size=kernel_size,dropout=drop)
f = nn.Flatten()

fnet_layers = nn.ModuleList([FNETBLock(input_dim=50, 
                                       dim_feedforward=64, dropout=0.0) for _ in range(depth)])

dense = _Dense(dim_cnn, dim_feedforward*2, drop)
output = nn.Linear(dim_feedforward*2, 1)


x = x
# x = x.unsqueeze(2)
print(x.shape)
x = conv(x)
print('conv out shape:', x.shape)
x = f(x)
print('f out shape: ', x.shape)
for layer in fnet_layers:
    x = layer(x)
    print('fblock out shape: ', x.shape)

x = dense(x)
print('dense out: ', x.shape)
out = output(x)
print('final out shape: ', x.shape)



In [None]:
def scaled_dot_product(q, k, v, mask=None):
    d_k = q.size()[-1]
    attn_logits = torch.matmul(q, k.transpose(-2, -1))
    attn_logits = attn_logits / math.sqrt(d_k)
    if mask is not None:
        attn_logits = attn_logits.masked_fill(mask == 0, -9e15)
    attention = F.softmax(attn_logits, dim=-1)
    values = torch.matmul(attention, v)
    return values, attention

In [None]:
kernels = [10, 8, 6, 5]
for kernel_size in kernels:
    left, right = kernel_size//2, kernel_size//2
    if kernel_size%2==0 :
        print(kernel_size, 'is even number')
        right -= 1
    else:
        print(kernel_size, 'is odd number')
    padding = (left, right, 0, 0)
    print(padding)

# FFT

In [None]:
batch_size = 1024
window = 50 #aka seq length
embedding_dim = 1
x = torch.randn(batch_size, window, embedding_dim) 
print(x.shape)
fft2 = torch.fft.fft2(x).real
two_ffts = torch.fft.fft(torch.fft.fft(x, dim=-1), dim=-2).real
# fft2.real
# two_ffts = torch.fft.fft(torch.fft.fft(x, dim=0), dim=1)
torch.allclose(fft2, two_ffts)
two_ffts.shape

# Conv1d

In [None]:
# [batch_size, sequence_length, embedding_dim]

# convolution_layer = nn.conv1d(in_channels, out_channels, kernel_size)
# in_channels = embedding_dim
# out_channels = arbitrary int
# kernel_size = 2 (I want bigrams)
batch_size = 1024
window = 50 #aka seq length
embedding_dim = 1
x = torch.randn(batch_size, window, embedding_dim) 
print(x.shape)
x = x.transpose(1, 2).contiguous()
print(x.shape)

In [None]:
conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=16, kernel_size=4,stride=1)
feature_map = conv1(x)
feature_map.shape

In [None]:
inp = torch.randn(1024, 16, 47)
# With Learnable Parameters
# m = nn.LayerNorm(input.size()[1:])
# # Without Learnable Parameters
# m = nn.LayerNorm(input.size()[1:], elementwise_affine=False)
# # Normalize over last two dimensions
# m = nn.LayerNorm([10, 10])
# Normalize over last dimension of size 16
inp = inp.permute(0, 2, 1)
print(inp.shape)
m = nn.LayerNorm(16)
# Activating the module
output = m(inp)
output = output.permute(0, 2, 1)
output.shape

In [None]:

m = nn.LayerNorm(16)
output = m(feature_map)

In [None]:
a = torch.randn(1024, 1, 10)  
print(a.size())
a = nn.ZeroPad2d((5,4,0,0))(a)
a = nn.Conv1d(1, 30, kernel_size=10)(a)
print(a.size())
a = nn.ZeroPad2d((3,4,0,0))(a)
a = nn.Conv1d(30, 40, kernel_size=8)(a)
print(a.size())
a = nn.ZeroPad2d((2,3,0,0))(a)
a = nn.Conv1d(40, 50, kernel_size=6)(a)
print(a.size())
a = nn.ZeroPad2d((2,2,0,0))(a)
a = nn.Conv1d(50, 50, kernel_size=5)(a)
print(a.size())
a = nn.ZeroPad2d((2,2,0,0))(a)
a = nn.Conv1d(50, 50, kernel_size=5)(a)
print(a.size())
a = nn.Flatten()(a)
print(a.size())
in_features = a.size()[1]
out_features = 1024
a = nn.Linear(in_features, out_features)(a)
print(a.size())

## ATTENTION

In [None]:
# simple attention layer (from torchnlp) like keras

In [None]:
x = feature_map.permute(0, 1,2)
print(x.shape)
attention = Attention(47)
out, weights = attention(x, x)
out.size()


In [None]:
# scaled attention / multihead attention

In [None]:
# batch_size = 1024
# window = 50 #aka seq length
# embedding_dim = 1
# x = torch.randn(batch_size, window, embedding_dim) 
x = feature_map
x = x.permute(0, 2, 1)
query, key, value =  x, x, x
# multihead_attn = nn.MultiheadAttention(embed_dim=16, num_heads=16)
# attn_output, attn_output_weights = multihead_attn(query, key, value)
attn_output, attn_output_weights = scaled_dot_product(query, key, value)
print('weights shape: ', attn_output_weights.shape, 'attn_output shape: ', attn_output.shape)

# GRU

In [None]:
feature_map = feature_map.permute(0, 2, 1)
feature_map.shape

In [None]:
feature_map = attn_output

In [None]:
# input of shape (batch, seqlength ,input_size)
# output of shape (batch, seqlength, hiddensize*2)
b1 = nn.GRU(input_size=16, hidden_size=64, batch_first=True,
            bidirectional=True,
            dropout=0.5)
b1_out = b1(feature_map)[0]
b1_out.shape

In [None]:
b2 = nn.GRU(128, 256, batch_first=True,
            bidirectional=True,
            dropout=0.5)
b2_out = b2(b1_out)[0]
b2_out.shape

In [None]:
b2_out = b2_out[:, -1, :]
b2_out.shape

# Dense/Linear/FC


In [None]:
class _Dense(nn.Module):

    def __init__(self, in_features, out_features, dropout):
        super(_Dense, self).__init__()
        self.linear = nn.Sequential(
            nn.Linear(in_features, out_features),
            nn.Dropout(dropout),
            nn.ReLU(inplace=True),
        )
    def forward(self, x):
        return self.linear(x)

In [None]:
x = b2_out[:, -1, :]
print(x.shape)

In [None]:
dense1 = _Dense(512, 128, 0.5)
dense_out1 = dense1(x)
dense_out1.shape

In [None]:
dense2 = _Dense(128, 64, 0.5)
dense_out2 = dense2(dense_out1)
dense_out2.shape

In [None]:
output = nn.Linear(64, 1)
out = output(dense_out2)
out.shape