Based off of the efficientnet in Timm: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/efficientnet.py  we will reporpose this architecture for our use with 1-dimension sequence data

In [1]:
import torch
import numpy as np
import pandas as pd
import os
import h5py
from exabiome.nn.loader import read_dataset, LazySeqDataset
import argparse
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F

In [2]:
path = '/global/homes/a/azaidi/ar122_r202.toy.input.h5'

In [3]:
hparams = argparse.Namespace(**{'load': False,
                            'window': 4096,
                            'step': 4096,
                             'classify': True,
                               'tgt_tax_lvl': "phylum",
                               'fwd_only': True})

In [4]:
chunks = LazySeqDataset(hparams, path=path, keep_open=True)
len(chunks)

19010

In [6]:
def old_pad_seq(seq):
    if(len(seq) < 4096):
        padded = torch.zeros(4096)
        padded[:len(seq)] = seq
        return padded
    else:
        return seq

In [7]:
class taxon_ds(Dataset):
    def __init__(self, chunks, transform=None):
        self.chunks = chunks
        self.transform = transform
    
    def __len__(self):
        return len(self.chunks)
    
    def __getitem__(self, idx):
        x = chunks[idx][1]
        if self.transform:
            x = self.transform(x)
        y = chunks[idx][2]
        return (x.unsqueeze(0), y)

In [9]:
%time
ds = taxon_ds(chunks, old_pad_seq)

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 8.34 µs


In [10]:
dl = DataLoader(ds, batch_size=16, shuffle=True)
len(dl)

1189

In [11]:
batch = next(iter(dl))
batch[0].shape, batch[1].shape

(torch.Size([16, 1, 4096]), torch.Size([16]))

# An Efficientnet has basically three parts: 
**(0) Base (Feet) --> (1) Body --> (2) Head**

Within these three parts -- we are **mainly** only using three tools/units of computation:

(0) Conv1d: https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html <br>
(1) BatchNorm1d: https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm1d.html <br>
(2) SiLU: https://pytorch.org/docs/stable/generated/torch.nn.SiLU.html <br>

*There are a few other items that are added as well, that we will see below

<br>**Base** (feet):<br>
0) Conv1d --> 1) BatchNorm1d --> 2) SiLU

**Head**: <br>
(0) Conv1d --> (1) BatchNorm1d --> (2) SiLU --> (3) SelectAdaptivePool1d --> (4) Linear

*the base & head are relatively straightforward -- we'll implement both below:*

In [21]:
def get_conv_bn(in_ch=1, out_ch=2, ks=2, stride=2, padding=None):
    return nn.Sequential(
        nn.Conv1d(in_channels = in_ch, out_channels = out_ch,
                 kernel_size = ks, stride = stride, 
                  padding=padding, bias=False),
        nn.BatchNorm1d(num_features = out_ch)
    )

In [76]:
def conv(in_ch, out_ch, ks, stride, padding=0, activation=False):
    res = get_conv_bn(in_ch, out_ch, ks, stride, padding)
    if activation:
        res = nn.Sequential(res, nn.SiLU(inplace=True))
    return res

Let's make a function to add the SiLU layer

In [28]:
conv(1,2,3,4, activation=True)

Sequential(
  (0): Sequential(
    (0): Conv1d(1, 2, kernel_size=(3,), stride=(4,), padding=(None,), bias=False)
    (1): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): SiLU(inplace=True)
)

This was the old base layer fxn

In [29]:
def get_base_layer(in_chans=1, out_chans=32, ks=3, stride=2, padding=1):
    return nn.Sequential(
        nn.Conv1d(in_channels= in_chans, out_channels= out_chans, 
                  kernel_size= ks, stride= stride, 
                  padding=padding, bias=False),
        nn.BatchNorm1d(num_features = out_chans),
        nn.SiLU(inplace=True))

Now we can just use our conv function to replace that + this will be the building block for the entire model

In [122]:
def get_base_layer(in_ch=1, out_ch=32, ks=3, stride=2, padding=1):
    return conv(in_ch=in_ch, out_ch=out_ch, ks=ks, 
               stride=stride, padding=padding, activation=True)

In [123]:
get_base_layer()(batch[0]).shape

torch.Size([16, 32, 2048])

The head had a bit more going on, but we can still simplify it

In [34]:
def get_head_layer(in_chans=320, out_chans=1280, ks=1, stride=1,
              avg_out_feats=10, lin_out_feats=1):
    return nn.Sequential(
        nn.Conv1d(in_channels= in_chans, out_channels= out_chans, 
                  kernel_size= ks, stride= stride, bias=False),
        nn.BatchNorm1d(num_features = out_chans),
        nn.SiLU(inplace=True),
        nn.AdaptiveAvgPool1d(output_size=avg_out_feats),
        nn.Linear(in_features=avg_out_feats, out_features=lin_out_feats))

In [36]:
get_head_layer()

Sequential(
  (0): Conv1d(320, 1280, kernel_size=(1,), stride=(1,), bias=False)
  (1): BatchNorm1d(1280, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): SiLU(inplace=True)
  (3): AdaptiveAvgPool1d(output_size=10)
  (4): Linear(in_features=10, out_features=1, bias=True)
)

In [44]:
def get_head_layer(in_chans=320, out_chans=1280, ks=1, stride=1,
              avg_out_feats=200, lin_out_feats=1):
    return nn.Sequential(
        conv(in_chans, out_chans, ks, stride, activation=True),
        nn.AdaptiveAvgPool1d(output_size=avg_out_feats),
        nn.Linear(in_features=avg_out_feats, out_features=lin_out_feats))

In [45]:
get_head_layer()

Sequential(
  (0): Sequential(
    (0): Sequential(
      (0): Conv1d(320, 1280, kernel_size=(1,), stride=(1,), padding=(None,), bias=False)
      (1): BatchNorm1d(1280, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): SiLU(inplace=True)
  )
  (1): AdaptiveAvgPool1d(output_size=200)
  (2): Linear(in_features=200, out_features=1, bias=True)
)

Not too much shorter, but better nonetheless

**Body**:<br>
(0) DepthwiseSeparableConv <br>
(1) InvertedResidual (two in a row) <br>
(2) InvertedResidual (two in a row) <br>
(3) InvertedResidual (three in a row) <br>
(4) InvertedResidual (three in a row) <br>
(5) InvertedResidual (three in a row) <br>
(6) InvertedResidual (one) <br>

*ok so what are these layers in the body?*

# DepthwiseSeperable:
(0) Conv1d <br>
(1) BatchNorm1d <br>
(2) SiLU <br>
(3) **Squeeze Excite**<br>
(4) Conv1d <br>
(5) BatchNorm1d <br>
(6) Identity <br>

# InvertedResidual:
(0) Conv1d <br>
(1) BatchNorm1d <br>
(2) SiLU <br>
(3) Conv1d <br>
(4) BatchNorm1d <br>
(5) SiLU <br>
(6) **Squeeze Excite**<br>
(7) Conv1d <br>
(8) BatchNorm1d <br>

**"Squeeze Excite" = Conv1d --> SiLU --> Conv1d**

Let's define our squeeze excite function -- since we have two conv layers, let's use tuples for our parameters for now -- the parameters in the paper are much more structured for the squueze excite layer, but we will keep this optionality in place (for now)

In the paper the squueze excite takes the number of filters from 240 --> 10 --> 240. This would be easier to encode into the function below, but would make it harder to tweak these values

In [47]:
def get_sq_ex(in_ch= (1,1), out_ch= (2,2), ks= (2,2), stride= (2,2)):
    return nn.Sequential(
        nn.Conv1d(in_channels= in_ch[0], out_channels= out_ch[0], 
                  kernel_size= ks[0], stride= stride[0]),
        nn.SiLU(),
        nn.Conv1d(in_channels= in_ch[1], out_channels= out_ch[1], 
                  kernel_size= ks[1], stride= stride[1])
    )

In [48]:
#uncomment to confirm the above function works
get_sq_ex()

Sequential(
  (0): Conv1d(1, 2, kernel_size=(2,), stride=(2,))
  (1): SiLU()
  (2): Conv1d(1, 2, kernel_size=(2,), stride=(2,))
)

The above functions have simplified our work to produce the desired layers -- we have everything we need to create both the layer types in our models body

**DepthwiseSeperable**: <br>
(0) conv<br>
(1) get_sq_ex <br>
(2) conv <br>
(3) Identity <br>

**InvertedResidual**: <br>
(0) conv <br>
(1) conv <br>
(2) get_sq_ex <br>
(3) conv <br>

A squeeze-excite unit compresses the number of channels down and then expands it back to the original amount

In [83]:
def get_dep_sep(in_ch, out_ch, ks=3, mid_ch=8):
    return nn.Sequential(
        conv(in_ch=in_ch, out_ch=in_ch, ks=ks, stride=1, activation=True),
        get_sq_ex(in_ch=(in_ch, mid_ch), 
                  out_ch=(mid_ch, in_ch)),
        conv(in_ch=in_ch, out_ch=out_ch, ks=1, stride=1),
        nn.Identity()
    )

In [84]:
get_dep_sep(32, 16)

Sequential(
  (0): Sequential(
    (0): Sequential(
      (0): Conv1d(32, 32, kernel_size=(3,), stride=(1,), bias=False)
      (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): SiLU(inplace=True)
  )
  (1): Sequential(
    (0): Conv1d(32, 8, kernel_size=(2,), stride=(2,))
    (1): SiLU()
    (2): Conv1d(8, 32, kernel_size=(2,), stride=(2,))
  )
  (2): Sequential(
    (0): Conv1d(32, 16, kernel_size=(1,), stride=(1,), bias=False)
    (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (3): Identity()
)

In [104]:
#let's just make sure things are moving forward with our depthwise seperable layer
model = nn.Sequential(
    get_base_layer(),
    get_dep_sep(32, 16))
model(batch[0]).shape

torch.Size([16, 16, 511])

In [97]:
get_sq_ex((96,4),(4,96))

Sequential(
  (0): Conv1d(96, 4, kernel_size=(2,), stride=(2,))
  (1): SiLU()
  (2): Conv1d(4, 96, kernel_size=(2,), stride=(2,))
)

In [113]:
nn.Sequential(
    model,
    conv(16,96,1,1, activation=True),
    conv(96,96, ks=3,stride=2,padding=1, activation=True),
    get_sq_ex((96,4), (4,96), ks=(1,1), stride=(1,1)),
    conv(96, 24, 1,1)
)(batch[0]).shape

torch.Size([16, 24, 256])

(1) The first conv layer in the inverted residuals ALWAYS has a kernel size of 1 and stride of 1, with no padding.

(2) For the second conv layer, the kernel size, stride and padding can be different in each layer

(3) The squeeze excite layer always has a kernel size of 1 and stride of 1

(4) The last conv layer always has a stride of 1 and kernel size of 1

In [114]:
def get_inv_res(in_ch, mid_ch, out_ch, sq_ch=4, ks=1, stride=1, padding=1):
    return nn.Sequential(
        conv(in_ch=in_ch, out_ch=mid_ch, ks=1, stride=1, activation=True),
        conv(in_ch=mid_ch, out_ch=mid_ch, ks=ks, stride=stride, 
             padding=padding, activation=True),
        get_sq_ex((mid_ch,sq_ch), (sq_ch, mid_ch) ,ks=(1,1), stride=(1,1)),
        conv(in_ch=mid_ch, out_ch=out_ch, ks=1, stride=1)
    )

In [118]:
nn.Sequential(
    model,
    get_inv_res(16,96,24, 4, ks=3, stride=2,padding=1))(batch[0]).shape

torch.Size([16, 24, 256])

In [119]:
nn.Sequential(
    get_base_layer(),
    get_dep_sep(in_ch=32,out_ch=16),
    get_inv_res(in_ch=16, mid_ch=96, out_ch= 24, 
                sq_ch=4, ks=3, stride=2, padding=1)
)(batch[0]).shape

torch.Size([16, 24, 256])

Looks like things are working! :)