# ConvMixer

> ConvMixer model.

Implementation of ConvMixer.  
ConvMixer - ICLR 2022 submission ["Patches Are All You Need?".](https://openreview.net/forum?id=TVHS5Y4dNvM)  
Adopted from [https://github.com/tmp-iclr/convmixer](https://github.com/tmp-iclr/convmixer)  
Home for convmixer: [https://github.com/locuslab/convmixer](https://github.com/locuslab/convmixer)

Purpose of this implementation - possibilities for tune this model.  
For example - play with activation function, initialization etc.  

## Import and create model

Base class for model - ConvMixer, return pytorch Sequential model.  

In [None]:
from model_constructor import ConvMixer

Now we can create convmixer model:

In [None]:
convmixer_1024_20 = ConvMixer(dim=1024, depth=20)

In [None]:
#collapse_output
convmixer_1024_20

ConvMixer(
  (0): ConvLayer(
    (conv): Conv2d(3, 1024, kernel_size=(7, 7), stride=(7, 7))
    (act_fn): GELU(approximate='none')
    (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): Sequential(
    (0): Residual(
      (fn): ConvLayer(
        (conv): Conv2d(1024, 1024, kernel_size=(9, 9), stride=(1, 1), padding=same, groups=1024)
        (act_fn): GELU(approximate='none')
        (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): ConvLayer(
      (conv): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1))
      (act_fn): GELU(approximate='none')
      (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (2): Sequential(
    (0): Residual(
      (fn): ConvLayer(
        (conv): Conv2d(1024, 1024, kernel_size=(9, 9), stride=(1, 1), padding=same, groups=1024)
        (act_fn): GELU(approximate='none')
        (bn): BatchNorm2d(10

## Change activation function.

In [None]:
#hide
from torch.nn import Mish

Lets create model with Mish (import it from torch) instead of GELU.

In [None]:
convmixer_1024_20 = ConvMixer(dim=1024, depth=20, act_fn=Mish())

In [None]:
#collapse_output
convmixer_1024_20[0]

ConvLayer(
  (conv): Conv2d(3, 1024, kernel_size=(7, 7), stride=(7, 7))
  (act_fn): Mish()
  (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [None]:
#collapse_output
convmixer_1024_20[1]

Sequential(
  (0): Residual(
    (fn): ConvLayer(
      (conv): Conv2d(1024, 1024, kernel_size=(9, 9), stride=(1, 1), padding=same, groups=1024)
      (act_fn): Mish()
      (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): ConvLayer(
    (conv): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1))
    (act_fn): Mish()
    (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)

## Pre activation

Activation function before convolution.

In [None]:
convmixer_1024_20 = ConvMixer(dim=1024, depth=20, act_fn=Mish(), pre_act=True)

In [None]:
#collapse_output
convmixer_1024_20[0]

ConvLayer(
  (conv): Conv2d(3, 1024, kernel_size=(7, 7), stride=(7, 7))
  (act_fn): Mish()
  (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [None]:
#collapse_output
convmixer_1024_20[1]

Sequential(
  (0): Residual(
    (fn): ConvLayer(
      (act_fn): Mish()
      (conv): Conv2d(1024, 1024, kernel_size=(9, 9), stride=(1, 1), padding=same, groups=1024)
      (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): ConvLayer(
    (act_fn): Mish()
    (conv): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1))
    (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)

## BatchNorm before activation.

In [None]:
convmixer_1024_20 = ConvMixer(dim=1024, depth=20, act_fn=Mish(), bn_1st=True)

In [None]:
#collapse_output
convmixer_1024_20[0]

ConvLayer(
  (conv): Conv2d(3, 1024, kernel_size=(7, 7), stride=(7, 7))
  (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act_fn): Mish()
)

In [None]:
#collapse_output
convmixer_1024_20[1]

Sequential(
  (0): Residual(
    (fn): ConvLayer(
      (conv): Conv2d(1024, 1024, kernel_size=(9, 9), stride=(1, 1), padding=same, groups=1024)
      (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act_fn): Mish()
    )
  )
  (1): ConvLayer(
    (conv): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1))
    (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act_fn): Mish()
  )
)