In [1]:
!pip3 install git+https://github.com/arogozhnikov/einops

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/arogozhnikov/einops
  Cloning https://github.com/arogozhnikov/einops to /tmp/pip-req-build-c5mla3qr
  Running command git clone --filter=blob:none --quiet https://github.com/arogozhnikov/einops /tmp/pip-req-build-c5mla3qr
  Resolved https://github.com/arogozhnikov/einops to commit 32594eb593e45020309612cbe85972537dcbd112
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: einops
  Building wheel for einops (pyproject.toml) ... [?25l[?25hdone
  Created wheel for einops: filename=einops-0.6.0-py3-none-any.whl size=42055 sha256=a78aa546f1f422baf86555f2a3ba38a26de49a2602977ae2c5a85f973badead0
  Stored in directory: /tmp/pip-ephem-wheel-cache-ofxps5wz/wheels/66/07/52/b78abb1f0b12a84e44347dfd583719bc956

# Separable convolutions

![](https://miro.medium.com/v2/resize:fit:4800/format:webp/1*o3mKhG3nHS-1dWa_plCeFw.png)

In [2]:
import torch
from torch import nn

In [14]:
class SeparableConv2d(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size, bias=False):
      super(SeparableConv2d, self).__init__()
      self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size, groups=in_channels, bias=bias)
      self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, groups=1, bias=bias)

  def forward(self, x):
      out = self.depthwise(x)
      out = self.pointwise(out)
      return out

In [15]:
features = torch.rand(1, 3, 12, 12)
out = SeparableConv2d(3, 256, 5)
out(features).shape

# добавить паддинг

torch.Size([1, 256, 8, 8])

# R(2+1) conv

https://www.tensorflow.org/tutorials/video/video_classification

https://paperswithcode.com/method/2-1-d-convolution

![alt text](https://drive.google.com/uc?export=view&id=1DDI_5xclb7wb1V2vtDzgoAKm2psjd1qb)

In [6]:
import torch
from torch import nn as nn

In [19]:
class R2_and1_conv(torch.nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size):
    super().__init__()
    self.in_channels = in_channels
    self.out_channels = out_channels
    self.kernel_size = kernel_size

    # first step: [c, t, h, w] -> [c, t, h1, w1]
    self.c1 = nn.Conv3d(in_channels, in_channels, (1, kernel_size[0], kernel_size[1]))
    # second step: [c, t, h1, w1] -> [c, t2, h1, w1]
    self.c2 = nn.Conv3d(in_channels, out_channels, (kernel_size[2], 1, 1))

  def forward(self, x):
    # [c, t, h, w]
    out = self.c1(x)
    out = self.c2(out)
    return out

In [7]:
# [c, t, h, w]
x = torch.rand((1, 10, 5, 5))
conv3d = nn.Conv3d(in_channels=1, out_channels=1, kernel_size=3)

In [8]:
out_x = conv3d(x)
out_x.shape # [1, 8, 3, 3])

torch.Size([1, 8, 3, 3])

In [20]:
out_x2 = R2_and1_conv(1, 2, [3, 3, 3])(x)
out_x2.shape # [1, 8, 3, 3])

torch.Size([2, 8, 3, 3])

#Temporal attention

[GLTR](https://openaccess.thecvf.com/content_ICCV_2019/papers/Li_Global-Local_Temporal_Representations_for_Video_Person_Re-Identification_ICCV_2019_paper.pdf)

![alt text](https://drive.google.com/uc?export=view&id=1k2G22YyTg_bmY9HBlYFkfyaHcapEQYeN)


In [3]:
T = 10 # 10 frames
d = 20 # inner dim size
input_features = torch.rand((d, T))

In [21]:
class GLRT(nn.Module):
  def __init__(self, in_channels=20, dilation_rates = [1, 2, 4], kernel_size=3):
    super().__init__()
    
    self.dtp = nn.ModuleList([nn.Conv1d(in_channels, in_channels, kernel_size=kernel_size, dilation = dr, padding='same') # kernel_size + dr, 0
        for dr in dilation_rates
        ])
    
    self.convs_b_c = nn.ModuleList([
                nn.Sequential(nn.Conv2d(in_channels, in_channels, k, groups=in_channels, padding='same'), 
                nn.BatchNorm2d(in_channels),
                nn.ReLU())
            for _ in range(2)
        ])

  def forward(self, f):
    f = f.unsqueeze(0)
    # print(f.shape)
    f = torch.cat([conv(f) for conv in self.dtp], dim=1)
    return f

In [22]:
GLRT()(input_features).shape # 60, 1

torch.Size([1, 60, 10])

# Spartial-temporal attention

![alt text](https://drive.google.com/uc?export=view&id=1ehQclFDue6eG50OkywOOErMVkca5hzDw)

!(https://drive.google.com/uc?export=view&id=1ehQclFDue6eG50OkywOOErMVkca5hzDw)

![alt text](https://drive.google.com/uc?export=view&id=1vW6rAKoF37rjPAylhZEOANk2L0EMc_dL)