In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
x = torch.rand(1,10,3,224,224) # B T C H W 

In [3]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5) # input channels, output channel, kernel size
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)      # input channels, output channel, kernel size
        self.fc1 = nn.Linear(16 * 5 * 5, 120) #  16*5*5 5,5,16
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x))) # c. h. w. 16, 5, 5 -> 16, 10, 10
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x)) 
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# 16 x 1 x 1 
# 16 x 5
net = Net()

In [4]:
net

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [11]:
x = torch.rand(4, 3, 30, 128, 128) # Batch, Channels, Time, Height, Width

In [22]:
class Net(nn.Module):
    def __init__(self, num_classes, kernel_size):
        super().__init__()
        self.conv1 = nn.Conv3d(3, 16, kernel_size, padding='same')
        # 5x5x5x3x16 = 1200 (bias)
        self.conv2 = nn.Conv3d(16, 32, kernel_size, padding='same')
        self.conv3 = nn.Conv3d(32, 64, kernel_size, padding='same')
        self.conv4 = nn.Conv3d(64, 128, kernel_size, padding='same')
        self.conv5 = nn.Conv3d(128, 256, kernel_size, padding='same')
        self.conv6 = nn.Conv3d(256, 512, kernel_size, padding='same')
        # 16M 
        self.maxpool2d = nn.MaxPool3d((1,3,3))
        self.relu = nn.ReLU()
        self.prediction_head = nn.Linear(31457280, num_classes)
        # 31457280*7
        # 5x5x5x256x512 
        # 16M
        
    def forward(self, x):
        x = self.conv1(x)
        x = torch.flatten(x)
        x = self.prediction_head(x)
        return x


net = Net(7)

In [27]:
model_parameters = filter(lambda p: p.requires_grad, net.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])

In [26]:
import numpy as np

In [29]:
net

Net(
  (conv1): Conv3d(3, 16, kernel_size=(5, 5, 5), stride=(1, 1, 1), padding=same)
  (conv2): Conv3d(16, 32, kernel_size=(5, 5, 5), stride=(1, 1, 1), padding=same)
  (conv3): Conv3d(32, 64, kernel_size=(5, 5, 5), stride=(1, 1, 1), padding=same)
  (conv4): Conv3d(64, 128, kernel_size=(5, 5, 5), stride=(1, 1, 1), padding=same)
  (conv5): Conv3d(128, 256, kernel_size=(5, 5, 5), stride=(1, 1, 1), padding=same)
  (conv6): Conv3d(256, 512, kernel_size=(5, 5, 5), stride=(1, 1, 1), padding=same)
  (maxpool2d): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 3, 3), padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU()
  (prediction_head): Linear(in_features=31457280, out_features=7, bias=True)
)