In [1]:
import torch 
import torch.nn as nn
import os 

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

from torchinfo import summary

# The Pytorch Video Module Zoo

In [2]:
from pytorchvideo.models import x3d

model = x3d.create_x3d(
    input_channel=3,
    model_num_class=2,
    input_clip_length=4,
    input_crop_size=160,
    
)

In [3]:
from pytorchvideo.models import slowfast

# todo
model = slowfast.create_slowfast(
    input_channels=(3, 3), 
    model_num_class=2, 
    model_depth=50,
    norm=nn.BatchNorm3d,
    activation=nn.ReLU
)

In [4]:
from pytorchvideo.models import r2plus1d

model = r2plus1d.create_r2plus1d(
    input_channel=3,
    model_depth=50,
    model_num_class=2,
    norm=nn.BatchNorm3d,
    activation=nn.ReLU,
)

In [5]:
from pytorchvideo.models import csn

model = csn.create_csn(
    input_channel=3,
    model_depth=50, 
    norm=nn.BatchNorm3d,
    activation=nn.ReLU,
    model_num_class=2,
)

In [12]:
from pytorchvideo.models import resnet

model = resnet.create_resnet(
    input_channel=3,
    model_depth=50,
    norm=nn.BatchNorm3d,
    activation=nn.ReLU,
    model_num_class=1,
)

In [13]:
from torchinfo import summary

batch_size = 4

# summayer = summary(
#     model, 
#     input_size=(batch_size, 3, 8, 256, 256), 
#     col_names=["input_size", "output_size", "num_params", "mult_adds", "kernel_size", "trainable"],
    
#     )

summayer = summary(
    model,
    input_size=(batch_size, 3, 16, 224, 224),
    col_names=["input_size", "output_size", "num_params", "kernel_size"],
    depth=50
)

print(summayer)


Layer (type:depth-idx)                             Input Shape               Output Shape              Param #                   Kernel Shape
Net                                                [4, 3, 16, 224, 224]      [4, 1]                    --                        --
├─ModuleList: 1-1                                  --                        --                        --                        --
│    └─ResNetBasicStem: 2-1                        [4, 3, 16, 224, 224]      [4, 64, 16, 56, 56]       --                        --
│    │    └─Conv3d: 3-1                            [4, 3, 16, 224, 224]      [4, 64, 16, 112, 112]     28,224                    [3, 7, 7]
│    │    └─BatchNorm3d: 3-2                       [4, 64, 16, 112, 112]     [4, 64, 16, 112, 112]     128                       --
│    │    └─ReLU: 3-3                              [4, 64, 16, 112, 112]     [4, 64, 16, 112, 112]     --                        --
│    │    └─MaxPool3d: 3-4                         [4, 64, 

# The TorhcHub Module Zoo

In [8]:
model_name = 'x3d_s'
model = torch.hub.load('facebookresearch/pytorchvideo', model_name, pretrained=False)

Using cache found in /root/.cache/torch/hub/facebookresearch_pytorchvideo_main


In [9]:
from torchvision.models import video

model = video.r3d_18(pretrained=False)

In [10]:
result = summary(model, input_size=(batch_size, 3, 8, 256, 256))

print(result)

Layer (type:depth-idx)                   Output Shape              Param #
VideoResNet                              [4, 400]                  --
├─BasicStem: 1-1                         [4, 64, 8, 128, 128]      --
│    └─Conv3d: 2-1                       [4, 64, 8, 128, 128]      28,224
│    └─BatchNorm3d: 2-2                  [4, 64, 8, 128, 128]      128
│    └─ReLU: 2-3                         [4, 64, 8, 128, 128]      --
├─Sequential: 1-2                        [4, 64, 8, 128, 128]      --
│    └─BasicBlock: 2-4                   [4, 64, 8, 128, 128]      --
│    │    └─Sequential: 3-1              [4, 64, 8, 128, 128]      110,720
│    │    └─Sequential: 3-2              [4, 64, 8, 128, 128]      110,720
│    │    └─ReLU: 3-3                    [4, 64, 8, 128, 128]      --
│    └─BasicBlock: 2-5                   [4, 64, 8, 128, 128]      --
│    │    └─Sequential: 3-4              [4, 64, 8, 128, 128]      110,720
│    │    └─Sequential: 3-5              [4, 64, 8, 128, 128]    

In [11]:
import pytorchvideo.layers as layers 

swish = layers.swish.Swish()
input_tensor = torch.zeros(2, 256, 4 ,14,14)
output = swish(input_tensor)

output.shape

torch.Size([2, 256, 4, 14, 14])