In [1]:
import torch 
import torch.nn as nn
import os 

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

from torchinfo import summary

# The Pytorch Video Module Zoo

In [7]:
from pytorchvideo.models import x3d

model = x3d.create_x3d(
    input_channel=3,
    model_num_class=2,
    input_clip_length=4,
    input_crop_size=160,
    
)

In [3]:
from pytorchvideo.models import slowfast

# todo
model = slowfast.create_slowfast(
    input_channels=(3, 3), 
    model_num_class=2, 
    model_depth=50,
    norm=nn.BatchNorm3d,
    activation=nn.ReLU
)

In [4]:
from pytorchvideo.models import r2plus1d

model = r2plus1d.create_r2plus1d(
    input_channel=3,
    model_depth=50,
    model_num_class=2,
    norm=nn.BatchNorm3d,
    activation=nn.ReLU,
)

In [5]:
from pytorchvideo.models import csn

model = csn.create_csn(
    input_channel=3,
    model_depth=50, 
    norm=nn.BatchNorm3d,
    activation=nn.ReLU,
    model_num_class=2,
)

In [6]:
from pytorchvideo.models import resnet

model = resnet.create_resnet(
    input_channel=3,
    model_depth=152,
    norm=nn.BatchNorm3d,
    activation=nn.ReLU,
    model_num_class=2,
)

In [8]:
from torchinfo import summary

batch_size = 4

# summayer = summary(
#     model, 
#     input_size=(batch_size, 3, 8, 256, 256), 
#     col_names=["input_size", "output_size", "num_params", "mult_adds", "kernel_size", "trainable"],
    
#     )

summayer = summary(
    model,
    input_size=(batch_size, 3, 8, 256, 256),
    col_names=["input_size", "output_size", "num_params", "kernel_size"]
)

print(summayer)


Layer (type:depth-idx)                                       Input Shape               Output Shape              Param #                   Kernel Shape
Net                                                          [4, 3, 8, 256, 256]       [4, 2]                    --                        --
├─ModuleList: 1-1                                            --                        --                        --                        --
│    └─ResNetBasicStem: 2-1                                  [4, 3, 8, 256, 256]       [4, 24, 8, 128, 128]      --                        --
│    │    └─Conv2plus1d: 3-1                                 [4, 3, 8, 256, 256]       [4, 24, 8, 128, 128]      768                       --
│    │    └─BatchNorm3d: 3-2                                 [4, 24, 8, 128, 128]      [4, 24, 8, 128, 128]      48                        --
│    │    └─ReLU: 3-3                                        [4, 24, 8, 128, 128]      [4, 24, 8, 128, 128]      --                       

# The TorhcHub Module Zoo

In [8]:
model_name = 'x3d_s'
model = torch.hub.load('facebookresearch/pytorchvideo', model_name, pretrained=False)

Downloading: "https://github.com/facebookresearch/pytorchvideo/archive/main.zip" to /root/.cache/torch/hub/main.zip


In [9]:
from torchvision.models import video

model = video.r3d_18(pretrained=False)

KeyboardInterrupt: 

In [None]:
result = summary(model, input_size=(batch_size, 3, 8, 256, 256))

print(result)

Layer (type:depth-idx)                   Output Shape              Param #
VideoResNet                              [16, 400]                 --
├─BasicStem: 1-1                         [16, 64, 8, 128, 128]     --
│    └─Conv3d: 2-1                       [16, 64, 8, 128, 128]     28,224
│    └─BatchNorm3d: 2-2                  [16, 64, 8, 128, 128]     128
│    └─ReLU: 2-3                         [16, 64, 8, 128, 128]     --
├─Sequential: 1-2                        [16, 64, 8, 128, 128]     --
│    └─BasicBlock: 2-4                   [16, 64, 8, 128, 128]     --
│    │    └─Sequential: 3-1              [16, 64, 8, 128, 128]     110,720
│    │    └─Sequential: 3-2              [16, 64, 8, 128, 128]     110,720
│    │    └─ReLU: 3-3                    [16, 64, 8, 128, 128]     --
│    └─BasicBlock: 2-5                   [16, 64, 8, 128, 128]     --
│    │    └─Sequential: 3-4              [16, 64, 8, 128, 128]     110,720
│    │    └─Sequential: 3-5              [16, 64, 8, 128, 128]   