## Imports

In [2]:
import os
import torch
import torch.nn as nn
import wandb
import numpy as np

from torchvision import transforms
from mmcv_csn import ResNet3dCSN
from cls_head import ClassifierHead
from cls_autoencoder import EncoderDecoder
from scheduler import GradualWarmupScheduler
from mmaction.datasets import build_dataset



In [None]:
os.chdir('../')

In [None]:
try:
    device = 'mps' if torch.backends.mps.is_available() else 'cpu'
except:
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
work_dir = 'work_dirs/wlasl-dataset/'
batch_size = 2

os.makedirs(work_dir, exist_ok=True)

In [None]:
# Building the datasets
train_dataset = build_dataset(train_cfg)
test_dataset = build_dataset(test_cfg)

# Setting up dataloaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                    batch_size=batch_size,
                                    shuffle=True,
                                    num_workers=4,
                                    pin_memory=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                    batch_size=1,
                                    shuffle=True,
                                    num_workers=4,
                                    pin_memory=True)

In [None]:
import torch.nn.functional as F

class PoseEncoder(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(PoseEncoder, self).__init__()
        self.fc1 = nn.Linear(in_channels, 1024)
        self.fc2 = nn.Linear(1024, 1024)
        self.fc3 = nn.Linear(1024, 512)
        self.fc4 = nn.Linear(512, 512)
        self.fc5 = nn.Linear(512, 1024)
        self.fc6 = nn.Linear(1024, 1024)
        self.fc7 = nn.Linear(1024, out_channels)
        self.dropout = nn.Dropout(0.5)

        
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc2(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc3(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc4(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc5(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc6(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc7(x)
        x = F.relu(x)

        return x

In [48]:
a = torch.tensor([])
a.shape

torch.Size([0])

In [49]:
x = torch.rand(2048, 4, 7, 7)

In [40]:
y = torch.flatten(x)

In [46]:
z = torch.concat((z,y), dim=0)

In [47]:
z.shape

torch.Size([1204224])

In [None]:
import torch.nn.functional as F

class MultiModalNeck(nn.Module):
    def __init__(self):
        super(MultiModalNeck, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))

    def forward(self,
                rgb=None,
                depth=None,
                flow=None,
                face=None,
                left_hand=None,
                right_hand=None,
                pose=None):
        
        out = torch.tensor([])
        
        if rgb is not None:
            rgb = torch.flatten(self.avg_pool(rgb))
            out = torch.concat((out, rgb), dim=0)
        
        if depth is not None:
            depth = torch.flatten(self.avg_pool(depth))
            out = torch.concat((out, depth), dim=0)

        if flow is not None:
            flow = torch.flatten(self.avg_pool(flow))
            out = torch.concat((out, flow), dim=0)

        if face is not None:
            face = torch.flatten(self.avg_pool(face))
            out = torch.concat((out, face), dim=0)

        if left_hand is not None:
            left_hand = torch.flatten(self.avg_pool(left_hand))
            out = torch.concat((out, left_hand), dim=0)

        if right_hand is not None:
            right_hand = torch.flatten(self.avg_pool(right_hand))
            out = torch.concat((out, right_hand), dim=0)
            
        if pose is not None:
            out = torch.concat((out, pose), dim=0)
            
        return pose

In [1]:
# Create a CSN model for rgb
rgb_encoder = ResNet3dCSN(
    pretrained2d=False,
    # pretrained=None,
    pretrained='https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r50_ig65m_20210617-ce545a37.pth',
    depth=50,
    with_pool2=False,
    bottleneck_mode='ir',
    norm_eval=True,
    zero_init_residual=False,
    bn_frozen=True
)

# Create a CSN model for flow
flow_encoder = ResNet3dCSN(
    pretrained2d=False,
    # pretrained=None,
    pretrained='https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r50_ig65m_20210617-ce545a37.pth',
    depth=50,
    with_pool2=False,
    bottleneck_mode='ir',
    norm_eval=True,
    zero_init_residual=False,
    bn_frozen=True
)

# Create a CSN model for depth
depth_encoder = ResNet3dCSN(
    pretrained2d=False,
    # pretrained=None,
    pretrained='https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r50_ig65m_20210617-ce545a37.pth',
    depth=50,
    with_pool2=False,
    bottleneck_mode='ir',
    norm_eval=True,
    zero_init_residual=False,
    bn_frozen=True
)

# Create a CSN model for left hand
lhand_encoder = ResNet3dCSN(
    pretrained2d=False,
    # pretrained=None,
    pretrained='https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r50_ig65m_20210617-ce545a37.pth',
    depth=50,
    with_pool2=False,
    bottleneck_mode='ir',
    norm_eval=True,
    zero_init_residual=False,
    bn_frozen=True
)

# Create a CSN model for right hand
rhand_encoder = ResNet3dCSN(
    pretrained2d=False,
    # pretrained=None,
    pretrained='https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r50_ig65m_20210617-ce545a37.pth',
    depth=50,
    with_pool2=False,
    bottleneck_mode='ir',
    norm_eval=True,
    zero_init_residual=False,
    bn_frozen=True
)

# Create a CSN model for face
face_encoder = ResNet3dCSN(
    pretrained2d=False,
    # pretrained=None,
    pretrained='https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r50_ig65m_20210617-ce545a37.pth',
    depth=50,
    with_pool2=False,
    bottleneck_mode='ir',
    norm_eval=True,
    zero_init_residual=False,
    bn_frozen=True
)

# Add the dimensions here
pose_encoder = PoseEncoder(# dimensions go here)

rgb_encoder.init_weights()
depth_encoder.init_weights()
flow_encoder.init_weights()
rhand_encoder.init_weights()
lhand_encoder.init_weights()
face_encoder.init_weights()

neck = MultiModalNeck()

# TODO: Classification Head change in_features

head = ClassifierHead(num_classes=400,
                 in_features=2048,
                 dropout_ratio=0.5,
                 init_std=0.01)

head.init_weights()

model = EncoderDecoder(encoder, decoder, reconstruct_head)

NameError: name 'ResNet3dCSN' is not defined

In [24]:
import torch.nn.functional as F

class PoseEncoder(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(PoseEncoder, self).__init__()
        self.fc1 = nn.Linear(in_channels, 1024)
        self.fc2 = nn.Linear(1024, 1024)
        self.fc3 = nn.Linear(1024, 512)
        self.fc4 = nn.Linear(512, 512)
        self.fc5 = nn.Linear(512, 1024)
        self.fc6 = nn.Linear(1024, 1024)
        self.fc7 = nn.Linear(1024, out_channels)
        self.dropout = nn.Dropout(0.5)

        
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc2(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc3(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc4(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc5(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc6(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc7(x)
        x = F.relu(x)

        return x

In [25]:
poseencoder = PoseEncoder(2, 4)
x = torch.tensor([1.0,2.0])

In [26]:
poseencoder(x)

tensor([0.0000, 0.0000, 0.0026, 0.0408], grad_fn=<ReluBackward0>)