In [1]:
import torchvision.transforms as transforms
from dataset import MultiViewDataset, SelfSupervisedDataset
from torchvision.models.video import R2Plus1D_18_Weights, r2plus1d_18
import torch
from torchsummary import summary

In [2]:
transformAug = transforms.Compose(
    [
        transforms.RandomAffine(degrees=(0, 0), translate=(0.1, 0.1), scale=(0.9, 1)),
        transforms.RandomPerspective(distortion_scale=0.3, p=0.5),
        transforms.RandomRotation(degrees=5),
        transforms.ColorJitter(brightness=0.5, saturation=0.5, contrast=0.5),
        transforms.RandomHorizontalFlip(),
    ]
)

In [3]:
transforms_model = R2Plus1D_18_Weights.KINETICS400_V1.transforms()

In [4]:
path = "/work3/s194572/SoccerData/mvfouls/"
start_frame = 0
end_frame = 125
fps = 25
split = "Train"
num_views = 4
dataset = MultiViewDataset(
    path=path,
    start=start_frame,
    end=end_frame,
    fps=fps,
    split=split,
    num_views=num_views,
    transform=transformAug,
    transform_model=transforms_model,
)

dataset_async_view = SelfSupervisedDataset(
    path=path,
    start=start_frame,
    end=end_frame,
    fps=fps,
    split=split,
    num_views=num_views,
    transform=transformAug,
    transform_model=transforms_model,
    semi_type='time'
)
print(len(dataset_async_view))

/work3/s194572/SoccerData/mvfouls/Train/annotations.json
/work3/s194572/SoccerData/mvfouls/Train/annotations.json
20


In [5]:
data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=2, shuffle=True, pin_memory=True)
data_loader_async = torch.utils.data.DataLoader(dataset=dataset_async_view, batch_size=2, shuffle=True, pin_memory=True)

In [6]:
target_offence_severity, target_action, mvclips, target_action_idx = next(iter(data_loader))



In [10]:
mvclips.shape, target_action_idx, target_offence_severity

NameError: name 'mvclips' is not defined

In [6]:
mvclips, target = next(iter(data_loader_async))
print(mvclips.shape)
print(target.shape)
print(target)



49 111
1 63
torch.Size([2, 2, 3, 62, 112, 112])
torch.Size([2])
tensor([0., 1.])


In [15]:
model = r2plus1d_18(weights=R2Plus1D_18_Weights.DEFAULT)
model.fc = torch.nn.Sequential()
lifting_net = torch.nn.Sequential()


In [16]:
feat_dim = 512

In [17]:
B, V, C, D, H, W = mvclips.shape

In [20]:
from utils import batch_tensor, unbatch_tensor

In [21]:
mvclips.shape

torch.Size([2, 2, 3, 125, 112, 112])

In [22]:
# move the viewpoint dimension to the batch dimension
# so that we can pass it through the model
tmp0 = batch_tensor(mvclips, dim=1, squeeze=True)
tmp0.shape


torch.Size([4, 3, 125, 112, 112])

In [32]:
summary(model, (4,3,125,112,112))

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

In [23]:
tmp1 = model(tmp0)
tmp1.shape


torch.Size([4, 512])

In [24]:
# reshape the output to the original shape
aux = unbatch_tensor(tmp1, batch_size=B, dim=1, unsqueeze=True)
aux.shape

torch.Size([2, 2, 512])

In [29]:
aux = lifting_net(aux)
aux.shape
aux = aux.view(-1, feat_dim*2)
aux.shape

torch.Size([2, 1024])

In [17]:
# mean pooling
pooled_view = torch.mean(aux, dim=1)
pooled_view.shape

torch.Size([2, 512])

In [18]:
# I don't know why they didn't use RELU here
fc_inter = torch.nn.Sequential(
    torch.nn.LayerNorm(feat_dim),
    torch.nn.Linear(feat_dim, feat_dim),
    torch.nn.Linear(feat_dim, feat_dim),
)
fc_offence = torch.nn.Sequential(
    torch.nn.LayerNorm(feat_dim),
    torch.nn.Linear(feat_dim, feat_dim),
    torch.nn.Linear(feat_dim, 4),
)
fc_action = torch.nn.Sequential(
    torch.nn.LayerNorm(feat_dim),
    torch.nn.Linear(feat_dim, feat_dim),
    torch.nn.Linear(feat_dim, 8),
)

In [19]:
inter = fc_inter(pooled_view)
pred_action = fc_action(inter)
pred_offence = fc_offence(inter)
pred_action, pred_offence

(tensor([[ 0.5827,  0.3194, -0.3739, -0.0878,  0.1025,  0.5530, -0.2489, -0.4822],
         [-0.6568, -0.3731,  0.2011,  0.0999, -0.1626, -0.3944, -0.0091,  0.2515]],
        grad_fn=<AddmmBackward0>),
 tensor([[-0.1725,  0.5863, -0.0718,  0.4702],
         [-0.0640, -0.2364,  0.1644, -0.1343]], grad_fn=<AddmmBackward0>))