In [48]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import random
import time
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_video
from torchvision import transforms


In [14]:
from google.colab import drive
drive.mount('/content/drive')
print("Drive mounted")

Drive mounted


In [49]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32
num_classes = 11 # crowd11 classes for movement
num_epochs = 100
learning_rate = 0.001
file_path = '/content/drive/MyDrive/CABR/data/'

In [50]:
class_dict_motion = {
    0: 'gas_free',
    1: 'gas_jammed',
    2: 'laminar_flow',
    3: 'turbulent_flow',
    4: 'crossing_flow',
    5: 'merging_flow',
    6: 'diverging_flow',
    7: 'static_calm',
    8: 'static_agitated',
    9: 'interacting_crowd',
    10: 'no_crowd'
}

In [19]:
class AppearanceBranch(nn.Module):
    def __init__(self):
        super(AppearanceBranch, self).__init__()
        
        self.branch_appearance = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=7, stride=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5),
            nn.Conv2d(96, 256, kernel_size=5, stride=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5),
            nn.Conv2d(256, 384, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv2d(384, 384, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Flatten()
        )
    
    def forward(self, x):
        out_appearance = self.branch_appearance(x)
        return out_appearance

In [20]:
class MotionBranch(nn.Module):
    def __init__(self):
        super(MotionBranch, self).__init__()
        
        self.branch_motion = nn.Sequential(
            nn.Conv2d(20, 96, kernel_size=7, stride=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5),
            nn.Conv2d(96, 256, kernel_size=5, stride=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5),
            nn.Conv2d(256, 384, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv2d(384, 384, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Flatten()
        )
    
    def forward(self, x):
        out_motion = self.branch_motion(x)
        return out_motion

In [21]:
class DeeplyLearnedAttributes(nn.Module):
    def __init__(self):
        super(DeeplyLearnedAttributes, self).__init__()
        
        self.appearance_branch = AppearanceBranch()
        self.motion_branch = MotionBranch()
        self.concat = nn.Linear(4096*2, 51)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x_appearance, x_motion):
        out_appearance = self.appearance_branch(x_appearance)
        out_motion = self.motion_branch(x_motion)
        out = torch.cat((out_appearance, out_motion), dim=1)
        out = self.concat(out)
        out = self.sigmoid(out)
        return out

model = DeeplyLearnedAttributes()

print(model)

DeeplyLearnedAttributes(
  (appearance_branch): AppearanceBranch(
    (branch_appearance): Sequential(
      (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=1.0)
      (4): Conv2d(96, 256, kernel_size=(5, 5), stride=(2, 2))
      (5): ReLU()
      (6): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (7): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=1.0)
      (8): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1))
      (9): ReLU()
      (10): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1))
      (11): ReLU()
      (12): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1))
      (13): ReLU()
      (14): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (15): Flatten(start_dim=1, end_dim=-1)
    )
  )
  (motion_branch): MotionBranch(
    (branch_motio

In [22]:
class C3D(nn.Module):
    def __init__(self, num_classes=487):
        super(C3D, self).__init__()

        self.conv1 = nn.Conv3d(3, 64, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        self.conv2 = nn.Conv3d(64, 128, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv3a = nn.Conv3d(128, 256, kernel_size=3, padding=1)
        self.conv3b = nn.Conv3d(256, 256, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv4a = nn.Conv3d(256, 512, kernel_size=3, padding=1)
        self.conv4b = nn.Conv3d(512, 512, kernel_size=3, padding=1)
        self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv5a = nn.Conv3d(512, 512, kernel_size=3, padding=1)
        self.conv5b = nn.Conv3d(512, 512, kernel_size=3, padding=1)
        self.pool5 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.fc6 = nn.Linear(8192, 4096)
        self.fc7 = nn.Linear(4096, 4096)
        self.fc8 = nn.Linear(4096, num_classes)

        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)

        x = F.relu(self.conv2(x))
        x = self.pool2(x)

        x = F.relu(self.conv3a(x))
        x = F.relu(self.conv3b(x))
        x = self.pool3(x)

        x = F.relu(self.conv4a(x))
        x = F.relu(self.conv4b(x))
        x = self.pool4(x)

        x = F.relu(self.conv5a(x))
        x = F.relu(self.conv5b(x))
        x = self.pool5(x)

        x = x.view(x.size(0), -1)

        x = F.relu(self.fc6(x))
        x = self.dropout(x)

        x = F.relu(self.fc7(x))
        x = self.dropout(x)

        x = self.fc8(x)

        return F.softmax(x, dim=1)

In [23]:
def get_model(input_shape=(3, 16, 112, 112), summary=False, num_classes=487):
    model = C3D(num_classes=num_classes)
    if summary:
        print(model)
    return model

get_model()

C3D(
  (conv1): Conv3d(3, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (pool1): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (pool2): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv3a): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv3b): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (pool3): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv4a): Conv3d(256, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv4b): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (pool4): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv5a): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=

In [24]:
# linking appearance and motion branch

class DeeplyLearnedAttributesModified(nn.Module):
    def __init__(self):
        super(DeeplyLearnedAttributesModified, self).__init__()
        
        self.appearance_branch = AppearanceBranch()
        self.motion_branch = C3D()
        self.concat = nn.Linear(4096*2, 51)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x_appearance, x_motion):
        out_appearance = self.appearance_branch(x_appearance)
        out_motion = self.motion_branch(x_motion)
        out = torch.cat((out_appearance, out_motion), dim=1)
        out = self.concat(out)
        out = self.sigmoid(out)
        return out

In [25]:
new_model_obj = DeeplyLearnedAttributesModified()
print(new_model_obj)

DeeplyLearnedAttributesModified(
  (appearance_branch): AppearanceBranch(
    (branch_appearance): Sequential(
      (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=1.0)
      (4): Conv2d(96, 256, kernel_size=(5, 5), stride=(2, 2))
      (5): ReLU()
      (6): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (7): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=1.0)
      (8): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1))
      (9): ReLU()
      (10): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1))
      (11): ReLU()
      (12): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1))
      (13): ReLU()
      (14): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (15): Flatten(start_dim=1, end_dim=-1)
    )
  )
  (motion_branch): C3D(
    (conv1): Conv3

In [26]:
class MotionMapsDataset(Dataset):
    def __init__(self, file_paths, transform=None):
        self.file_paths = file_paths
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        video_path = self.file_paths[idx]
        frames, frame_rate = read_video(video_path)
        if self.transform:
            frames = [self.transform(frame) for frame in frames]
        frames = torch.stack(frames)
        label = self._extract_label(video_path)
        return frames, label

    def _extract_label(self, video_path):
        return int(video_path.split('/')[-2])

In [27]:
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
])
dataset = MotionMapsDataset(file_paths=file_path, transform=transform)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [28]:
model.parameters(), lr=0.001)odel = C3D().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(m

In [45]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    
    for inputs, labels in data_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)

    epoch_loss = running_loss / total_samples
    epoch_accuracy = correct_predictions / total_samples * 100.0

    print(f"Epoch [{epoch + 1}/{num_epochs}]")

Epoch [1/100]
Epoch [2/100]
Epoch [3/100]
Epoch [4/100]
Epoch [5/100]
Epoch [6/100]
Epoch [7/100]
Epoch [8/100]
Epoch [9/100]
Epoch [10/100]
Epoch [11/100]
Epoch [12/100]
Epoch [13/100]
Epoch [14/100]
Epoch [15/100]
Epoch [16/100]
Epoch [17/100]
Epoch [18/100]
Epoch [19/100]
Epoch [20/100]
Epoch [21/100]
Epoch [22/100]
Epoch [23/100]
Epoch [24/100]
Epoch [25/100]
Epoch [26/100]
Epoch [27/100]
Epoch [28/100]
Epoch [29/100]
Epoch [30/100]
Epoch [31/100]
Epoch [32/100]
Epoch [33/100]
Epoch [34/100]
Epoch [35/100]
Epoch [36/100]
Epoch [37/100]
Epoch [38/100]
Epoch [39/100]
Epoch [40/100]
Epoch [41/100]
Epoch [42/100]
Epoch [43/100]
Epoch [44/100]
Epoch [45/100]
Epoch [46/100]
Epoch [47/100]
Epoch [48/100]
Epoch [49/100]
Epoch [50/100]
Epoch [51/100]
Epoch [52/100]
Epoch [53/100]
Epoch [54/100]
Epoch [55/100]
Epoch [56/100]
Epoch [57/100]
Epoch [58/100]
Epoch [59/100]
Epoch [60/100]
Epoch [61/100]
Epoch [62/100]
Epoch [63/100]
Epoch [64/100]
Epoch [65/100]
Epoch [66/100]
Epoch [67/100]
Epoc

In [51]:
test_file = '000125733.npy'
inference_data = file_path + 'test/' + test_file

In [52]:
model.eval()

video_path = inference_data
frame = MotionMapsDataset(file_paths=video_path, transform=transform)
frames = DataLoader(frame, batch_size=1, shuffle=True)

with torch.no_grad():
    outputs = model(frames.unsqueeze(0))

probabilities = torch.softmax(outputs, dim=1)
predicted_class = torch.argmax(probabilities, dim=1).item()

print(f"Predicted class: {predicted_class}, Predicted Class Label: {class_dict_motion[predicted_class]}")

Predicted class: 2, Predicted Class Label: laminar_flow


In [53]:
test_file = '055271257.npy'
inference_data = file_path + 'test/' + test_file

In [54]:
model.eval()

video_path = inference_data
frame = MotionMapsDataset(file_paths=video_path, transform=transform)
frames = DataLoader(frame, batch_size=1, shuffle=True)

with torch.no_grad():
    outputs = model(frames.unsqueeze(0))

probabilities = torch.softmax(outputs, dim=1)
predicted_class = torch.argmax(probabilities, dim=1).item()

print(f"Predicted class: {predicted_class}, Predicted Class Label: {class_dict_motion[predicted_class]}")

Predicted class: 2, Predicted Class Label: laminar_flow
