In [1]:
import os
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torch.optim as optim
from torchinfo import summary

import dataloader as dataloader
import utils

import time
import importlib

In [2]:
importlib.reload(dataloader)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
class CNNLSTM(nn.Module):
    def __init__(self, num_classes):
        super(CNNLSTM, self).__init__()

        self.conv1 = nn.Conv3d(3, 32, kernel_size=(3, 3, 3), padding=(2, 2, 2))
        self.conv2 = nn.Conv3d(32, 64, kernel_size=(3, 3, 3), padding=(2, 2, 2))
        
        self.pool = nn.MaxPool3d(kernel_size=(5, 5, 5))

        # Calculate input size for LSTM
        self.input_size = 64 * 1 * 1
        
        self.lstm = nn.LSTM(input_size=self.input_size, hidden_size=512, num_layers=1, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(1024, num_classes)

    def forward(self, x):
        batch_size, C, H, W, timesteps = x.size()

        c_in = x
        
        c_out = self.pool(torch.relu(self.conv1(c_in)))
        c_out = self.pool(torch.relu(self.conv2(c_out)))
        
        r_in = c_out.view(batch_size, timesteps, -1)
        
        r_out, (h_n, h_c) = self.lstm(r_in)
        r_out = self.fc(r_out[:, -1, :])
        
        return r_out


In [6]:
# Example usage with dummy data

## model initialization
model = CNNLSTM(num_classes=5).to(device)
print(model)

## Generate random dummy input data within the defined shape
dummy_data = torch.randn(2, 3, 112, 112, 16).to(device)  # Batch size, channels, height, width, frames

## Forward pass through the model
output = model(dummy_data)

# Print the output shape
print("Output shape:", output.shape)

CNNLSTM(
  (conv1): Conv3d(3, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(2, 2, 2))
  (conv2): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(2, 2, 2))
  (pool): MaxPool3d(kernel_size=(5, 5, 5), stride=(5, 5, 5), padding=0, dilation=1, ceil_mode=False)
  (lstm): LSTM(64, 512, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=1024, out_features=5, bias=True)
)
Output shape: torch.Size([2, 5])


In [7]:
# Print model summary
summary(model)

Layer (type:depth-idx)                   Param #
CNNLSTM                                  --
├─Conv3d: 1-1                            2,624
├─Conv3d: 1-2                            55,360
├─MaxPool3d: 1-3                         --
├─LSTM: 1-4                              2,367,488
├─Linear: 1-5                            5,125
Total params: 2,430,597
Trainable params: 2,430,597
Non-trainable params: 0

In [8]:
## initialize dataloader

# Define your transform (data pre-processing) fınction
# Define your dataset with transform
transform = transforms.Compose([
    dataloader.myUCF5Preprocessing(output_size=(112, 112))
])


# Define your dataset
dataset = dataloader.myUCF5Loader(root_dir='UCF5', transform=transform)


# Split the dataset
train_set, val_set, test_set = dataloader.split_dataset(dataset)


# Create data loaders
train_loader = torch.utils.data.DataLoader(train_set, batch_size=2, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=1, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False)


In [9]:
# Training loop


# hyper-params
num_epochs = 3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# dont forget to send model to device 
model.to(device) 

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device, dtype=torch.float32), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)

    # Validation loop
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device, dtype=torch.float32), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Loss: {running_loss / len(train_loader.dataset):.4f}, "
          f"Val Loss: {val_loss / len(val_loader.dataset):.4f}, "
          f"Val Acc: {(100 * correct / total):.2f}%")

# Testing loop
model.eval()
test_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device, dtype=torch.float32), targets.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

print(f"Test Loss: {test_loss / len(test_loader.dataset):.4f}, "
      f"Test Acc: {(100 * correct / total):.2f}%")




Epoch [1/3], Train Loss: 1.6576, Val Loss: 1.6240, Val Acc: 10.00%
Epoch [2/3], Train Loss: 1.6073, Val Loss: 1.6444, Val Acc: 10.00%
Epoch [3/3], Train Loss: 1.6032, Val Loss: 1.5843, Val Acc: 20.00%
Test Loss: 1.5254, Test Acc: 30.00%
