In [29]:
# -------------------------------
# 0. Imports and Setup
# -------------------------------
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm
import cv2

In [30]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [31]:
# -------------------------------
# 1. Data Loading and Frame Extraction
# -------------------------------
def load_annotations(csv_path):
    df = pd.read_csv(csv_path)
    label_map = {'truthful': 0, 'deceptive': 1}
    df['label'] = df['class'].map(label_map)
    return df

def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while len(frames) < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, resize)
        frames.append(frame)
    cap.release()
    return frames


In [32]:
# -------------------------------
# 2. Feature Extractor (MobileNetV2)
# -------------------------------
class FeatureExtractor:
    def __init__(self, device='cuda'):
        self.device = device
        mobilenet = models.mobilenet_v2(pretrained=True)
        self.model = mobilenet.features
        self.model.eval().to(device)

        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

    def extract(self, video_path, max_frames=30):
        frames = extract_frames(video_path, max_frames)
        features = []
        for frame in frames:
            input_tensor = self.transform(frame).unsqueeze(0).to(self.device)
            with torch.no_grad():
                feat = self.model(input_tensor)
                feat = torch.mean(feat, dim=[2, 3])
            features.append(feat.squeeze(0).cpu().numpy())
        return np.stack(features)

def cache_features(df, video_dir, cache_dir, max_frames=30):
    os.makedirs(cache_dir, exist_ok=True)
    extractor = FeatureExtractor(device)

    for idx, row in tqdm(df.iterrows(), total=len(df)):
        video_name = row['id']
        cache_path = os.path.join(cache_dir, video_name.replace('.mp4', '.npy'))

        if not os.path.exists(cache_path):
            folder = 'Truthful' if row['label'] == 0 else 'Deceptive'
            video_path = os.path.join(video_dir, folder, video_name)
            try:
                features = extractor.extract(video_path, max_frames)
                np.save(cache_path, features)
            except Exception as e:
                print(f"Error processing {video_name}: {str(e)}")

In [33]:
# -------------------------------
# 3. Dataset and DataLoader
# -------------------------------
class VideoDataset(Dataset):
    def __init__(self, df, cache_dir, max_frames=30):
        self.df = df.reset_index(drop=True)
        self.cache_dir = cache_dir
        self.max_frames = max_frames

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        video_name = row['id'].replace('.mp4', '.npy')
        features_path = os.path.join(self.cache_dir, video_name)

        try:
            features = np.load(features_path)
        except:
            features = np.zeros((self.max_frames, 1280))

        if len(features) > self.max_frames:
            features = features[:self.max_frames]
        elif len(features) < self.max_frames:
            pad = np.zeros((self.max_frames - len(features), features.shape[1]))
            features = np.vstack([features, pad])

        return torch.FloatTensor(features), torch.tensor(row['label'], dtype=torch.float32)

def collate_fn(batch):
    features, labels = zip(*batch)
    features = torch.stack(features)
    labels = torch.stack(labels)
    lengths = torch.tensor([len(f) for f in features], dtype=torch.long)
    return features, labels, lengths

In [38]:
# # -------------------------------
# # 4. Model: CNN + BLSTM
# # -------------------------------
# class EnhancedCNNBLSTM(nn.Module):
#     def __init__(self, input_size=1280, hidden_size=512, num_layers=3, dropout=0.5):
#         super().__init__()
#         self.feature_reducer = nn.Sequential(
#             nn.Linear(input_size, 512),
#             nn.ReLU()
#         )

#         self.blstm = nn.LSTM(
#             input_size=512,
#             hidden_size=hidden_size,
#             num_layers=num_layers,
#             bidirectional=True,
#             batch_first=True,
#             dropout=dropout
#         )
#         self.lstm_norm = nn.LayerNorm(hidden_size * 2)

#         self.attention = nn.Sequential(
#             nn.Linear(hidden_size * 2, hidden_size),
#             nn.Tanh(),
#             nn.Linear(hidden_size, 1, bias=False)
#         )

#         self.temporal_cnn = nn.Sequential(
#             nn.Conv1d(hidden_size * 2, hidden_size, kernel_size=5, padding=2),
#             nn.BatchNorm1d(hidden_size),
#             nn.GELU(),
#             nn.Dropout(dropout),
#             nn.Conv1d(hidden_size, hidden_size // 2, kernel_size=3, padding=1),
#             nn.BatchNorm1d(hidden_size // 2),
#             nn.GELU(),
#             nn.AdaptiveAvgPool1d(1)
#         )

#         self.classifier = nn.Sequential(
#             nn.Linear(hidden_size // 2, 256),
#             nn.GELU(),
#             nn.Dropout(dropout),
#             nn.LayerNorm(256),
#             nn.Linear(256, 1)
#         )

#     def forward(self, x, lengths):
#         x = self.feature_reducer(x)
#         packed = nn.utils.rnn.pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
#         packed_out, _ = self.blstm(packed)
#         out, _ = nn.utils.rnn.pad_packed_sequence(packed_out, batch_first=True)

#         attn_weights = F.softmax(self.attention(out), dim=1)
#         out = torch.sum(attn_weights * out, dim=1)
#         out = self.lstm_norm(out)

#         out = out.unsqueeze(-1)
#         out = self.temporal_cnn(out).squeeze(-1)

#         return self.classifier(out).squeeze(1)

In [47]:
class CNN_BiLSTM(nn.Module):
    def __init__(self, input_dim, cnn_out_channels, cnn_kernel_size, lstm_hidden_size,
                 lstm_num_layers, fc_hidden_size, dropout_rate, bidirectional=True):
        super(CNN_BiLSTM, self).__init__()
        
        # 1D CNN expects input of shape (batch, channels=input_dim, sequence_len)
        self.cnn = nn.Conv1d(in_channels=input_dim,
                             out_channels=cnn_out_channels,
                             kernel_size=cnn_kernel_size,
                             padding=cnn_kernel_size // 2)

        self.lstm = nn.LSTM(input_size=cnn_out_channels,
                            hidden_size=lstm_hidden_size,
                            num_layers=lstm_num_layers,
                            batch_first=True,
                            bidirectional=bidirectional)

        self.bn = nn.BatchNorm1d(cnn_out_channels)

        lstm_output_dim = lstm_hidden_size * (2 if bidirectional else 1)
        
        self.fc1 = nn.Linear(lstm_output_dim, fc_hidden_size)
        self.fc2 = nn.Linear(fc_hidden_size, 1)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x, lengths):
        # x: (batch_size, seq_len, input_dim)
        x = x.permute(0, 2, 1)  # (batch_size, input_dim, seq_len)
        x = self.bn(self.cnn(x))  # (batch_size, cnn_out_channels, seq_len)
        x = x.permute(0, 2, 1)  # (batch_size, seq_len, cnn_out_channels)

        # Optional: Adjust lengths if CNN changes time dim
        # Skipped here because padding preserves length

        packed = nn.utils.rnn.pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_out, _ = self.lstm(packed)
        lstm_out, _ = nn.utils.rnn.pad_packed_sequence(packed_out, batch_first=True)

        out, _ = torch.max(lstm_out, dim=1)  # (batch_size, lstm_output_dim)

        out = self.dropout(self.fc1(out))
        out = self.fc2(out).squeeze(1)     # (batch_size,)
        return out

In [40]:
# -------------------------------
# 5. Training & Evaluation
# -------------------------------
def train_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for features, labels, lengths in loader:
        features, labels = features.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(features, lengths)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def evaluate(model, loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for features, labels, lengths in loader:
            features = features.to(device)
            outputs = model(features, lengths)
            preds = torch.sigmoid(outputs).cpu().numpy() > 0.5
            all_preds.extend(preds)
            all_labels.extend(labels.numpy())
    return {
        'accuracy': accuracy_score(all_labels, all_preds),
        'precision': precision_score(all_labels, all_preds),
        'recall': recall_score(all_labels, all_preds),
        'f1': f1_score(all_labels, all_preds)
    }

In [54]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.8, gamma=2):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets):
        BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        pt = torch.exp(-BCE_loss)
        focal_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
        return focal_loss.mean()

In [71]:
# -------------------------------
# 6. Full Pipeline Entry Point
# -------------------------------
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load and split dataset
    annotations = load_annotations("data/annotations.csv")
    train_df, test_df = train_test_split(annotations, test_size=0.2, stratify=annotations['label'], random_state=42)
    train_df, val_df = train_test_split(train_df, test_size=0.125, stratify=train_df['label'], random_state=42)

    video_dir = "data/Clips"
    cache_dir = "cached_features"
    max_frames = 30

    # Cache features if not already
    print("Caching features...")
    cache_features(train_df, video_dir, cache_dir, max_frames)
    cache_features(val_df, video_dir, cache_dir, max_frames)
    cache_features(test_df, video_dir, cache_dir, max_frames)

    # DataLoaders
    train_loader = DataLoader(VideoDataset(train_df, cache_dir, max_frames), batch_size=8, shuffle=True, collate_fn=collate_fn)
    val_loader = DataLoader(VideoDataset(val_df, cache_dir, max_frames), batch_size=8, shuffle=False, collate_fn=collate_fn)
    test_loader = DataLoader(VideoDataset(test_df, cache_dir, max_frames), batch_size=16, shuffle=False, collate_fn=collate_fn)

    # Initialize CNN-BiLSTM model
    model = CNN_BiLSTM(
        input_dim=1280,
        cnn_out_channels=128,
        cnn_kernel_size=5,
        lstm_hidden_size=256,
        lstm_num_layers=2,
        fc_hidden_size=128,
        dropout_rate=0.5,
        bidirectional=True
    ).to(device)

    # Optimizer with weight decay
    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=5e-5,           # Reduced from 1e-4 (prevent overshooting)
        weight_decay=1e-5
    )

    # Optional: compute class imbalance for pos_weight
    pos_weight = torch.tensor([
        len(train_df[train_df['label']==0]) / 
        len(train_df[train_df['label']==1])
    ], device=device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    # criterion = FocalLoss()

    # Learning rate scheduler
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer,
        T_0=10,            # Reset every 10 epochs
        eta_min=1e-6        # Min learning rate
    )

    # Training loop
    for epoch in range(25):
        loss = train_epoch(model, train_loader, optimizer, criterion)
        scheduler.step()
        metrics = evaluate(model, val_loader)
        print(f"Epoch {epoch+1}: Loss={loss:.4f} | Val Acc={metrics['accuracy']:.4f} | F1={metrics['f1']:.4f}")

    # Final test evaluation
    print("Final Evaluation on Test Set:")
    test_metrics = evaluate(model, test_loader)
    print(test_metrics)



Caching features...


100%|██████████| 84/84 [00:00<00:00, 21460.77it/s]
100%|██████████| 12/12 [00:00<00:00, 7926.24it/s]
100%|██████████| 25/25 [00:00<00:00, 12496.44it/s]


Epoch 1: Loss=0.6842 | Val Acc=0.5000 | F1=0.6667
Epoch 2: Loss=0.6582 | Val Acc=0.5833 | F1=0.7059
Epoch 3: Loss=0.6357 | Val Acc=0.7500 | F1=0.7692
Epoch 4: Loss=0.5955 | Val Acc=0.7500 | F1=0.7692
Epoch 5: Loss=0.5591 | Val Acc=0.7500 | F1=0.7692
Epoch 6: Loss=0.4946 | Val Acc=0.7500 | F1=0.7692
Epoch 7: Loss=0.4760 | Val Acc=0.7500 | F1=0.7692
Epoch 8: Loss=0.4267 | Val Acc=0.7500 | F1=0.7692
Epoch 9: Loss=0.4245 | Val Acc=0.7500 | F1=0.7692
Epoch 10: Loss=0.4028 | Val Acc=0.7500 | F1=0.7692
Epoch 11: Loss=0.3491 | Val Acc=0.8333 | F1=0.8571
Epoch 12: Loss=0.2887 | Val Acc=0.9167 | F1=0.9091
Epoch 13: Loss=0.2153 | Val Acc=0.9167 | F1=0.9091
Epoch 14: Loss=0.1624 | Val Acc=0.7500 | F1=0.7692
Epoch 15: Loss=0.1452 | Val Acc=0.8333 | F1=0.8333
Epoch 16: Loss=0.1474 | Val Acc=0.9167 | F1=0.9091
Epoch 17: Loss=0.0851 | Val Acc=0.8333 | F1=0.8333
Epoch 18: Loss=0.1499 | Val Acc=0.8333 | F1=0.8333
Epoch 19: Loss=0.0706 | Val Acc=0.9167 | F1=0.9091
Epoch 20: Loss=0.0540 | Val Acc=0.9167 |