In [2]:
# -------------------------------
# 0. Imports and Setup
# -------------------------------
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm
import cv2

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [4]:
# -------------------------------
# 1. Data Loading and Frame Extraction
# -------------------------------
def load_annotations(csv_path):
    df = pd.read_csv(csv_path)
    label_map = {'truthful': 0, 'deceptive': 1}
    df['label'] = df['class'].map(label_map)
    return df

def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while len(frames) < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, resize)
        frames.append(frame)
    cap.release()
    return frames


In [5]:
# -------------------------------
# 2. Feature Extractor (MobileNetV2)
# -------------------------------
class FeatureExtractor:
    def __init__(self, device='cuda'):
        self.device = device
        mobilenet = models.mobilenet_v2(pretrained=True)
        self.model = mobilenet.features
        self.model.eval().to(device)

        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

    def extract(self, video_path, max_frames=30):
        frames = extract_frames(video_path, max_frames)
        features = []
        for frame in frames:
            input_tensor = self.transform(frame).unsqueeze(0).to(self.device)
            with torch.no_grad():
                feat = self.model(input_tensor)
                feat = torch.mean(feat, dim=[2, 3])
            features.append(feat.squeeze(0).cpu().numpy())
        return np.stack(features)

def cache_features(df, video_dir, cache_dir, max_frames=30):
    os.makedirs(cache_dir, exist_ok=True)
    extractor = FeatureExtractor(device)

    for idx, row in tqdm(df.iterrows(), total=len(df)):
        video_name = row['id']
        cache_path = os.path.join(cache_dir, video_name.replace('.mp4', '.npy'))

        if not os.path.exists(cache_path):
            folder = 'Truthful' if row['label'] == 0 else 'Deceptive'
            video_path = os.path.join(video_dir, folder, video_name)
            try:
                features = extractor.extract(video_path, max_frames)
                np.save(cache_path, features)
            except Exception as e:
                print(f"Error processing {video_name}: {str(e)}")

In [6]:
# -------------------------------
# 3. Dataset and DataLoader
# -------------------------------
class VideoDataset(Dataset):
    def __init__(self, df, cache_dir, max_frames=30):
        self.df = df.reset_index(drop=True)
        self.cache_dir = cache_dir
        self.max_frames = max_frames

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        video_name = row['id'].replace('.mp4', '.npy')
        features_path = os.path.join(self.cache_dir, video_name)

        try:
            features = np.load(features_path)
        except:
            features = np.zeros((self.max_frames, 1280))

        if len(features) > self.max_frames:
            features = features[:self.max_frames]
        elif len(features) < self.max_frames:
            pad = np.zeros((self.max_frames - len(features), features.shape[1]))
            features = np.vstack([features, pad])

        return torch.FloatTensor(features), torch.tensor(row['label'], dtype=torch.float32)

def collate_fn(batch):
    features, labels = zip(*batch)
    features = torch.stack(features)
    labels = torch.stack(labels)
    lengths = torch.tensor([len(f) for f in features], dtype=torch.long)
    return features, labels, lengths

In [7]:
# # -------------------------------
# # 4. Model: CNN + BLSTM
# # -------------------------------
# class EnhancedCNNBLSTM(nn.Module):
#     def __init__(self, input_size=1280, hidden_size=512, num_layers=3, dropout=0.5):
#         super().__init__()
#         self.feature_reducer = nn.Sequential(
#             nn.Linear(input_size, 512),
#             nn.ReLU()
#         )

#         self.blstm = nn.LSTM(
#             input_size=512,
#             hidden_size=hidden_size,
#             num_layers=num_layers,
#             bidirectional=True,
#             batch_first=True,
#             dropout=dropout
#         )
#         self.lstm_norm = nn.LayerNorm(hidden_size * 2)

#         self.attention = nn.Sequential(
#             nn.Linear(hidden_size * 2, hidden_size),
#             nn.Tanh(),
#             nn.Linear(hidden_size, 1, bias=False)
#         )

#         self.temporal_cnn = nn.Sequential(
#             nn.Conv1d(hidden_size * 2, hidden_size, kernel_size=5, padding=2),
#             nn.BatchNorm1d(hidden_size),
#             nn.GELU(),
#             nn.Dropout(dropout),
#             nn.Conv1d(hidden_size, hidden_size // 2, kernel_size=3, padding=1),
#             nn.BatchNorm1d(hidden_size // 2),
#             nn.GELU(),
#             nn.AdaptiveAvgPool1d(1)
#         )

#         self.classifier = nn.Sequential(
#             nn.Linear(hidden_size // 2, 256),
#             nn.GELU(),
#             nn.Dropout(dropout),
#             nn.LayerNorm(256),
#             nn.Linear(256, 1)
#         )

#     def forward(self, x, lengths):
#         x = self.feature_reducer(x)
#         packed = nn.utils.rnn.pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
#         packed_out, _ = self.blstm(packed)
#         out, _ = nn.utils.rnn.pad_packed_sequence(packed_out, batch_first=True)

#         attn_weights = F.softmax(self.attention(out), dim=1)
#         out = torch.sum(attn_weights * out, dim=1)
#         out = self.lstm_norm(out)

#         out = out.unsqueeze(-1)
#         out = self.temporal_cnn(out).squeeze(-1)

#         return self.classifier(out).squeeze(1)

In [8]:
class CNN_BiLSTM(nn.Module):
    def __init__(self, input_dim, cnn_out_channels, cnn_kernel_size, lstm_hidden_size,
                 lstm_num_layers, fc_hidden_size, dropout_rate, bidirectional=True):
        super(CNN_BiLSTM, self).__init__()
        
        # 1D CNN expects input of shape (batch, channels=input_dim, sequence_len)
        self.cnn = nn.Conv1d(in_channels=input_dim,
                             out_channels=cnn_out_channels,
                             kernel_size=cnn_kernel_size,
                             padding=cnn_kernel_size // 2)

        self.lstm = nn.LSTM(input_size=cnn_out_channels,
                            hidden_size=lstm_hidden_size,
                            num_layers=lstm_num_layers,
                            batch_first=True,
                            bidirectional=bidirectional)

        self.bn = nn.BatchNorm1d(cnn_out_channels)

        lstm_output_dim = lstm_hidden_size * (2 if bidirectional else 1)
        
        self.fc1 = nn.Linear(lstm_output_dim, fc_hidden_size)
        self.fc2 = nn.Linear(fc_hidden_size, 1)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x, lengths):
        # x: (batch_size, seq_len, input_dim)
        x = x.permute(0, 2, 1)  # (batch_size, input_dim, seq_len)
        x = self.bn(self.cnn(x))  # (batch_size, cnn_out_channels, seq_len)
        x = x.permute(0, 2, 1)  # (batch_size, seq_len, cnn_out_channels)

        # Optional: Adjust lengths if CNN changes time dim
        # Skipped here because padding preserves length

        packed = nn.utils.rnn.pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_out, _ = self.lstm(packed)
        lstm_out, _ = nn.utils.rnn.pad_packed_sequence(packed_out, batch_first=True)

        out, _ = torch.max(lstm_out, dim=1)  # (batch_size, lstm_output_dim)

        out = self.dropout(self.fc1(out))
        out = self.fc2(out).squeeze(1)     # (batch_size,)
        return out

In [9]:
# -------------------------------
# 5. Training & Evaluation
# -------------------------------
def train_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for features, labels, lengths in loader:
        features, labels = features.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(features, lengths)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def evaluate(model, loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for features, labels, lengths in loader:
            features = features.to(device)
            outputs = model(features, lengths)
            preds = torch.sigmoid(outputs).cpu().numpy() > 0.5
            all_preds.extend(preds)
            all_labels.extend(labels.numpy())
    return {
        'accuracy': accuracy_score(all_labels, all_preds),
        'precision': precision_score(all_labels, all_preds),
        'recall': recall_score(all_labels, all_preds),
        'f1': f1_score(all_labels, all_preds)
    }

In [14]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
   ---------------------------------------- 0.0/386.6 kB ? eta -:--:--
   ------- -------------------------------- 71.7/386.6 kB 1.9 MB/s eta 0:00:01
   ---------------------------------------- 386.6/386.6 kB 4.8 MB/s eta 0:00:00
Downloading alembic-1.15.2-py3-none-any.whl (231 kB)
   ---------------------------------------- 0.0/231.9 kB ? eta -:--:--
   ---------------------------------------- 231.9/231.9 kB 7.2 MB/s eta 0:00:00
Downloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading mako-1.3.10-py3-none-any.whl (78 kB)
   ------------------------

In [15]:
import optuna
from optuna.trial import TrialState

def objective(trial):
    # Define hyperparameters to optimize
    params = {
        'cnn_out_channels': trial.suggest_categorical('cnn_out_channels', [64, 128, 256]),
        'cnn_kernel_size': trial.suggest_int('cnn_kernel_size', 3, 7, step=2),
        'lstm_hidden_size': trial.suggest_categorical('lstm_hidden_size', [128, 256, 512]),
        'lstm_num_layers': trial.suggest_int('lstm_num_layers', 1, 3),
        'fc_hidden_size': trial.suggest_categorical('fc_hidden_size', [64, 128, 256]),
        'dropout_rate': trial.suggest_float('dropout_rate', 0.3, 0.7, step=0.1),
        'lr': trial.suggest_float('lr', 1e-5, 1e-4, log=True),
        'batch_size': trial.suggest_categorical('batch_size', [8, 16, 32])
    }
    
    # Create model with trial parameters
    model = CNN_BiLSTM(
        input_dim=1280,
        cnn_out_channels=params['cnn_out_channels'],
        cnn_kernel_size=params['cnn_kernel_size'],
        lstm_hidden_size=params['lstm_hidden_size'],
        lstm_num_layers=params['lstm_num_layers'],
        fc_hidden_size=params['fc_hidden_size'],
        dropout_rate=params['dropout_rate'],
        bidirectional=True
    ).to(device)
    
    # Create data loader with trial batch size
    train_loader = DataLoader(
        VideoDataset(train_df, cache_dir, max_frames),
        batch_size=params['batch_size'],
        shuffle=True,
        collate_fn=collate_fn
    )
    
    # Optimizer and scheduler
    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=params['lr'],
        weight_decay=1e-5
    )
    
    # Training loop (shortened for tuning)
    for epoch in range(10):  # Fewer epochs for tuning
        train_epoch(model, train_loader, optimizer, criterion)
        metrics = evaluate(model, val_loader)
        
        # Report intermediate results
        trial.report(metrics['f1'], epoch)
        
        # Handle pruning (early stopping)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    return metrics['f1']

def run_hyperparameter_tuning():
    study = optuna.create_study(
        direction='maximize',
        pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=3)
    )
    study.optimize(objective, n_trials=30, timeout=3600)
    
    print("Best trial:")
    trial = study.best_trial
    print(f"  F1-score: {trial.value}")
    print("  Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")
    
    return study.best_params

In [21]:
# -------------------------------
# 6. Full Pipeline Entry Point
# -------------------------------
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load and split dataset
    annotations = load_annotations("data/annotations.csv")
    train_df, test_df = train_test_split(annotations, test_size=0.2, stratify=annotations['label'], random_state=42)
    train_df, val_df = train_test_split(train_df, test_size=0.125, stratify=train_df['label'], random_state=42)

    video_dir = "data/Clips"
    cache_dir = "cached_features"
    max_frames = 30

    # Cache features if not already
    print("Caching features...")
    cache_features(train_df, video_dir, cache_dir, max_frames)
    cache_features(val_df, video_dir, cache_dir, max_frames)
    cache_features(test_df, video_dir, cache_dir, max_frames)

    # Run hyperparameter tuning first
    print("Starting hyperparameter optimization...")
    best_params = run_hyperparameter_tuning()
    
    # Now run main training with best params
    print("\nTraining final model with best parameters...")
    model = CNN_BiLSTM(
        input_dim=1280,
        cnn_out_channels=best_params['cnn_out_channels'],
        cnn_kernel_size=best_params['cnn_kernel_size'],
        lstm_hidden_size=best_params['lstm_hidden_size'],
        lstm_num_layers=best_params['lstm_num_layers'],
        fc_hidden_size=best_params['fc_hidden_size'],
        dropout_rate=best_params['dropout_rate'],
        bidirectional=True
    ).to(device)
    
    # DataLoaders
    # train_loader = DataLoader(VideoDataset(train_df, cache_dir, max_frames), batch_size=8, shuffle=True, collate_fn=collate_fn)
    # val_loader = DataLoader(VideoDataset(val_df, cache_dir, max_frames), batch_size=16, shuffle=False, collate_fn=collate_fn)
    # test_loader = DataLoader(VideoDataset(test_df, cache_dir, max_frames), batch_size=16, shuffle=False, collate_fn=collate_fn)
    # Now create data loaders using best batch sizes
    train_loader = DataLoader(
        VideoDataset(train_df, cache_dir, max_frames),
        batch_size=best_params['batch_size'],
        shuffle=True,
        collate_fn=collate_fn
    )
    val_loader = DataLoader(
        VideoDataset(val_df, cache_dir, max_frames),
        batch_size=best_params['batch_size'],  # Could use different val batch size if desired
        shuffle=False,
        collate_fn=collate_fn
    )
    test_loader = DataLoader(
        VideoDataset(test_df, cache_dir, max_frames),
        batch_size=best_params['batch_size'],  # Could use different test batch size
        shuffle=False,
        collate_fn=collate_fn
    )

    # Initialize CNN-BiLSTM model
    model = CNN_BiLSTM(
        input_dim=1280,
        cnn_out_channels=best_params['cnn_out_channels'],
        cnn_kernel_size=best_params['cnn_kernel_size'],
        lstm_hidden_size=best_params['lstm_hidden_size'],
        lstm_num_layers=best_params['lstm_num_layers'],
        fc_hidden_size=best_params['fc_hidden_size'],
        dropout_rate=best_params['dropout_rate'],
        bidirectional=True
    ).to(device)

    # Optimizer with weight decay
    # optimizer = torch.optim.AdamW(
    #     model.parameters(),
    #     lr=5e-5,           # Reduced from 1e-4 (prevent overshooting)
    #     weight_decay=1e-5
    # )

    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=best_params['lr'],
        weight_decay=1e-5
    )

    # Optional: compute class imbalance for pos_weight
    pos_weight = torch.tensor([
        len(train_df[train_df['label']==0]) / 
        len(train_df[train_df['label']==1])
    ], device=device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    # criterion = FocalLoss()

    # Learning rate scheduler
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer,
        T_0=10,            # Reset every 10 epochs
        eta_min=1e-6        # Min learning rate
    )

    # Training loop
    for epoch in range(20):
        loss = train_epoch(model, train_loader, optimizer, criterion)
        scheduler.step()
        metrics = evaluate(model, val_loader)
        print(f"Epoch {epoch+1}: Loss={loss:.4f} | Val Acc={metrics['accuracy']:.4f} | F1={metrics['f1']:.4f}")

    # Final test evaluation
    print("Final Evaluation on Test Set:")
    test_metrics = evaluate(model, test_loader)
    print(test_metrics)



Caching features...


100%|██████████| 84/84 [00:00<00:00, 16669.26it/s]
100%|██████████| 12/12 [00:00<00:00, 5994.00it/s]
100%|██████████| 25/25 [00:00<00:00, 12453.40it/s]
[I 2025-05-07 11:59:24,409] A new study created in memory with name: no-name-e3e70e3b-17f5-45d3-9126-f6fcb8ad8a76


Starting hyperparameter optimization...


[I 2025-05-07 11:59:26,846] Trial 0 finished with value: 0.2857142857142857 and parameters: {'cnn_out_channels': 256, 'cnn_kernel_size': 3, 'lstm_hidden_size': 128, 'lstm_num_layers': 1, 'fc_hidden_size': 256, 'dropout_rate': 0.5, 'lr': 1.1691403620341307e-05, 'batch_size': 32}. Best is trial 0 with value: 0.2857142857142857.
[I 2025-05-07 11:59:32,182] Trial 1 finished with value: 0.7692307692307692 and parameters: {'cnn_out_channels': 64, 'cnn_kernel_size': 5, 'lstm_hidden_size': 256, 'lstm_num_layers': 2, 'fc_hidden_size': 256, 'dropout_rate': 0.7, 'lr': 3.0246073668859985e-05, 'batch_size': 32}. Best is trial 1 with value: 0.7692307692307692.
[I 2025-05-07 11:59:36,762] Trial 2 finished with value: 0.7692307692307692 and parameters: {'cnn_out_channels': 64, 'cnn_kernel_size': 3, 'lstm_hidden_size': 128, 'lstm_num_layers': 2, 'fc_hidden_size': 128, 'dropout_rate': 0.7, 'lr': 6.0527705831770484e-05, 'batch_size': 16}. Best is trial 1 with value: 0.7692307692307692.
[I 2025-05-07 11:5

Best trial:
  F1-score: 0.8333333333333334
  Params: 
    cnn_out_channels: 256
    cnn_kernel_size: 7
    lstm_hidden_size: 256
    lstm_num_layers: 1
    fc_hidden_size: 256
    dropout_rate: 0.5
    lr: 2.8179533049874422e-05
    batch_size: 8

Training final model with best parameters...
Epoch 1: Loss=0.6802 | Val Acc=0.5833 | F1=0.2857
Epoch 2: Loss=0.6432 | Val Acc=0.6667 | F1=0.6000
Epoch 3: Loss=0.5922 | Val Acc=0.7500 | F1=0.7273
Epoch 4: Loss=0.5572 | Val Acc=0.7500 | F1=0.7692
Epoch 5: Loss=0.5274 | Val Acc=0.7500 | F1=0.7692
Epoch 6: Loss=0.4974 | Val Acc=0.7500 | F1=0.7692
Epoch 7: Loss=0.4786 | Val Acc=0.7500 | F1=0.7692
Epoch 8: Loss=0.4692 | Val Acc=0.7500 | F1=0.7692
Epoch 9: Loss=0.4484 | Val Acc=0.7500 | F1=0.7692
Epoch 10: Loss=0.4506 | Val Acc=0.7500 | F1=0.7692
Epoch 11: Loss=0.4874 | Val Acc=0.7500 | F1=0.7692
Epoch 12: Loss=0.4272 | Val Acc=0.7500 | F1=0.7692
Epoch 13: Loss=0.3883 | Val Acc=0.7500 | F1=0.7692
Epoch 14: Loss=0.3367 | Val Acc=0.7500 | F1=0.7692
Ep

In [12]:
print("Train set distribution:")
print(train_df['class'].value_counts())

print("\nValidation set distribution:")
print(val_df['class'].value_counts())

print("\nTest set distribution:")
print(test_df['class'].value_counts())

Train set distribution:
class
truthful     42
deceptive    42
Name: count, dtype: int64

Validation set distribution:
class
truthful     6
deceptive    6
Name: count, dtype: int64

Test set distribution:
class
deceptive    13
truthful     12
Name: count, dtype: int64
