Mohammed elidrissi laoukili
* subjet  : video analysis

In [None]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
# %%
"""
TimeSformer for Video Emotion Recognition using Hugging Face Transformers
This approach uses pretrained TimeSformer from Facebook Research via HuggingFace
"""
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import cv2
from datasets import load_dataset
import tempfile
from transformers import TimesformerModel, TimesformerConfig

# %%
# ========================
# 1. DATASET CLASS
# ========================

class SyntheticEmotionsDataset(Dataset):
    """Dataset wrapper for aadityaubhat/synthetic-emotions"""
    def __init__(self, split='train', num_frames=8, frame_size=224, transform=None):
        self.num_frames = num_frames
        self.frame_size = frame_size
        self.transform = transform
        
        print(f"Loading dataset (split: {split})...")
        dataset = load_dataset("aadityaubhat/synthetic-emotions", split=split)
        self.dataset = dataset
        
        self.samples = []
        self.emotion_counts = {}
        
        labels = dataset['label'] if 'label' in dataset.column_names else dataset['emotion']
        
        for idx, emotion in enumerate(labels):
            self.samples.append((idx, emotion))
            self.emotion_counts[emotion] = self.emotion_counts.get(emotion, 0) + 1
        
        self.emotions = sorted(list(set([s[1] for s in self.samples])))
        self.emotion_to_idx = {emotion: idx for idx, emotion in enumerate(self.emotions)}
        
        print(f"Loaded {len(self.samples)} videos")
        print(f"Emotions: {self.emotions}")
        print(f"Distribution: {self.emotion_counts}")
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        sample_idx, emotion = self.samples[idx]
        
        try:
            item = self.dataset[sample_idx]
            video_data = item['video']
            frames = self.load_video(video_data)
        except Exception as e:
            frames = torch.zeros(self.num_frames, 3, self.frame_size, self.frame_size)
        
        if self.transform:
            frames = self.transform(frames)
        
        label = self.emotion_to_idx[emotion]
        return frames, label
    
    def load_video(self, video_data):
        """Load and process video"""
        with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp:
            if isinstance(video_data, dict) and 'bytes' in video_data:
                tmp.write(video_data['bytes'])
            elif isinstance(video_data, bytes):
                tmp.write(video_data)
            tmp_path = tmp.name
        
        frames = self.extract_frames(tmp_path)
        
        try:
            os.unlink(tmp_path)
        except:
            pass
        
        return frames
    
    def extract_frames(self, video_path):
        """Extract frames from video"""
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        if total_frames == 0:
            cap.release()
            # Return shape: [num_frames, 3, H, W]
            return torch.randn(self.num_frames, 3, self.frame_size, self.frame_size)
        
        frame_indices = np.linspace(0, total_frames - 1, self.num_frames, dtype=int)
        
        frames = []
        for idx in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
            ret, frame = cap.read()
            if ret:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = cv2.resize(frame, (self.frame_size, self.frame_size))
                frame = torch.from_numpy(frame).permute(2, 0, 1).float() / 255.0
                frames.append(frame)
        
        cap.release()
        
        while len(frames) < self.num_frames:
            if frames:
                frames.append(frames[-1].clone())
            else:
                frames.append(torch.zeros(3, self.frame_size, self.frame_size))
        
        # Stack to [num_frames, 3, H, W]
        frames = torch.stack(frames[:self.num_frames])
        return frames


# %%
# ========================
# 2. DATA AUGMENTATION
# ========================

class VideoTransform:
    """Video augmentation"""
    def __init__(self, mode='train'):
        self.mode = mode
        
    def __call__(self, frames):
        """frames: [T, C, H, W]"""
        if self.mode == 'train':
            # Random horizontal flip
            if torch.rand(1) > 0.5:
                frames = torch.flip(frames, dims=[3])
            
            # Brightness
            brightness = 0.8 + torch.rand(1) * 0.4
            frames = torch.clamp(frames * brightness, 0, 1)
        
        # Normalize (ImageNet stats)
        mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
        std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
        frames = (frames - mean) / std
        
        return frames


# %%
# ========================
# 3. TIMESFORMER MODEL (HUGGING FACE)
# ========================

class TimeSformerForEmotionRecognition(nn.Module):
    """
    TimeSformer model using Hugging Face Transformers
    - Uses pretrained weights from Facebook Research
    - Implements divided space-time attention
    """
    def __init__(self, num_classes=10, num_frames=8, pretrained=True):
        super().__init__()
        
        print("\nüèóÔ∏è  Building TimeSformer model...")
        
        if pretrained:
            print("   Loading pretrained TimeSformer from Facebook Research...")
            # Load pretrained TimeSformer (base model)
            self.timesformer = TimesformerModel.from_pretrained(
                "facebook/timesformer-base-finetuned-k400",
                ignore_mismatched_sizes=True
            )
            print("   ‚úì Pretrained weights loaded from Kinetics-400")
        else:
            print("   Initializing TimeSformer from scratch...")
            config = TimesformerConfig(
                image_size=224,
                patch_size=16,
                num_channels=3,
                num_frames=num_frames,
                hidden_size=768,
                num_hidden_layers=12,
                num_attention_heads=12,
                intermediate_size=3072,
                attention_type="divided_space_time"
            )
            self.timesformer = TimesformerModel(config)
            print("   ‚úì Model initialized from scratch")
        
        # Get hidden size
        hidden_size = self.timesformer.config.hidden_size
        
        # Classification head
        self.classifier = nn.Sequential(
            nn.LayerNorm(hidden_size),
            nn.Dropout(0.3),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_size // 2, num_classes)
        )
        
        print(f"   Architecture:")
        print(f"   - Attention Type: Divided Space-Time")
        print(f"   - Hidden Size: {hidden_size}")
        print(f"   - Transformer Layers: {self.timesformer.config.num_hidden_layers}")
        print(f"   - Attention Heads: {self.timesformer.config.num_attention_heads}")
        print(f"   - Output Classes: {num_classes}")
    
    def forward(self, pixel_values):
        """
        Args:
            pixel_values: [batch_size, num_frames, num_channels, height, width]
        """
        # HuggingFace TimeSformer expects: [batch_size, num_frames, num_channels, height, width]
        # Our dataloader gives: [batch_size, num_frames, num_channels, height, width]
        # So we DON'T need to permute - it's already correct!
        
        # Get TimeSformer outputs
        outputs = self.timesformer(pixel_values)
        
        # Use the [CLS] token representation
        sequence_output = outputs.last_hidden_state
        cls_token = sequence_output[:, 0]  # [batch_size, hidden_size]
        
        # Classification
        logits = self.classifier(cls_token)
        
        return logits


# %%
# ========================
# 4. TRAINING FUNCTIONS
# ========================

def train_epoch(model, loader, criterion, optimizer, device, grad_clip=1.0):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    pbar = tqdm(loader, desc='Training')
    for videos, labels in pbar:
        videos = videos.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(videos)
        loss = criterion(outputs, labels)
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        pbar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'acc': f'{100.*correct/total:.2f}%'
        })
    
    return total_loss / len(loader), correct / total


def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for videos, labels in tqdm(loader, desc='Validating'):
            videos = videos.to(device)
            labels = labels.to(device)
            
            outputs = model(videos)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    
    return total_loss / len(loader), acc, f1, all_preds, all_labels


# %%
# ========================
# 5. MAIN SCRIPT
# ========================

def main():
    print("="*80)
    print("TIMESFORMER VIDEO EMOTION RECOGNITION")
    print("Using Hugging Face Transformers + Facebook Research Pretrained Model")
    print("="*80)
    
    config = {
        'num_frames': 8,
        'frame_size': 224,
        'batch_size': 2,
        'num_epochs': 5,
        'learning_rate': 3e-5,
        'weight_decay': 0.01,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'use_pretrained': True,  # Use pretrained weights
        'train_split': 0.6,
        'val_split': 0.2,
        'test_split': 0.2
    }
    
    print(f"\nConfiguration:")
    for k, v in config.items():
        print(f"  {k}: {v}")
    print("="*80)
    
    # Set seeds
    torch.manual_seed(42)
    np.random.seed(42)
    
    # Load dataset
    print("\nüìÅ Loading dataset...")
    train_dataset = SyntheticEmotionsDataset(
        split='train',
        num_frames=config['num_frames'],
        frame_size=config['frame_size'],
        transform=VideoTransform(mode='train')
    )
    
    val_dataset = SyntheticEmotionsDataset(
        split='train',
        num_frames=config['num_frames'],
        frame_size=config['frame_size'],
        transform=VideoTransform(mode='val')
    )
    
    # Split data
    dataset_size = len(train_dataset)
    indices = list(range(dataset_size))
    np.random.shuffle(indices)
    
    train_size = int(config['train_split'] * dataset_size)
    val_size = int(config['val_split'] * dataset_size)
    
    train_indices = indices[:train_size]
    val_indices = indices[train_size:train_size+val_size]
    test_indices = indices[train_size+val_size:]
    
    print(f"\nüìä Splits: Train={len(train_indices)}, Val={len(val_indices)}, Test={len(test_indices)}")
    
    train_subset = Subset(train_dataset, train_indices)
    val_subset = Subset(val_dataset, val_indices)
    test_subset = Subset(val_dataset, test_indices)
    
    train_loader = DataLoader(train_subset, batch_size=config['batch_size'], 
                             shuffle=True, num_workers=0, drop_last=True)
    val_loader = DataLoader(val_subset, batch_size=config['batch_size'], 
                           shuffle=False, num_workers=0)
    test_loader = DataLoader(test_subset, batch_size=config['batch_size'], 
                            shuffle=False, num_workers=0)
    
    # Create model
    model = TimeSformerForEmotionRecognition(
        num_classes=len(train_dataset.emotions),
        num_frames=config['num_frames'],
        pretrained=config['use_pretrained']
    ).to(config['device'])
    
    total_params = sum(p.numel() for p in model.parameters()) / 1e6
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6
    print(f"\nüìà Parameters: {total_params:.2f}M total, {trainable_params:.2f}M trainable")
    
    # Training setup
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = optim.AdamW(model.parameters(), lr=config['learning_rate'], 
                           weight_decay=config['weight_decay'])
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config['num_epochs'])
    
    # Training loop
    best_val_acc = 0
    train_losses, val_losses = [], []
    train_accs, val_accs = [], []
    
    print("\n" + "="*80)
    print("üöÄ STARTING TRAINING")
    print("="*80)
    
    for epoch in range(config['num_epochs']):
        print(f"\nüìÖ Epoch {epoch+1}/{config['num_epochs']}")
        
        train_loss, train_acc = train_epoch(
            model, train_loader, criterion, optimizer, config['device']
        )
        
        val_loss, val_acc, val_f1, _, _ = validate(
            model, val_loader, criterion, config['device']
        )
        
        scheduler.step()
        
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)
        
        print(f"Train: Loss={train_loss:.4f}, Acc={train_acc:.4f}")
        print(f"Val:   Loss={val_loss:.4f}, Acc={val_acc:.4f}, F1={val_f1:.4f}")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'best_timesformer_hf.pth')
            print(f"‚úÖ New best model saved!")
    
    # Test evaluation
    print("\n" + "="*80)
    print("üéØ FINAL TEST EVALUATION")
    print("="*80)
    
    model.load_state_dict(torch.load('best_timesformer_hf.pth'))
    test_loss, test_acc, test_f1, test_preds, test_labels = validate(
        model, test_loader, criterion, config['device']
    )
    
    print(f"\nüìä Test Results:")
    print(f"  Accuracy: {test_acc:.4f} ({test_acc*100:.2f}%)")
    print(f"  F1-Score: {test_f1:.4f}")
    
    print("\n" + "="*80)
    print("‚úÖ TRAINING COMPLETE!")
    print("="*80)


if __name__ == '__main__':
    main()

2026-01-02 11:26:21.102929: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1767353181.324237      47 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1767353181.387863      47 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

TIMESFORMER VIDEO EMOTION RECOGNITION
Using Hugging Face Transformers + Facebook Research Pretrained Model

Configuration:
  num_frames: 8
  frame_size: 224
  batch_size: 2
  num_epochs: 5
  learning_rate: 3e-05
  weight_decay: 0.01
  device: cuda
  use_pretrained: True
  train_split: 0.6
  val_split: 0.2
  test_split: 0.2

üìÅ Loading dataset...
Loading dataset (split: train)...


README.md: 0.00B [00:00, ?B/s]

Resolving data files:   0%|          | 0/101 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/101 [00:00<?, ?files/s]

train/006_20250111_1454_Joyful Laughter (‚Ä¶):   0%|          | 0.00/3.53M [00:00<?, ?B/s]

train/003_20250111_1450_Joyful Leap_simp(‚Ä¶):   0%|          | 0.00/4.06M [00:00<?, ?B/s]

train/005_20250111_1457_Joyful Laughter (‚Ä¶):   0%|          | 0.00/4.16M [00:00<?, ?B/s]

train/010_20250111_1459_Joyful Balloon R(‚Ä¶):   0%|          | 0.00/3.67M [00:00<?, ?B/s]

train/004_20250111_1450_Warm Wisdom Smil(‚Ä¶):   0%|          | 0.00/3.89M [00:00<?, ?B/s]

train/015_20250111_1503_Matriarch's Ster(‚Ä¶):   0%|          | 0.00/4.42M [00:00<?, ?B/s]

train/011_20250111_1500_Fired Up!_simple(‚Ä¶):   0%|          | 0.00/3.48M [00:00<?, ?B/s]

train/008_20250111_1456_Joyful Dance Mov(‚Ä¶):   0%|          | 0.00/4.31M [00:00<?, ?B/s]

train/013_20250111_1502_Intense Confront(‚Ä¶):   0%|          | 0.00/3.89M [00:00<?, ?B/s]

train/001_20250107_2153_Joyful Laughter_(‚Ä¶):   0%|          | 0.00/3.00M [00:00<?, ?B/s]

train/016_20250111_1504_Intense Frustrat(‚Ä¶):   0%|          | 0.00/2.80M [00:00<?, ?B/s]

train/007_20250111_1455_Father's Joyful (‚Ä¶):   0%|          | 0.00/3.38M [00:00<?, ?B/s]

train/002_20250111_1449_Joyful Celebrati(‚Ä¶):   0%|          | 0.00/6.03M [00:00<?, ?B/s]

train/012_20250111_1500_Frustrated Refle(‚Ä¶):   0%|          | 0.00/2.58M [00:00<?, ?B/s]

train/014_20250111_1503_Angry Boy's Stom(‚Ä¶):   0%|          | 0.00/4.78M [00:00<?, ?B/s]

train/009_20250111_1458_Grandfather's Jo(‚Ä¶):   0%|          | 0.00/3.84M [00:00<?, ?B/s]

train/017_20250111_1506_Passionate Kitch(‚Ä¶):   0%|          | 0.00/2.98M [00:00<?, ?B/s]

train/018_20250111_1506_Frustrated Homew(‚Ä¶):   0%|          | 0.00/2.48M [00:00<?, ?B/s]

train/019_20250111_1519_Desert Boy's Sho(‚Ä¶):   0%|          | 0.00/3.72M [00:00<?, ?B/s]

train/020_20250111_1520_Intense Confront(‚Ä¶):   0%|          | 0.00/2.87M [00:00<?, ?B/s]

train/021_20250119_0051_Lonely Tears_sim(‚Ä¶):   0%|          | 0.00/2.17M [00:00<?, ?B/s]

train/022_20250111_1522_Reflective Momen(‚Ä¶):   0%|          | 0.00/4.25M [00:00<?, ?B/s]

train/024_20250111_1523_Heartfelt Solitu(‚Ä¶):   0%|          | 0.00/2.77M [00:00<?, ?B/s]

train/023_20250111_1523_Lonely Park Thou(‚Ä¶):   0%|          | 0.00/2.45M [00:00<?, ?B/s]

train/025_20250111_1525_Tearful Solitude(‚Ä¶):   0%|          | 0.00/2.72M [00:00<?, ?B/s]

train/026_20250111_1525_Raw Emotional Te(‚Ä¶):   0%|          | 0.00/3.15M [00:00<?, ?B/s]

train/027_20250111_1526_Mother's Comfort(‚Ä¶):   0%|          | 0.00/2.54M [00:00<?, ?B/s]

train/029_20250111_1531_Desolate Gaze_si(‚Ä¶):   0%|          | 0.00/2.81M [00:00<?, ?B/s]

train/028_20250111_1527_Melancholic Cont(‚Ä¶):   0%|          | 0.00/3.39M [00:00<?, ?B/s]

train/030_20250111_1528_Thai Girl's Tear(‚Ä¶):   0%|          | 0.00/3.71M [00:00<?, ?B/s]

train/031_20250111_1529_Fearful Retreat_(‚Ä¶):   0%|          | 0.00/3.10M [00:00<?, ?B/s]

train/032_20250111_1531_Surprised Wonder(‚Ä¶):   0%|          | 0.00/2.98M [00:00<?, ?B/s]

train/033_20250111_1534_Icelandic Fearfu(‚Ä¶):   0%|          | 0.00/3.13M [00:00<?, ?B/s]

train/034_20250111_1532_Hiding in Fear_s(‚Ä¶):   0%|          | 0.00/3.35M [00:00<?, ?B/s]

train/037_20250111_1540_Desperate Scream(‚Ä¶):   0%|          | 0.00/3.18M [00:00<?, ?B/s]

train/036_20250111_1539_Defensive Tensio(‚Ä¶):   0%|          | 0.00/3.62M [00:00<?, ?B/s]

train/035_20250111_1538_Trembling in Ten(‚Ä¶):   0%|          | 0.00/2.83M [00:00<?, ?B/s]

train/038_20250111_1540_Teenager's Tense(‚Ä¶):   0%|          | 0.00/3.04M [00:00<?, ?B/s]

train/039_20250111_1541_Moment of Terror(‚Ä¶):   0%|          | 0.00/2.64M [00:00<?, ?B/s]

train/040_20250111_1542_Protective Embra(‚Ä¶):   0%|          | 0.00/3.55M [00:00<?, ?B/s]

train/041_20250111_1930_Joyful Gift Disc(‚Ä¶):   0%|          | 0.00/3.85M [00:00<?, ?B/s]

train/042_20250111_1931_Irish Astonishme(‚Ä¶):   0%|          | 0.00/3.65M [00:00<?, ?B/s]

train/043_20250111_1933_Unexpected Surpr(‚Ä¶):   0%|          | 0.00/4.49M [00:00<?, ?B/s]

train/044_20250111_1934_Joyful Surprise_(‚Ä¶):   0%|          | 0.00/3.50M [00:00<?, ?B/s]

train/045_20250112_1548_Surprised Discov(‚Ä¶):   0%|          | 0.00/3.86M [00:00<?, ?B/s]

train/046_20250112_1549_Joyous Kite Adve(‚Ä¶):   0%|          | 0.00/4.98M [00:00<?, ?B/s]

train/047_20250112_1551_Joyful Surprise (‚Ä¶):   0%|          | 0.00/4.84M [00:00<?, ?B/s]

train/048_20250112_1552_Unexpected Book (‚Ä¶):   0%|          | 0.00/2.48M [00:00<?, ?B/s]

train/050_20250112_1556_Sudden Realizati(‚Ä¶):   0%|          | 0.00/2.44M [00:00<?, ?B/s]

train/051_20250112_1608_Teen's Disgusted(‚Ä¶):   0%|          | 0.00/2.60M [00:00<?, ?B/s]

train/052_20250112_1610_Expressive Displ(‚Ä¶):   0%|          | 0.00/4.03M [00:00<?, ?B/s]

train/049_20250112_1555_Shocked City Gir(‚Ä¶):   0%|          | 0.00/4.66M [00:00<?, ?B/s]

train/053_20250112_1612_Unpleasant Smell(‚Ä¶):   0%|          | 0.00/3.30M [00:00<?, ?B/s]

train/055_20250112_1644_Boy Rejects Curr(‚Ä¶):   0%|          | 0.00/2.76M [00:00<?, ?B/s]

train/054_20250112_1621_Latina's Yuck Fa(‚Ä¶):   0%|          | 0.00/4.07M [00:00<?, ?B/s]

train/056_20250112_1646_Pensive Middle E(‚Ä¶):   0%|          | 0.00/2.54M [00:00<?, ?B/s]

train/057_20250112_1651_Stomach Pain Str(‚Ä¶):   0%|          | 0.00/3.16M [00:00<?, ?B/s]

train/058_20250112_1654_Unexpected Kitch(‚Ä¶):   0%|          | 0.00/4.09M [00:00<?, ?B/s]

train/059_20250112_1656_Vietnamese Marke(‚Ä¶):   0%|          | 0.00/3.31M [00:00<?, ?B/s]

train/062_20250112_1704_Mother's Warm Em(‚Ä¶):   0%|          | 0.00/3.73M [00:00<?, ?B/s]

train/060_20250112_1657_Grandfather's Ge(‚Ä¶):   0%|          | 0.00/2.81M [00:00<?, ?B/s]

train/061_20250112_1701_Joyful Park Mome(‚Ä¶):   0%|          | 0.00/4.24M [00:00<?, ?B/s]

train/063_20250112_1708_Shy Teen's Confi(‚Ä¶):   0%|          | 0.00/3.17M [00:00<?, ?B/s]

train/064_20250112_1709_Warm Grandmother(‚Ä¶):   0%|          | 0.00/2.97M [00:00<?, ?B/s]

train/065_20250112_1714_Warm Embrace_sim(‚Ä¶):   0%|          | 0.00/1.78M [00:00<?, ?B/s]

train/066_20250112_1724_Tender Father-Ba(‚Ä¶):   0%|          | 0.00/2.76M [00:00<?, ?B/s]

train/067_20250112_1725_Youthful Hope in(‚Ä¶):   0%|          | 0.00/3.49M [00:00<?, ?B/s]

train/068_20250112_1727_Cozy Feline Comp(‚Ä¶):   0%|          | 0.00/4.28M [00:00<?, ?B/s]

train/069_20250112_1729_Joyful Heart Ges(‚Ä¶):   0%|          | 0.00/3.09M [00:00<?, ?B/s]

train/072_20250112_1736_Deep in Thought_(‚Ä¶):   0%|          | 0.00/2.47M [00:00<?, ?B/s]

train/070_20250112_1730_Tender African C(‚Ä¶):   0%|          | 0.00/4.69M [00:00<?, ?B/s]

train/071_20250112_1732_Perplexed in Sun(‚Ä¶):   0%|          | 0.00/2.54M [00:00<?, ?B/s]

train/073_20250112_1739_Elderly Woman's (‚Ä¶):   0%|          | 0.00/2.42M [00:00<?, ?B/s]

train/074_20250112_1740_Confused Shrug E(‚Ä¶):   0%|          | 0.00/2.47M [00:00<?, ?B/s]

train/075_20250112_1742_Lost in Tokyo_si(‚Ä¶):   0%|          | 0.00/4.57M [00:00<?, ?B/s]

train/076_20250112_1749_Pointing with Cu(‚Ä¶):   0%|          | 0.00/2.78M [00:00<?, ?B/s]

train/077_20250112_1750_Contemplative Re(‚Ä¶):   0%|          | 0.00/2.53M [00:00<?, ?B/s]

train/078_20250112_1802_Perplexed Mother(‚Ä¶):   0%|          | 0.00/2.71M [00:00<?, ?B/s]

train/079_20250112_1805_Teen's Upside-Do(‚Ä¶):   0%|          | 0.00/3.20M [00:00<?, ?B/s]

train/082_20250113_2340_Contemplative Mo(‚Ä¶):   0%|          | 0.00/3.18M [00:00<?, ?B/s]

train/080_20250113_2338_Confused Man's E(‚Ä¶):   0%|          | 0.00/2.63M [00:00<?, ?B/s]

train/081_20250113_2339_Tranquil Tea Mom(‚Ä¶):   0%|          | 0.00/2.96M [00:00<?, ?B/s]

train/083_20250113_2342_Confident Young (‚Ä¶):   0%|          | 0.00/1.51M [00:00<?, ?B/s]

train/084_20250113_2344_Late-Night Study(‚Ä¶):   0%|          | 0.00/2.28M [00:00<?, ?B/s]

train/086_20250113_2348_Peaceful Contemp(‚Ä¶):   0%|          | 0.00/2.88M [00:00<?, ?B/s]

train/085_20250113_2344_City Stroll Sere(‚Ä¶):   0%|          | 0.00/2.83M [00:00<?, ?B/s]

train/087_20250113_2349_Reflective Momen(‚Ä¶):   0%|          | 0.00/2.34M [00:00<?, ?B/s]

train/088_20250113_2350_Serene Shoelace (‚Ä¶):   0%|          | 0.00/3.69M [00:00<?, ?B/s]

train/089_20250113_2352_Impatient Market(‚Ä¶):   0%|          | 0.00/4.27M [00:00<?, ?B/s]

train/090_20250113_2352_Hispanic Mother (‚Ä¶):   0%|          | 0.00/2.31M [00:00<?, ?B/s]

train/091_20250113_2353_Tears of Joy_sim(‚Ä¶):   0%|          | 0.00/3.31M [00:00<?, ?B/s]

train/093_20250113_2355_Endearing Nervou(‚Ä¶):   0%|          | 0.00/2.60M [00:00<?, ?B/s]

train/094_20250113_2358_Emotional Releas(‚Ä¶):   0%|          | 0.00/2.94M [00:00<?, ?B/s]

train/095_20250113_2359_Surprised to Joy(‚Ä¶):   0%|          | 0.00/4.16M [00:00<?, ?B/s]

train/092_20250113_2354_Frustration in S(‚Ä¶):   0%|          | 0.00/3.41M [00:00<?, ?B/s]

train/096_20250114_0002_Playful Sass Unl(‚Ä¶):   0%|          | 0.00/2.80M [00:00<?, ?B/s]

train/097_20250114_0003_Joyful Laughter (‚Ä¶):   0%|          | 0.00/3.38M [00:00<?, ?B/s]

train/098_20250114_0004_Confused Amuseme(‚Ä¶):   0%|          | 0.00/2.43M [00:00<?, ?B/s]

train/099_20250114_0005_From Frown to Sm(‚Ä¶):   0%|          | 0.00/4.53M [00:00<?, ?B/s]

train/100_20250114_0008_Joyful Reconcili(‚Ä¶):   0%|          | 0.00/2.88M [00:00<?, ?B/s]

metadata.csv: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/100 [00:00<?, ? examples/s]

Loaded 100 videos
Emotions: ['Anger', 'Confusion', 'Disgust', 'Fear', 'Happiness and Joy', 'Love and Affection', 'Mixed Emotions', 'Neutral/Everyday', 'Sadness', 'Surprise']
Distribution: {'Happiness and Joy': 10, 'Anger': 10, 'Sadness': 10, 'Fear': 10, 'Surprise': 10, 'Disgust': 10, 'Love and Affection': 10, 'Confusion': 10, 'Neutral/Everyday': 10, 'Mixed Emotions': 10}
Loading dataset (split: train)...


Resolving data files:   0%|          | 0/101 [00:00<?, ?it/s]

Loaded 100 videos
Emotions: ['Anger', 'Confusion', 'Disgust', 'Fear', 'Happiness and Joy', 'Love and Affection', 'Mixed Emotions', 'Neutral/Everyday', 'Sadness', 'Surprise']
Distribution: {'Happiness and Joy': 10, 'Anger': 10, 'Sadness': 10, 'Fear': 10, 'Surprise': 10, 'Disgust': 10, 'Love and Affection': 10, 'Confusion': 10, 'Neutral/Everyday': 10, 'Mixed Emotions': 10}

üìä Splits: Train=60, Val=20, Test=20

üèóÔ∏è  Building TimeSformer model...
   Loading pretrained TimeSformer from Facebook Research...


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/486M [00:00<?, ?B/s]

   ‚úì Pretrained weights loaded from Kinetics-400
   Architecture:
   - Attention Type: Divided Space-Time
   - Hidden Size: 768
   - Transformer Layers: 12
   - Attention Heads: 12
   - Output Classes: 10


model.safetensors:   0%|          | 0.00/486M [00:00<?, ?B/s]


üìà Parameters: 121.56M total, 121.56M trainable

üöÄ STARTING TRAINING

üìÖ Epoch 1/5



Training:   0%|          | 0/30 [00:00<?, ?it/s][A
Training:   0%|          | 0/30 [00:02<?, ?it/s, loss=2.1303, acc=50.00%][A
Training:   3%|‚ñé         | 1/30 [00:02<01:04,  2.22s/it, loss=2.1303, acc=50.00%][A
Training:   3%|‚ñé         | 1/30 [00:02<01:04,  2.22s/it, loss=2.4167, acc=25.00%][A
Training:   7%|‚ñã         | 2/30 [00:02<00:37,  1.35s/it, loss=2.4167, acc=25.00%][A
Training:   7%|‚ñã         | 2/30 [00:03<00:37,  1.35s/it, loss=2.5039, acc=16.67%][A
Training:  10%|‚ñà         | 3/30 [00:03<00:28,  1.07s/it, loss=2.5039, acc=16.67%][A
Training:  10%|‚ñà         | 3/30 [00:04<00:28,  1.07s/it, loss=2.3094, acc=12.50%][A
Training:  13%|‚ñà‚ñé        | 4/30 [00:04<00:24,  1.07it/s, loss=2.3094, acc=12.50%][A
Training:  13%|‚ñà‚ñé        | 4/30 [00:05<00:24,  1.07it/s, loss=1.8268, acc=30.00%][A
Training:  17%|‚ñà‚ñã        | 5/30 [00:05<00:21,  1.16it/s, loss=1.8268, acc=30.00%][A
Training:  17%|‚ñà‚ñã        | 5/30 [00:05<00:21,  1.16it/s, loss=2.7146, acc=25.

Train: Loss=2.3417, Acc=0.1000
Val:   Loss=2.6034, Acc=0.0000, F1=0.0000

üìÖ Epoch 2/5


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [00:22<00:00,  1.35it/s, loss=2.2493, acc=10.00%]
Validating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:02<00:00,  4.12it/s]


Train: Loss=2.3132, Acc=0.1000
Val:   Loss=2.6096, Acc=0.0000, F1=0.0000

üìÖ Epoch 3/5


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [00:22<00:00,  1.32it/s, loss=2.2749, acc=6.67%] 
Validating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:02<00:00,  3.98it/s]


Train: Loss=2.3505, Acc=0.0667
Val:   Loss=2.5072, Acc=0.0500, F1=0.0048
‚úÖ New best model saved!

üìÖ Epoch 4/5


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [00:22<00:00,  1.31it/s, loss=2.3169, acc=6.67%]
Validating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:02<00:00,  3.95it/s]


Train: Loss=2.3334, Acc=0.0667
Val:   Loss=2.5483, Acc=0.0500, F1=0.0048

üìÖ Epoch 5/5


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [00:23<00:00,  1.28it/s, loss=2.1693, acc=15.00%]
Validating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:02<00:00,  3.82it/s]


Train: Loss=2.3106, Acc=0.1500
Val:   Loss=2.5442, Acc=0.0500, F1=0.0048

üéØ FINAL TEST EVALUATION


Validating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:02<00:00,  3.82it/s]


üìä Test Results:
  Accuracy: 0.0500 (5.00%)
  F1-Score: 0.0048

‚úÖ TRAINING COMPLETE!



