Mohammed elidrissi laoukili
* subjet  : video analysis

In [2]:
# %%
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import cv2
import pandas as pd
from datasets import load_dataset
import tempfile
import os

# %%
# ========================
# 1. HUGGING FACE DATASET CLASS
# ========================

class SyntheticEmotionsDataset(Dataset):
    """Dataset wrapper for aadityaubhat/synthetic-emotions from Hugging Face"""
    def __init__(self, split='train', num_frames=16, frame_size=224, transform=None):
        self.num_frames = num_frames
        self.frame_size = frame_size
        self.transform = transform
        self.split = split
        
        print(f"Loading synthetic-emotions dataset (split: {split})...")
        
        try:
            dataset = load_dataset("aadityaubhat/synthetic-emotions", split=split)
            if "video" in dataset.features:
                dataset = dataset.cast_column("video", dataset.features["video"])
            self.dataset = dataset
        except Exception as e:
            print(f"Error loading dataset: {e}")
            raise
        
        self.samples = []
        self.emotion_counts = {}
        
        labels = None
        if 'label' in dataset.column_names:
            labels = dataset['label']
        elif 'emotion' in dataset.column_names:
            labels = dataset['emotion']
        else:
            raise ValueError("Dataset has no label/emotion column")

        for idx, emotion in enumerate(labels):
            self.samples.append((idx, emotion))
            self.emotion_counts[emotion] = self.emotion_counts.get(emotion, 0) + 1
        
        self.emotions = sorted(list(set([s[1] for s in self.samples])))
        self.emotion_to_idx = {emotion: idx for idx, emotion in enumerate(self.emotions)}
        
        print(f"Loaded {len(self.samples)} videos")
        print(f"Emotions found: {self.emotions}")
        print(f"Emotion distribution: {self.emotion_counts}")
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        sample_idx, emotion = self.samples[idx]

        try:
            item = self.dataset[sample_idx]
            video_data = item['video']
            frames = self.load_video_from_hf(video_data)
        except Exception as e:
            frames = torch.zeros(3, self.num_frames, self.frame_size, self.frame_size)

        if self.transform:
            frames = self.transform(frames)

        label = self.emotion_to_idx[emotion]
        return frames, label
    
    def load_video_from_hf(self, video_data):
        """Load video from Hugging Face dataset item"""
        with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp:
            if isinstance(video_data, dict) and 'bytes' in video_data:
                tmp.write(video_data['bytes'])
            elif isinstance(video_data, bytes):
                tmp.write(video_data)
            else:
                video_path = video_data.get('path', video_data)
                if os.path.exists(video_path):
                    with open(video_path, 'rb') as f:
                        tmp.write(f.read())
            tmp_path = tmp.name
        
        frames = self.load_video_from_path(tmp_path)
        
        try:
            os.unlink(tmp_path)
        except:
            pass
        
        return frames
    
    def load_video_from_path(self, video_path):
        """Load video and sample frames uniformly"""
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        if total_frames == 0:
            cap.release()
            return torch.randn(3, self.num_frames, self.frame_size, self.frame_size)
        
        frame_indices = np.linspace(0, total_frames - 1, self.num_frames, dtype=int)
        
        frames = []
        for idx in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
            ret, frame = cap.read()
            if ret:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = cv2.resize(frame, (self.frame_size, self.frame_size))
                frames.append(frame)
        
        cap.release()
        
        while len(frames) < self.num_frames:
            if frames:
                frames.append(frames[-1].copy())
            else:
                frames.append(np.zeros((self.frame_size, self.frame_size, 3), dtype=np.uint8))
        
        frames = np.stack(frames[:self.num_frames])
        frames = torch.from_numpy(frames).permute(3, 0, 1, 2).float()
        frames = frames / 255.0
        
        return frames

# %%
# ========================
# 2. DATA AUGMENTATION
# ========================

class VideoAugmentation:
    """Data augmentation for video frames"""
    def __init__(self, mode='train'):
        self.mode = mode
        
    def __call__(self, frames):
        """frames: tensor of shape [C, T, H, W]"""
        if self.mode == 'train':
            if torch.rand(1) > 0.5:
                frames = torch.flip(frames, dims=[3])
            
            brightness_factor = 0.8 + torch.rand(1) * 0.4
            frames = torch.clamp(frames * brightness_factor, 0, 1)
            
            c, t, h, w = frames.shape
            crop_size = int(h * (0.8 + torch.rand(1) * 0.2))
            top = torch.randint(0, h - crop_size + 1, (1,)).item()
            left = torch.randint(0, w - crop_size + 1, (1,)).item()
            frames = frames[:, :, top:top+crop_size, left:left+crop_size]
            frames = torch.nn.functional.interpolate(
                frames.permute(1, 0, 2, 3),
                size=(h, w),
                mode='bilinear',
                align_corners=False
            ).permute(1, 0, 2, 3)
        
        mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1, 1)
        std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1, 1)
        frames = (frames - mean) / std
        
        return frames

# %%
# ========================
# 3. TIMESFORMER MODEL (FIXED - NO IN-PLACE OPERATIONS)
# ========================

class PatchEmbed(nn.Module):
    """Convert video to patch embeddings"""
    def __init__(self, img_size=224, patch_size=16, in_channels=3, embed_dim=768):
        super().__init__()
        self.img_size = img_size
        self.patch_size = patch_size
        self.num_patches = (img_size // patch_size) ** 2
        
        self.projection = nn.Conv2d(
            in_channels, embed_dim,
            kernel_size=patch_size,
            stride=patch_size
        )
    
    def forward(self, x):
        B, C, T, H, W = x.shape
        x = x.permute(0, 2, 1, 3, 4)
        x = x.reshape(B * T, C, H, W)
        x = self.projection(x)
        x = x.flatten(2)
        x = x.transpose(1, 2)
        x = x.reshape(B, T, self.num_patches, -1)
        return x


class DividedSpaceTimeAttention(nn.Module):
    """Divided Space-Time Attention (FIXED - No in-place operations)"""
    def __init__(self, dim, num_heads=12, qkv_bias=True, attn_drop=0., proj_drop=0.):
        super().__init__()
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = head_dim ** -0.5
        
        self.qkv_spatial = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.attn_drop_spatial = nn.Dropout(attn_drop)
        self.proj_spatial = nn.Linear(dim, dim)
        self.proj_drop_spatial = nn.Dropout(proj_drop)
        
        self.qkv_temporal = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.attn_drop_temporal = nn.Dropout(attn_drop)
        self.proj_temporal = nn.Linear(dim, dim)
        self.proj_drop_temporal = nn.Dropout(proj_drop)
    
    def forward(self, x):
        B, T, N, D = x.shape
        
        # Temporal attention on CLS tokens
        cls_tokens = x[:, :, 0, :].clone()  # Clone to avoid in-place issues
        qkv = self.qkv_temporal(cls_tokens).reshape(B, T, 3, self.num_heads, D // self.num_heads)
        qkv = qkv.permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]
        
        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        attn = self.attn_drop_temporal(attn)
        
        cls_tokens_out = (attn @ v).transpose(1, 2).reshape(B, T, D)
        cls_tokens_out = self.proj_temporal(cls_tokens_out)
        cls_tokens_out = self.proj_drop_temporal(cls_tokens_out)
        
        # FIXED: Create new tensor instead of in-place operation
        x_new = x.clone()
        x_new[:, :, 0, :] = x[:, :, 0, :] + cls_tokens_out
        
        # Spatial attention
        x_spatial = x_new.reshape(B * T, N, D)
        qkv = self.qkv_spatial(x_spatial).reshape(B * T, N, 3, self.num_heads, D // self.num_heads)
        qkv = qkv.permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]
        
        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        attn = self.attn_drop_spatial(attn)
        
        x_spatial = (attn @ v).transpose(1, 2).reshape(B * T, N, D)
        x_spatial = self.proj_spatial(x_spatial)
        x_spatial = self.proj_drop_spatial(x_spatial)
        
        # FIXED: Add residual without in-place operation
        x_out = x_new.reshape(B * T, N, D) + x_spatial
        x_out = x_out.reshape(B, T, N, D)
        
        return x_out


class TransformerBlock(nn.Module):
    """Transformer block with divided space-time attention"""
    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.):
        super().__init__()
        self.norm1 = nn.LayerNorm(dim)
        self.attn = DividedSpaceTimeAttention(dim, num_heads, qkv_bias, attn_drop, drop)
        self.norm2 = nn.LayerNorm(dim)
        
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = nn.Sequential(
            nn.Linear(dim, mlp_hidden_dim),
            nn.GELU(),
            nn.Dropout(drop),
            nn.Linear(mlp_hidden_dim, dim),
            nn.Dropout(drop)
        )
    
    def forward(self, x):
        x = x + self.attn(self.norm1(x))
        x = x + self.mlp(self.norm2(x))
        return x


class TimeSformer(nn.Module):
    """TimeSformer for video emotion recognition"""
    def __init__(
        self,
        img_size=224,
        patch_size=16,
        in_channels=3,
        num_classes=7,
        embed_dim=384,
        depth=6,
        num_heads=6,
        mlp_ratio=4.,
        qkv_bias=True,
        drop_rate=0.1,
        attn_drop_rate=0.1
    ):
        super().__init__()
        
        self.num_classes = num_classes
        self.embed_dim = embed_dim
        
        self.patch_embed = PatchEmbed(img_size, patch_size, in_channels, embed_dim)
        num_patches = self.patch_embed.num_patches
        
        self.cls_token = nn.Parameter(torch.zeros(1, 1, 1, embed_dim))
        self.pos_embed = nn.Parameter(torch.zeros(1, 1, num_patches + 1, embed_dim))
        self.pos_drop = nn.Dropout(p=drop_rate)
        
        self.blocks = nn.ModuleList([
            TransformerBlock(
                dim=embed_dim,
                num_heads=num_heads,
                mlp_ratio=mlp_ratio,
                qkv_bias=qkv_bias,
                drop=drop_rate,
                attn_drop=attn_drop_rate
            )
            for _ in range(depth)
        ])
        
        self.norm = nn.LayerNorm(embed_dim)
        self.head = nn.Linear(embed_dim, num_classes)
        
        nn.init.trunc_normal_(self.pos_embed, std=0.02)
        nn.init.trunc_normal_(self.cls_token, std=0.02)
        self.apply(self._init_weights)
    
    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            nn.init.trunc_normal_(m.weight, std=0.02)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)
    
    def forward(self, x):
        B, C, T, H, W = x.shape
        x = self.patch_embed(x)
        cls_tokens = self.cls_token.expand(B, T, -1, -1)
        x = torch.cat([cls_tokens, x], dim=2)
        x = x + self.pos_embed
        x = self.pos_drop(x)
        
        for block in self.blocks:
            x = block(x)
        
        x = self.norm(x)
        x = x.mean(dim=[1, 2])
        x = self.head(x)
        
        return x

# %%
# ========================
# 4. TRAINING FUNCTIONS
# ========================

def train_epoch(model, loader, criterion, optimizer, device):
    """Train for one epoch"""
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    pbar = tqdm(loader, desc='Training')
    for videos, labels in pbar:
        try:
            videos = videos.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)
            
            optimizer.zero_grad()
            outputs = model(videos)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            pbar.set_postfix({
                'loss': f'{loss.item():.4f}',
                'acc': f'{100.*correct/total:.2f}%'
            })
            
        except Exception as e:
            print(f"\nError in training batch: {e}")
            import traceback
            traceback.print_exc()
            continue
    
    return total_loss / len(loader), correct / total


def validate(model, loader, criterion, device):
    """Validate the model"""
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        pbar = tqdm(loader, desc='Validating')
        for videos, labels in pbar:
            try:
                videos = videos.to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)
                
                outputs = model(videos)
                loss = criterion(outputs, labels)
                
                total_loss += loss.item()
                _, predicted = outputs.max(1)
                
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
                
                pbar.set_postfix({'loss': f'{loss.item():.4f}'})
                
            except Exception as e:
                print(f"\nError in validation batch: {e}")
                import traceback
                traceback.print_exc()
                continue
    
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return total_loss / len(loader), accuracy, f1, all_preds, all_labels


def plot_confusion_matrix(y_true, y_pred, class_names):
    """Plot confusion matrix"""
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.savefig('confusion_matrix.png')
    plt.close()
    print("Confusion matrix saved to 'confusion_matrix.png'")


# %%
# ========================
# 5. MAIN TRAINING SCRIPT
# ========================

def main():
    config = {
        'num_frames': 8,
        'frame_size': 224,
        'patch_size': 16,
        'embed_dim': 384,
        'depth': 6,
        'num_heads': 6,
        'batch_size': 4,
        'num_epochs': 5,
        'learning_rate': 3e-4,
        'weight_decay': 0.05,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'train_split': 0.7,
        'val_split': 0.15,
        'test_split': 0.15
    }
    
    print(f"Using device: {config['device']}")
    if config['device'] == 'cpu':
        print("WARNING: Training on CPU will be very slow!")
    print("="*60)
    
    torch.manual_seed(42)
    np.random.seed(42)
    
    print("\nLoading dataset...")
    full_dataset = SyntheticEmotionsDataset(
        split='train',
        num_frames=config['num_frames'],
        frame_size=config['frame_size'],
        transform=VideoAugmentation(mode='train')
    )
    
    config['num_classes'] = len(full_dataset.emotions)
    print(f"Number of emotion classes: {config['num_classes']}")
    
    dataset_size = len(full_dataset)
    indices = list(range(dataset_size))
    np.random.shuffle(indices)
    
    train_size = int(config['train_split'] * dataset_size)
    val_size = int(config['val_split'] * dataset_size)
    
    train_indices = indices[:train_size]
    val_indices = indices[train_size:train_size+val_size]
    test_indices = indices[train_size+val_size:]
    
    print(f"\nDataset splits:")
    print(f"  Train: {len(train_indices)} videos")
    print(f"  Val: {len(val_indices)} videos")
    print(f"  Test: {len(test_indices)} videos")
    
    from torch.utils.data import Subset
    train_dataset = Subset(full_dataset, train_indices)
    val_dataset = Subset(full_dataset, val_indices)
    test_dataset = Subset(full_dataset, test_indices)
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=config['batch_size'],
        shuffle=True,
        num_workers=0,
        pin_memory=False,
        drop_last=False
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=config['batch_size'],
        shuffle=False,
        num_workers=0,
        pin_memory=False,
        drop_last=False
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=config['batch_size'],
        shuffle=False,
        num_workers=0,
        pin_memory=False,
        drop_last=False
    )
    
    print("\nCreating model...")
    model = TimeSformer(
        img_size=config['frame_size'],
        patch_size=config['patch_size'],
        num_classes=config['num_classes'],
        embed_dim=config['embed_dim'],
        depth=config['depth'],
        num_heads=config['num_heads']
    ).to(config['device'])
    
    print(f"Model parameters: {sum(p.numel() for p in model.parameters()) / 1e6:.2f}M")
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(
        model.parameters(),
        lr=config['learning_rate'],
        weight_decay=config['weight_decay']
    )
    
    scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer,
        T_max=config['num_epochs']
    )
    
    best_val_acc = 0
    train_losses, val_losses = [], []
    train_accs, val_accs = [], []
    
    print("\n" + "="*60)
    print("STARTING TRAINING")
    print("="*60)
    
    try:
        for epoch in range(config['num_epochs']):
            print(f"\nEpoch {epoch+1}/{config['num_epochs']}")
            print("-" * 60)
            
            train_loss, train_acc = train_epoch(
                model, train_loader, criterion, optimizer, config['device']
            )
            
            val_loss, val_acc, val_f1, _, _ = validate(
                model, val_loader, criterion, config['device']
            )
            
            scheduler.step()
            
            train_losses.append(train_loss)
            val_losses.append(val_loss)
            train_accs.append(train_acc)
            val_accs.append(val_acc)
            
            print(f"\nEpoch {epoch+1} Summary:")
            print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
            print(f"  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")
            
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(model.state_dict(), 'best_model.pth')
                print(f"  ✓ Saved best model with val_acc: {val_acc:.4f}")
    
    except KeyboardInterrupt:
        print("\n\nTraining interrupted by user!")
    except Exception as e:
        print(f"\n\nError during training: {e}")
        import traceback
        traceback.print_exc()
        return
    
    if len(train_losses) > 0:
        plt.figure(figsize=(12, 4))
        
        plt.subplot(1, 2, 1)
        plt.plot(train_losses, label='Train Loss', marker='o')
        plt.plot(val_losses, label='Val Loss', marker='s')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.title('Training and Validation Loss')
        plt.grid(True)
        
        plt.subplot(1, 2, 2)
        plt.plot(train_accs, label='Train Acc', marker='o')
        plt.plot(val_accs, label='Val Acc', marker='s')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        plt.title('Training and Validation Accuracy')
        plt.grid(True)
        
        plt.tight_layout()
        plt.savefig('training_curves.png', dpi=150)
        plt.close()
        print("\nTraining curves saved to 'training_curves.png'")
    
    print("\n" + "="*60)
    print("FINAL TEST EVALUATION")
    print("="*60)
    
    try:
        model.load_state_dict(torch.load('best_model.pth'))
        test_loss, test_acc, test_f1, test_preds, test_labels = validate(
            model, test_loader, criterion, config['device']
        )
        
        print(f"\nTest Loss: {test_loss:.4f}")
        print(f"Test Accuracy: {test_acc:.4f}")
        print(f"Test F1-Score: {test_f1:.4f}")
        
        # Get unique classes in test set
        unique_test_classes = sorted(list(set(test_labels)))
        test_class_names = [full_dataset.emotions[i] for i in unique_test_classes]
        
        print("\nClassification Report:")
        print(f"Note: Only {len(unique_test_classes)} out of {len(full_dataset.emotions)} classes present in test set")
        print(classification_report(
            test_labels, 
            test_preds,
            labels=unique_test_classes,
            target_names=test_class_names,
            digits=4,
            zero_division=0
        ))
        
        plot_confusion_matrix(
            test_labels,
            test_preds,
            test_class_names
        )
        
        print("\n" + "="*60)
        print("TRAINING COMPLETE!")
        print("="*60)
        print(f"Best validation accuracy: {best_val_acc:.4f}")
        print(f"Final test accuracy: {test_acc:.4f}")
        
    except FileNotFoundError:
        print("\nNo saved model found. Training may not have completed successfully.")
    except Exception as e:
        print(f"\nError during testing: {e}")
        import traceback
        traceback.print_exc()


if __name__ == '__main__':
    main()

Using device: cpu

Loading dataset...
Loading synthetic-emotions dataset (split: train)...
Loaded 100 videos
Emotions found: ['Anger', 'Confusion', 'Disgust', 'Fear', 'Happiness and Joy', 'Love and Affection', 'Mixed Emotions', 'Neutral/Everyday', 'Sadness', 'Surprise']
Emotion distribution: {'Happiness and Joy': 10, 'Anger': 10, 'Sadness': 10, 'Fear': 10, 'Surprise': 10, 'Disgust': 10, 'Love and Affection': 10, 'Confusion': 10, 'Neutral/Everyday': 10, 'Mixed Emotions': 10}
Number of emotion classes: 10

Dataset splits:
  Train: 70 videos
  Val: 15 videos
  Test: 15 videos

Creating model...
Model parameters: 14.57M

STARTING TRAINING

Epoch 1/5
------------------------------------------------------------


Training: 100%|██████████| 18/18 [02:58<00:00,  9.89s/it, loss=2.9118, acc=7.14%]
Validating: 100%|██████████| 4/4 [00:08<00:00,  2.10s/it, loss=1.9317]



Epoch 1 Summary:
  Train Loss: 2.7621, Train Acc: 0.0714
  Val Loss: 2.4241, Val Acc: 0.1333, Val F1: 0.0314
  ✓ Saved best model with val_acc: 0.1333

Epoch 2/5
------------------------------------------------------------


Training: 100%|██████████| 18/18 [02:31<00:00,  8.40s/it, loss=2.3811, acc=10.00%]
Validating: 100%|██████████| 4/4 [00:08<00:00,  2.05s/it, loss=2.3834]



Epoch 2 Summary:
  Train Loss: 2.4733, Train Acc: 0.1000
  Val Loss: 2.3749, Val Acc: 0.0000, Val F1: 0.0000

Epoch 3/5
------------------------------------------------------------


Training: 100%|██████████| 18/18 [02:12<00:00,  7.35s/it, loss=2.1770, acc=10.00%]
Validating: 100%|██████████| 4/4 [00:08<00:00,  2.15s/it, loss=2.1806]



Epoch 3 Summary:
  Train Loss: 2.3585, Train Acc: 0.1000
  Val Loss: 2.3619, Val Acc: 0.1333, Val F1: 0.0314

Epoch 4/5
------------------------------------------------------------


Training: 100%|██████████| 18/18 [02:28<00:00,  8.22s/it, loss=2.5264, acc=7.14%]
Validating: 100%|██████████| 4/4 [00:09<00:00,  2.45s/it, loss=2.2900]



Epoch 4 Summary:
  Train Loss: 2.3429, Train Acc: 0.0714
  Val Loss: 2.3919, Val Acc: 0.0667, Val F1: 0.0083

Epoch 5/5
------------------------------------------------------------


Training: 100%|██████████| 18/18 [02:28<00:00,  8.26s/it, loss=2.3123, acc=11.43%]
Validating: 100%|██████████| 4/4 [00:08<00:00,  2.05s/it, loss=2.2934]



Epoch 5 Summary:
  Train Loss: 2.3037, Train Acc: 0.1143
  Val Loss: 2.3757, Val Acc: 0.1333, Val F1: 0.0314

Training curves saved to 'training_curves.png'

FINAL TEST EVALUATION


Validating: 100%|██████████| 4/4 [00:09<00:00,  2.37s/it, loss=2.9783]



Test Loss: 2.5039
Test Accuracy: 0.0000
Test F1-Score: 0.0000

Classification Report:
Note: Only 8 out of 10 classes present in test set
                    precision    recall  f1-score   support

             Anger     0.0000    0.0000    0.0000       1.0
         Confusion     0.0000    0.0000    0.0000       2.0
           Disgust     0.0000    0.0000    0.0000       2.0
 Happiness and Joy     0.0000    0.0000    0.0000       1.0
Love and Affection     0.0000    0.0000    0.0000       1.0
    Mixed Emotions     0.0000    0.0000    0.0000       2.0
  Neutral/Everyday     0.0000    0.0000    0.0000       3.0
           Sadness     0.0000    0.0000    0.0000       3.0

         micro avg     0.0000    0.0000    0.0000      15.0
         macro avg     0.0000    0.0000    0.0000      15.0
      weighted avg     0.0000    0.0000    0.0000      15.0

Confusion matrix saved to 'confusion_matrix.png'

TRAINING COMPLETE!
Best validation accuracy: 0.1333
Final test accuracy: 0.0000
