<a href="https://colab.research.google.com/github/Youngstg/Test_Multimodal/blob/main/TestBERT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MULTIMODAL MUSIC EMOTION CLASSIFICATION
Part 1: BERT-based Lyrics Classification dengan 5-Fold Cross Validation

Dataset: MIREX Emotion Dataset dari Kaggle
Modalitas: Lirik (Text)
Model: BERT (bert-base-uncased)

# 1. INSTALASI DAN IMPORT LIBRARY

In [None]:
# Install required packages
!pip install -q kagglehub transformers torch scikit-learn pandas numpy

import os
import json
import pandas as pd
import numpy as np
import re
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from transformers import BertTokenizer, BertModel, get_linear_schedule_with_warmup
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings('ignore')

# Set random seeds untuk reproducibility
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

set_seed(42)

# Check GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


# 2. DOWNLOAD DAN LOAD DATASET

In [None]:
import kagglehub

# Download dataset
path = kagglehub.dataset_download("imsparsh/multimodal-mirex-emotion-dataset")
print("Path to dataset files:", path)

# Explore dataset structure
print("\n=== Dataset Structure ===")
for root, dirs, files in os.walk(path):
    level = root.replace(path, '').count(os.sep)
    indent = ' ' * 2 * level
    print(f'{indent}{os.path.basename(root)}/')
    subindent = ' ' * 2 * (level + 1)
    for file in files[:5]:
        print(f'{subindent}{file}')
    if len(files) > 5:
        print(f'{subindent}... and {len(files)-5} more files')

Using Colab cache for faster access to the 'multimodal-mirex-emotion-dataset' dataset.
Path to dataset files: /kaggle/input/multimodal-mirex-emotion-dataset

=== Dataset Structure ===
multimodal-mirex-emotion-dataset/
  README.txt
  dataset/
    clusters.txt
    categories.txt
    split-by-categories-audio.bat
    split-by-categories-lyrics.bat
    dataset info.html
    ... and 2 more files
    Audio/
      326.mp3
      149.mp3
      898.mp3
      011.mp3
      434.mp3
      ... and 898 more files
    MIDIs/
      552.mid
      197.mid
      019.mid
      662.mid
      773.mid
      ... and 191 more files
    Lyrics/
      559.txt
      557.txt
      361.txt
      812.txt
      245.txt
      ... and 759 more files


# 3. LOAD DAN PREPROCESSING DATA

In [None]:
def load_mirex_dataset(dataset_path):
    """
    Load MIREX dataset berdasarkan struktur:
    - categories.txt: emotion labels (satu per baris, index = line number)
    - clusters.txt: cluster labels (satu per baris, index = line number)
    - Lyrics/*.txt: lyric files (filename = song number)

    Format sebenarnya:
    - categories.txt: baris ke-i = emotion untuk song ke-i
    - clusters.txt: baris ke-i = CLUSTER untuk song ke-i (GUNAKAN INI!)
    """

    # 1. Load categories (emotion labels) - for reference only
    categories_path = os.path.join(dataset_path, 'dataset', 'categories.txt')
    emotion_labels = []

    print("\n--- Loading categories.txt (reference only) ---")

    if os.path.exists(categories_path):
        with open(categories_path, 'r', encoding='utf-8', errors='ignore') as f:
            lines = f.readlines()
            emotion_labels = [line.strip() for line in lines if line.strip()]

        print(f"✓ Loaded {len(emotion_labels)} emotion labels")
        print(f"  Unique emotions: {sorted(set(emotion_labels))}")

    # 2. Load clusters - USE THIS AS LABELS!
    clusters_path = os.path.join(dataset_path, 'dataset', 'clusters.txt')
    cluster_labels = []

    print("\n--- Loading clusters.txt (USING THIS AS LABELS) ---")

    if os.path.exists(clusters_path):
        with open(clusters_path, 'r', encoding='utf-8', errors='ignore') as f:
            lines = f.readlines()
            cluster_labels = [line.strip() for line in lines if line.strip()]

        unique_clusters = sorted(set(cluster_labels))
        print(f"✓ Loaded {len(cluster_labels)} cluster labels")
        print(f"  Unique clusters: {unique_clusters}")
        print(f"  Number of clusters: {len(unique_clusters)}")

        # Show distribution
        from collections import Counter
        cluster_counts = Counter(cluster_labels)
        print(f"\n  Cluster distribution:")
        for cluster, count in sorted(cluster_counts.items()):
            print(f"    {cluster}: {count} songs")
    else:
        print("❌ clusters.txt not found!")
        return pd.DataFrame()

    # 3. Create song_id to cluster mapping
    song_cluster_map = {}

    # Map based on index
    for idx in range(len(cluster_labels)):
        # Song IDs might be 0-indexed or 1-indexed, we'll try both
        song_id_0 = str(idx).zfill(3)  # 000, 001, 002...
        song_id_1 = str(idx + 1).zfill(3)  # 001, 002, 003...

        song_cluster_map[song_id_0] = cluster_labels[idx]
        song_cluster_map[song_id_1] = cluster_labels[idx]

    print(f"\n✓ Created mappings for {len(song_cluster_map)} potential song IDs")

    # 4. Load lyrics
    lyrics_dir = os.path.join(dataset_path, 'dataset', 'Lyrics')
    data = []

    if os.path.exists(lyrics_dir):
        lyrics_files = [f for f in os.listdir(lyrics_dir) if f.endswith('.txt')]
        print(f"\n--- Loading lyrics ---")
        print(f"✓ Found {len(lyrics_files)} lyric files")

        # Sample some filenames to understand naming
        print(f"  Sample filenames: {sorted(lyrics_files)[:5]}")

        matched = 0
        unmatched = 0
        unmatched_samples = []

        for filename in lyrics_files:
            song_id = filename.replace('.txt', '')

            # Get cluster label
            if song_id not in song_cluster_map:
                unmatched += 1
                if len(unmatched_samples) < 3:
                    unmatched_samples.append(song_id)
                continue

            matched += 1
            cluster = song_cluster_map[song_id]

            # Read lyrics
            lyrics_path = os.path.join(lyrics_dir, filename)
            try:
                with open(lyrics_path, 'r', encoding='utf-8', errors='ignore') as f:
                    lyrics = f.read().strip()

                if lyrics:  # Only add if lyrics not empty
                    data.append({
                        'song_id': song_id,
                        'lyrics': lyrics,
                        'cluster': cluster  # Changed from 'emotion' to 'cluster'
                    })

                    if len(data) <= 3:
                        print(f"  ✓ Loaded: {song_id} - {cluster} ({len(lyrics)} chars)")
            except Exception as e:
                print(f"  ❌ Error reading {filename}: {e}")
                continue

        print(f"\n✓ Successfully matched: {matched} songs")
        if unmatched > 0:
            print(f"⚠️ Unmatched: {unmatched} songs")
            print(f"  Sample unmatched IDs: {unmatched_samples}")
    else:
        print("❌ Lyrics directory not found!")
        return pd.DataFrame()

    df = pd.DataFrame(data)
    print(f"\n{'='*80}")
    print(f"✓ Final dataset: {len(df)} songs with lyrics and clusters")
    print(f"{'='*80}")

    return df

# Load data
print("\n" + "="*80)
print("LOADING MIREX DATASET")
print("="*80)

df = load_mirex_dataset(path)

print(f"\nDataset shape: {df.shape}")

if len(df) > 0:
    print(f"\nFirst few rows:")
    print(df.head())

    print("\nCluster distribution:")
    print(df['cluster'].value_counts())
else:
    print("\n⚠️ ERROR: No data loaded!")
    print("Please check the debug output above to identify the issue.")
    raise ValueError("Failed to load dataset - check debug output above")


LOADING MIREX DATASET

--- Loading categories.txt (reference only) ---
✓ Loaded 903 emotion labels
  Unique emotions: ['Agressive', 'Amiable-good natured', 'Autumnal', 'Bittersweet', 'Boisterous', 'Brooding', 'Campy', 'Cheerful', 'Confident', 'Fiery', 'Fun', 'Humorous', 'Intense', 'Literate', 'Passionate', 'Poignant', 'Rollicking', 'Rousing', 'Rowdy', 'Silly', 'Sweet', 'Tense - Anxious', 'Visceral', 'Volatile', 'Wistful', 'Witty', 'Wry', 'whimsical']

--- Loading clusters.txt (USING THIS AS LABELS) ---
✓ Loaded 903 cluster labels
  Unique clusters: ['Cluster 1', 'Cluster 2', 'Cluster 3', 'Cluster 4', 'Cluster 5']
  Number of clusters: 5

  Cluster distribution:
    Cluster 1: 170 songs
    Cluster 2: 164 songs
    Cluster 3: 215 songs
    Cluster 4: 191 songs
    Cluster 5: 163 songs

✓ Created mappings for 904 potential song IDs

--- Loading lyrics ---
✓ Found 764 lyric files
  Sample filenames: ['001.txt', '003.txt', '004.txt', '007.txt', '008.txt']
  ✓ Loaded: 559 - Cluster 4 (4964

# 4. TEXT PREPROCESSING

In [None]:
def clean_lyrics(text):
    """
    Improved cleaning for lyrics text
    """
    if pd.isna(text):
        return ""

    text = str(text)

    # Remove common noise patterns in lyrics
    # Remove [Chorus], [Verse], etc.
    text = re.sub(r'\[.*?\]', '', text)
    # Remove (x2), (repeat), etc.
    text = re.sub(r'\(.*?\)', '', text)
    # Remove URLs
    text = re.sub(r'http\S+|www\S+', '', text)

    # Lowercase
    text = text.lower()

    # Remove extra whitespace and newlines
    text = ' '.join(text.split())

    # Keep letters, numbers, and basic punctuation
    text = re.sub(r'[^a-z0-9\s.,!?\']', ' ', text)

    # Remove repeated punctuation
    text = re.sub(r'([.,!?])\1+', r'\1', text)

    # Remove extra spaces
    text = ' '.join(text.split())

    return text.strip()

# Apply cleaning
print("\n" + "="*80)
print("PREPROCESSING LYRICS")
print("="*80)

df['lyrics_clean'] = df['lyrics'].apply(clean_lyrics)
print("✓ Cleaning completed!")

# Remove rows with empty lyrics
df = df[df['lyrics_clean'].str.len() > 0].reset_index(drop=True)
print(f"✓ Dataset after removing empty lyrics: {df.shape}")

# Show sample
print("\nSample cleaned lyrics:")
print(df.iloc[0]['lyrics_clean'][:200] + "...")


PREPROCESSING LYRICS
✓ Cleaning completed!
✓ Dataset after removing empty lyrics: (764, 4)

Sample cleaned lyrics:
i remember every little thing as if it happened only yesterday parking by the lake and there was not another car in sight and i never had a girl looking any better than you did and all the kids at sch...


# 5. LABEL ENCODING

In [None]:
print("\n" + "="*80)
print("ENCODING LABELS")
print("="*80)

# Encode cluster labels
label_encoder = LabelEncoder()
df['cluster_encoded'] = label_encoder.fit_transform(df['cluster'])

print(f"✓ Cluster classes: {label_encoder.classes_}")
print(f"✓ Number of clusters: {len(label_encoder.classes_)}")

print("\nClass distribution:")
for cluster, count in df['cluster'].value_counts().items():
    encoded = df[df['cluster'] == cluster]['cluster_encoded'].iloc[0]
    print(f"  {encoded}: {cluster} - {count} samples ({count/len(df)*100:.1f}%)")

num_classes = len(label_encoder.classes_)

# Check class imbalance
min_samples = df['cluster'].value_counts().min()
max_samples = df['cluster'].value_counts().max()
imbalance_ratio = max_samples / min_samples
print(f"\n⚠️ Class imbalance ratio: {imbalance_ratio:.2f}x")
if imbalance_ratio > 3:
    print("  High class imbalance detected! Consider using class weights.")


ENCODING LABELS
✓ Cluster classes: ['Cluster 1' 'Cluster 2' 'Cluster 3' 'Cluster 4' 'Cluster 5']
✓ Number of clusters: 5

Class distribution:
  2: Cluster 3 - 192 samples (25.1%)
  3: Cluster 4 - 173 samples (22.6%)
  1: Cluster 2 - 138 samples (18.1%)
  0: Cluster 1 - 134 samples (17.5%)
  4: Cluster 5 - 127 samples (16.6%)

⚠️ Class imbalance ratio: 1.51x


# 6. DATASET CLASS

In [None]:
class LyricsDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        # Tokenize
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }


# 7. MODEL DEFINITION

In [None]:
class BERTEmotionClassifier(nn.Module):
    def __init__(self, num_classes, dropout=0.5):  # Increased dropout
        super(BERTEmotionClassifier, self).__init__()

        # BERT encoder
        self.bert = BertModel.from_pretrained('bert-base-uncased')

        # Freeze early BERT layers to prevent overfitting
        for param in self.bert.embeddings.parameters():
            param.requires_grad = False
        for i, layer in enumerate(self.bert.encoder.layer[:8]):  # Freeze first 8 layers
            for param in layer.parameters():
                param.requires_grad = False

        # Simpler classifier head to prevent overfitting
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

        # Layer normalization for stability
        self.layer_norm = nn.LayerNorm(self.bert.config.hidden_size)

    def forward(self, input_ids, attention_mask):
        # BERT encoding
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask
        )

        # Use [CLS] token representation
        pooled_output = outputs.pooler_output

        # Normalize and classify
        x = self.layer_norm(pooled_output)
        x = self.dropout(x)
        logits = self.fc(x)

        return logits

# 8. TRAINING FUNCTION

In [None]:
def train_epoch(model, dataloader, criterion, optimizer, scheduler, device, accumulation_steps=1):
    model.train()
    total_loss = 0
    predictions = []
    true_labels = []

    optimizer.zero_grad()

    for batch_idx, batch in enumerate(dataloader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # Forward pass
        logits = model(input_ids, attention_mask)
        loss = criterion(logits, labels)

        # Normalize loss for gradient accumulation
        loss = loss / accumulation_steps

        # Backward pass
        loss.backward()

        # Update weights every accumulation_steps
        if (batch_idx + 1) % accumulation_steps == 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

        total_loss += loss.item() * accumulation_steps

        # Predictions
        preds = torch.argmax(logits, dim=1)
        predictions.extend(preds.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

    avg_loss = total_loss / len(dataloader)
    accuracy = accuracy_score(true_labels, predictions)

    return avg_loss, accuracy

def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    predictions = []
    true_labels = []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            # Forward pass
            logits = model(input_ids, attention_mask)
            loss = criterion(logits, labels)

            total_loss += loss.item()

            # Predictions
            preds = torch.argmax(logits, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    avg_loss = total_loss / len(dataloader)
    accuracy = accuracy_score(true_labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(
        true_labels, predictions, average='weighted', zero_division=0
    )

    return avg_loss, accuracy, precision, recall, f1, predictions, true_labels

# 9. 5-FOLD CROSS VALIDATION

In [None]:
# Improved Hyperparameters to prevent overfitting
BATCH_SIZE = 16  # Increased back for stability
MAX_LENGTH = 256  # Shorter sequences
LEARNING_RATE = 3e-5  # Slightly higher LR for better convergence
NUM_EPOCHS = 15  # More epochs
N_FOLDS = 5
WEIGHT_DECAY = 0.01  # L2 regularization
EARLY_STOPPING_PATIENCE = 4  # More patience
ACCUMULATION_STEPS = 2  # Gradient accumulation for larger effective batch

# Initialize tokenizer
print("\n" + "="*80)
print("INITIALIZING BERT TOKENIZER")
print("="*80)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
print("✓ Tokenizer loaded!")

# Prepare data for cross-validation
X = df['lyrics_clean'].values
y = df['cluster_encoded'].values

print(f"\n✓ Total samples: {len(X)}")
print(f"✓ Total clusters: {num_classes}")

# Calculate class weights for imbalanced data
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weights = torch.FloatTensor(class_weights).to(device)
print(f"\n✓ Using class weights to handle imbalance")
print(f"  Class weights: {class_weights.cpu().numpy()}")

# 5-Fold Stratified Cross Validation
skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=42)

print("\n" + "="*80)
print("STARTING 5-FOLD CROSS VALIDATION")
print("="*80)

# Store results
fold_results = []

for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    print(f"\n{'='*80}")
    print(f"FOLD {fold + 1}/{N_FOLDS}")
    print(f"{'='*80}")

    # Split data
    X_train, X_val = X[train_idx], X[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    print(f"Train size: {len(X_train)}, Val size: {len(X_val)}")

    # Create datasets
    train_dataset = LyricsDataset(X_train, y_train, tokenizer, MAX_LENGTH)
    val_dataset = LyricsDataset(X_val, y_val, tokenizer, MAX_LENGTH)

    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    # Initialize model
    model = BERTEmotionClassifier(num_classes=num_classes)
    model = model.to(device)

    # Loss with class weights and optimizer with weight decay
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    # Scheduler - cosine annealing for better convergence
    from torch.optim.lr_scheduler import CosineAnnealingLR
    scheduler = CosineAnnealingLR(optimizer, T_max=len(train_loader) * NUM_EPOCHS)

    # Training loop with early stopping
    best_val_f1 = 0
    patience_counter = 0
    training_history = []

    for epoch in range(NUM_EPOCHS):
        print(f"\nEpoch {epoch + 1}/{NUM_EPOCHS}")

        # Train
        train_loss, train_acc = train_epoch(
            model, train_loader, criterion, optimizer, scheduler, device, ACCUMULATION_STEPS
        )

        # Validate
        val_loss, val_acc, val_precision, val_recall, val_f1, _, _ = evaluate(
            model, val_loader, criterion, device
        )

        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        # Track history
        training_history.append({
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'train_acc': train_acc,
            'val_loss': val_loss,
            'val_acc': val_acc,
            'val_f1': val_f1
        })

        # Save best model and early stopping
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), f'best_model_fold{fold+1}.pt')
            patience_counter = 0
            print(f"  ✓ New best F1: {best_val_f1:.4f}")
        else:
            patience_counter += 1
            print(f"  No improvement ({patience_counter}/{EARLY_STOPPING_PATIENCE})")

            if patience_counter >= EARLY_STOPPING_PATIENCE:
                print(f"  Early stopping triggered!")
                break

    # Load best model and final evaluation
    model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pt'))
    val_loss, val_acc, val_precision, val_recall, val_f1, predictions, true_labels = evaluate(
        model, val_loader, criterion, device
    )

    print(f"\n{'='*80}")
    print(f"FOLD {fold + 1} FINAL RESULTS:")
    print(f"{'='*80}")
    print(f"Accuracy:  {val_acc:.4f}")
    print(f"Precision: {val_precision:.4f}")
    print(f"Recall:    {val_recall:.4f}")
    print(f"F1-Score:  {val_f1:.4f}")

    # Store results
    fold_results.append({
        'fold': fold + 1,
        'accuracy': val_acc,
        'precision': val_precision,
        'recall': val_recall,
        'f1': val_f1
    })

    # Classification report
    print("\nClassification Report:")
    print(classification_report(
        true_labels, predictions,
        target_names=label_encoder.classes_,
        digits=4,
        zero_division=0
    ))


INITIALIZING BERT TOKENIZER
✓ Tokenizer loaded!

✓ Total samples: 764
✓ Total clusters: 5

✓ Using class weights to handle imbalance
  Class weights: [1.1402985  1.1072464  0.79583335 0.883237   1.2031496 ]

STARTING 5-FOLD CROSS VALIDATION

FOLD 1/5
Train size: 611, Val size: 153

Epoch 1/15
Train Loss: 1.7425, Train Acc: 0.2193
Val Loss: 1.5796, Val Acc: 0.3268, Val F1: 0.2450
  ✓ New best F1: 0.2450

Epoch 2/15
Train Loss: 1.6372, Train Acc: 0.2700
Val Loss: 1.4971, Val Acc: 0.3791, Val F1: 0.3416
  ✓ New best F1: 0.3416

Epoch 3/15
Train Loss: 1.4997, Train Acc: 0.3764
Val Loss: 1.4945, Val Acc: 0.3464, Val F1: 0.3144
  No improvement (1/4)

Epoch 4/15
Train Loss: 1.4277, Train Acc: 0.4173
Val Loss: 1.4333, Val Acc: 0.3856, Val F1: 0.3846
  ✓ New best F1: 0.3846

Epoch 5/15
Train Loss: 1.3153, Train Acc: 0.4566
Val Loss: 1.4241, Val Acc: 0.4314, Val F1: 0.4081
  ✓ New best F1: 0.4081

Epoch 6/15
Train Loss: 1.2427, Train Acc: 0.5221
Val Loss: 1.4755, Val Acc: 0.4118, Val F1: 0.388

# 10. FINAL RESULTS

In [None]:
print("\n" + "="*80)
print("5-FOLD CROSS VALIDATION SUMMARY")
print("="*80)

results_df = pd.DataFrame(fold_results)
print("\nResults per fold:")
print(results_df.to_string(index=False))

print("\n" + "="*80)
print("AVERAGE PERFORMANCE ACROSS ALL FOLDS:")
print("="*80)
print(f"Accuracy:  {results_df['accuracy'].mean():.4f} ± {results_df['accuracy'].std():.4f}")
print(f"Precision: {results_df['precision'].mean():.4f} ± {results_df['precision'].std():.4f}")
print(f"Recall:    {results_df['recall'].mean():.4f} ± {results_df['recall'].std():.4f}")
print(f"F1-Score:  {results_df['f1'].mean():.4f} ± {results_df['f1'].std():.4f}")

# Save results
results_df.to_csv('bert_lyrics_cv_results.csv', index=False)
print("\nResults saved to 'bert_lyrics_cv_results.csv'")

print("\n✅ BERT Lyrics Classification Complete!")
print("\nNext steps:")
print("1. Load audio data for audio modalitas")
print("2. Load MIDI data for MIDI modalitas")
print("3. Implement multimodal fusion")


5-FOLD CROSS VALIDATION SUMMARY

Results per fold:
 fold  accuracy  precision   recall       f1
    1  0.437908   0.440559 0.437908 0.433468
    2  0.529412   0.527305 0.529412 0.526101
    3  0.490196   0.489881 0.490196 0.472347
    4  0.483660   0.515930 0.483660 0.480147
    5  0.427632   0.491996 0.427632 0.433879

AVERAGE PERFORMANCE ACROSS ALL FOLDS:
Accuracy:  0.4738 ± 0.0415
Precision: 0.4931 ± 0.0334
Recall:    0.4738 ± 0.0415
F1-Score:  0.4692 ± 0.0384

Results saved to 'bert_lyrics_cv_results.csv'

✅ BERT Lyrics Classification Complete!

Next steps:
1. Load audio data for audio modalitas
2. Load MIDI data for MIDI modalitas
3. Implement multimodal fusion
