In [None]:
!pip install mlflow pyyaml torch transformers datasets gensim scikit-learn pandas numpy matplotlib seaborn nltk -q

In [None]:
# Cell 2: Import Libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import yaml
import json
import mlflow
from mlflow import log_metric, log_param
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

import re
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)

from collections import Counter
from gensim.models import Word2Vec, FastText
from transformers import BertTokenizer, BertModel
from datasets import load_dataset
from sklearn.metrics import f1_score, hamming_loss, precision_score, recall_score

# Cell 3: Set Random Seeds
torch.manual_seed(42)
np.random.seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"✓ PyTorch device: {device}")

# Cell 4: Load Configuration
with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print("✓ Configuration loaded from config.yaml")
print(f"✓ Experiment: {config['mlflow']['experiment_name']}")

# Cell 5: Initialize MLflow
mlflow.set_tracking_uri(config['mlflow']['tracking_uri'])

experiment = mlflow.get_experiment_by_name(config['mlflow']['experiment_name'])
if experiment is None:
    experiment_id = mlflow.create_experiment(config['mlflow']['experiment_name'])
    print(f"✓ Created MLflow experiment (ID: {experiment_id})")
else:
    print(f"✓ Using existing MLflow experiment (ID: {experiment.experiment_id})")

mlflow.set_experiment(config['mlflow']['experiment_name'])

print("\n✓ SNIPPET 1 COMPLETE: Setup & Configuration Ready")


In [None]:
# SNIPPET 2: Data Loading & Preprocessing
# ============================================================================

print("\n" + "="*70)
print("STEP 1: DATA LOADING & PREPROCESSING")
print("="*70)

# Cell 1: Load Dataset
print("\nLoading GoEmotions dataset...")
dataset = load_dataset("google-research-datasets/go_emotions", "simplified")

print(f"Dataset splits available: {list(dataset.keys())}")

# Cell 2: Preprocessing Function
def preprocess_text(text):
    """Clean and tokenize text based on config"""
    if config['preprocessing']['lowercase']:
        text = text.lower()
    
    if config['preprocessing']['remove_urls']:
        text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    
    if config['preprocessing']['remove_special_chars']:
        text = re.sub(r'[^a-zA-Z\s]', '', text)
    
    tokens = word_tokenize(text)
    return tokens

# Cell 3: Load and Process Data
def load_data(dataset_split, max_samples=None):
    """Load dataset split and convert to tokens + labels"""
    texts = []
    labels = []
    tokens_list = []
    
    for i, example in enumerate(dataset_split):
        if max_samples and i >= max_samples:
            break
        
        text = example['text']
        texts.append(text)
        
        # Tokenize
        tokens = preprocess_text(text)
        tokens_list.append(tokens)
        
        # Multi-label encoding: [0,0,1,0,...] where 1 = emotion present
        label_vector = [0] * 28
        for label_id in example['labels']:
            label_vector[label_id] = 1
        labels.append(label_vector)
    
    return texts, np.array(labels), tokens_list

# Load splits
train_texts, train_labels, train_tokens = load_data(
    dataset['train'], 
    config['data']['train_size']
)
val_texts, val_labels, val_tokens = load_data(
    dataset['validation'],
    config['data']['val_size']
)
test_texts, test_labels, test_tokens = load_data(
    dataset['test'],
    config['data']['test_size']
)

print(f"\n✓ Loaded data splits:")
print(f"  Train:  {len(train_texts)} samples")
print(f"  Val:    {len(val_texts)} samples")
print(f"  Test:   {len(test_texts)} samples")
print(f"  Labels: {train_labels.shape} (multi-hot encoded)")

# Cell 4: Build Vocabulary
print("\nBuilding vocabulary...")
word_freq = Counter()
for tokens in train_tokens:
    word_freq.update(tokens)

vocab = {word: idx + 2 for idx, (word, freq) in enumerate(word_freq.items()) if freq >= 2}
vocab['<PAD>'] = 0
vocab['<UNK>'] = 1

print(f"✓ Vocabulary size: {len(vocab)} tokens")

# Emotion labels reference
emotion_labels = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval',
    'caring', 'confusion', 'curiosity', 'desire', 'disappointment',
    'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear',
    'gratitude', 'grief', 'joy', 'love', 'nervousness',
    'optimism', 'pride', 'realization', 'relief', 'remorse',
    'sadness', 'surprise', 'neutral'
]

print(f"✓ Emotion labels: {len(emotion_labels)} categories")

print("\n✓ SNIPPET 2 COMPLETE: Data Loaded & Preprocessed")


In [None]:
# SNIPPET 3: Dense Embeddings Generation
# ============================================================================

print("\n" + "="*70)
print("STEP 2: CREATE DENSE EMBEDDINGS FOR TEXT")
print("="*70)
print("CRITICAL: Convert tokens to dense embedding vectors BEFORE neural network")

# Cell 1: Embedding Generator Class
class DenseEmbeddingGenerator:
    """Generate dense embeddings from tokenized text"""
    
    def __init__(self, config, vocab, tokenized_texts):
        self.config = config
        self.vocab = vocab
        self.tokenized_texts = tokenized_texts
        self.embedding_dim = config['embeddings']['Word2Vec']['vector_size']
    
    def train_word2vec(self):
        """Train Word2Vec model"""
        print("\n[Word2Vec] Training embedding model...")
        cfg = self.config['embeddings']['Word2Vec']
        
        model = Word2Vec(
            sentences=self.tokenized_texts,
            vector_size=cfg['vector_size'],
            window=cfg['window'],
            min_count=cfg['min_count'],
            sg=cfg['sg'],  # 1=Skip-gram
            workers=cfg['workers'],
            epochs=cfg['epochs']
        )
        
        print(f"  Vocabulary size: {len(model.wv)}")
        return model, cfg
    
    def train_glove(self):
        """Train GloVe-style (CBOW) model"""
        print("\n[GloVe] Training embedding model (CBOW)...")
        cfg = self.config['embeddings']['GloVe']
        
        model = Word2Vec(
            sentences=self.tokenized_texts,
            vector_size=cfg['vector_size'],
            window=cfg['window'],
            min_count=cfg['min_count'],
            sg=cfg['sg'],  # 0=CBOW
            workers=cfg['workers'],
            epochs=cfg['epochs']
        )
        
        print(f"  Vocabulary size: {len(model.wv)}")
        return model, cfg
    
    def train_fasttext(self):
        """Train FastText model"""
        print("\n[FastText] Training embedding model...")
        cfg = self.config['embeddings']['FastText']
        
        model = FastText(
            sentences=self.tokenized_texts,
            vector_size=cfg['vector_size'],
            window=cfg['window'],
            min_count=cfg['min_count'],
            sg=cfg['sg'],  # 1=Skip-gram
            workers=cfg['workers'],
            epochs=cfg['epochs']
        )
        
        print(f"  Vocabulary size: {len(model.wv)}")
        return model, cfg
    
    def text_to_dense_vector(self, tokens, embedding_model, method='mean'):
        """
        CRITICAL: Convert token sequence to single dense vector
        Methods: mean pooling, max pooling, sum pooling
        """
        vectors = []
        
        for token in tokens:
            if token in embedding_model.wv:
                vectors.append(embedding_model.wv[token])
        
        if len(vectors) == 0:
            return np.zeros(self.embedding_dim)
        
        vectors = np.array(vectors)
        
        if method == 'mean':
            return np.mean(vectors, axis=0)
        elif method == 'max':
            return np.max(vectors, axis=0)
        elif method == 'sum':
            return np.sum(vectors, axis=0)
    
    def generate_dense_embeddings(self, texts_tokens, embedding_model, method='mean'):
        """Generate dense embeddings for all texts"""
        dense_embeddings = []
        
        for tokens in tqdm(texts_tokens, desc=f"Converting to dense vectors ({method})"):
            dense_vector = self.text_to_dense_vector(tokens, embedding_model, method)
            dense_embeddings.append(dense_vector)
        
        return np.array(dense_embeddings)

# Cell 2: Initialize Generator
generator = DenseEmbeddingGenerator(config, vocab, train_tokens)

# Cell 3: Generate Embeddings for Word2Vec, GloVe, FastText
embeddings_data = {}

if config['embeddings']['Word2Vec']['enabled']:
    w2v_model, w2v_cfg = generator.train_word2vec()
    train_dense_w2v = generator.generate_dense_embeddings(train_tokens, w2v_model, method='mean')
    val_dense_w2v = generator.generate_dense_embeddings(val_tokens, w2v_model, method='mean')
    test_dense_w2v = generator.generate_dense_embeddings(test_tokens, w2v_model, method='mean')
    
    embeddings_data['Word2Vec'] = {
        'train': train_dense_w2v,
        'val': val_dense_w2v,
        'test': test_dense_w2v,
        'config': w2v_cfg,
        'dim': config['embeddings']['Word2Vec']['vector_size']
    }
    print(f"  ✓ Dense embeddings shape: {train_dense_w2v.shape} (samples, dims)")

if config['embeddings']['GloVe']['enabled']:
    glove_model, glove_cfg = generator.train_glove()
    train_dense_glove = generator.generate_dense_embeddings(train_tokens, glove_model, method='mean')
    val_dense_glove = generator.generate_dense_embeddings(val_tokens, glove_model, method='mean')
    test_dense_glove = generator.generate_dense_embeddings(test_tokens, glove_model, method='mean')
    
    embeddings_data['GloVe'] = {
        'train': train_dense_glove,
        'val': val_dense_glove,
        'test': test_dense_glove,
        'config': glove_cfg,
        'dim': config['embeddings']['GloVe']['vector_size']
    }
    print(f"  ✓ Dense embeddings shape: {train_dense_glove.shape} (samples, dims)")

if config['embeddings']['FastText']['enabled']:
    ft_model, ft_cfg = generator.train_fasttext()
    train_dense_ft = generator.generate_dense_embeddings(train_tokens, ft_model, method='mean')
    val_dense_ft = generator.generate_dense_embeddings(val_tokens, ft_model, method='mean')
    test_dense_ft = generator.generate_dense_embeddings(test_tokens, ft_model, method='mean')
    
    embeddings_data['FastText'] = {
        'train': train_dense_ft,
        'val': val_dense_ft,
        'test': test_dense_ft,
        'config': ft_cfg,
        'dim': config['embeddings']['FastText']['vector_size']
    }
    print(f"  ✓ Dense embeddings shape: {train_dense_ft.shape} (samples, dims)")

# Cell 4: Generate BERT Embeddings
if config['embeddings']['BERT']['enabled']:
    print("\n[BERT] Extracting dense embeddings...")
    bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    bert_model = BertModel.from_pretrained('bert-base-uncased').to(device)
    bert_model.eval()
    
    def get_bert_embeddings(texts, tokenizer, model, batch_size=16):
        """Extract BERT [CLS] token embeddings (768-dim)"""
        all_embeddings = []
        
        for i in tqdm(range(0, len(texts), batch_size), desc="BERT embeddings"):
            batch_texts = texts[i:i+batch_size]
            
            encodings = tokenizer(
                batch_texts,
                max_length=config['embeddings']['BERT']['max_length'],
                padding='max_length',
                truncation=True,
                return_tensors='pt'
            )
            
            input_ids = encodings['input_ids'].to(device)
            attention_mask = encodings['attention_mask'].to(device)
            
            with torch.no_grad():
                outputs = bert_model(input_ids, attention_mask)
                embeddings = outputs.pooler_output.cpu().numpy()
                all_embeddings.append(embeddings)
        
        return np.vstack(all_embeddings)
    
    train_dense_bert = get_bert_embeddings(train_texts, bert_tokenizer, bert_model)
    val_dense_bert = get_bert_embeddings(val_texts, bert_tokenizer, bert_model)
    test_dense_bert = get_bert_embeddings(test_texts, bert_tokenizer, bert_model)
    
    embeddings_data['BERT'] = {
        'train': train_dense_bert,
        'val': val_dense_bert,
        'test': test_dense_bert,
        'config': {'model_name': 'bert-base-uncased'},
        'dim': 768  # BERT hidden size
    }
    print(f"  ✓ Dense embeddings shape: {train_dense_bert.shape} (samples, dims)")

print(f"\n✓ STEP 2 COMPLETE: All texts converted to dense embeddings")
print(f"✓ Total embedding models: {len(embeddings_data)}")

In [None]:
# SNIPPET 4: Dataset Preparation
# ============================================================================

print("\n" + "="*70)
print("STEP 3: CREATE DATASETS WITH DENSE EMBEDDINGS")
print("="*70)

# Cell 1: Dataset Class
class DenseEmbeddingDataset(Dataset):
    """
    PyTorch Dataset that takes DENSE embeddings as input
    NOT tokens, NOT raw text - only fixed-size dense vectors
    """
    
    def __init__(self, dense_embeddings, labels):
        self.embeddings = torch.from_numpy(dense_embeddings).float()
        self.labels = torch.from_numpy(labels).float()
    
    def __len__(self):
        return len(self.embeddings)
    
    def __getitem__(self, idx):
        return {
            'embedding': self.embeddings[idx],      # (embedding_dim,)
            'label': self.labels[idx]               # (28,)
        }

# Cell 2: Create Dataloaders
print("\nCreating dataloaders...")
dataloaders = {}

for embedding_name in embeddings_data.keys():
    data = embeddings_data[embedding_name]
    
    # Create datasets
    train_dataset = DenseEmbeddingDataset(data['train'], train_labels)
    val_dataset = DenseEmbeddingDataset(data['val'], val_labels)
    test_dataset = DenseEmbeddingDataset(data['test'], test_labels)
    
    # Create dataloaders
    batch_size = config['training']['batch_size']
    
    dataloaders[embedding_name] = {
        'train': DataLoader(train_dataset, batch_size=batch_size, shuffle=True),
        'val': DataLoader(val_dataset, batch_size=batch_size),
        'test': DataLoader(test_dataset, batch_size=batch_size)
    }
    
    train_batches = len(dataloaders[embedding_name]['train'])
    val_batches = len(dataloaders[embedding_name]['val'])
    test_batches = len(dataloaders[embedding_name]['test'])
    
    print(f"  ✓ {embedding_name:12} - Train batches: {train_batches}, Val: {val_batches}, Test: {test_batches}")

print(f"\n✓ STEP 3 COMPLETE: Dataloaders Ready")
print(f"  Batch size: {config['training']['batch_size']}")

In [None]:
# SNIPPET 5: Neural Network Architecture
# ============================================================================

print("\n" + "="*70)
print("STEP 4: BUILD NEURAL NETWORK FOR MULTI-LABEL CLASSIFICATION")
print("="*70)
print("Network input: Dense embeddings (fixed size)")
print("Network output: 28 logits (one per emotion label)")

# Cell 1: Neural Network Model
class MultiLabelClassifier(nn.Module):
    """
    Deep neural network for multi-label classification
    
    STRICT REQUIREMENT:
    Input: Dense embedding vectors (100-dim for W2V/GloVe/FastText, 768 for BERT)
    Output: 28 logits (multi-label, one per emotion)
    """
    
    def __init__(self, input_dim, num_labels, config):
        super(MultiLabelClassifier, self).__init__()
        
        # Get architecture from config
        layers_dims = config['neural_network']['layers'].copy()
        layers_dims[0] = input_dim  # Set input dimension dynamically
        
        print(f"  Network architecture: {' -> '.join(map(str, layers_dims))}")
        
        # Build layers
        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList() if config['neural_network']['batch_norm'] else None
        self.dropouts = nn.ModuleList()
        
        for i in range(len(layers_dims) - 1):
            self.layers.append(nn.Linear(layers_dims[i], layers_dims[i+1]))
            
            # Batch norm for hidden layers only
            if config['neural_network']['batch_norm'] and i < len(layers_dims) - 2:
                self.batch_norms.append(nn.BatchNorm1d(layers_dims[i+1]))
            
            # Dropout for hidden layers only
            if i < len(layers_dims) - 2:
                self.dropouts.append(nn.Dropout(config['neural_network']['dropout']))
        
        self.activation = nn.ReLU() if config['neural_network']['activation'] == 'relu' else nn.Tanh()
    
    def forward(self, x):
        """
        Forward pass
        x: (batch_size, embedding_dim) - DENSE EMBEDDINGS
        output: (batch_size, 28) - logits
        """
        for i, layer in enumerate(self.layers[:-1]):  # All except last
            x = layer(x)
            
            if self.batch_norms is not None:
                x = self.batch_norms[i](x)
            
            x = self.activation(x)
            x = self.dropouts[i](x)
        
        # Output layer (no activation, no dropout)
        x = self.layers[-1](x)
        
        return x

# Cell 2: Display Network Architecture
print("\n✓ Network Architecture:")
print(f"  Config layers: {config['neural_network']['layers']}")
print(f"  Activation: {config['neural_network']['activation']}")
print(f"  Dropout: {config['neural_network']['dropout']}")
print(f"  Batch norm: {config['neural_network']['batch_norm']}")
print(f"  Output: 28 logits (multi-label)")
print(f"  Loss function: BCEWithLogitsLoss")

print(f"\n✓ STEP 4 COMPLETE: Neural Network Architecture Ready")

In [None]:
# SNIPPET 6: Training & Evaluation Functions
# ============================================================================

print("\n" + "="*70)
print("DEFINING TRAINING & EVALUATION FUNCTIONS")
print("="*70)

# Cell 1: Training Function
def train_model(model, train_loader, optimizer, config, device):
    """Train model for one epoch"""
    model.train()
    total_loss = 0
    
    criterion = nn.BCEWithLogitsLoss()
    
    for batch in tqdm(train_loader, desc="Training", leave=False):
        embeddings = batch['embedding'].to(device)
        labels = batch['label'].to(device)
        
        # Forward pass: embeddings -> network -> logits
        outputs = model(embeddings)
        loss = criterion(outputs, labels)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(train_loader)

# Cell 2: Evaluation Function
def evaluate_model(model, dataloader, device, threshold=0.5):
    """
    Evaluate model on validation/test set
    Compute: loss, F1, precision, recall, hamming loss
    """
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    criterion = nn.BCEWithLogitsLoss()
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating", leave=False):
            embeddings = batch['embedding'].to(device)
            labels = batch['label'].to(device)
            
            # Forward pass
            outputs = model(embeddings)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            
            # Apply sigmoid and threshold
            preds = torch.sigmoid(outputs) > threshold
            
            all_preds.append(preds.cpu().numpy())
            all_labels.append(labels.cpu().numpy())
    
    # Stack all predictions and labels
    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)
    
    # Calculate metrics
    metrics = {
        'loss': total_loss / len(dataloader),
        'hamming_loss': hamming_loss(all_labels, all_preds),
        'micro_f1': f1_score(all_labels, all_preds, average='micro', zero_division=0),
        'macro_f1': f1_score(all_labels, all_preds, average='macro', zero_division=0),
        'micro_precision': precision_score(all_labels, all_preds, average='micro', zero_division=0),
        'micro_recall': recall_score(all_labels, all_preds, average='micro', zero_division=0),
    }
    
    return metrics

print("✓ Functions defined:")
print("  - train_model(): Train for one epoch")
print("  - evaluate_model(): Compute all metrics")
print(f"\n✓ SNIPPET 6 COMPLETE: Training Functions Ready")

In [None]:
# SNIPPET 7: Training Loop with MLflow Tracking
# ============================================================================

print("\n" + "="*70)
print("STEP 5: TRAIN & COMPARE ALL MODELS WITH MLFLOW TRACKING")
print("="*70)

# Cell 1: Training Loop for All Embeddings
all_results = {}
training_histories = {}

for embedding_name in embeddings_data.keys():
    print(f"\n{'='*70}")
    print(f"Training with {embedding_name} Embeddings")
    print(f"{'='*70}")
    
    # Start MLflow run
    run_name = f"{embedding_name}_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}"
    mlflow.start_run(run_name=run_name)
    
    try:
        # Cell 2: Log Parameters to MLflow
        print("\nLogging parameters to MLflow...")
        
        mlflow.log_param('embedding_model', embedding_name)
        mlflow.log_param('embedding_dim', embeddings_data[embedding_name]['dim'])
        mlflow.log_param('num_labels', 28)
        mlflow.log_param('num_epochs', config['training']['num_epochs'])
        mlflow.log_param('batch_size', config['training']['batch_size'])
        mlflow.log_param('learning_rate', config['training']['learning_rate'])
        mlflow.log_param('dropout', config['neural_network']['dropout'])
        mlflow.log_param('batch_norm', config['neural_network']['batch_norm'])
        
        # Log embedding-specific config
        for key, value in embeddings_data[embedding_name]['config'].items():
            mlflow.log_param(f'emb_{key}', value)
        
        # Cell 3: Create Model
        input_dim = embeddings_data[embedding_name]['dim']
        
        model = MultiLabelClassifier(
            input_dim=input_dim,
            num_labels=28,
            config=config
        ).to(device)
        
        print(f"✓ Model created with input_dim={input_dim}")
        
        # Cell 4: Setup Optimizer
        learning_rate = config['training']['learning_rate']
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        
        # Cell 5: Get Dataloaders
        train_loader = dataloaders[embedding_name]['train']
        val_loader = dataloaders[embedding_name]['val']
        test_loader = dataloaders[embedding_name]['test']
        
        # Cell 6: Training Loop with Early Stopping
        best_val_f1 = 0
        patience = 0
        history = {'train_loss': [], 'val_loss': [], 'val_f1': []}
        
        for epoch in range(config['training']['num_epochs']):
            print(f"\nEpoch {epoch+1}/{config['training']['num_epochs']}")
            
            # Train
            train_loss = train_model(model, train_loader, optimizer, config, device)
            
            # Validate
            val_metrics = evaluate_model(model, val_loader, device)
            
            # Store history
            history['train_loss'].append(train_loss)
            history['val_loss'].append(val_metrics['loss'])
            history['val_f1'].append(val_metrics['micro_f1'])
            
            # Log to MLflow
            mlflow.log_metric('train_loss', train_loss, step=epoch)
            mlflow.log_metric('val_loss', val_metrics['loss'], step=epoch)
            mlflow.log_metric('val_micro_f1', val_metrics['micro_f1'], step=epoch)
            mlflow.log_metric('val_macro_f1', val_metrics['macro_f1'], step=epoch)
            
            print(f"  Train Loss: {train_loss:.4f}")
            print(f"  Val Loss: {val_metrics['loss']:.4f}, Micro-F1: {val_metrics['micro_f1']:.4f}")
            
            # Early stopping
            if val_metrics['micro_f1'] > best_val_f1:
                best_val_f1 = val_metrics['micro_f1']
                patience = 0
                best_model_state = model.state_dict().copy()
                print(f"  ✓ Best model saved (F1: {best_val_f1:.4f})")
            else:
                patience += 1
                if patience >= config['training']['early_stopping_patience']:
                    print(f"  Early stopping at epoch {epoch+1}")
                    break
        
        # Cell 7: Load Best Model & Test
        print("\nEvaluating best model on test set...")
        model.load_state_dict(best_model_state)
        
        test_metrics = evaluate_model(model, test_loader, device)
        
        print(f"\n[{embedding_name}] TEST RESULTS:")
        for metric_name, metric_value in test_metrics.items():
            print(f"  {metric_name}: {metric_value:.4f}")
            mlflow.log_metric(f'test_{metric_name}', metric_value)
        
        all_results[embedding_name] = test_metrics
        training_histories[embedding_name] = history
        
        # Cell 8: Save Model Artifact
        model_path = f'{embedding_name}_best_model.pth'
        torch.save(model.state_dict(), model_path)
        mlflow.log_artifact(model_path)
        print(f"  ✓ Model saved: {model_path}")
        
        mlflow.end_run()
        print(f"\n✓ {embedding_name} training complete - MLflow run ended")
        
    except Exception as e:
        print(f"\n✗ Error training {embedding_name}: {e}")
        mlflow.end_run()

print(f"\n✓ SNIPPET 7 COMPLETE: All models trained & logged to MLflow")


In [None]:
# SNIPPET 8: Results Comparison & Visualization
# ============================================================================

print("\n" + "="*70)
print("FINAL MODEL COMPARISON")
print("="*70)

# Cell 1: Create Results DataFrame
print("\nCompiling results...")

results_df = pd.DataFrame.from_dict(all_results, orient='index')

print("\n✓ Model Performance Comparison:")
print(results_df.to_string())

# Cell 2: Save Results to CSV
results_df.to_csv('embedding_comparison_final.csv')
print(f"\n✓ Results saved to: embedding_comparison_final.csv")

# Cell 3: Create Comparison Visualizations
print("\nGenerating comparison plots...")

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

models = list(all_results.keys())
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']

# Plot 1: Test Loss
axes[0, 0].bar(models, [all_results[m]['loss'] for m in models], color=colors)
axes[0, 0].set_title('Test Loss (Lower is Better)', fontweight='bold', fontsize=12)
axes[0, 0].set_ylabel('Loss')
axes[0, 0].grid(axis='y', alpha=0.3)

for i, m in enumerate(models):
    height = all_results[m]['loss']
    axes[0, 0].text(i, height, f'{height:.4f}', ha='center', va='bottom')

# Plot 2: Hamming Loss
axes[0, 1].bar(models, [all_results[m]['hamming_loss'] for m in models], color=colors)
axes[0, 1].set_title('Hamming Loss (Lower is Better)', fontweight='bold', fontsize=12)
axes[0, 1].set_ylabel('Hamming Loss')
axes[0, 1].grid(axis='y', alpha=0.3)

for i, m in enumerate(models):
    height = all_results[m]['hamming_loss']
    axes[0, 1].text(i, height, f'{height:.4f}', ha='center', va='bottom')

# Plot 3: Micro-F1 Score
axes[1, 0].bar(models, [all_results[m]['micro_f1'] for m in models], color=colors)
axes[1, 0].set_title('Micro-F1 Score (Higher is Better)', fontweight='bold', fontsize=12)
axes[1, 0].set_ylabel('Micro-F1')
axes[1, 0].set_ylim([0, 1])
axes[1, 0].grid(axis='y', alpha=0.3)

for i, m in enumerate(models):
    height = all_results[m]['micro_f1']
    axes[1, 0].text(i, height, f'{height:.4f}', ha='center', va='bottom')

# Plot 4: Macro-F1 Score
axes[1, 1].bar(models, [all_results[m]['macro_f1'] for m in models], color=colors)
axes[1, 1].set_title('Macro-F1 Score (Higher is Better)', fontweight='bold', fontsize=12)
axes[1, 1].set_ylabel('Macro-F1')
axes[1, 1].set_ylim([0, 1])
axes[1, 1].grid(axis='y', alpha=0.3)

for i, m in enumerate(models):
    height = all_results[m]['macro_f1']
    axes[1, 1].text(i, height, f'{height:.4f}', ha='center', va='bottom')

plt.tight_layout()
comparison_plot = 'embedding_comparison_final.png'
plt.savefig(comparison_plot, dpi=100, bbox_inches='tight')
plt.close()

print(f"✓ Comparison plot saved: {comparison_plot}")

# Cell 4: Log Final Comparison to MLflow
mlflow.start_run(run_name='final_comparison')
mlflow.log_artifact(comparison_plot)
mlflow.log_artifact('embedding_comparison_final.csv')
mlflow.end_run()

print("✓ Final comparison logged to MLflow")

# Cell 5: Summary Statistics
print("\n" + "="*70)
print("SUMMARY STATISTICS")
print("="*70)

print("\n1. Best Model by Micro-F1 Score:")
best_model = results_df.loc[results_df['micro_f1'].idxmax()]
print(f"   {best_model.name} with Micro-F1: {best_model['micro_f1']:.4f}")

print("\n2. Best Model by Macro-F1 Score:")
best_macro_model = results_df.loc[results_df['macro_f1'].idxmax()]
print(f"   {best_macro_model.name} with Macro-F1: {best_macro_model['macro_f1']:.4f}")

print("\n3. Lowest Hamming Loss:")
best_hamming_model = results_df.loc[results_df['hamming_loss'].idxmin()]
print(f"   {best_hamming_model.name} with Hamming Loss: {best_hamming_model['hamming_loss']:.4f}")

print("\n4. Model Rankings (by Micro-F1):")
ranked = results_df.sort_values('micro_f1', ascending=False).reset_index()
for idx, row in ranked.iterrows():
    print(f"   {idx+1}. {row['index']:12} - Micro-F1: {row['micro_f1']:.4f}, Macro-F1: {row['macro_f1']:.4f}")

print("\n✓ SNIPPET 8 COMPLETE: Comparison Done")


In [None]:
# SNIPPET 9: MLflow Tracking Summary
# ============================================================================

print("\n" + "="*70)
print("MLFLOW EXPERIMENT SUMMARY")
print("="*70)

# Cell 1: Get Experiment Info
print(f"\nTracking URI: {config['mlflow']['tracking_uri']}")
print(f"Experiment: {config['mlflow']['experiment_name']}")

# Cell 2: Search Runs
print("\nSearching all runs...")

runs = mlflow.search_runs(
    experiment_names=[config['mlflow']['experiment_name']]
)

print(f"\n✓ Total runs: {len(runs)}")

# Cell 3: Display Run Summary
print("\n" + "-"*70)
print("RUN SUMMARY:")
print("-"*70)

for idx, run in runs.iterrows():
    if 'final_comparison' not in run['run_name']:
        print(f"\nRun {idx+1}: {run['run_name']}")
        print(f"  Status: {run['status']}")
        print(f"  Duration: {run['duration']/1000:.2f}s")
        
        # Get embedding model from tags
        embedding_model = run.get('tags.embedding_model', 'N/A')
        print(f"  Embedding: {embedding_model}")
        
        # Get best test metrics if available
        if 'test_micro_f1' in runs.columns:
            test_f1 = run.get('metrics.test_micro_f1', 'N/A')
            if test_f1 != 'N/A':
                print(f"  Test Micro-F1: {test_f1:.4f}")

# Cell 4: MLflow UI Command
print("\n" + "="*70)
print("TO VIEW MLflow UI:")
print("="*70)
print(f"\nRun command:")
print(f"  mlflow ui --backend-store-uri {config['mlflow']['tracking_uri']}")
print(f"\nThen open: http://localhost:5000")

print("\n✓ SNIPPET 9 COMPLETE: MLflow Summary Ready")
