# Model Testing: MultinomialNB, Logistic Regression, and SGD Tuning

This notebook tunes MultinomialNB, Logistic Regression, and SGD classifiers for movie genre classification and compares them with the baseline LinearSVC model.

## Objectives:
1. Hyperparameter tuning for MultinomialNB (with cross-validation overfitting checks)
2. Hyperparameter tuning for Logistic Regression (with cross-validation overfitting checks)
3. Hyperparameter tuning for SGD (with cross-validation overfitting checks)
4. Comparison with baseline LinearSVC model
5. Comprehensive evaluation metrics
6. Voting Classifier ensemble


In [None]:
# Imports and Setup
import sys
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Add project root to path
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, validation_curve
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier

from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import (
    f1_score, 
    precision_score, 
    recall_score, 
    hamming_loss, 
    jaccard_score,
    make_scorer
)
from scipy.stats import loguniform, randint, uniform
import time

# Project imports
from descriptions.config import INTERIM_DATA_DIR, MODELS_DIR
from descriptions.dataset import load_interim
from descriptions.modeling.train import prepare_features_and_labels, train_test_split_data
from descriptions.modeling.model import build_model, load_model

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("‚úì Imports complete")


## 1. Load and Prepare Data


In [None]:
# Load interim data
print("Loading data...")
data = load_interim()
print(f"‚úì Loaded {len(data)} samples")

# Prepare features and labels
print("\nPreparing features and labels...")
X, y, vectorizer, mlb, normalizer, feature_selector = prepare_features_and_labels(
    data,
    vectorizer=None,
    mlb=None,
    normalizer=None,
    feature_selector=None,
    k_features=4500
)

print(f"‚úì Features shape: {X.shape}")
print(f"‚úì Labels shape: {y.shape}")
print(f"‚úì Number of genres: {len(mlb.classes_)}")
print(f"\nGenres: {list(mlb.classes_)}")

# Split into train/test (use same random_state as training: 42)
print("\nSplitting data into train/test sets...")
X_train, X_test, y_train, y_test = train_test_split_data(
    X, y, test_size=0.2, random_state=42, shuffle=True
)

print(f"\n‚úì Data split complete:")
print(f"  - Training samples: {len(X_train)}")
print(f"  - Test samples: {len(X_test)}")

# Convert to numpy arrays for sklearn
X_train_array = X_train.values if isinstance(X_train, pd.DataFrame) else X_train
X_test_array = X_test.values if isinstance(X_test, pd.DataFrame) else X_test


## 2. Baseline: LinearSVC Model


In [None]:
# Train baseline LinearSVC model
print("Training baseline LinearSVC model...")
baseline_params = {
    'C': 0.1,
    'penalty': 'l2',
    'loss': 'squared_hinge',
    'max_iter': 1000,
    'tol': 1e-3,
    'class_weight': 'balanced',
    'dual': False,
    'random_state': 42
}

baseline_model = build_model(**baseline_params)

start_time = time.time()
baseline_model.fit(X_train_array, y_train)
training_time = time.time() - start_time

print(f"‚úì Baseline model trained in {training_time:.2f} seconds")

# Get predictions
# LinearSVC doesn't have predict_proba, so we use decision_function and convert to probabilities
from scipy.special import expit
y_scores_baseline = baseline_model.decision_function(X_test_array)
y_proba_baseline = expit(y_scores_baseline)
y_pred_baseline = baseline_model.predict(X_test_array)

# Evaluate baseline
baseline_metrics = {
    'f1_micro': f1_score(y_test, y_pred_baseline, average='micro'),
    'f1_macro': f1_score(y_test, y_pred_baseline, average='macro'),
    'precision_micro': precision_score(y_test, y_pred_baseline, average='micro', zero_division=0),
    'recall_micro': recall_score(y_test, y_pred_baseline, average='micro', zero_division=0),
    'hamming_loss': hamming_loss(y_test, y_pred_baseline),
    'jaccard_score': jaccard_score(y_test, y_pred_baseline, average='micro', zero_division=0),
}

print("\nüìä BASELINE LINEARSVC MODEL METRICS")
print("=" * 70)
for metric, value in baseline_metrics.items():
    print(f"  {metric:20s}: {value:.4f} ({value * 100:.2f}%)")

In [None]:
# Validation Curve for LinearSVC
print("\n" + "=" * 70)
print("VALIDATION CURVE: LinearSVC (C Parameter)")
print("=" * 70)

# Create base LinearSVC model with same parameters as baseline (except C which we'll vary)
base_svc = LinearSVC(
    penalty='l2',
    loss='squared_hinge',
    max_iter=1000,
    tol=1e-3,
    class_weight='balanced',
    dual=False,
    random_state=42
)
svc_model = OneVsRestClassifier(base_svc)

# Use F1-micro as the scoring metric (same as MultinomialNB)
scorer_svc = make_scorer(f1_score, average='micro', zero_division=0)

# Create a range of C values to test
# Use log scale for better visualization (C typically ranges from 1e-3 to 1e2)
C_range = np.logspace(-3, 2, 20)  # From 0.001 to 100, 20 points

print(f"\nComputing validation curve for C values...")
print(f"  C range: {C_range[0]:.4f} to {C_range[-1]:.4f}")
print(f"  Number of points: {len(C_range)}")
print(f"  CV folds: 5")

# Compute validation curve
train_scores_svc, val_scores_svc = validation_curve(
    svc_model,
    X_train_array,
    y_train,
    param_name='estimator__C',
    param_range=C_range,
    cv=5,
    scoring=scorer_svc,
    n_jobs=-1
)

# Calculate mean and std for train and validation scores
train_mean_svc = np.mean(train_scores_svc, axis=1)
train_std_svc = np.std(train_scores_svc, axis=1)
val_mean_svc = np.mean(val_scores_svc, axis=1)
val_std_svc = np.std(val_scores_svc, axis=1)

# Find best C from validation curve
best_C_idx = np.argmax(val_mean_svc)
best_C_val = C_range[best_C_idx]
best_val_score_svc = val_mean_svc[best_C_idx]

print(f"\n  Best C from validation curve: {best_C_val:.4f}")
print(f"  Best validation score: {best_val_score_svc:.4f}")

# Plot validation curve
fig, ax = plt.subplots(figsize=(10, 6))

# Plot training scores
ax.plot(C_range, train_mean_svc, 'o-', color='#3498db', label='Training Score', linewidth=2, markersize=6)
ax.fill_between(C_range, train_mean_svc - train_std_svc, train_mean_svc + train_std_svc, alpha=0.2, color='#3498db')

# Plot validation scores
ax.plot(C_range, val_mean_svc, 'o-', color='#e74c3c', label='Validation Score', linewidth=2, markersize=6)
ax.fill_between(C_range, val_mean_svc - val_std_svc, val_mean_svc + val_std_svc, alpha=0.2, color='#e74c3c')

# Mark best C
ax.axvline(x=best_C_val, color='#2ecc71', linestyle='--', linewidth=2, label=f'Best C ({best_C_val:.4f})')

# Mark C from baseline model
baseline_C = baseline_params['C']
ax.axvline(x=baseline_C, color='#f39c12', linestyle='--', linewidth=2, 
           label=f'Baseline C ({baseline_C:.4f})')

ax.set_xlabel('C (Regularization Parameter)', fontsize=12, fontweight='bold')
ax.set_ylabel('F1-Micro Score', fontsize=12, fontweight='bold')
ax.set_title('LinearSVC Validation Curve: C Parameter', fontsize=14, fontweight='bold', pad=15)
ax.set_xscale('log')  # Log scale for better visualization
ax.grid(True, alpha=0.3, linestyle='--')
ax.legend(loc='best', fontsize=10)
ax.set_ylim([0, max(max(train_mean_svc), max(val_mean_svc)) * 1.1])

plt.tight_layout()
plt.show()

print("\n‚úì Validation curve generated")

# Print gap analysis
gap_at_best = train_mean_svc[best_C_idx] - val_mean_svc[best_C_idx]
baseline_C_idx = np.argmin(np.abs(C_range - baseline_C))
gap_at_baseline = train_mean_svc[baseline_C_idx] - val_mean_svc[baseline_C_idx]

print(f"\n  Gap at best C ({best_C_val:.4f}): {gap_at_best:.4f} ({gap_at_best * 100:.2f}%)")
print(f"  Gap at baseline C ({baseline_C:.4f}): {gap_at_baseline:.4f} ({gap_at_baseline * 100:.2f}%)")



## 3. RidgeClassifier Hyperparameter Tuning



In [None]:
# RidgeClassifier Hyperparameter Tuning
print("=" * 70)
print("HYPERPARAMETER TUNING: RidgeClassifier")
print("=" * 70)

# Create base RidgeClassifier model
# RidgeClassifier uses alpha for regularization (higher alpha = more regularization)
# It's similar to LinearSVC but uses Ridge regression instead of SVM
base_ridge = RidgeClassifier(
    class_weight='balanced',
    random_state=42,
    solver='auto'  # Automatically choose best solver
)
ridge_model = OneVsRestClassifier(base_ridge)

# Use F1-micro as the scoring metric (same as other models)
scorer_ridge = make_scorer(f1_score, average='micro', zero_division=0)

# Define parameter grid for RidgeClassifier
# Alpha is the regularization strength (inverse of C in LinearSVC)
ridge_param_grid = {
    'estimator__alpha': np.logspace(-2, 2, 20)  # From 0.01 to 100, 20 points on log scale
}

print("\nStarting GridSearchCV for RidgeClassifier...")
print(f"  Parameter grid: alpha range from {ridge_param_grid['estimator__alpha'][0]:.4f} to {ridge_param_grid['estimator__alpha'][-1]:.4f}")
print(f"  Scoring metric: F1-micro")
print(f"  CV folds: 5")
print("\n  (This may take a few minutes...)")

# GridSearchCV
ridge_grid_search = GridSearchCV(
    estimator=ridge_model,
    param_grid=ridge_param_grid,
    cv=5,
    scoring=scorer_ridge,
    n_jobs=-1,
    verbose=1,
    return_train_score=True  # Enable train scores for overfitting analysis
)

start_time = time.time()
ridge_grid_search.fit(X_train_array, y_train)
tuning_time = time.time() - start_time

print(f"\n‚úì Tuning complete in {tuning_time:.2f} seconds")
print(f"  Best F1-micro (CV): {ridge_grid_search.best_score_:.4f}")
print(f"  Best parameters: {ridge_grid_search.best_params_}")

# Get best model
ridge_model_tuned = ridge_grid_search.best_estimator_

# Cross-validation overfitting check
print("\n" + "=" * 70)
print("CROSS-VALIDATION OVERFITTING CHECK: RidgeClassifier")
print("=" * 70)

# Get CV scores from GridSearchCV (validation scores)
best_cv_score_ridge = ridge_grid_search.best_score_
best_cv_std_ridge = ridge_grid_search.cv_results_['std_test_score'][ridge_grid_search.best_index_]

# Get training scores from GridSearchCV
best_train_score_ridge = ridge_grid_search.cv_results_['mean_train_score'][ridge_grid_search.best_index_]
best_train_std_ridge = ridge_grid_search.cv_results_['std_train_score'][ridge_grid_search.best_index_]

# Calculate overfitting gap
overfitting_gap_ridge = best_train_score_ridge - best_cv_score_ridge

print(f"\n  Validation Score (CV): {best_cv_score_ridge:.4f} ¬± {best_cv_std_ridge:.4f}")
print(f"  Training Score (CV):    {best_train_score_ridge:.4f} ¬± {best_train_std_ridge:.4f}")
print(f"  Overfitting Gap:        {overfitting_gap_ridge:.4f} ({overfitting_gap_ridge * 100:.2f}%)")

if overfitting_gap_ridge > 0.05:  # 5% threshold
    print(f"  ‚ö†Ô∏è  WARNING: Potential overfitting detected (gap > 5%)")
elif overfitting_gap_ridge > 0.02:  # 2% threshold
    print(f"  ‚ö†Ô∏è  CAUTION: Moderate overfitting gap (2-5%)")
else:
    print(f"  ‚úÖ Good generalization (overfitting gap < 2%)")



In [None]:
# Validation Curve for RidgeClassifier
print("\n" + "=" * 70)
print("VALIDATION CURVE: RidgeClassifier (Alpha Parameter)")
print("=" * 70)

# Create a range of alpha values to test
# Use a wider range for better visualization
alpha_range_ridge = np.logspace(-2, 2, 20)  # From 0.01 to 100, 20 points

print(f"\nComputing validation curve for alpha values...")
print(f"  Alpha range: {alpha_range_ridge[0]:.4f} to {alpha_range_ridge[-1]:.4f}")
print(f"  Number of points: {len(alpha_range_ridge)}")
print(f"  CV folds: 5")

# Compute validation curve
train_scores_ridge, val_scores_ridge = validation_curve(
    ridge_model,
    X_train_array,
    y_train,
    param_name='estimator__alpha',
    param_range=alpha_range_ridge,
    cv=5,
    scoring=scorer_ridge,
    n_jobs=-1
)

# Calculate mean and std for train and validation scores
train_mean_ridge = np.mean(train_scores_ridge, axis=1)
train_std_ridge = np.std(train_scores_ridge, axis=1)
val_mean_ridge = np.mean(val_scores_ridge, axis=1)
val_std_ridge = np.std(val_scores_ridge, axis=1)

# Find best alpha from validation curve
best_alpha_idx_ridge = np.argmax(val_mean_ridge)
best_alpha_val_ridge = alpha_range_ridge[best_alpha_idx_ridge]
best_val_score_ridge = val_mean_ridge[best_alpha_idx_ridge]

print(f"\n  Best alpha from validation curve: {best_alpha_val_ridge:.4f}")
print(f"  Best validation score: {best_val_score_ridge:.4f}")

# Plot validation curve
fig, ax = plt.subplots(figsize=(10, 6))

# Plot training scores
ax.plot(alpha_range_ridge, train_mean_ridge, 'o-', color='#3498db', label='Training Score', linewidth=2, markersize=6)
ax.fill_between(alpha_range_ridge, train_mean_ridge - train_std_ridge, train_mean_ridge + train_std_ridge, alpha=0.2, color='#3498db')

# Plot validation scores
ax.plot(alpha_range_ridge, val_mean_ridge, 'o-', color='#e74c3c', label='Validation Score', linewidth=2, markersize=6)
ax.fill_between(alpha_range_ridge, val_mean_ridge - val_std_ridge, val_mean_ridge + val_std_ridge, alpha=0.2, color='#e74c3c')

# Mark best alpha
ax.axvline(x=best_alpha_val_ridge, color='#2ecc71', linestyle='--', linewidth=2, label=f'Best Alpha ({best_alpha_val_ridge:.4f})')

# Mark alpha from GridSearchCV if available
if 'estimator__alpha' in ridge_grid_search.best_params_:
    best_alpha_gs = ridge_grid_search.best_params_['estimator__alpha']
    ax.axvline(x=best_alpha_gs, color='#f39c12', linestyle='--', linewidth=2, 
               label=f'GridSearch Best ({best_alpha_gs:.4f})')

ax.set_xlabel('Alpha (Regularization Parameter)', fontsize=12, fontweight='bold')
ax.set_ylabel('F1-Micro Score', fontsize=12, fontweight='bold')
ax.set_title('RidgeClassifier Validation Curve: Alpha Parameter', fontsize=14, fontweight='bold', pad=15)
ax.set_xscale('log')  # Log scale for better visualization
ax.grid(True, alpha=0.3, linestyle='--')
ax.legend(loc='best', fontsize=10)
ax.set_ylim([0, max(max(train_mean_ridge), max(val_mean_ridge)) * 1.1])

plt.tight_layout()
plt.show()

print("\n‚úì Validation curve generated")

# Print gap analysis
gap_at_best_ridge = train_mean_ridge[best_alpha_idx_ridge] - val_mean_ridge[best_alpha_idx_ridge]
print(f"\n  Gap at best alpha ({best_alpha_val_ridge:.4f}): {gap_at_best_ridge:.4f} ({gap_at_best_ridge * 100:.2f}%)")

# Generate predictions
print("\nGenerating predictions with tuned RidgeClassifier...")
y_pred_ridge = ridge_model_tuned.predict(X_test_array)
print("‚úì Predictions generated")

# Evaluate tuned RidgeClassifier
ridge_metrics = {
    'f1_micro': f1_score(y_test, y_pred_ridge, average='micro'),
    'f1_macro': f1_score(y_test, y_pred_ridge, average='macro'),
    'precision_micro': precision_score(y_test, y_pred_ridge, average='micro', zero_division=0),
    'recall_micro': recall_score(y_test, y_pred_ridge, average='micro', zero_division=0),
    'hamming_loss': hamming_loss(y_test, y_pred_ridge),
    'jaccard_score': jaccard_score(y_test, y_pred_ridge, average='micro', zero_division=0),
}

print("\nüìä TUNED RIDGECLASSIFIER MODEL METRICS (Test Set)")
print("=" * 70)
for metric, value in ridge_metrics.items():
    print(f"  {metric:20s}: {value:.4f} ({value * 100:.2f}%)")



## 4. MultinomialNB Hyperparameter Tuning


In [None]:
# MultinomialNB Hyperparameter Tuning
print("=" * 70)
print("HYPERPARAMETER TUNING: MultinomialNB")
print("=" * 70)

# Define parameter grid for MultinomialNB
nb_param_grid = {
    'estimator__alpha': np.linspace(0.04, 0.02, 10),  # Smoothing parameter
    'estimator__fit_prior': [False],  # Whether to learn class prior probabilities
}

# Create base MultinomialNB model
base_nb = MultinomialNB()
nb_model = OneVsRestClassifier(base_nb)

# Use F1-micro as the scoring metric (appropriate for multi-label)
scorer = make_scorer(f1_score, average='micro', zero_division=0)

print("\nStarting RandomizedSearchCV for MultinomialNB...")
print(f"  Parameter grid: {nb_param_grid}")
print(f"  Scoring metric: F1-micro")
print(f"  CV folds: 5")
print(f"  n_iter: 50")
print("\n  (This may take a few minutes...)")

# RandomizedSearchCV
nb_random_search = GridSearchCV(
    estimator=nb_model,
    param_grid=nb_param_grid,
    cv=5,
    scoring=scorer,
    n_jobs=-1,
    verbose=1,
    return_train_score=True  # Enable train scores for overfitting analysis
)

start_time = time.time()
nb_random_search.fit(X_train_array, y_train)
tuning_time = time.time() - start_time

print(f"\n‚úì Tuning complete in {tuning_time:.2f} seconds")
print(f"  Best F1-micro (CV): {nb_random_search.best_score_:.4f}")
print(f"  Best parameters: {nb_random_search.best_params_}")

# Get best model
nb_model_tuned = nb_random_search.best_estimator_

# Cross-validation overfitting check
print("\n" + "=" * 70)
print("CROSS-VALIDATION OVERFITTING CHECK: MultinomialNB")
print("=" * 70)

# Get CV scores from RandomizedSearchCV (validation scores)
best_cv_score = nb_random_search.best_score_
best_cv_std = nb_random_search.cv_results_['std_test_score'][nb_random_search.best_index_]

# Get training scores from RandomizedSearchCV
best_train_score = nb_random_search.cv_results_['mean_train_score'][nb_random_search.best_index_]
best_train_std = nb_random_search.cv_results_['std_train_score'][nb_random_search.best_index_]

# Calculate overfitting gap
overfitting_gap = best_train_score - best_cv_score

print(f"\n  Validation Score (CV): {best_cv_score:.4f} ¬± {best_cv_std:.4f}")
print(f"  Training Score (CV):    {best_train_score:.4f} ¬± {best_train_std:.4f}")
print(f"  Overfitting Gap:        {overfitting_gap:.4f} ({overfitting_gap * 100:.2f}%)")

if overfitting_gap > 0.05:  # 5% threshold
    print(f"  ‚ö†Ô∏è  WARNING: Potential overfitting detected (gap > 5%)")
elif overfitting_gap > 0.02:  # 2% threshold
    print(f"  ‚ö†Ô∏è  CAUTION: Moderate overfitting gap (2-5%)")
else:
    print(f"  ‚úÖ Good generalization (overfitting gap < 2%)")


In [None]:
# Validation Curve
print("\n" + "=" * 70)
print("VALIDATION CURVE: MultinomialNB (Alpha Parameter)")
print("=" * 70)

# Create a range of alpha values to test
# Use a wider range for better visualization
alpha_range = np.logspace(-3, 1, 20)  # From 0.001 to 10, 20 points

print(f"\nComputing validation curve for alpha values...")
print(f"  Alpha range: {alpha_range[0]:.4f} to {alpha_range[-1]:.4f}")
print(f"  Number of points: {len(alpha_range)}")
print(f"  CV folds: 5")

# Compute validation curve
train_scores, val_scores = validation_curve(
    nb_model,
    X_train_array,
    y_train,
    param_name='estimator__alpha',
    param_range=alpha_range,
    cv=5,
    scoring=scorer,
    n_jobs=-1
)

# Calculate mean and std for train and validation scores
train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
val_mean = np.mean(val_scores, axis=1)
val_std = np.std(val_scores, axis=1)

# Find best alpha from validation curve
best_alpha_idx = np.argmax(val_mean)
best_alpha_val = alpha_range[best_alpha_idx]
best_val_score = val_mean[best_alpha_idx]

print(f"\n  Best alpha from validation curve: {best_alpha_val:.4f}")
print(f"  Best validation score: {best_val_score:.4f}")

# Plot validation curve
fig, ax = plt.subplots(figsize=(10, 6))

# Plot training scores
ax.plot(alpha_range, train_mean, 'o-', color='#3498db', label='Training Score', linewidth=2, markersize=6)
ax.fill_between(alpha_range, train_mean - train_std, train_mean + train_std, alpha=0.2, color='#3498db')

# Plot validation scores
ax.plot(alpha_range, val_mean, 'o-', color='#e74c3c', label='Validation Score', linewidth=2, markersize=6)
ax.fill_between(alpha_range, val_mean - val_std, val_mean + val_std, alpha=0.2, color='#e74c3c')

# Mark best alpha
ax.axvline(x=best_alpha_val, color='#2ecc71', linestyle='--', linewidth=2, label=f'Best Alpha ({best_alpha_val:.4f})')

# Mark alpha from RandomizedSearchCV if available
if 'estimator__alpha' in nb_random_search.best_params_:
    best_alpha_rs = nb_random_search.best_params_['estimator__alpha']
    ax.axvline(x=best_alpha_rs, color='#f39c12', linestyle='--', linewidth=2, 
               label=f'RandomizedSearch Best ({best_alpha_rs:.4f})')

ax.set_xlabel('Alpha (Smoothing Parameter)', fontsize=12, fontweight='bold')
ax.set_ylabel('F1-Micro Score', fontsize=12, fontweight='bold')
ax.set_title('MultinomialNB Validation Curve: Alpha Parameter', fontsize=14, fontweight='bold', pad=15)
ax.set_xscale('log')  # Log scale for better visualization
ax.grid(True, alpha=0.3, linestyle='--')
ax.legend(loc='best', fontsize=10)
ax.set_ylim([0, max(max(train_mean), max(val_mean)) * 1.1])

plt.tight_layout()
plt.show()

print("\n‚úì Validation curve generated")

# Generate predictions
print("\nGenerating predictions with tuned MultinomialNB...")
y_proba_nb = nb_model_tuned.predict_proba(X_test_array)
y_pred_nb = nb_model_tuned.predict(X_test_array)
print("‚úì Predictions generated")

# Evaluate tuned MultinomialNB
nb_metrics = {
    'f1_micro': f1_score(y_test, y_pred_nb, average='micro'),
    'f1_macro': f1_score(y_test, y_pred_nb, average='macro'),
    'precision_micro': precision_score(y_test, y_pred_nb, average='micro', zero_division=0),
    'recall_micro': recall_score(y_test, y_pred_nb, average='micro', zero_division=0),
    'hamming_loss': hamming_loss(y_test, y_pred_nb),
    'jaccard_score': jaccard_score(y_test, y_pred_nb, average='micro', zero_division=0),
}

print("\nüìä TUNED MULTINOMIALNB MODEL METRICS (Test Set)")
print("=" * 70)
for metric, value in nb_metrics.items():
    print(f"  {metric:20s}: {value:.4f} ({value * 100:.2f}%)")
