In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.utils.class_weight import compute_class_weight
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (confusion_matrix, classification_report, 
                            f1_score, precision_score, recall_score, 
                            accuracy_score, roc_auc_score, cohen_kappa_score,
                            matthews_corrcoef)
from tqdm import tqdm
import warnings
import json
import time
warnings.filterwarnings('ignore')

# ---------------------------
# 0. Device
# ---------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ---------------------------
# 1. Load data
# ---------------------------
image_array = np.load("images.npy", allow_pickle=True)
labels = np.load("labels.npy", allow_pickle=True)

# Load class names
class_names = np.load("class_names.npy", allow_pickle=True)
num_classes = len(class_names)

# Labels are already encoded
y = labels.astype(np.int64)

print(f"Number of classes: {num_classes}")
print(f"Classes: {class_names}")
print(f"Class distribution: {np.bincount(y)}")

# Load pre-computed splits
train_idx = np.load("split_train.npy")
val_idx = np.load("split_val.npy")
test_idx = np.load("split_test.npy")

print(f"Train size: {len(train_idx)}, Val size: {len(val_idx)}, Test size: {len(test_idx)}")

# ---------------------------
# 2. Dataset class
# ---------------------------
class WasteDataset(Dataset):
    def __init__(self, images, labels, indices, transform=None):
        self.images = images
        self.labels = labels
        self.indices = indices
        self.transform = transform

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        i = self.indices[idx]
        img = self.images[i].astype(np.uint8)
        label = int(self.labels[i])
        if self.transform:
            img = self.transform(img)
        return img, label

# ---------------------------
# 3. Model Architecture (Your BetterCNN - UNCHANGED)
# ---------------------------
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.skip = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.skip = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.skip(x)
        out = self.relu(out)
        return out

class BetterCNN(nn.Module):
    def __init__(self, num_classes, input_channels=3):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(input_channels, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )
        self.layer2 = ResidualBlock(64, 128, stride=2)
        self.layer3 = ResidualBlock(128, 256, stride=2)
        self.layer4 = ResidualBlock(256, 512, stride=2)
        self.pool = nn.AdaptiveAvgPool2d((4,4))
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(512*4*4, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

# ---------------------------
# 4. Label Smoothing Loss
# ---------------------------
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, epsilon=0.1, weight=None):
        super().__init__()
        self.epsilon = epsilon
        self.weight = weight
    
    def forward(self, output, target):
        n_class = output.size(1)
        log_preds = F.log_softmax(output, dim=1)
        loss = -log_preds.sum(dim=1).mean()
        nll = F.nll_loss(log_preds, target, weight=self.weight)
        return (1 - self.epsilon) * nll + self.epsilon * loss / n_class

# ---------------------------
# 5. Training Function
# ---------------------------
def train_with_config(config, image_array, y, train_idx, val_idx, test_idx, class_names, device):
    """Train BetterCNN model with given hyperparameters"""
    
    print(f"\n{'='*80}")
    print(f"Training Configuration: {config['name']}")
    print(f"{'='*80}")
    for key, value in config.items():
        if key != 'name':
            print(f"  {key:20s}: {value}")
    print(f"{'='*80}\n")
    
    start_time = time.time()
    
    # Create transforms
    train_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((256, 256)),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(p=config['hflip_prob']),
        transforms.RandomVerticalFlip(p=config['vflip_prob']),
        transforms.RandomRotation(config['rotation_degrees']),
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
        transforms.ColorJitter(brightness=config['color_jitter'], 
                              contrast=config['color_jitter'], 
                              saturation=config['color_jitter']*0.7),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        transforms.RandomErasing(p=config['random_erasing'])
    ])
    
    val_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Create datasets
    train_dataset = WasteDataset(image_array, y, train_idx, transform=train_transform)
    val_dataset = WasteDataset(image_array, y, val_idx, transform=val_transform)
    test_dataset = WasteDataset(image_array, y, test_idx, transform=val_transform)
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], 
                             shuffle=True, num_workers=0, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], 
                           num_workers=0, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], 
                            num_workers=0, pin_memory=True)
    
    # Create model
    model = BetterCNN(num_classes=num_classes, input_channels=3).to(device)
    print(f"Model parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
    
    # Class weights
    class_weights = compute_class_weight(
        class_weight="balanced",
        classes=np.unique(y[train_idx]),
        y=y[train_idx]
    )
    class_weights = torch.tensor(class_weights, dtype=torch.float32).to(device)
    
    # Loss function
    criterion = LabelSmoothingCrossEntropy(epsilon=config['label_smoothing'], weight=class_weights)
    
    # Optimizer
    if config['optimizer'] == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'], 
                              weight_decay=config['weight_decay'])
    elif config['optimizer'] == 'AdamW':
        optimizer = optim.AdamW(model.parameters(), lr=config['learning_rate'], 
                               weight_decay=config['weight_decay'])
    else:  # SGD
        optimizer = optim.SGD(model.parameters(), lr=config['learning_rate'], 
                             momentum=0.9, weight_decay=config['weight_decay'])
    
    # Scheduler
    if config['scheduler'] == 'ReduceLROnPlateau':
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=3, min_lr=1e-7
        )
    elif config['scheduler'] == 'CosineAnnealing':
        scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=config['epochs'], eta_min=1e-6
        )
    else:  # OneCycleLR
        scheduler = optim.lr_scheduler.OneCycleLR(
            optimizer, max_lr=config['learning_rate']*3, 
            epochs=config['epochs'], steps_per_epoch=len(train_loader)
        )
    
    # Training
    best_val_acc = 0
    best_test_acc = 0
    patience_counter = 0
    train_losses, val_losses = [], []
    train_accs, val_accs = [], []
    val_acc_history = []
    
    for epoch in range(config['epochs']):
        # Training phase
        model.train()
        running_loss, correct, total = 0, 0, 0
        
        train_loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{config['epochs']} [Train]", leave=False)
        for X_batch, y_batch in train_loop:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            if config['scheduler'] == 'OneCycleLR':
                scheduler.step()
            
            running_loss += loss.item() * X_batch.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == y_batch).sum().item()
            total += y_batch.size(0)
            
            train_loop.set_postfix({'loss': running_loss/total, 'acc': correct/total})
        
        train_loss = running_loss / total
        train_acc = correct / total
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        
        # Validation phase
        model.eval()
        val_loss, correct, total = 0, 0, 0
        
        val_loop = tqdm(val_loader, desc=f"Epoch {epoch+1}/{config['epochs']} [Val]", leave=False)
        with torch.no_grad():
            for X_batch, y_batch in val_loop:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item() * X_batch.size(0)
                _, predicted = torch.max(outputs, 1)
                correct += (predicted == y_batch).sum().item()
                total += y_batch.size(0)
                
                val_loop.set_postfix({'loss': val_loss/total, 'acc': correct/total})
        
        val_loss = val_loss / total
        val_acc = correct / total
        val_losses.append(val_loss)
        val_accs.append(val_acc)
        val_acc_history.append(val_acc)
        
        # Moving average for stability
        avg_val_acc = np.mean(val_acc_history[-3:]) if len(val_acc_history) >= 3 else val_acc
        
        print(f"Epoch {epoch+1:2d}/{config['epochs']} | "
              f"Train: Loss={train_loss:.4f} Acc={train_acc:.4f} | "
              f"Val: Loss={val_loss:.4f} Acc={val_acc:.4f} AvgAcc={avg_val_acc:.4f}")
        
        # Step scheduler
        if config['scheduler'] == 'ReduceLROnPlateau':
            scheduler.step(val_acc)
        elif config['scheduler'] == 'CosineAnnealing':
            scheduler.step()
        
        # Save best model
        if avg_val_acc > best_val_acc:
            best_val_acc = avg_val_acc
            patience_counter = 0
            
            # Test on best validation model
            model.eval()
            correct, total = 0, 0
            with torch.no_grad():
                for X_batch, y_batch in test_loader:
                    X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                    outputs = model(X_batch)
                    _, predicted = torch.max(outputs, 1)
                    correct += (predicted == y_batch).sum().item()
                    total += y_batch.size(0)
            best_test_acc = correct / total
            
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'epoch': epoch,
                'val_acc': val_acc,
                'test_acc': best_test_acc
            }, f"best_model_{config['name']}.pth")
            
            print(f"  ✓ Best model saved! Val={avg_val_acc:.4f}, Test={best_test_acc:.4f}")
        else:
            patience_counter += 1
            if patience_counter >= config['patience']:
                print(f"  ✗ Early stopping at epoch {epoch+1}")
                break
    
    training_time = (time.time() - start_time) / 60
    
    # Final test evaluation
    checkpoint = torch.load(f"best_model_{config['name']}.pth")
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    
    y_true, y_pred, y_probs = [], [], []
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            probs = F.softmax(outputs, dim=1)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(y_batch.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
            y_probs.extend(probs.cpu().numpy())
    
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_probs = np.array(y_probs)
    
    final_test_acc = accuracy_score(y_true, y_pred)
    
    # Per-class accuracy
    per_class_acc = {}
    for i, class_name in enumerate(class_names):
        class_mask = y_true == i
        if class_mask.sum() > 0:
            class_acc = np.mean(y_pred[class_mask] == i)
            per_class_acc[str(class_name)] = float(class_acc)
    
    results = {
        'config_name': config['name'],
        'config': config,
        'best_val_acc': float(best_val_acc),
        'final_test_acc': float(final_test_acc),
        'per_class_acc': per_class_acc,
        'training_time_min': float(training_time),
        'epochs_trained': len(train_losses),
        'history': {
            'train_loss': [float(x) for x in train_losses],
            'train_acc': [float(x) for x in train_accs],
            'val_loss': [float(x) for x in val_losses],
            'val_acc': [float(x) for x in val_accs]
        }
    }
    
    print(f"\n{'='*80}")
    print(f"RESULTS for {config['name']}:")
    print(f"  Best Val Acc:  {best_val_acc*100:.2f}%")
    print(f"  Final Test Acc: {final_test_acc*100:.2f}%")
    print(f"  Training Time:  {training_time:.1f} minutes")
    print(f"{'='*80}\n")
    
    return results, model, y_true, y_pred, y_probs

# ---------------------------
# 6. Configuration 4 ONLY
# ---------------------------
config_4 = {
    'name': 'config_4_less_augment',
    'learning_rate': 0.0005,
    'batch_size': 24,
    'optimizer': 'AdamW',
    'scheduler': 'CosineAnnealing',
    'weight_decay': 5e-4,
    'label_smoothing': 0.1,
    'epochs': 20,
    'patience': 12,
    'hflip_prob': 0.5,
    'vflip_prob': 0.2,
    'rotation_degrees': 15,
    'color_jitter': 0.2,
    'random_erasing': 0.1
}

# ---------------------------
# 7. Train Model with Configuration 4
# ---------------------------
print(f"\n{'#'*80}")
print(f"# TRAINING WITH CONFIGURATION 4 ONLY")
print(f"# Model: BetterCNN (Your Original Architecture)")
print(f"{'#'*80}\n")

results, model, y_true, y_pred, y_probs = train_with_config(
    config_4, image_array, y, train_idx, val_idx, test_idx, class_names, device
)

# Save results
with open(f"results_{config_4['name']}.json", 'w') as f:
    json.dump(results, f, indent=2)
print(f"✓ Saved results to results_{config_4['name']}.json")

# ---------------------------
# 8. COMPREHENSIVE EVALUATION METRICS
# ---------------------------
print(f"\n{'#'*80}")
print(f"# COMPREHENSIVE EVALUATION METRICS")
print(f"{'#'*80}\n")

# Calculate all metrics
accuracy = accuracy_score(y_true, y_pred)
precision_macro = precision_score(y_true, y_pred, average='macro', zero_division=0)
precision_weighted = precision_score(y_true, y_pred, average='weighted', zero_division=0)
recall_macro = recall_score(y_true, y_pred, average='macro', zero_division=0)
recall_weighted = recall_score(y_true, y_pred, average='weighted', zero_division=0)
f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0)
f1_weighted = f1_score(y_true, y_pred, average='weighted', zero_division=0)
kappa = cohen_kappa_score(y_true, y_pred)
mcc = matthews_corrcoef(y_true, y_pred)

# Print overall metrics
print("="*80)
print("OVERALL METRICS:")
print("="*80)
print(f"Accuracy:              {accuracy*100:.2f}%")
print(f"\nPrecision (Macro):     {precision_macro*100:.2f}%")
print(f"Precision (Weighted):  {precision_weighted*100:.2f}%")
print(f"\nRecall (Macro):        {recall_macro*100:.2f}%")
print(f"Recall (Weighted):     {recall_weighted*100:.2f}%")
print(f"\nF1-Score (Macro):      {f1_macro*100:.2f}%")
print(f"F1-Score (Weighted):   {f1_weighted*100:.2f}%")
print(f"\nCohen's Kappa:         {kappa:.4f}")
print(f"Matthews Corr Coef:    {mcc:.4f}")
print("="*80)

# Print per-class metrics
print(f"\n{'='*80}")
print("PER-CLASS METRICS:")
print(f"{'='*80}")
print(f"\n{classification_report(y_true, y_pred, target_names=class_names, digits=4)}")

# Calculate and print per-class precision, recall, and F1
precision_per_class = precision_score(y_true, y_pred, average=None, zero_division=0)
recall_per_class = recall_score(y_true, y_pred, average=None, zero_division=0)
f1_per_class = f1_score(y_true, y_pred, average=None, zero_division=0)

print(f"\n{'='*80}")
print("DETAILED PER-CLASS BREAKDOWN:")
print(f"{'='*80}")
print(f"{'Class':<20} {'Precision':<12} {'Recall':<12} {'F1-Score':<12} {'Support'}")
print("-"*80)
for i, class_name in enumerate(class_names):
    support = np.sum(y_true == i)
    print(f"{class_name:<20} {precision_per_class[i]*100:>10.2f}% "
          f"{recall_per_class[i]*100:>10.2f}% "
          f"{f1_per_class[i]*100:>10.2f}% "
          f"{support:>10}")
print("="*80)

# ---------------------------
# 9. CONFUSION MATRIX VISUALIZATION
# ---------------------------
print("\n\nGenerating Confusion Matrix...")

# Calculate confusion matrix
cm = confusion_matrix(y_true, y_pred)
cm_normalized = confusion_matrix(y_true, y_pred, normalize='true')

# Create figure with two subplots
fig, axes = plt.subplots(1, 2, figsize=(20, 8))

# Plot absolute confusion matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, 
            yticklabels=class_names, ax=axes[0], cbar_kws={'label': 'Count'})
axes[0].set_title('Confusion Matrix (Absolute Counts)', fontsize=14, fontweight='bold', pad=20)
axes[0].set_ylabel('True Label', fontsize=12, fontweight='bold')
axes[0].set_xlabel('Predicted Label', fontsize=12, fontweight='bold')
axes[0].tick_params(axis='x', rotation=45)
axes[0].tick_params(axis='y', rotation=0)

# Plot normalized confusion matrix
sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Greens', xticklabels=class_names, 
            yticklabels=class_names, ax=axes[1], cbar_kws={'label': 'Proportion'}, 
            vmin=0, vmax=1)
axes[1].set_title('Confusion Matrix (Normalized)', fontsize=14, fontweight='bold', pad=20)
axes[1].set_ylabel('True Label', fontsize=12, fontweight='bold')
axes[1].set_xlabel('Predicted Label', fontsize=12, fontweight='bold')
axes[1].tick_params(axis='x', rotation=45)
axes[1].tick_params(axis='y', rotation=0)

plt.tight_layout()
plt.savefig('confusion_matrix_config4.png', dpi=300, bbox_inches='tight')
print("✓ Confusion matrix saved to confusion_matrix_config4.png")
plt.show()

# ---------------------------
# 10. TRAINING HISTORY VISUALIZATION
# ---------------------------
print("\nGenerating Training History Plot...")

fig, axes = plt.subplots(1, 2, figsize=(16, 5))

epochs = range(1, len(results['history']['train_acc']) + 1)

# Accuracy plot
ax = axes[0]
ax.plot(epochs, [x*100 for x in results['history']['train_acc']], 
        label='Train Acc', linewidth=2, marker='o', markersize=6)
ax.plot(epochs, [x*100 for x in results['history']['val_acc']], 
        label='Val Acc', linewidth=2, marker='s', markersize=6)
ax.axhline(70, color='red', linestyle='--', linewidth=2, label='Target: 70%')
ax.set_xlabel('Epoch', fontsize=12, fontweight='bold')
ax.set_ylabel('Accuracy (%)', fontsize=12, fontweight='bold')
ax.set_title('Training and Validation Accuracy', fontsize=14, fontweight='bold')
ax.legend(fontsize=10)
ax.grid(alpha=0.3)

# Loss plot
ax = axes[1]
ax.plot(epochs, results['history']['train_loss'], 
        label='Train Loss', linewidth=2, marker='o', markersize=6, color='coral')
ax.plot(epochs, results['history']['val_loss'], 
        label='Val Loss', linewidth=2, marker='s', markersize=6, color='orange')
ax.set_xlabel('Epoch', fontsize=12, fontweight='bold')
ax.set_ylabel('Loss', fontsize=12, fontweight='bold')
ax.set_title('Training and Validation Loss', fontsize=14, fontweight='bold')
ax.legend(fontsize=10)
ax.grid(alpha=0.3)

plt.tight_layout()
plt.savefig('training_history_config4.png', dpi=300, bbox_inches='tight')
print("✓ Training history saved to training_history_config4.png")
plt.show()

# ---------------------------
# 11. PER-CLASS PERFORMANCE BAR CHART
# ---------------------------
print("\nGenerating Per-Class Performance Chart...")

fig, ax = plt.subplots(figsize=(14, 8))

x = np.arange(len(class_names))
width = 0.25

bars1 = ax.bar(x - width, precision_per_class * 100, width, label='Precision', 
               color='steelblue', edgecolor='black')
bars2 = ax.bar(x, recall_per_class * 100, width, label='Recall', 
               color='coral', edgecolor='black')
bars3 = ax.bar(x + width, f1_per_class * 100, width, label='F1-Score', 
               color='lightgreen', edgecolor='black')

ax.set_xlabel('Class', fontsize=12, fontweight='bold')
ax.set_ylabel('Score (%)', fontsize=12, fontweight='bold')
ax.set_title('Per-Class Precision, Recall, and F1-Score', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(class_names, rotation=45, ha='right')
ax.legend(fontsize=11)
ax.grid(axis='y', alpha=0.3)
ax.axhline(70, color='red', linestyle='--', linewidth=2, alpha=0.5)

# Add value labels on bars
for bars in [bars1, bars2, bars3]:
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1f}%', ha='center', va='bottom', fontsize=8)

plt.tight_layout()
plt.savefig('per_class_metrics_config4.png', dpi=300, bbox_inches='tight')
print("✓ Per-class metrics chart saved to per_class_metrics_config4.png")
plt.show()

# ---------------------------
# 12. SUMMARY REPORT
# ---------------------------
print(f"\n\n{'#'*80}")
print("# FINAL SUMMARY REPORT - CONFIGURATION 4")
print(f"{'#'*80}\n")

print("Configuration Details:")
print("-" * 80)
for key, value in config_4.items():
    if key != 'name':
        print(f"  {key:20s}: {value}")

print(f"\n{'='*80}")
print("PERFORMANCE SUMMARY:")
print(f"{'='*80}")
print(f"Test Accuracy:         {accuracy*100:.2f}%")
print(f"F1-Score (Macro):      {f1_macro*100:.2f}%")
print(f"F1-Score (Weighted):   {f1_weighted*100:.2f}%")
print(f"Precision (Macro):     {precision_macro*100:.2f}%")
print(f"Recall (Macro):        {recall_macro*100:.2f}%")
print(f"Cohen's Kappa:         {kappa:.4f}")
print(f"Matthews Corr Coef:    {mcc:.4f}")
print(f"Training Time:         {results['training_time_min']:.1f} minutes")
print(f"Epochs Trained:        {results['epochs_trained']}")
print("="*80)

print(f"\n{'#'*80}")
print("# EVALUATION COMPLETE!")
print(f"{'#'*80}\n")

print("Generated Files:")
print(f"  ✓ results_{config_4['name']}.json")
print(f"  ✓ confusion_matrix_config4.png")
print(f"  ✓ training_history_config4.png")
print(f"  ✓ per_class_metrics_config4.png")
print(f"  ✓ best_model_{config_4['name']}.pth")