# Baixando dados

In [None]:
# Import the DataLoader class from the get_data_from_db module
from get_data_from_db import DataLoader

# Initialize the DataLoader
data_loader = DataLoader()

# Get the dataset path
path = data_loader.path
print(f"Dataset path: {path}")

# Get test and train data
test_data = data_loader.get_test_data()
train_data = data_loader.get_train_data()

print(f"Number of test images: {len(test_data)}")
print(f"Number of train images: {len(train_data)}")
print(f"Sample test files: {test_data[:5] if len(test_data) > 5 else test_data}")
print(f"Sample train files: {train_data[:5] if len(train_data) > 5 else train_data}")

Os arquivos já estavam separados em treino e teste

Obtendo os arquivos de treino

In [None]:
# Using the DataLoader to get all category paths and files
import os

# Get all test categories (normal)
test_normal_path = os.path.join(path, "test", "0_normal")
test_normal_files = os.listdir(test_normal_path)

# Get all train categories
train_normal_path = os.path.join(path, "train", "0_normal")
train_normal_files = os.listdir(train_normal_path)

train_ulcerative_colitis_path = os.path.join(path, "train", "1_ulcerative_colitis")
train_ulcerative_colitis_files = os.listdir(train_ulcerative_colitis_path)

train_polyps_path = os.path.join(path, "train", "2_polyps")
train_polyps_files = os.listdir(train_polyps_path)

train_esophagitis_path = os.path.join(path, "train", "3_esophagitis")
train_esophagitis_files = os.listdir(train_esophagitis_path)

# Print summary of all categories
print("Dataset Summary:")
print(f"Test Normal: {len(test_normal_files)} images")
print(f"Train Normal: {len(train_normal_files)} images")
print(f"Train Ulcerative Colitis: {len(train_ulcerative_colitis_files)} images")
print(f"Train Polyps: {len(train_polyps_files)} images")
print(f"Train Esophagitis: {len(train_esophagitis_files)} images")

# Total counts
total_test = len(test_normal_files)
total_train = len(train_normal_files) + len(train_ulcerative_colitis_files) + len(train_polyps_files) + len(train_esophagitis_files)
print(f"\nTotal Test Images: {total_test}")
print(f"Total Train Images: {total_train}")
print(f"Total Dataset: {total_test + total_train} images")

In [None]:
# Complete dataset analysis including validation data
import importlib
import get_data_from_db
importlib.reload(get_data_from_db)
from get_data_from_db import DataLoader

# Create a new DataLoader instance with enhanced features
enhanced_loader = DataLoader()

# Get all category counts including validation
counts = enhanced_loader.get_category_counts()
print("Complete Dataset Counts:")
for category, count in counts.items():
    print(f"  {category}: {count} images")

# Calculate totals by split
test_total = counts['test_normal']
train_total = counts['train_normal'] + counts['train_ulcerative_colitis'] + counts['train_polyps'] + counts['train_esophagitis']
val_total = counts['val_normal'] + counts['val_ulcerative_colitis'] + counts['val_polyps'] + counts['val_esophagitis']
total_images = test_total + train_total + val_total

print(f"\nDataset Split Summary:")
print(f"  Training: {train_total} images")
print(f"  Validation: {val_total} images")
print(f"  Testing: {test_total} images")
print(f"  Total: {total_images} images")

# Get validation data organized by category
validation_by_category = enhanced_loader.get_validation_data()
print(f"\nValidation data categories: {list(validation_by_category.keys())}")

# Get validation data with labels
validation_with_labels = enhanced_loader.get_validation_data_with_labels()
print(f"Total validation samples with labels: {len(validation_with_labels)}")
print("Sample validation data with labels:")
category_names = ['normal', 'ulcerative_colitis', 'polyps', 'esophagitis']
for i in range(min(5, len(validation_with_labels))):
    filename, label = validation_with_labels[i]
    print(f"  {filename} -> {category_names[label]} (label: {label})")

# Show distribution across categories
print(f"\nCategory Distribution:")
for i, category in enumerate(category_names):
    train_count = counts[f'train_{category}']
    val_count = counts[f'val_{category}']
    test_count = counts['test_normal'] if category == 'normal' else 0
    print(f"  {category.upper()}:")
    print(f"    Train: {train_count}, Val: {val_count}, Test: {test_count}")
    print(f"    Total: {train_count + val_count + test_count}")

# Análise Completa do Dataset

O dataset está organizado em três divisões:

## Estrutura do Dataset:
- **Treino (train/)**: 3.200 imagens (800 por categoria)
- **Validação (val/)**: 2.000 imagens (500 por categoria)  
- **Teste (test/)**: 200 imagens (apenas categoria normal)

## Categorias:
1. **Normal (0)**: Imagens normais do cólon
2. **Ulcerative Colitis (1)**: Colite ulcerativa
3. **Polyps (2)**: Pólipos
4. **Esophagitis (3)**: Esofagite

## Total: 5.400 imagens

**Nota**: O conjunto de teste contém apenas imagens da categoria "normal", enquanto treino e validação contêm todas as 4 categorias balanceadas.

In [None]:
# Example: How to use the DataLoader for machine learning workflows
import numpy as np

# Get all data splits with labels
train_data_with_labels = enhanced_loader.get_train_data()
val_data_with_labels = enhanced_loader.get_validation_data_with_labels()
test_data_files = enhanced_loader.get_test_data()  # Only normal images

print("Data for Machine Learning:")
print(f"Training samples: {len(train_data_with_labels)} (with labels)")
print(f"Validation samples: {len(val_data_with_labels)} (with labels)")
print(f"Test samples: {len(test_data_files)} (normal only)")

# Example: Extract labels for analysis
train_labels = [label for _, label in train_data_with_labels]
val_labels = [label for _, label in val_data_with_labels]

print(f"\nLabel distribution in training:")
unique_labels, train_counts = np.unique(train_labels, return_counts=True)
for label, count in zip(unique_labels, train_counts):
    print(f"  Label {label} ({category_names[label]}): {count} samples")

print(f"\nLabel distribution in validation:")
unique_labels, val_counts = np.unique(val_labels, return_counts=True)
for label, count in zip(unique_labels, val_counts):
    print(f"  Label {label} ({category_names[label]}): {count} samples")

# Example: Get full file paths for a specific category
normal_train_files = enhanced_loader.get_train_data_by_category()['normal']
normal_val_files = enhanced_loader.get_validation_data()['normal']

print(f"\nExample file paths for 'normal' category:")
print(f"  Train path: {enhanced_loader.train_normal_path}")
print(f"  Validation path: {enhanced_loader.validation_normal_path}")
print(f"  Sample files: {normal_train_files[:3]} (train), {normal_val_files[:3]} (val)")

# Modelo de Machine Learning para Classificação

Agora vamos implementar um modelo de deep learning (CNN - Convolutional Neural Network) para classificar as imagens de cólon nas 4 categorias:

1. **Normal (0)**
2. **Ulcerative Colitis (1)**  
3. **Polyps (2)**
4. **Esophagitis (3)**

O modelo será treinado usando:
- **3.200 imagens de treino** (800 por categoria)
- **2.000 imagens de validação** (500 por categoria)
- **200 imagens de teste** (apenas normal - para detecção de anomalias)

In [None]:
# Install required packages
%pip install opencv-python scikit-learn

# Import necessary libraries for machine learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
from PIL import Image
import random

# Set random seeds for reproducibility
tf.random.set_seed(42)
np.random.seed(42)
random.seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")
print(f"Number of GPUs: {len(tf.config.list_physical_devices('GPU'))}")

In [None]:
# Visualize sample images from each category
def show_sample_images(enhanced_loader, num_samples=2):
    """Display sample images from each category"""
    
    category_names = ['normal', 'ulcerative_colitis', 'polyps', 'esophagitis']
    category_paths = {
        'normal': enhanced_loader.train_normal_path,
        'ulcerative_colitis': enhanced_loader.train_ulcerative_colitis_path,
        'polyps': enhanced_loader.train_polyps_path,
        'esophagitis': enhanced_loader.train_esophagitis_path
    }
    
    fig, axes = plt.subplots(4, num_samples, figsize=(12, 16))
    
    for i, category in enumerate(category_names):
        files = os.listdir(category_paths[category])
        sample_files = random.sample(files, num_samples)
        
        for j, filename in enumerate(sample_files):
            img_path = os.path.join(category_paths[category], filename)
            img = load_img(img_path, target_size=(224, 224))
            
            axes[i, j].imshow(img)
            axes[i, j].set_title(f'{category.upper()}\n{filename}')
            axes[i, j].axis('off')
    
    plt.tight_layout()
    plt.show()

# Show sample images
show_sample_images(enhanced_loader, num_samples=3)

In [None]:
# Create data generators for training and validation
IMG_SIZE = 224
BATCH_SIZE = 32
NUM_CLASSES = 4

# Data augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,
    fill_mode='nearest'
)

# Only rescaling for validation (no augmentation)
val_datagen = ImageDataGenerator(rescale=1./255)

# Create generators using the directory structure
train_generator = train_datagen.flow_from_directory(
    os.path.join(enhanced_loader.path, 'train'),
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    seed=42
)

val_generator = val_datagen.flow_from_directory(
    os.path.join(enhanced_loader.path, 'val'),
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False,
    seed=42
)

print(f"Training generator: {train_generator.samples} samples")
print(f"Validation generator: {val_generator.samples} samples")
print(f"Class indices: {train_generator.class_indices}")
print(f"Number of classes: {train_generator.num_classes}")

# Calculate steps per epoch
STEPS_PER_EPOCH = train_generator.samples // BATCH_SIZE
VALIDATION_STEPS = val_generator.samples // BATCH_SIZE

print(f"Steps per epoch: {STEPS_PER_EPOCH}")
print(f"Validation steps: {VALIDATION_STEPS}")

In [None]:
# Create CNN models
def create_custom_cnn(input_shape=(224, 224, 3), num_classes=4):
    """Create a custom CNN model"""
    
    model = keras.Sequential([
        # First Conv Block
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.MaxPooling2D(2, 2),
        
        # Second Conv Block
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(2, 2),
        
        # Third Conv Block
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(2, 2),
        
        # Fourth Conv Block
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(2, 2),
        
        # Classifier
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.5),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

def create_transfer_learning_model(input_shape=(224, 224, 3), num_classes=4):
    """Create a transfer learning model using VGG16"""
    
    # Load pre-trained VGG16 model
    base_model = VGG16(
        weights='imagenet',
        include_top=False,
        input_shape=input_shape
    )
    
    # Freeze base model layers
    base_model.trainable = False
    
    # Add custom classification head
    model = keras.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.5),
        layers.Dense(512, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model, base_model

# Create both models
print("Creating Custom CNN model...")
custom_model = create_custom_cnn()
custom_model.summary()

print("\\n" + "="*50)
print("Creating Transfer Learning model (VGG16)...")
transfer_model, base_model = create_transfer_learning_model()
transfer_model.summary()

In [None]:
# Compile and train the transfer learning model (VGG16)
# We'll focus on the transfer learning model as it typically performs better

# Compile the model
transfer_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

# Define callbacks
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=0.0001,
        verbose=1
    )
]

print("Starting training of Transfer Learning Model (VGG16)...")
print(f"Training for maximum of 20 epochs with early stopping...")

# Train the model
EPOCHS = 10

history = transfer_model.fit(
    train_generator,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    validation_data=val_generator,
    validation_steps=VALIDATION_STEPS,
    callbacks=callbacks,
    verbose=1
)

print("Training completed!")

# Avaliação e Teste do Modelo

Agora vamos avaliar o desempenho do modelo treinado usando métricas detalhadas e visualizações:

1. **Plotar curvas de treinamento** (loss e accuracy)
2. **Avaliar no conjunto de validação** 
3. **Gerar matriz de confusão**
4. **Calcular métricas detalhadas** (precision, recall, F1-score)
5. **Testar predições** em imagens individuais

In [None]:
# Plot training history
def plot_training_history(history):
    """Plot training and validation metrics"""
    
    # Get metrics from history
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs_range = range(len(acc))
    
    # Create subplots
    plt.figure(figsize=(15, 5))
    
    # Plot accuracy
    plt.subplot(1, 3, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    
    # Plot loss
    plt.subplot(1, 3, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    
    # Plot additional metrics if available
    plt.subplot(1, 3, 3)
    if 'precision' in history.history:
        plt.plot(epochs_range, history.history['precision'], label='Training Precision')
        plt.plot(epochs_range, history.history['val_precision'], label='Validation Precision')
    if 'recall' in history.history:
        plt.plot(epochs_range, history.history['recall'], label='Training Recall', linestyle='--')
        plt.plot(epochs_range, history.history['val_recall'], label='Validation Recall', linestyle='--')
    
    plt.legend()
    plt.title('Training and Validation Metrics')
    plt.xlabel('Epochs')
    plt.ylabel('Score')
    
    plt.tight_layout()
    plt.show()
    
    # Print final metrics
    print("Final Training Results:")
    print(f"Training Accuracy: {acc[-1]:.4f}")
    print(f"Validation Accuracy: {val_acc[-1]:.4f}")
    print(f"Training Loss: {loss[-1]:.4f}")
    print(f"Validation Loss: {val_loss[-1]:.4f}")

# Plot the training history
plot_training_history(history)

In [None]:
# Evaluate model on validation set
print("Evaluating model on validation set...")

# Get validation loss and metrics
val_loss, val_accuracy, val_precision, val_recall = transfer_model.evaluate(
    val_generator, 
    steps=VALIDATION_STEPS, 
    verbose=1
)

print(f"\nValidation Results:")
print(f"Loss: {val_loss:.4f}")
print(f"Accuracy: {val_accuracy:.4f}")
print(f"Precision: {val_precision:.4f}")
print(f"Recall: {val_recall:.4f}")
print(f"F1-Score: {2 * (val_precision * val_recall) / (val_precision + val_recall):.4f}")

# Generate predictions for confusion matrix
print("\nGenerating predictions for confusion matrix...")

# Reset the validation generator to ensure we get all samples in order
val_generator.reset()

# Get predictions
predictions = transfer_model.predict(val_generator, steps=VALIDATION_STEPS, verbose=1)
predicted_classes = np.argmax(predictions, axis=1)

# Get true labels
true_classes = val_generator.classes[:len(predicted_classes)]

# Get class names
class_names = list(val_generator.class_indices.keys())
print(f"Classes: {class_names}")
print(f"Number of predictions: {len(predicted_classes)}")
print(f"Number of true labels: {len(true_classes)}")

In [None]:
# Create and plot confusion matrix
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

def plot_confusion_matrix(true_labels, predicted_labels, class_names, title="Confusion Matrix"):
    """Plot a detailed confusion matrix"""
    
    # Calculate confusion matrix
    cm = confusion_matrix(true_labels, predicted_labels)
    
    # Create figure with subplots
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # Plot raw confusion matrix
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=class_names, yticklabels=class_names, ax=ax1)
    ax1.set_title(f'{title} - Raw Counts')
    ax1.set_xlabel('Predicted')
    ax1.set_ylabel('Actual')
    
    # Plot normalized confusion matrix (percentages)
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    sns.heatmap(cm_normalized, annot=True, fmt='.2%', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names, ax=ax2)
    ax2.set_title(f'{title} - Normalized (%)')
    ax2.set_xlabel('Predicted')
    ax2.set_ylabel('Actual')
    
    plt.tight_layout()
    plt.show()
    
    return cm, cm_normalized

# Generate confusion matrix
print("Creating Confusion Matrix...")
cm, cm_norm = plot_confusion_matrix(true_classes, predicted_classes, class_names, 
                                   "Validation Set Confusion Matrix")

# Print detailed metrics
print("\nDetailed Classification Report:")
print(classification_report(true_classes, predicted_classes, target_names=class_names, digits=4))

In [None]:
# Analyze performance per category
from sklearn.metrics import precision_score, recall_score, f1_score

def analyze_per_category_performance(true_labels, predicted_labels, class_names):
    """Analyze model performance for each category"""
    
    print("Performance Analysis by Category:")
    print("="*60)
    
    # Calculate metrics for each class
    precision_per_class = precision_score(true_labels, predicted_labels, average=None)
    recall_per_class = recall_score(true_labels, predicted_labels, average=None)
    f1_per_class = f1_score(true_labels, predicted_labels, average=None)
    
    # Create a summary table
    performance_data = []
    for i, class_name in enumerate(class_names):
        # Count correct and total predictions for this class
        class_mask = (true_labels == i)
        total_actual = np.sum(class_mask)
        correct_predictions = np.sum((true_labels == i) & (predicted_labels == i))
        
        performance_data.append({
            'Category': class_name.upper(),
            'Precision': f"{precision_per_class[i]:.4f}",
            'Recall': f"{recall_per_class[i]:.4f}",
            'F1-Score': f"{f1_per_class[i]:.4f}",
            'Correct': correct_predictions,
            'Total': total_actual,
            'Accuracy': f"{correct_predictions/total_actual:.4f}" if total_actual > 0 else "N/A"
        })
        
        print(f"{class_name.upper():20} | "
              f"Precision: {precision_per_class[i]:.4f} | "
              f"Recall: {recall_per_class[i]:.4f} | "
              f"F1: {f1_per_class[i]:.4f} | "
              f"Correct: {correct_predictions:3d}/{total_actual:3d}")
    
    print("="*60)
    
    # Overall metrics
    overall_accuracy = np.sum(true_labels == predicted_labels) / len(true_labels)
    avg_precision = np.mean(precision_per_class)
    avg_recall = np.mean(recall_per_class)
    avg_f1 = np.mean(f1_per_class)
    
    print(f"OVERALL METRICS:")
    print(f"Accuracy:  {overall_accuracy:.4f}")
    print(f"Avg Precision: {avg_precision:.4f}")
    print(f"Avg Recall:    {avg_recall:.4f}")
    print(f"Avg F1-Score:  {avg_f1:.4f}")
    
    return performance_data

# Analyze performance
performance_results = analyze_per_category_performance(true_classes, predicted_classes, class_names)

# Preparação para Validação Cruzada

Agora vamos juntar todos os dados das classes correspondentes (treino, validação e teste) em um conjunto único para depois redistribuí-los em K-fold cross validation.

**Estratégia**:
1. **Juntar todas as imagens por categoria** (normal, ulcerative_colitis, polyps, esophagitis)
2. **Criar um dataset unificado** com todas as imagens e seus labels
3. **Preparar para K-fold** onde cada fold terá uma distribuição balanceada das classes

In [None]:
# Collect all image paths from all splits (train, val, test) by category
import glob
from pathlib import Path

def collect_all_images_by_category(enhanced_loader):
    """Collect all image paths from train, val, and test sets by category"""
    
    # Initialize dictionary to store all images by category
    all_images_by_category = {
        'normal': [],
        'ulcerative_colitis': [],
        'polyps': [],
        'esophagitis': []
    }
    
    # Category mapping for folder names
    category_folders = {
        'normal': '0_normal',
        'ulcerative_colitis': '1_ulcerative_colitis', 
        'polyps': '2_polyps',
        'esophagitis': '3_esophagitis'
    }
    
    # Collect from train folder
    print("Collecting images from TRAIN folder...")
    train_path = os.path.join(enhanced_loader.path, 'train')
    for category, folder_name in category_folders.items():
        folder_path = os.path.join(train_path, folder_name)
        if os.path.exists(folder_path):
            # Get all image files (jpg, jpeg, png)
            image_files = []
            for ext in ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']:
                image_files.extend(glob.glob(os.path.join(folder_path, ext)))
            all_images_by_category[category].extend(image_files)
            print(f"  {category}: {len(image_files)} images from train")
    
    # Collect from validation folder
    print("\\nCollecting images from VALIDATION folder...")
    val_path = os.path.join(enhanced_loader.path, 'val')
    for category, folder_name in category_folders.items():
        folder_path = os.path.join(val_path, folder_name)
        if os.path.exists(folder_path):
            image_files = []
            for ext in ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']:
                image_files.extend(glob.glob(os.path.join(folder_path, ext)))
            all_images_by_category[category].extend(image_files)
            print(f"  {category}: {len(image_files)} images from validation")
    
    # Collect from test folder (only normal)
    print("\\nCollecting images from TEST folder...")
    test_path = os.path.join(enhanced_loader.path, 'test')
    test_normal_path = os.path.join(test_path, '0_normal')
    if os.path.exists(test_normal_path):
        image_files = []
        for ext in ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']:
            image_files.extend(glob.glob(os.path.join(test_normal_path, ext)))
        all_images_by_category['normal'].extend(image_files)
        print(f"  normal: {len(image_files)} images from test")
    
    return all_images_by_category

# Collect all images
print("="*60)
print("COLLECTING ALL IMAGES BY CATEGORY")
print("="*60)
all_images = collect_all_images_by_category(enhanced_loader)

# Print summary
print("\\n" + "="*60)
print("TOTAL IMAGES BY CATEGORY:")
print("="*60)
total_all_images = 0
for category, images in all_images.items():
    print(f"{category.upper():20}: {len(images):4d} images")
    total_all_images += len(images)

print("-" * 60)
print(f"{'TOTAL':20}: {total_all_images:4d} images")
print("="*60)

In [None]:
# Create unified dataset with all images and labels
import pandas as pd
from sklearn.utils import shuffle

def create_unified_dataset(all_images_by_category):
    """Create a unified dataset with all images and their corresponding labels"""
    
    # Category to label mapping
    category_to_label = {
        'normal': 0,
        'ulcerative_colitis': 1,
        'polyps': 2,
        'esophagitis': 3
    }
    
    # Create lists for all images and labels
    all_image_paths = []
    all_labels = []
    all_categories = []
    
    print("Creating unified dataset...")
    for category, image_paths in all_images_by_category.items():
        label = category_to_label[category]
        
        # Add all images from this category
        all_image_paths.extend(image_paths)
        all_labels.extend([label] * len(image_paths))
        all_categories.extend([category] * len(image_paths))
        
        print(f"Added {len(image_paths)} {category} images with label {label}")
    
    # Create DataFrame
    unified_df = pd.DataFrame({
        'image_path': all_image_paths,
        'label': all_labels,
        'category': all_categories
    })
    
    # Shuffle the dataset
    unified_df = shuffle(unified_df, random_state=42).reset_index(drop=True)
    
    return unified_df

# Create unified dataset
unified_dataset = create_unified_dataset(all_images)

print("\\n" + "="*60)
print("UNIFIED DATASET CREATED")
print("="*60)
print(f"Total samples: {len(unified_dataset)}")
print(f"Features: {list(unified_dataset.columns)}")

# Show distribution
print("\\nLabel distribution in unified dataset:")
label_counts = unified_dataset['label'].value_counts().sort_index()
category_names = ['normal', 'ulcerative_colitis', 'polyps', 'esophagitis']

for label, count in label_counts.items():
    category = category_names[label]
    percentage = (count / len(unified_dataset)) * 100
    print(f"  Label {label} ({category:18}): {count:4d} samples ({percentage:5.1f}%)")

# Show first few samples
print(f"\\nFirst 5 samples:")
for i in range(min(5, len(unified_dataset))):
    row = unified_dataset.iloc[i]
    filename = os.path.basename(row['image_path'])
    print(f"  {i+1}. {filename:30} -> Label: {row['label']} ({row['category']})")

print("\\n" + "="*60)

In [None]:
# Verify data integrity and prepare for K-fold cross validation
from collections import Counter

def verify_data_integrity(unified_df):
    """Verify that all image files exist and are accessible"""
    
    print("Verifying data integrity...")
    
    # Check if all files exist
    missing_files = []
    accessible_files = 0
    
    for i, row in unified_df.iterrows():
        if os.path.exists(row['image_path']):
            accessible_files += 1
        else:
            missing_files.append(row['image_path'])
    
    print(f"Total files: {len(unified_df)}")
    print(f"Accessible files: {accessible_files}")
    print(f"Missing files: {len(missing_files)}")
    
    if missing_files:
        print("\\nMissing files:")
        for file_path in missing_files[:10]:  # Show first 10
            print(f"  {file_path}")
        if len(missing_files) > 10:
            print(f"  ... and {len(missing_files) - 10} more")
    
    return len(missing_files) == 0

# Verify integrity
print("="*60)
print("DATA INTEGRITY VERIFICATION")
print("="*60)
data_is_valid = verify_data_integrity(unified_dataset)

if data_is_valid:
    print("✅ All data files are accessible!")
else:
    print("❌ Some data files are missing!")

# Show detailed statistics
print("\\n" + "="*60)
print("DETAILED DATASET STATISTICS")
print("="*60)

# Overall statistics
print(f"Dataset shape: {unified_dataset.shape}")
print(f"Total samples: {len(unified_dataset)}")
print(f"Number of features: {len(unified_dataset.columns)}")

# Class balance analysis
print("\\nClass distribution analysis:")
for label in range(4):
    subset = unified_dataset[unified_dataset['label'] == label]
    category = category_names[label]
    count = len(subset)
    percentage = (count / len(unified_dataset)) * 100
    
    print(f"  Class {label} ({category:18}):")
    print(f"    Count: {count:4d} samples ({percentage:5.1f}%)")
    
    # Show some example file paths
    sample_paths = subset['image_path'].head(3).apply(lambda x: os.path.basename(x)).tolist()
    print(f"    Examples: {', '.join(sample_paths)}")

# Check for balanced dataset
label_counts = unified_dataset['label'].value_counts().sort_index()
min_count = label_counts.min()
max_count = label_counts.max()
balance_ratio = min_count / max_count

print(f"\\nClass balance analysis:")
print(f"  Minimum class size: {min_count}")
print(f"  Maximum class size: {max_count}")
print(f"  Balance ratio: {balance_ratio:.3f}")

if balance_ratio >= 0.8:
    print("  ✅ Dataset is well balanced")
elif balance_ratio >= 0.6:
    print("  ⚠️ Dataset has moderate imbalance")
else:
    print("  ❌ Dataset is highly imbalanced")

print("\\n" + "="*60)
print("READY FOR K-FOLD CROSS VALIDATION!")
print("="*60)

In [None]:
# Setup K-fold Cross Validation
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt

def setup_kfold_cv(unified_df, k=5, random_state=42):
    """Setup stratified K-fold cross validation"""
    
    print(f"Setting up {k}-Fold Cross Validation...")
    
    # Extract features and labels
    X = unified_df['image_path'].values  # Image paths as features
    y = unified_df['label'].values       # Labels
    
    # Create stratified K-fold
    skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=random_state)
    
    # Store fold information
    fold_info = []
    
    print(f"\\nCreating {k} stratified folds...")
    for fold_idx, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        # Get train and validation data for this fold
        X_train_fold = X[train_idx]
        X_val_fold = X[val_idx]
        y_train_fold = y[train_idx]
        y_val_fold = y[val_idx]
        
        # Calculate class distribution for this fold
        train_dist = Counter(y_train_fold)
        val_dist = Counter(y_val_fold)
        
        fold_data = {
            'fold': fold_idx + 1,
            'train_indices': train_idx,
            'val_indices': val_idx,
            'train_paths': X_train_fold,
            'val_paths': X_val_fold,
            'train_labels': y_train_fold,
            'val_labels': y_val_fold,
            'train_distribution': train_dist,
            'val_distribution': val_dist,
            'train_size': len(train_idx),
            'val_size': len(val_idx)
        }
        
        fold_info.append(fold_data)
        
        print(f"\\n  Fold {fold_idx + 1}:")
        print(f"    Train size: {len(train_idx):4d} samples")
        print(f"    Val size:   {len(val_idx):4d} samples")
        print(f"    Train distribution: {dict(train_dist)}")
        print(f"    Val distribution:   {dict(val_dist)}")
    
    return fold_info, skf

# Setup 5-fold cross validation
k_folds = 5
print("="*70)
print("K-FOLD CROSS VALIDATION SETUP")
print("="*70)

fold_information, stratified_kfold = setup_kfold_cv(unified_dataset, k=k_folds)

# Visualize fold distributions
def plot_fold_distributions(fold_info):
    """Plot class distributions across folds"""
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # Prepare data for plotting
    folds = [f"Fold {f['fold']}" for f in fold_info]
    categories = ['Normal', 'Ulcerative Colitis', 'Polyps', 'Esophagitis']
    
    # Train distributions
    train_data = []
    for cat_idx in range(4):
        train_counts = [f['train_distribution'][cat_idx] for f in fold_info]
        train_data.append(train_counts)
    
    # Validation distributions  
    val_data = []
    for cat_idx in range(4):
        val_counts = [f['val_distribution'][cat_idx] for f in fold_info]
        val_data.append(val_counts)
    
    # Plot train distributions
    x = range(len(folds))
    width = 0.2
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
    
    for i, (category, counts) in enumerate(zip(categories, train_data)):
        ax1.bar([xi + i*width for xi in x], counts, width, label=category, color=colors[i], alpha=0.8)
    
    ax1.set_xlabel('Folds')
    ax1.set_ylabel('Number of Samples')
    ax1.set_title('Training Set Distribution Across Folds')
    ax1.set_xticks([xi + width*1.5 for xi in x])
    ax1.set_xticklabels(folds)
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Plot validation distributions
    for i, (category, counts) in enumerate(zip(categories, val_data)):
        ax2.bar([xi + i*width for xi in x], counts, width, label=category, color=colors[i], alpha=0.8)
    
    ax2.set_xlabel('Folds')
    ax2.set_ylabel('Number of Samples')
    ax2.set_title('Validation Set Distribution Across Folds')
    ax2.set_xticks([xi + width*1.5 for xi in x])
    ax2.set_xticklabels(folds)
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

# Plot distributions
print("\\nVisualizing fold distributions...")
plot_fold_distributions(fold_information)

# Summary statistics
print("\\n" + "="*70)
print("CROSS VALIDATION SUMMARY")
print("="*70)
print(f"Number of folds: {k_folds}")
print(f"Total samples: {len(unified_dataset)}")
print(f"Average train size per fold: {fold_information[0]['train_size']}")
print(f"Average validation size per fold: {fold_information[0]['val_size']}")
print(f"Train/Val split ratio: {fold_information[0]['train_size']/(fold_information[0]['train_size']+fold_information[0]['val_size']):.1%}/{fold_information[0]['val_size']/(fold_information[0]['train_size']+fold_information[0]['val_size']):.1%}")

print("\\n✅ Dataset successfully unified and prepared for K-fold cross validation!")
print("\\n📊 You now have all images from train, validation, and test sets")
print("   combined into a single dataset ready for redistribution.")
print("="*70)

In [None]:
# Save unified dataset and fold information for future use
import pickle
import json

def save_kfold_data(unified_df, fold_info, base_path=None):
    """Save the unified dataset and K-fold information"""
    
    if base_path is None:
        base_path = os.getcwd()
    
    # Create output directory
    output_dir = os.path.join(base_path, 'cross_validation_data')
    os.makedirs(output_dir, exist_ok=True)
    
    # Save unified dataset as CSV
    csv_path = os.path.join(output_dir, 'unified_dataset.csv')
    unified_df.to_csv(csv_path, index=False)
    print(f"✅ Unified dataset saved to: {csv_path}")
    
    # Save unified dataset as pickle (preserves data types)
    pickle_path = os.path.join(output_dir, 'unified_dataset.pkl')
    unified_df.to_pickle(pickle_path)
    print(f"✅ Unified dataset (pickle) saved to: {pickle_path}")
    
    # Prepare fold information for JSON serialization
    fold_info_serializable = []
    for fold in fold_info:
        # Convert numpy int64 keys to regular int for JSON compatibility
        train_dist = {int(k): int(v) for k, v in fold['train_distribution'].items()}
        val_dist = {int(k): int(v) for k, v in fold['val_distribution'].items()}
        
        fold_serializable = {
            'fold': int(fold['fold']),
            'train_indices': fold['train_indices'].tolist(),
            'val_indices': fold['val_indices'].tolist(),
            'train_size': int(fold['train_size']),
            'val_size': int(fold['val_size']),
            'train_distribution': train_dist,
            'val_distribution': val_dist
        }
        fold_info_serializable.append(fold_serializable)
    
    # Save fold information as JSON
    json_path = os.path.join(output_dir, 'kfold_info.json')
    with open(json_path, 'w') as f:
        json.dump({
            'k_folds': int(len(fold_info)),
            'total_samples': int(len(unified_df)),
            'fold_information': fold_info_serializable
        }, f, indent=2)
    print(f"✅ K-fold information saved to: {json_path}")
    
    # Save fold information as pickle (preserves numpy arrays)
    fold_pickle_path = os.path.join(output_dir, 'kfold_info.pkl')
    with open(fold_pickle_path, 'wb') as f:
        pickle.dump(fold_info, f)
    print(f"✅ K-fold information (pickle) saved to: {fold_pickle_path}")
    
    # Create a summary file
    summary_path = os.path.join(output_dir, 'dataset_summary.txt')
    with open(summary_path, 'w') as f:
        f.write("UNIFIED DATASET SUMMARY\\n")
        f.write("="*50 + "\\n")
        f.write(f"Creation date: {pd.Timestamp.now()}\\n")
        f.write(f"Total samples: {len(unified_df)}\\n")
        f.write(f"Number of classes: {len(unified_df['label'].unique())}\\n")
        f.write(f"K-fold splits: {len(fold_info)}\\n")
        f.write("\\n")
        
        f.write("CLASS DISTRIBUTION:\\n")
        f.write("-" * 30 + "\\n")
        for label in range(4):
            count = len(unified_df[unified_df['label'] == label])
            percentage = (count / len(unified_df)) * 100
            category = ['normal', 'ulcerative_colitis', 'polyps', 'esophagitis'][label]
            f.write(f"Class {label} ({category:18}): {count:4d} ({percentage:5.1f}%)\\n")
        
        f.write("\\n")
        f.write("FOLD INFORMATION:\\n")
        f.write("-" * 30 + "\\n")
        for fold in fold_info:
            f.write(f"Fold {fold['fold']}: {fold['train_size']} train, {fold['val_size']} val\\n")
    
    print(f"✅ Dataset summary saved to: {summary_path}")
    
    return output_dir

# Save all data
print("="*70)
print("SAVING UNIFIED DATASET AND K-FOLD INFORMATION")
print("="*70)

output_directory = save_kfold_data(unified_dataset, fold_information)

print(f"\\n📁 All files saved to: {output_directory}")

# Show what was created
print("\\n📋 Created files:")
for file in os.listdir(output_directory):
    file_path = os.path.join(output_directory, file)
    file_size = os.path.getsize(file_path) / (1024*1024)  # Size in MB
    print(f"  • {file:25} ({file_size:.2f} MB)")

print("\\n" + "="*70)
print("🎉 DATASET UNIFICATION COMPLETE!")
print("="*70)
print("\\n✅ What was accomplished:")
print("   • Combined all images from train, validation, and test sets")
print("   • Created unified dataset with proper labels")
print("   • Setup stratified 5-fold cross validation")
print("   • Verified data integrity")
print("   • Saved all data for future use")
print("\\n🚀 You're now ready to:")
print("   • Run K-fold cross validation experiments")
print("   • Compare model performance across folds")
print("   • Get more robust performance estimates")
print("   • Ensure your model generalizes well")
print("="*70)

# Validação Cruzada Estratificada - Implementação

Agora vamos implementar a validação cruzada estratificada usando os dados unificados:

## Estratégia:
1. **Treinar o modelo VGG16** em cada um dos 5 folds
2. **Avaliar métricas** para cada fold (accuracy, precision, recall, F1-score)
3. **Coletar resultados** de todos os folds
4. **Calcular estatísticas finais** (média, desvio padrão)
5. **Comparar performance** entre folds para validar robustez

## Benefícios:
- **Estimativa mais robusta** da performance do modelo
- **Redução do overfitting** aos dados de validação
- **Melhor confiança** nos resultados
- **Identificação de variabilidade** na performance

In [None]:
# Create custom data generators for cross-validation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
import tempfile
import shutil

def create_fold_generators(fold_data, unified_df, img_size=224, batch_size=32):
    """Create train and validation generators for a specific fold"""
    
    # Get indices for this fold
    train_indices = fold_data['train_indices']
    val_indices = fold_data['val_indices']
    
    # Get data for this fold
    train_paths = unified_df.iloc[train_indices]['image_path'].values
    train_labels = unified_df.iloc[train_indices]['label'].values
    val_paths = unified_df.iloc[val_indices]['image_path'].values
    val_labels = unified_df.iloc[val_indices]['label'].values
    
    print(f"Fold {fold_data['fold']}:")
    print(f"  Train samples: {len(train_paths)}")
    print(f"  Val samples: {len(val_paths)}")
    
    # Create data generators
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=False,
        fill_mode='nearest'
    )
    
    val_datagen = ImageDataGenerator(rescale=1./255)
    
    # Create custom generator class
    class CustomGenerator(tf.keras.utils.Sequence):
        def __init__(self, paths, labels, datagen, batch_size, img_size, shuffle=True):
            self.paths = paths
            self.labels = labels
            self.datagen = datagen
            self.batch_size = batch_size
            self.img_size = img_size
            self.shuffle = shuffle
            self.indices = np.arange(len(self.paths))
            if self.shuffle:
                np.random.shuffle(self.indices)
        
        def __len__(self):
            return len(self.paths) // self.batch_size
        
        def __getitem__(self, idx):
            batch_indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
            batch_paths = self.paths[batch_indices]
            batch_labels = self.labels[batch_indices]
            
            # Load and preprocess images
            batch_images = []
            for path in batch_paths:
                img = load_img(path, target_size=(self.img_size, self.img_size))
                img_array = img_to_array(img)
                # Apply data augmentation
                img_array = self.datagen.random_transform(img_array)
                img_array = self.datagen.standardize(img_array)
                batch_images.append(img_array)
            
            batch_images = np.array(batch_images)
            batch_labels_categorical = to_categorical(batch_labels, num_classes=4)
            
            return batch_images, batch_labels_categorical
        
        def on_epoch_end(self):
            if self.shuffle:
                np.random.shuffle(self.indices)
    
    # Create generators
    train_gen = CustomGenerator(train_paths, train_labels, train_datagen, batch_size, img_size, shuffle=True)
    val_gen = CustomGenerator(val_paths, val_labels, val_datagen, batch_size, img_size, shuffle=False)
    
    return train_gen, val_gen

# Test generator creation with first fold
print("Testing generator creation...")
test_train_gen, test_val_gen = create_fold_generators(fold_information[0], unified_dataset)
print("✅ Generators created successfully!")
print(f"Train batches per epoch: {len(test_train_gen)}")
print(f"Val batches per epoch: {len(test_val_gen)}")

In [None]:
# Implement K-Fold Cross Validation Training
import time
from tensorflow.keras.models import clone_model

def run_cross_validation(fold_info_list, unified_df, epochs=8, verbose=1):
    """Run complete k-fold cross validation"""
    
    # Store results for each fold
    cv_results = {
        'fold_metrics': [],
        'fold_histories': [],
        'fold_models': [],
        'training_times': []
    }
    
    print("="*80)
    print("STARTING K-FOLD CROSS VALIDATION")
    print("="*80)
    print(f"Number of folds: {len(fold_info_list)}")
    print(f"Epochs per fold: {epochs}")
    print(f"Total training sessions: {len(fold_info_list)}")
    print("="*80)
    
    for i, fold_data in enumerate(fold_info_list):
        fold_num = fold_data['fold']
        print(f"\\n{'='*20} FOLD {fold_num}/{len(fold_info_list)} {'='*20}")
        
        start_time = time.time()
        
        # Create generators for this fold
        print("Creating data generators...")
        train_gen, val_gen = create_fold_generators(fold_data, unified_df, 
                                                   img_size=224, batch_size=32)
        
        # Create a fresh model for this fold
        print("Creating fresh model...")
        fold_model, fold_base_model = create_transfer_learning_model()
        
        # Compile model
        fold_model.compile(
            optimizer=Adam(learning_rate=0.001),
            loss='categorical_crossentropy',
            metrics=['accuracy', 'precision', 'recall']
        )
        
        # Define callbacks
        fold_callbacks = [
            EarlyStopping(
                monitor='val_loss',
                patience=3,
                restore_best_weights=True,
                verbose=0
            ),
            ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=2,
                min_lr=0.0001,
                verbose=0
            )
        ]
        
        # Calculate steps
        steps_per_epoch = len(train_gen)
        validation_steps = len(val_gen)
        
        print(f"Training fold {fold_num}...")
        print(f"  Steps per epoch: {steps_per_epoch}")
        print(f"  Validation steps: {validation_steps}")
        
        # Train the model
        history = fold_model.fit(
            train_gen,
            steps_per_epoch=steps_per_epoch,
            epochs=epochs,
            validation_data=val_gen,
            validation_steps=validation_steps,
            callbacks=fold_callbacks,
            verbose=verbose
        )
        
        # Evaluate on validation set
        print(f"Evaluating fold {fold_num}...")
        val_metrics = fold_model.evaluate(val_gen, steps=validation_steps, verbose=0)
        
        # Store results
        fold_result = {
            'fold': fold_num,
            'val_loss': val_metrics[0],
            'val_accuracy': val_metrics[1],
            'val_precision': val_metrics[2],
            'val_recall': val_metrics[3],
            'val_f1_score': 2 * (val_metrics[2] * val_metrics[3]) / (val_metrics[2] + val_metrics[3]),
            'epochs_trained': len(history.history['loss']),
            'best_val_loss': min(history.history['val_loss']),
            'best_val_accuracy': max(history.history['val_accuracy'])
        }
        
        training_time = time.time() - start_time
        
        cv_results['fold_metrics'].append(fold_result)
        cv_results['fold_histories'].append(history.history)
        cv_results['fold_models'].append(fold_model)
        cv_results['training_times'].append(training_time)
        
        print(f"Fold {fold_num} Results:")
        print(f"  Validation Accuracy: {fold_result['val_accuracy']:.4f}")
        print(f"  Validation Loss: {fold_result['val_loss']:.4f}")
        print(f"  Validation Precision: {fold_result['val_precision']:.4f}")
        print(f"  Validation Recall: {fold_result['val_recall']:.4f}")
        print(f"  Validation F1-Score: {fold_result['val_f1_score']:.4f}")
        print(f"  Training Time: {training_time:.1f}s")
        print(f"  Epochs Trained: {fold_result['epochs_trained']}")
    
    return cv_results

# Run cross validation
print("Starting K-Fold Cross Validation...")
cv_results = run_cross_validation(fold_information, unified_dataset, epochs=8, verbose=1)

In [None]:
# Analyze Cross-Validation Results
def analyze_cv_results(cv_results):
    """Analyze and visualize cross-validation results"""
    
    fold_metrics = cv_results['fold_metrics']
    
    print("="*70)
    print("CROSS-VALIDATION RESULTS ANALYSIS")
    print("="*70)
    
    # Extract metrics
    accuracies = [fold['val_accuracy'] for fold in fold_metrics]
    losses = [fold['val_loss'] for fold in fold_metrics]
    precisions = [fold['val_precision'] for fold in fold_metrics]
    recalls = [fold['val_recall'] for fold in fold_metrics]
    f1_scores = [fold['val_f1_score'] for fold in fold_metrics]
    training_times = cv_results['training_times']
    
    # Calculate statistics
    metrics_stats = {
        'accuracy': {
            'mean': np.mean(accuracies),
            'std': np.std(accuracies),
            'min': np.min(accuracies),
            'max': np.max(accuracies)
        },
        'loss': {
            'mean': np.mean(losses),
            'std': np.std(losses),
            'min': np.min(losses),
            'max': np.max(losses)
        },
        'precision': {
            'mean': np.mean(precisions),
            'std': np.std(precisions),
            'min': np.min(precisions),
            'max': np.max(precisions)
        },
        'recall': {
            'mean': np.mean(recalls),
            'std': np.std(recalls),
            'min': np.min(recalls),
            'max': np.max(recalls)
        },
        'f1_score': {
            'mean': np.mean(f1_scores),
            'std': np.std(f1_scores),
            'min': np.min(f1_scores),
            'max': np.max(f1_scores)
        }
    }
    
    # Print detailed results
    print("Individual Fold Results:")
    print("-" * 70)
    print(f"{'Fold':<5} {'Accuracy':<10} {'Loss':<10} {'Precision':<11} {'Recall':<10} {'F1-Score':<10} {'Time(s)':<8}")
    print("-" * 70)
    
    for i, fold in enumerate(fold_metrics):
        print(f"{fold['fold']:<5} {fold['val_accuracy']:<10.4f} {fold['val_loss']:<10.4f} "
              f"{fold['val_precision']:<11.4f} {fold['val_recall']:<10.4f} "
              f"{fold['val_f1_score']:<10.4f} {training_times[i]:<8.1f}")
    
    print("-" * 70)
    
    # Print summary statistics
    print("\\nSummary Statistics:")
    print("-" * 50)
    for metric_name, stats in metrics_stats.items():
        print(f"{metric_name.upper():12}:")
        print(f"  Mean ± Std:  {stats['mean']:.4f} ± {stats['std']:.4f}")
        print(f"  Range:       [{stats['min']:.4f}, {stats['max']:.4f}]")
        print(f"  95% CI:      [{stats['mean'] - 1.96*stats['std']:.4f}, {stats['mean'] + 1.96*stats['std']:.4f}]")
        print()
    
    # Training time statistics
    total_time = sum(training_times)
    avg_time = np.mean(training_times)
    print(f"TRAINING TIME:")
    print(f"  Total time:    {total_time:.1f}s ({total_time/60:.1f}m)")
    print(f"  Average/fold:  {avg_time:.1f}s")
    print(f"  Time range:    [{min(training_times):.1f}s, {max(training_times):.1f}s]")
    
    return metrics_stats

# Analyze results
metrics_statistics = analyze_cv_results(cv_results)

In [None]:
# Visualize Cross-Validation Results
def plot_cv_results(cv_results, metrics_stats):
    """Create comprehensive visualizations of CV results"""
    
    fold_metrics = cv_results['fold_metrics']
    fold_histories = cv_results['fold_histories']
    
    # Extract data for plotting
    folds = [f"Fold {fold['fold']}" for fold in fold_metrics]
    accuracies = [fold['val_accuracy'] for fold in fold_metrics]
    losses = [fold['val_loss'] for fold in fold_metrics]
    precisions = [fold['val_precision'] for fold in fold_metrics]
    recalls = [fold['val_recall'] for fold in fold_metrics]
    f1_scores = [fold['val_f1_score'] for fold in fold_metrics]
    
    # Create subplot figure
    fig = plt.figure(figsize=(20, 15))
    
    # 1. Bar plot of metrics across folds
    ax1 = plt.subplot(3, 3, 1)
    x = np.arange(len(folds))
    width = 0.15
    
    ax1.bar(x - 2*width, accuracies, width, label='Accuracy', alpha=0.8, color='blue')
    ax1.bar(x - width, precisions, width, label='Precision', alpha=0.8, color='green')
    ax1.bar(x, recalls, width, label='Recall', alpha=0.8, color='orange')
    ax1.bar(x + width, f1_scores, width, label='F1-Score', alpha=0.8, color='red')
    
    ax1.set_xlabel('Folds')
    ax1.set_ylabel('Score')
    ax1.set_title('Metrics Across Folds')
    ax1.set_xticks(x)
    ax1.set_xticklabels(folds)
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 2. Box plot of metrics
    ax2 = plt.subplot(3, 3, 2)
    metrics_data = [accuracies, precisions, recalls, f1_scores]
    ax2.boxplot(metrics_data, labels=['Accuracy', 'Precision', 'Recall', 'F1-Score'])
    ax2.set_title('Metrics Distribution')
    ax2.set_ylabel('Score')
    ax2.grid(True, alpha=0.3)
    
    # 3. Loss across folds
    ax3 = plt.subplot(3, 3, 3)
    ax3.bar(folds, losses, color='purple', alpha=0.7)
    ax3.set_title('Validation Loss Across Folds')
    ax3.set_xlabel('Folds')
    ax3.set_ylabel('Loss')
    ax3.grid(True, alpha=0.3)
    
    # 4. Training curves for all folds - Accuracy
    ax4 = plt.subplot(3, 3, 4)
    colors = ['blue', 'green', 'red', 'purple', 'orange']
    for i, history in enumerate(fold_histories):
        epochs = range(1, len(history['accuracy']) + 1)
        ax4.plot(epochs, history['accuracy'], f'{colors[i]}-', label=f'Fold {i+1} Train', alpha=0.7)
        ax4.plot(epochs, history['val_accuracy'], f'{colors[i]}--', label=f'Fold {i+1} Val', alpha=0.7)
    ax4.set_title('Training Curves - Accuracy')
    ax4.set_xlabel('Epoch')
    ax4.set_ylabel('Accuracy')
    ax4.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax4.grid(True, alpha=0.3)
    
    # 5. Training curves for all folds - Loss
    ax5 = plt.subplot(3, 3, 5)
    for i, history in enumerate(fold_histories):
        epochs = range(1, len(history['loss']) + 1)
        ax5.plot(epochs, history['loss'], f'{colors[i]}-', label=f'Fold {i+1} Train', alpha=0.7)
        ax5.plot(epochs, history['val_loss'], f'{colors[i]}--', label=f'Fold {i+1} Val', alpha=0.7)
    ax5.set_title('Training Curves - Loss')
    ax5.set_xlabel('Epoch')
    ax5.set_ylabel('Loss')
    ax5.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax5.grid(True, alpha=0.3)
    
    # 6. Mean and standard deviation
    ax6 = plt.subplot(3, 3, 6)
    metrics_names = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
    means = [metrics_stats['accuracy']['mean'], metrics_stats['precision']['mean'], 
             metrics_stats['recall']['mean'], metrics_stats['f1_score']['mean']]
    stds = [metrics_stats['accuracy']['std'], metrics_stats['precision']['std'], 
            metrics_stats['recall']['std'], metrics_stats['f1_score']['std']]
    
    ax6.bar(metrics_names, means, yerr=stds, capsize=5, alpha=0.7, color=['blue', 'green', 'orange', 'red'])
    ax6.set_title('Mean ± Std of Metrics')
    ax6.set_ylabel('Score')
    ax6.grid(True, alpha=0.3)
    
    # 7. Stability plot (coefficient of variation)
    ax7 = plt.subplot(3, 3, 7)
    cv_coeffs = [std/mean for mean, std in zip(means, stds)]
    ax7.bar(metrics_names, cv_coeffs, alpha=0.7, color='purple')
    ax7.set_title('Coefficient of Variation (Stability)')
    ax7.set_ylabel('CV = Std/Mean')
    ax7.grid(True, alpha=0.3)
    
    # 8. Epochs trained per fold
    ax8 = plt.subplot(3, 3, 8)
    epochs_trained = [fold['epochs_trained'] for fold in fold_metrics]
    ax8.bar(folds, epochs_trained, alpha=0.7, color='brown')
    ax8.set_title('Epochs Trained per Fold')
    ax8.set_xlabel('Folds')
    ax8.set_ylabel('Epochs')
    ax8.grid(True, alpha=0.3)
    
    # 9. Final comparison with confidence intervals
    ax9 = plt.subplot(3, 3, 9)
    # Calculate 95% confidence intervals
    ci_lower = [mean - 1.96*std for mean, std in zip(means, stds)]
    ci_upper = [mean + 1.96*std for mean, std in zip(means, stds)]
    
    ax9.errorbar(range(len(metrics_names)), means, 
                yerr=[np.array(means) - np.array(ci_lower), np.array(ci_upper) - np.array(means)],
                fmt='o', capsize=5, capthick=2, markersize=8)
    ax9.set_xticks(range(len(metrics_names)))
    ax9.set_xticklabels(metrics_names)
    ax9.set_title('95% Confidence Intervals')
    ax9.set_ylabel('Score')
    ax9.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

# Create visualizations
print("Creating comprehensive visualizations...")
plot_cv_results(cv_results, metrics_statistics)

In [None]:
# Compare Cross-Validation Results with Original Model and Save Results
def compare_with_original_model(cv_results, original_val_accuracy=None):
    """Compare CV results with the original single train/val split model"""
    
    fold_metrics = cv_results['fold_metrics']
    cv_accuracy_mean = np.mean([fold['val_accuracy'] for fold in fold_metrics])
    cv_accuracy_std = np.std([fold['val_accuracy'] for fold in fold_metrics])
    
    print("="*70)
    print("COMPARISON: CROSS-VALIDATION vs SINGLE SPLIT")
    print("="*70)
    
    if original_val_accuracy is not None:
        print(f"Original Model (Single Split):")
        print(f"  Validation Accuracy: {original_val_accuracy:.4f}")
        print()
    
    print(f"Cross-Validation Results:")
    print(f"  Mean Accuracy: {cv_accuracy_mean:.4f} ± {cv_accuracy_std:.4f}")
    print(f"  Confidence Interval (95%): [{cv_accuracy_mean - 1.96*cv_accuracy_std:.4f}, {cv_accuracy_mean + 1.96*cv_accuracy_std:.4f}]")
    
    if original_val_accuracy is not None:
        diff = cv_accuracy_mean - original_val_accuracy
        print(f"\\nDifference (CV - Original): {diff:+.4f}")
        
        if abs(diff) < cv_accuracy_std:
            print("✅ Results are consistent - difference within 1 standard deviation")
        elif abs(diff) < 2 * cv_accuracy_std:
            print("⚠️ Moderate difference - within 2 standard deviations")
        else:
            print("❌ Significant difference - more than 2 standard deviations")
    
    print("\\n" + "="*70)
    print("CROSS-VALIDATION BENEFITS:")
    print("="*70)
    print("✅ More robust performance estimate")
    print("✅ Reduced overfitting to validation set")
    print("✅ Better understanding of model variability")
    print("✅ More reliable confidence in results")
    print("="*70)

# Save cross-validation results
def save_cv_results(cv_results, metrics_stats, output_dir=None):
    """Save cross-validation results to files"""
    
    if output_dir is None:
        output_dir = os.path.join(os.getcwd(), 'cross_validation_results')
    
    os.makedirs(output_dir, exist_ok=True)
    
    # Save detailed results as JSON
    results_to_save = {
        'fold_metrics': cv_results['fold_metrics'],
        'training_times': cv_results['training_times'],
        'summary_statistics': metrics_stats,
        'cv_summary': {
            'num_folds': len(cv_results['fold_metrics']),
            'mean_accuracy': metrics_stats['accuracy']['mean'],
            'std_accuracy': metrics_stats['accuracy']['std'],
            'mean_f1_score': metrics_stats['f1_score']['mean'],
            'std_f1_score': metrics_stats['f1_score']['std'],
            'total_training_time': sum(cv_results['training_times'])
        }
    }
    
    # Save as JSON
    json_path = os.path.join(output_dir, 'cv_results.json')
    with open(json_path, 'w') as f:
        json.dump(results_to_save, f, indent=2)
    print(f"✅ CV results saved to: {json_path}")
    
    # Save as pickle (preserves all data including models)
    pickle_path = os.path.join(output_dir, 'cv_results_complete.pkl')
    with open(pickle_path, 'wb') as f:
        pickle.dump(cv_results, f)
    print(f"✅ Complete CV results (including models) saved to: {pickle_path}")
    
    # Create summary report
    report_path = os.path.join(output_dir, 'cv_summary_report.txt')
    with open(report_path, 'w') as f:
        f.write("CROSS-VALIDATION SUMMARY REPORT\\n")
        f.write("="*50 + "\\n")
        f.write(f"Date: {pd.Timestamp.now()}\\n")
        f.write(f"Number of folds: {len(cv_results['fold_metrics'])}\\n")
        f.write(f"Total samples: {len(unified_dataset)}\\n")
        f.write("\\n")
        
        f.write("PERFORMANCE METRICS:\\n")
        f.write("-" * 30 + "\\n")
        for metric_name, stats in metrics_stats.items():
            f.write(f"{metric_name.upper()}:\\n")
            f.write(f"  Mean: {stats['mean']:.4f}\\n")
            f.write(f"  Std:  {stats['std']:.4f}\\n")
            f.write(f"  95% CI: [{stats['mean'] - 1.96*stats['std']:.4f}, {stats['mean'] + 1.96*stats['std']:.4f}]\\n")
            f.write("\\n")
        
        f.write("INDIVIDUAL FOLD RESULTS:\\n")
        f.write("-" * 30 + "\\n")
        for fold in cv_results['fold_metrics']:
            f.write(f"Fold {fold['fold']}: Acc={fold['val_accuracy']:.4f}, "
                   f"F1={fold['val_f1_score']:.4f}, Loss={fold['val_loss']:.4f}\\n")
    
    print(f"✅ Summary report saved to: {report_path}")
    
    return output_dir

# Compare results (use the validation accuracy from our previous single model training if available)
# You can update this with the actual value from your previous training
original_accuracy = val_accuracy if 'val_accuracy' in globals() else None
compare_with_original_model(cv_results, original_accuracy)

# Save results
print("\\nSaving cross-validation results...")
cv_output_dir = save_cv_results(cv_results, metrics_statistics)

print(f"\\n📊 Cross-validation complete! Results saved to: {cv_output_dir}")

# Final summary
print("\\n" + "="*70)
print("🎉 STRATIFIED K-FOLD CROSS-VALIDATION COMPLETE!")
print("="*70)
print(f"✅ Trained and evaluated {len(cv_results['fold_metrics'])} models")
print(f"✅ Mean validation accuracy: {metrics_statistics['accuracy']['mean']:.4f} ± {metrics_statistics['accuracy']['std']:.4f}")
print(f"✅ Mean F1-score: {metrics_statistics['f1_score']['mean']:.4f} ± {metrics_statistics['f1_score']['std']:.4f}")
print(f"✅ Total training time: {sum(cv_results['training_times']):.1f}s ({sum(cv_results['training_times'])/60:.1f}m)")
print(f"✅ All results and models saved for future analysis")
print("="*70)