In [9]:
import os
from typing import Tuple, List
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, recall_score, f1_score, accuracy_score
from sklearn.datasets import fetch_openml
import pennylane as qml
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import time
import matplotlib.pyplot as plt
import seaborn as sns
from plot_utils import PlotUtils
import csv
from datetime import datetime
#from pen import QuantumClassifier
#from pen_hilbert import QuantumHilbertClassifier
import random
from sklearn.preprocessing import MinMaxScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import KFold # Importar KFold


In [10]:

def prepare_data_multiclass(
    data_dir: str = "data/dataset_v2/Training/",
    image_size: int = 128,
    seed: int = 42
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Carga, redimensiona y etiqueta imágenes para clasificación binaria:
    'tumor' (glioma, meningioma, pituitary) vs 'no_tumor'.
    Realiza downsampling para balancear las clases (todas tendrán el mismo número de imágenes que la clase minoritaria).
    Guarda un log de las imágenes seleccionadas en results/graphics/downsampling_log.txt.
    Devuelve (X, y) como arrays de numpy.
    """
    random.seed(seed)
    
    # === CAMBIO CLAVE: Mapeo de clases para clasificación binaria ===
    # Todas las categorías de tumores se mapean a la etiqueta 0 ('tumor')
    # 'no_tumor' se mapea a la etiqueta 1 ('no_tumor')
    raw_class_map = {
        "glioma_tumor": 0,       # Ahora es 'tumor'
        "meningioma_tumor": 0,   # Ahora es 'tumor'
        "pituitary_tumor": 0,    # Ahora es 'tumor'
        "no_tumor": 1            # Sigue siendo 'no_tumor'
    }
    
    # Agrupar archivos por la nueva etiqueta binaria
    files_by_binary_label = {0: [], 1: []} # 0 para 'tumor', 1 para 'no_tumor'

    # Recopilar todos los archivos y asignar su etiqueta binaria
    for class_name_raw, binary_label in raw_class_map.items():
        class_dir = os.path.join(data_dir, class_name_raw)
        
        if not os.path.exists(class_dir):
            print(f"Advertencia: El directorio {class_dir} no existe. Se saltará esta clase.")
            continue
            
        files = [os.path.join(class_dir, f) for f in os.listdir(class_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        files_by_binary_label[binary_label].extend(files)
            
    # === Downsampling para balancear las dos clases binarias (0 y 1) ===
    # Encontrar el número de imágenes de la clase minoritaria
    # Considera solo las clases que tienen imágenes
    counts = [len(files) for files in files_by_binary_label.values() if len(files) > 0]
    if not counts:
        raise ValueError(f"No se encontraron imágenes en ningún directorio de clases para generar datos en {data_dir}. Verifique la ruta y los contenidos.")
    
    min_count_binary = min(counts)
    print(f"La clase binaria minoritaria tiene {min_count_binary} imágenes en {data_dir}")
    
    selected_files_log = {} # Usaremos las etiquetas originales en el log para mayor claridad
    final_X, final_y = [], []
    
    for binary_label, files_list in files_by_binary_label.items():
        random.shuffle(files_list)
        selected_for_binary_label = files_list[:min_count_binary] # Seleccionar hasta min_count_binary
        
        # Procesar imágenes y añadir al dataset final
        for f in selected_for_binary_label:
            try:
                img = Image.open(f).convert('L').resize((image_size, image_size))
                final_X.append(np.array(img))
                final_y.append(binary_label)
                
                # Para el log, asociar el archivo con su nombre de clase original y la nueva etiqueta binaria
                original_class_name = os.path.basename(os.path.dirname(f))
                if original_class_name not in selected_files_log:
                    selected_files_log[original_class_name] = []
                selected_files_log[original_class_name].append(f)
                
            except Exception as e:
                print(f"Error al cargar la imagen {f}: {e}")
                continue # Saltar la imagen con error

    # Guardar log (puede ser útil para depuración)
    os.makedirs('results/logs', exist_ok=True)
    log_file_path = os.path.join('results', 'logs', f'downsampling_log_{os.path.basename(os.path.normpath(data_dir))}.txt')
    with open(log_file_path, 'w') as f:
        f.write(f"Downsampling log for data from: {data_dir}\n\n")
        f.write("Binary Class Mapping: 0='Tumor' (glioma, meningioma, pituitary), 1='No Tumor'\n\n")
        for original_class_name, files in selected_files_log.items():
            # Determinar la etiqueta binaria para el log
            current_binary_label = None
            for k, v in raw_class_map.items():
                if k == original_class_name:
                    current_binary_label = v
                    break
            
            f.write(f"Original Class: {original_class_name} (Mapped to Binary Label: {current_binary_label}) - {len(files)} images selected:\n")
            for file in files:
                f.write(f"    {file}\n")
            f.write("\n")
            
    final_X = np.stack(final_X)
    final_y = np.array(final_y)
    
    # Imprimir la distribución final de las clases binarias
    unique_labels, counts_labels = np.unique(final_y, return_counts=True)
    print(f"\nDistribución final de clases binarias en {data_dir}:")
    for label, count in zip(unique_labels, counts_labels):
        label_name = "Tumor" if label == 0 else "No Tumor"
        print(f"  Clase {label} ({label_name}): {count} imágenes")

    return final_X, final_y

# NOTA: En tu clase QuantumClassifier, el n_classes debe cambiar a 2.
# En la sección _build_model, la línea:
# self.model = HybridModel(qlayer, n_classes=4, n_qubits_model=self.n_qubits, input_features=self.pca_features)
# DEBE SER:
# self.model = HybridModel(qlayer, n_classes=2, n_qubits_model=self.n_qubits, input_features=self.pca_features)

# También, si estás usando PlotUtils, asegúrate de que plot_confusion_matrix use
# los nuevos nombres de clases:
# class_names = ['Tumor', 'No Tumor']

In [11]:

class QuantumClassifier:
    def __init__(self, n_qubits=8, pca_features=8, batch_size=16, epochs=20, lr=0.01, layers=3, seed=42):
        self.n_qubits = n_qubits
        self.pca_features = pca_features
        self.batch_size = batch_size
        self.epochs = epochs
        self.lr = lr
        self.layers = layers
        self.seed = seed
        torch.manual_seed(seed)
        np.random.seed(seed)
        self._prepare_data_custom()
        self._build_model()

    def _prepare_data_custom(self):
        X, y_train = prepare_data_multiclass(data_dir="data/dataset_v2/Training/", image_size=256, seed=self.seed)
        X = X.reshape((X.shape[0], -1)) / 255.0  # flatten and normalize
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        pca = PCA(n_components=self.pca_features)
        X_pca = pca.fit_transform(X_scaled)
        
        # Añadir este escalado
        scaler_angle = MinMaxScaler(feature_range=(0, np.pi / 2))
        X_train = scaler_angle.fit_transform(X_pca)

        X_test, y_test = prepare_data_multiclass(data_dir="data/dataset_v2/Testing/", image_size=256, seed=self.seed)
        X_test = X_test.reshape((X_test.shape[0], -1)) / 255.0  # flatten and normalize
        scaler = StandardScaler()
        X_scaled_test = scaler.fit_transform(X_test)
        pca = PCA(n_components=self.pca_features)
        X_pca_test = pca.fit_transform(X_scaled_test)
        
        # Añadir este escalado
        scaler_angle = MinMaxScaler(feature_range=(0, np.pi / 2))
        X_test = scaler_angle.fit_transform(X_pca_test)


        # Usar X_pca_scaled para el split
        #X_train, X_test, y_train, y_test = train_test_split(X_pca_scaled, y, test_size=0.2, random_state=self.seed)
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = np.array(y_train)
        self.y_test = np.array(y_test)

    def _build_model(self):
        #dev = qml.device("default.qubit", wires=self.n_qubits)
        dev = qml.device("lightning.qubit", wires=self.n_qubits) # Sugerido
        def circuit(inputs, weights):
            #for i in range(self.n_qubits):
            #   qml.RY(inputs[i], wires=i)
            #qml.templates.StronglyEntanglingLayers(weights, wires=range(self.n_qubits))
            qml.AngleEmbedding(inputs, wires=range(self.n_qubits), rotation='Y')
            qml.AngleEmbedding(inputs, wires=range(self.n_qubits), rotation='Z')
            # Usa un ansatz más simple y menos capas
            qml.templates.BasicEntanglerLayers(weights, wires=range(self.n_qubits))
            return [qml.expval(qml.PauliZ(i)) for i in range(self.n_qubits)]

        #weight_shapes = {"weights": (self.layers, self.n_qubits, 3)}
        weight_shapes = {"weights": (1, self.n_qubits)}  # <- CORRECTO

        qlayer = qml.qnn.TorchLayer(qml.qnode(dev)(circuit), weight_shapes)

        class HybridModel(nn.Module):
            def __init__(self, qlayer, n_classes=2, n_qubits_model=None, input_features=None):
                super().__init__()
                self.fc_input = nn.Linear(input_features, n_qubits_model)  # <- corregido
                self.qlayer = qlayer
                self.bn1 = nn.BatchNorm1d(n_qubits_model)
                self.hidden1 = nn.Linear(n_qubits_model, 64)
                self.dropout1 = nn.Dropout(0.3)
                self.hidden2 = nn.Linear(64, 32)
                self.dropout2 = nn.Dropout(0.2)
                self.output = nn.Linear(32, n_classes)
                self.relu = nn.ReLU()

            def forward(self, x):
                x = self.fc_input(x)
                x = self.qlayer(x)
                x = self.bn1(x)
                x = self.relu(self.hidden1(x))
                x = self.dropout1(x)
                x = self.relu(self.hidden2(x))
                x = self.dropout2(x)
                return self.output(x)

        self.model = HybridModel(qlayer, n_classes=2, n_qubits_model=self.n_qubits, input_features=self.pca_features)


       

    def train_and_evaluate(self):
        X_train_t = torch.tensor(self.X_train, dtype=torch.float32)
        y_train_t = torch.tensor(self.y_train, dtype=torch.long)
        train_loader = DataLoader(TensorDataset(X_train_t, y_train_t), batch_size=self.batch_size, shuffle=True)
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        loss_fn = nn.CrossEntropyLoss()
        scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.5)
        loss_history = []
        start = time.time()
        for epoch in range(self.epochs):
            for xb, yb in train_loader:
                pred = self.model(xb)
                loss = loss_fn(pred, yb)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            loss_history.append(loss.item())
            print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}")
                # <- Aquí va el scheduler
            scheduler.step(loss.item())
            for param_group in optimizer.param_groups:
                print("Current LR:", param_group['lr'])
            
        end = time.time()

        with torch.no_grad():
            X_test_t = torch.tensor(self.X_test, dtype=torch.float32)
            y_test_t = torch.tensor(self.y_test, dtype=torch.long)
            preds = self.model(X_test_t)
            preds_cls = torch.argmax(preds, dim=1)
            #probs = torch.softmax(preds, dim=1)
            #threshold = 0.75  # Solo clasificamos como 'no_tumor' si hay > 90% de confianza

            #preds_cls = torch.where(
            #    probs[:, 1] >= threshold,  # índice 1 es 'no_tumor'
            #    torch.tensor(1),
            #    torch.tensor(0)
            #)
            acc = (preds_cls == y_test_t).float().mean().item()
            cm = confusion_matrix(y_test_t.cpu().numpy(), preds_cls.cpu().numpy())
            recall = recall_score(y_test_t.cpu().numpy(), preds_cls.cpu().numpy(), average='binary')
            f1 = f1_score(y_test_t.cpu().numpy(), preds_cls.cpu().numpy(), average='binary')

        class_names = ['tumor', 'no_tumor']
        PlotUtils.plot_loss(loss_history, save_path='results/graphics/loss_function_pen_bin.png')
        PlotUtils.plot_confusion_matrix(cm, class_names=class_names, save_path='results/graphics/confusion_matrix_pen_bin.png')

        results = {
            'epochs': self.epochs,
            'learning_rate': self.lr,
            'features': self.pca_features,
            'layers': self.layers,
            'batch_size': self.batch_size,
            'loss': float(loss.item()),
            'accuracy': acc,
            'recall': recall,
            'f1_score': f1,
            'confusion_matrix': cm.tolist(),
            'execution_time': end - start
        }
        print(f"Accuracy: {acc:.4f}")
        print("Confusion Matrix:\n", cm)
        print(f"Recall: {recall:.4f}")
        print(f"F1 score: {f1:.4f}")
        return results


In [12]:
class ExperimentRunner:
    def __init__(self, epochs, lr, features, layers, batch_size, seed):
        self.epochs = epochs
        self.lr = lr
        self.features = features
        self.layers = layers
        self.batch_size = batch_size
        self.seed = seed

    def run_and_log(self, results, csv_file):
        duration = results.get('execution_time', None)
        date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        header = [
            'date', 'execution_time', 'epochs', 'learning_rate', 'features', 'layers', 'batch_size',
            'loss', 'accuracy', 'recall', 'f1_score'
        ]
        row = [
            date, f'{duration:.2f}', results['epochs'], results['learning_rate'], results['features'],
            results['layers'], results['batch_size'], results['loss'], results['accuracy'], results['recall'], results['f1_score']
        ]
        file_exists = os.path.isfile(csv_file)
        with open(csv_file, 'a', newline='') as f:
            writer = csv.writer(f)
            if not file_exists:
                writer.writerow(header)
            writer.writerow(row)
        print('Results saved in', csv_file)
        print('Run summary:')
        print(row)


In [13]:
class QuantumRunner(ExperimentRunner):
    def run(self):
        print("\n--- Running QUANTUM QuantumClassifier ---")
        qc = QuantumClassifier(
            n_qubits=self.features,
            pca_features=self.features,
            batch_size=self.batch_size,
            epochs=self.epochs,
            lr=self.lr,
            layers=self.layers,
            seed=self.seed
        )
        unique_classes, counts = np.unique(qc.y_train, return_counts=True)
        print("Distribution of classes in training data (y_train):")
        for cls, count in zip(unique_classes, counts):
            print(f"  Class {cls}: {count} samples")
        unique_classes_y, counts_y = np.unique(qc.y_test, return_counts=True)
        print("Distribution of classes in training data (y_train):")
        for cls, counts_y in zip(unique_classes_y, counts_y):
            print(f"  Class {cls}: {counts_y} samples")
        start_time = time.time()
        results = qc.train_and_evaluate()
        end_time = time.time()
        self.duration = end_time - start_time
        results['execution_time'] = self.duration
        self.run_and_log(results, 'results_log.csv')

In [14]:
# =====================
# CONFIGURATION CONSTANTS
# =====================
MODE = 'quantum'  # Options: 'quantum', 'quantum_hilbert', 'both'
EPOCHS = 30
# CONFIGURACIÓN SUGERIDA
LEARNING_RATE = 0.0005 # O incluso 0.001
FEATURES = 8
LAYERS = 1
BATCH_SIZE = 32
SEED = 42
USE_HILBERT = True  # Only relevant for quantum

In [15]:
QuantumRunner(EPOCHS, LEARNING_RATE, FEATURES, LAYERS, BATCH_SIZE, SEED).run()


--- Running QUANTUM QuantumClassifier ---
La clase binaria minoritaria tiene 2000 imágenes en data/dataset_v2/Training/

Distribución final de clases binarias en data/dataset_v2/Training/:
  Clase 0 (Tumor): 2000 imágenes
  Clase 1 (No Tumor): 2000 imágenes
La clase binaria minoritaria tiene 250 imágenes en data/dataset_v2/Testing/

Distribución final de clases binarias en data/dataset_v2/Testing/:
  Clase 0 (Tumor): 250 imágenes
  Clase 1 (No Tumor): 250 imágenes
Distribution of classes in training data (y_train):
  Class 0: 2000 samples
  Class 1: 2000 samples
Distribution of classes in training data (y_train):
  Class 0: 250 samples
  Class 1: 250 samples
Epoch 1: Loss = 0.7123
Current LR: 0.0005
Epoch 2: Loss = 0.5730
Current LR: 0.0005
Epoch 3: Loss = 0.7318
Current LR: 0.0005
Epoch 4: Loss = 0.6180
Current LR: 0.0005
Epoch 5: Loss = 0.5537
Current LR: 0.0005
Epoch 6: Loss = 0.6777
Current LR: 0.0005
Epoch 7: Loss = 0.6282
Current LR: 0.0005
Epoch 8: Loss = 0.4689
Current LR: 0.0