In [50]:
import os
from typing import Tuple, List
import numpy as np
from PIL import Image
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, recall_score, f1_score
from sklearn.datasets import fetch_openml
import pennylane as qml
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import time
import matplotlib.pyplot as plt
import seaborn as sns
import csv
from datetime import datetime
import random
from sklearn.preprocessing import MinMaxScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau


### Constants 
This section defines important constants and configuration variables used throughout the notebook.
 
- `CLASS_MAP`: A dictionary mapping each tumor class name to a unique integer label. This is used for encoding categorical labels into numerical form for model training and evaluation.
- `CLASS_MAP_NUMBER`: The total number of classes in the classification problem, derived from the length of `CLASS_MAP`.
- `BASE_URL`, `TRAINING_URL`, `TESTING_URL`: Paths to the dataset directories. These are used to load training and testing data.
- `RESULTS_GRAPHICS_URL`, `RESULTS_CSV_URL`: Paths where the results (such as plots and CSV files) will be saved.
 
You can modify these constants to adapt the notebook to different datasets or directory structures.


In [51]:
CLASS_MAP = {
        "glioma_tumor": 0,        
        "pituitary_tumor": 1,
        "no_tumor": 2
    }
CLASS_MAP_NUMBER = len(CLASS_MAP)
BASE_URL= "../data/dataset_multiclase/"
TRAINING_URL= BASE_URL + "Training/"
TESTING_URL= BASE_URL + "Testing/"
RESULTS_GRAPHICS_URL = '../results/graphics/'
RESULTS_CSV_URL = '../results/csv/'
RESULTS_MODELS_URL = '../results/models'

In [52]:
class PlotUtils:
    @staticmethod
    def plot_loss(loss_history, title='Loss function by epochs', save_path=None):
        plt.figure()
        plt.plot(loss_history, marker='o')
        plt.title(title)
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.grid(True)
        if save_path:
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            plt.savefig(save_path)
            plt.close()
        else:
            plt.show()

    @staticmethod
    def plot_confusion_matrix(cm, class_names=None, title='Confusion Matrix', save_path=None):
        plt.figure()
        xticks = class_names if class_names is not None else 'auto'
        yticks = class_names if class_names is not None else 'auto'
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=xticks, yticklabels=yticks)
        plt.title(title)
        plt.xlabel('Prediction')
        plt.ylabel('Real')
        if save_path:
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            plt.savefig(save_path)
            plt.close()
        else:
            plt.show() 

In [None]:
def prepare_data_multiclass(
    data_dir: str = TRAINING_URL,
    image_size: int = 128, ## Revisar el tamano de la imagen
    seed: int = 42
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Carga, redimensiona y etiqueta imágenes de las 4 clases para clasificación multiclase.
    Devuelve (X, y) como arrays de numpy.
    """
    random.seed(seed)
    class_map = CLASS_MAP
    files_by_class = {}
    for class_name in class_map:
        class_dir = os.path.join(data_dir, class_name)
        files = [os.path.join(class_dir, f) for f in os.listdir(class_dir) if f.lower().endswith('.jpg')]
        files_by_class[class_name] = files
    X, y = [], []
    for class_name, label in class_map.items():
        for f in files_by_class[class_name]:
            img = Image.open(f).convert('L').resize((image_size, image_size)) ### revisar si esto tiene interpolacion! 
            X.append(np.array(img))
            y.append(label)
    X = np.stack(X)
    y = np.array(y)
    return X, y

In [None]:
class QuantumClassifier:
    def __init__(self, n_qubits=8, pca_features=8, batch_size=16, epochs=20, lr=0.01, layers=3, seed=42):
        self.n_qubits = n_qubits
        self.pca_features = pca_features
        self.batch_size = batch_size
        self.epochs = epochs
        self.lr = lr
        self.layers = layers
        self.seed = seed
        torch.manual_seed(seed)
        np.random.seed(seed)
        self._prepare_data_custom()
        self._build_model()

    def prepare_data(self, data_dir):
        X, y = prepare_data_multiclass(data_dir=data_dir, image_size=256, seed=self.seed)
        X = X.reshape((X.shape[0], -1)) / 255.0  # flatten and normalize
        scaler = StandardScaler() ### verificar este metodo 
        X_scaled = scaler.fit_transform(X)
        pca = PCA(n_components=self.pca_features) ######## TODO  probar otros puede ser autoencoding
        X_pca = pca.fit_transform(X_scaled)
        scaler_angle = MinMaxScaler(feature_range=(0, np.pi / 2))
        x_train = scaler_angle.fit_transform(X_pca)
        return x_train, y        

    def _prepare_data_custom(self):
        x_train, y_train =self.prepare_data(TRAINING_URL)
        x_test, y_test =self.prepare_data(TESTING_URL)
        self.x_train = x_train
        self.x_test = x_test
        self.y_train = np.array(y_train)
        self.y_test = np.array(y_test)

    def _build_model(self):
        dev = qml.device("lightning.qubit", wires=self.n_qubits)
        def circuit(inputs, weights): #hacer varias capas cuanticas 
            qml.AngleEmbedding(inputs, wires=range(self.n_qubits), rotation='Y')
            # Encoding aca con cnot verificar si estose esta aplicando 
            # StronglyEntanglerLayers justificar porque se usa uno u otro
            #
            qml.templates.BasicEntanglerLayers(weights, wires=range(self.n_qubits))
            return [qml.expval(qml.PauliZ(i)) for i in range(self.n_qubits)]
        weight_shapes = {"weights": (self.layers, self.n_qubits)}
        qlayer = qml.qnn.TorchLayer(qml.qnode(dev)(circuit), weight_shapes)

        class HybridModel(nn.Module):
            ### Datos para entrenar
            ### Circuito rotaciones 
            ### Circuito 
            ###  Cnots que hcaen entanglement
            ### exval qnn evitar mezclarlos 
            #variational entablement en sandwwich 
            def __init__(self, qlayer, n_classes=CLASS_MAP_NUMBER, n_qubits_model=None, input_features=None):
                super().__init__()
                self.fc_input = nn.Linear(input_features, n_qubits_model)
                self.qlayer = qlayer # Aca agregamos una capa de QML  ### 
                # TODO revisar el QML de pennylane
                self.bn1 = nn.BatchNorm1d(n_qubits_model)
                self.hidden1 = nn.Linear(n_qubits_model, 64)
                self.dropout1 = nn.Dropout(0.3) ##### 
                self.hidden2 = nn.Linear(64, 32)
                self.dropout2 = nn.Dropout(0.2)
                self.hidden3 = nn.Linear(32, 16)
                self.dropout3 = nn.Dropout(0.3)
                self.hidden4 = nn.Linear(16, 8)
                self.dropout4 = nn.Dropout(0.3)
                self.output = nn.Linear(8, n_classes)                
                self.relu = nn.ReLU()
            def forward(self, x):
                x = self.fc_input(x)
                x = self.qlayer(x)
                x = self.bn1(x)
                x = self.relu(self.hidden1(x))
                x = self.dropout1(x)
                x = self.relu(self.hidden2(x))
                x = self.dropout2(x)
                x = self.relu(self.hidden3(x))
                x = self.dropout3(x)
                x = self.relu(self.hidden4(x))
                x = self.dropout4(x)
                return self.output(x)
        self.model = HybridModel(qlayer, n_classes=CLASS_MAP_NUMBER, n_qubits_model=self.n_qubits, input_features=self.pca_features)

    def train_and_evaluate(self):
        x_train_t = torch.tensor(self.x_train, dtype=torch.float32)
        y_train_t = torch.tensor(self.y_train, dtype=torch.long)
        train_loader = DataLoader(TensorDataset(x_train_t, y_train_t), batch_size=self.batch_size, shuffle=True)
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        loss_fn = nn.CrossEntropyLoss()
        scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=10, factor=0.8)
        loss_history = []
        start = time.time()
        epoch_results = []
        print(f"---------------EPOCHS--------------------")
        for epoch in range(self.epochs):
            self.model.train() # Set model to training mode
            for xb, yb in train_loader:
                pred = self.model(xb)
                loss = loss_fn(pred, yb)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            loss_history.append(loss.item())

            # Evaluate on test set at the end of each epoch to get accuracy
            self.model.eval()
            with torch.no_grad():
                x_test_t_epoch = torch.tensor(self.x_test, dtype=torch.float32)
                y_test_t_epoch = torch.tensor(self.y_test, dtype=torch.long)
                preds_epoch = self.model(x_test_t_epoch)
                preds_cls_epoch = torch.argmax(preds_epoch, dim=1)
                current_acc = (preds_cls_epoch == y_test_t_epoch).float().mean().item()

            print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}, Accuracy = {current_acc:.4f}")
            scheduler.step(loss.item())
            for param_group in optimizer.param_groups:
                print("Current LR:", param_group['lr'])

            # Store epoch results for later sorting
            epoch_results.append((current_acc, epoch + 1, self.model.state_dict()))

        end = time.time()

        # Sort results by accuracy in descending order and get the top 2
        epoch_results.sort(key=lambda x: x[0], reverse=True)
        top_two_epochs = epoch_results[:2]

        print("\nSaving top 2 models:")
        for acc, ep, state_dict in top_two_epochs:
            self._save_model(state_dict, ep, acc, RESULTS_MODELS_URL)

        # --- The rest of your existing evaluation and logging code ---
        # This part will evaluate the model *after* all epochs are done,
        # which means it reflects the performance of the *last* epoch.
        # The 'acc' variable here will be the accuracy of the last epoch,
        # not necessarily one of the top two saved.
        with torch.no_grad():
            x_test_t = torch.tensor(self.x_test, dtype=torch.float32)
            y_test_t = torch.tensor(self.y_test, dtype=torch.long)
            preds = self.model(x_test_t)
            preds_cls = torch.argmax(preds, dim=1)
            acc = (preds_cls == y_test_t).float().mean().item() # This 'acc' is for the last epoch
            cm = confusion_matrix(y_test_t.cpu().numpy(), preds_cls.cpu().numpy())
            recall = recall_score(y_test_t.cpu().numpy(), preds_cls.cpu().numpy(), average='macro')
            f1 = f1_score(y_test_t.cpu().numpy(), preds_cls.cpu().numpy(), average='macro')

        sorted_items = sorted(CLASS_MAP.items(), key=lambda item: item[1])
        class_names = [name for name, _ in sorted_items]

        PlotUtils.plot_loss(loss_history, save_path=RESULTS_GRAPHICS_URL + 'MC_loss_function_pen.png')
        PlotUtils.plot_confusion_matrix(cm, class_names=class_names, save_path=RESULTS_GRAPHICS_URL +'MC_confusion_matrix_pen.png')

        results = {
            'epochs': self.epochs,
            'learning_rate': self.lr,
            'features': self.pca_features,
            'layers': self.layers,
            'batch_size': self.batch_size,
            'loss': float(loss.item()), # Last epoch's loss
            'accuracy': acc, # Last epoch's accuracy
            'recall': recall,
            'f1_score': f1,
            'confusion_matrix': cm.tolist(),
            'execution_time': end - start
        }
        print(f"Accuracy (last epoch): {acc:.4f}")
        print("Confusion Matrix:\\n", cm)
        print(f"Recall: {recall:.4f}")
        print(f"F1 score: {f1:.4f}")
        return results

    def _save_model(self, model_state_dict, epoch, accuracy, save_dir):
        """
        Saves the model state dictionary to a specified directory
        with a structured filename.
        """
        os.makedirs(save_dir, exist_ok=True)
        filename = f"MC_best_model_epoch_{epoch}_acc_{accuracy:.4f}.pt"
        filepath = os.path.join(save_dir, filename)
        torch.save(model_state_dict, filepath)
        print(f"Model saved: {filepath}")

In [55]:
class ExperimentRunner:
    def __init__(self, epochs, lr, features, layers, batch_size, seed):
        self.epochs = epochs
        self.lr = lr
        self.features = features
        self.layers = layers
        self.batch_size = batch_size
        self.seed = seed

    def csv_log(self, results, csv_file):
        """
        Logs experiment results to a CSV file.

        Args:
            results (dict): Dictionary containing experiment results, including 'execution_time'.
            csv_file (str): Path to the CSV file where results will be saved.
        """
        duration_seconds = results.get('execution_time', None)
        log_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        header = [
            'date', 'execution_time_seconds', 'epochs', 'learning_rate', 'features', 'layers', 'batch_size',
            'loss', 'accuracy', 'recall', 'f1_score'
        ]
        row = [
            log_date, f'{duration_seconds:.2f}' if duration_seconds is not None else '',
            results.get('epochs'), results.get('learning_rate'), results.get('features'),
            results.get('layers'), results.get('batch_size'), results.get('loss'),
            results.get('accuracy'), results.get('recall'), results.get('f1_score')
        ]
        file_exists = os.path.isfile(csv_file)
        with open(csv_file, 'a', newline='') as f:
            writer = csv.writer(f)
            if not file_exists:
                writer.writerow(header)
            writer.writerow(row)
        print(f'Results saved to {csv_file}')
        print('Run summary:')
        print(row)


    def log_class_distribution(self,unique_classes, counts, data_type):
        """
        Logs the distribution of classes in a dataset.

        Args:
            unique_classes (np.ndarray): Array of unique class labels.
            counts (np.ndarray): Array of counts for each unique class.
            data_type (str): A string describing the type of data (e.g., "training data", "test data").
        """
        print(f"Distribution of classes in {data_type}:")
        for cls, count in zip(unique_classes, counts):
            print(f"  Class {cls}: {count} samples")

In [56]:
class QuantumRunner(ExperimentRunner):
    def run(self):
        print("\n--- Running QU⌈ANTUM QuantumClassifier ---")
        qc = QuantumClassifier(
            n_qubits=self.features,
            pca_features=self.features,
            batch_size=self.batch_size,
            epochs=self.epochs,
            lr=self.lr,
            layers=self.layers,
            seed=self.seed
        )
        unique_classes_train, counts_train = np.unique(qc.y_train, return_counts=True)
        self.log_class_distribution(unique_classes_train, counts_train, "training data (y_train)")

        unique_classes_test, counts_test = np.unique(qc.y_test, return_counts=True)
        self.log_class_distribution(unique_classes_test, counts_test, "test data (y_test)")
        # ... existing code ...
        start_time = time.time()
        results = qc.train_and_evaluate()
        end_time = time.time()
        self.duration = end_time - start_time
        results['execution_time'] = self.duration
        self.csv_log(results, RESULTS_CSV_URL+'MC_results_log_.csv')

In [57]:
# =====================
#### CONFIGURATION CONSTANTS
# =====================
MODE = 'quantum'  # Options: 'quantum', 'quantum_hilbert', 'both'
EPOCHS = 4
# CONFIGURACIÓN SUGERIDA
LEARNING_RATE = 0.001 # O incluso 0.001
FEATURES = 8 #numero de qubits width del circuito cuantico
LAYERS = 10 # profundidad del circuito cuantico
BATCH_SIZE = 32
SEED = 42
USE_HILBERT = True  # Only relevant for quantum

In [58]:
QuantumRunner(EPOCHS, LEARNING_RATE, FEATURES, LAYERS, BATCH_SIZE, SEED).run()


--- Running QU⌈ANTUM QuantumClassifier ---
Distribution of classes in training data (y_train):
  Class 0: 2000 samples
  Class 1: 2000 samples
  Class 2: 2000 samples
Distribution of classes in test data (y_test):
  Class 0: 250 samples
  Class 1: 250 samples
  Class 2: 250 samples
---------------EPOCHS--------------------
Epoch 1: Loss = 0.8373, Accuracy = 0.5640
Current LR: 0.001
Epoch 2: Loss = 1.0845, Accuracy = 0.6240
Current LR: 0.001
Epoch 3: Loss = 0.7252, Accuracy = 0.6960
Current LR: 0.001
Epoch 4: Loss = 0.7710, Accuracy = 0.7320
Current LR: 0.001

Saving top 2 models:
Model saved: ../results/models\MC_best_model_epoch_4_acc_0.7320.pt
Model saved: ../results/models\MC_best_model_epoch_3_acc_0.6960.pt
Accuracy (last epoch): 0.7320
Confusion Matrix:\n [[225  20   5]
 [ 49 184  17]
 [ 57  53 140]]
Recall: 0.7320
F1 score: 0.7267
Results saved to ../results/csv/MC_results_log_.csv
Run summary:
['2025-07-15 23:08:52', '199.29', 4, 0.001, 8, 10, 32, 0.7710404396057129, 0.73199999