<a href="https://colab.research.google.com/github/YoshGreen/PROYECTO-FINAL-IA-INF---372-DAT---245/blob/main/PCA_DATASET_CANCER_Mama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
# Importaciones necesarias para la evaluación:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score # Importamos f1_score

# --- Preparación de datos ---
ruta = "/content/drive/MyDrive/data/dataset_mama.csv"
try:
    df = pd.read_csv(ruta, sep=";")
except FileNotFoundError:
    print(f"Error: No se encontró el archivo en {ruta}")
    exit()

df = df.drop(columns=["id", "Unnamed: 32"], errors="ignore")
y_no_smote = df["diagnosis"].map({"M": 1, "B": 0})
X_no_smote = df.drop(columns=["diagnosis"])

scaler = StandardScaler()
X_no_smote_scaled = scaler.fit_transform(X_no_smote)

# --- Función de evaluación modificada ---
def evaluar_pca_con_f1(X, y, n_components):
    """
    Aplica PCA y evalúa usando un clasificador SVM, calculando
    Accuracy y F1-score promedio en 5 ejecuciones.
    """
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X)

    accuracies = []
    f1_scores = [] # Lista para almacenar los F1-scores

    for i in range(5):  # 5 ejecuciones
        X_train, X_test, y_train, y_test = train_test_split(
            X_pca, y, test_size=0.2, random_state=i, stratify=y
        )

        model = SVC(kernel="linear")
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        accuracies.append(accuracy_score(y_test, y_pred))
        # Calculamos el F1-score y lo añadimos a la lista
        f1_scores.append(f1_score(y_test, y_pred))

    var_explicada = np.sum(pca.explained_variance_ratio_)
    # Devolvemos 3 valores: Accuracy promedio, Varianza y F1-score promedio
    return np.mean(accuracies), var_explicada, np.mean(f1_scores)

# --- Ejecución del bucle para replicar tu tabla ---

# Añadimos 30 componentes para simular el "dataset original"
componentes = [2, 5, 10, 30]

print("Evaluación de PCA con Accuracy y F1-score:\n")
print(f"{'Componentes':<15} | {'Varianza (%)':<15} | {'Accuracy':<10} | {'F1-score':<10}")
print("-" * 65)

for n in componentes:
    # Capturamos los 3 valores devueltos por la función
    acc, var, f1 = evaluar_pca_con_f1(X_no_smote_scaled, y_no_smote, n)

    # Imprimimos los resultados formateados de manera similar a tu tabla
    print(f"{n:<15} | {var*100:<15.1f} | {acc:<10.4f} | {f1:<10.4f}")


Mounted at /content/drive
Evaluación de PCA con Accuracy y F1-score:

Componentes     | Varianza (%)    | Accuracy   | F1-score  
-----------------------------------------------------------------
2               | 63.2            | 0.9421     | 0.9220    
5               | 84.7            | 0.9596     | 0.9460    
10              | 95.2            | 0.9649     | 0.9518    
30              | 100.0           | 0.9614     | 0.9453    
