In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
    confusion_matrix, RocCurveDisplay, PrecisionRecallDisplay, ConfusionMatrixDisplay, classification_report
)

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
plt.rcParams['figure.figsize'] = (7, 5)


data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name='target')  # 0 = malignant, 1 = benign

X.sample(5, random_state=RANDOM_STATE)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
204,12.47,18.6,81.09,481.9,0.09965,0.1058,0.08005,0.03821,0.1925,0.06373,...,14.97,24.64,96.05,677.9,0.1426,0.2378,0.2671,0.1015,0.3014,0.0875
70,18.94,21.31,123.6,1130.0,0.09009,0.1029,0.108,0.07951,0.1582,0.05461,...,24.86,26.58,165.9,1866.0,0.1193,0.2336,0.2687,0.1789,0.2551,0.06589
131,15.46,19.48,101.7,748.9,0.1092,0.1223,0.1466,0.08087,0.1931,0.05796,...,19.26,26.0,124.9,1156.0,0.1546,0.2394,0.3791,0.1514,0.2837,0.08019
431,12.4,17.68,81.47,467.8,0.1054,0.1316,0.07741,0.02799,0.1811,0.07102,...,12.88,22.91,89.61,515.8,0.145,0.2629,0.2403,0.0737,0.2556,0.09359
540,11.54,14.44,74.65,402.9,0.09984,0.112,0.06737,0.02594,0.1818,0.06782,...,12.26,19.68,78.78,457.8,0.1345,0.2118,0.1797,0.06918,0.2329,0.08134


In [2]:
from sklearn.decomposition import PCA
N_FEATURES = 3
pca = PCA(n_components=N_FEATURES, random_state=RANDOM_STATE)
X_pca = pca.fit_transform(X)

print(f"Componentes: {N_FEATURES} | Varianza explicada acumulada: {pca.explained_variance_ratio_.sum():.3f}")
print("Varianza por componente:", np.round(pca.explained_variance_ratio_, 3), "\n")

Componentes: 3 | Varianza explicada acumulada: 1.000
Varianza por componente: [0.982 0.016 0.002] 



In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.datasets import load_breast_cancer
import plotly.express as px

# Datos
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name='target')

# PCA a 3 componentes con estandarización
k = 3
pipe = make_pipeline(StandardScaler(), PCA(n_components=k, random_state=42))
X_pca3 = pipe.fit_transform(X)
pca = pipe.named_steps['pca']



In [12]:
# DataFrame para plot
df_pca3 = pd.DataFrame(X_pca3, columns=["PC1", "PC2", "PC3"])
df_pca3["label"] = y.map({0:"Malignant (0)", 1:"Benign (1)"})

# Scatter 3D
fig = px.scatter_3d(
    df_pca3, x="PC1", y="PC2", z="PC3",
    color="label",
    color_discrete_map={"Malignant (0)": "red", "Benign (1)": "green"},
    opacity=0.7
)
fig.update_traces(marker=dict(size=4))
fig.update_layout(
    title="PCA 3D — Breast Cancer",
    scene = dict(
        xaxis_title=f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}% var)",
        yaxis_title=f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}% var)",
        zaxis_title=f"PC3 ({pca.explained_variance_ratio_[2]*100:.1f}% var)",
    ),
    legend_title_text="Clase"
)

# Mostrar (en Jupyter basta con fig.show(); para abrir navegador):
# fig.show()              # inline en notebook
fig.show("browser")   # o abre en el navegador por defecto
# fig.write_html("pca_3d.html", auto_open=True)  # guarda y abre HTML
