In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt

# === 1. Cargar los datos ===
df_transcriptomic = pd.read_csv("TCGA.BRCA.sampleMap_HiSeqV2_exon", sep="\t", index_col=0)
df_proteomic = pd.read_csv("TCGA.BRCA.sampleMap_RPPA", sep="\t", index_col=0)

# Asegura que ambos datasets tienen muestras en común
common_patients = df_transcriptomic.columns.intersection(df_proteomic.columns)

# Subset para que tengan los mismos pacientes
X_trans = df_transcriptomic[common_patients].T
X_prot = df_proteomic[common_patients].T

# Unir ambas matrices (transcriptoma + proteoma)
X = pd.concat([X_trans, X_prot], axis=1)

# === 2. Imputar con media ===
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# === 3. Escalar ===
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X_imputed)

# === 4. PCA para reducir dimensionalidad ===
pca = PCA(n_components=3)
X_pca = pca.fit_transform(X_scaled)

# === 5. DBSCAN con eps de 0.1 a 10 ===
results = []

print("=== DBSCAN sweep eps=0.1 a 10.0 ===")
for eps in np.arange(0.1, 10.1, 0.1):
    db = DBSCAN(eps=eps, min_samples=5)
    labels = db.fit_predict(X_pca)

    n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
    n_noise = list(labels).count(-1)

    if n_clusters > 1:
        score = silhouette_score(X_pca, labels)
    else:
        score = -1

    results.append((eps, n_clusters, n_noise, score))

    if n_clusters > 1:
        print(f"eps={eps:.1f} | clusters={n_clusters:3d} | noise={n_noise:4d} | silhouette={score:.3f}")

# === 6. (Opcional) Visualizar evolución del Silhouette ===
eps_vals = [r[0] for r in results if r[3] > 0]
scores = [r[3] for r in results if r[3] > 0]

plt.plot(eps_vals, scores)
plt.xlabel('Epsilon')
plt.ylabel('Silhouette Score')
plt.title('DBSCAN: Silhouette vs Eps')
plt.grid(True)
plt.show()
