In [1]:
import os
cwd = '/Users/dimitrismarkopoulos/Desktop/BRCA'
os.chdir(cwd)

import joblib

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.metrics import accuracy_score, adjusted_rand_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split

In [2]:
# ---------------------------------
# Load embeddings
# ---------------------------------
embeddings = joblib.load("embeddings_2d.joblib")


nonlinear_embeddings = { 
    # drop PCA
    "UMAP": embeddings['UMAP'],
    "t-SNE": embeddings.get('tSNE', embeddings.get('t-SNE')),
    "Isomap": embeddings['Isomap'],
    "Spectral": embeddings['Spectral']
}
print(f'Load embeddings: {list(nonlinear_embeddings.keys())}')


# ---------------------------------
# Import data
# ---------------------------------
data = pd.read_csv('BRCA_data.csv', index_col=0)
X = data.iloc[:,:353].copy()
y_all = data.iloc[:,353:].copy()

y = data["Subtype"].astype("category").cat.codes

Load embeddings: ['UMAP', 't-SNE', 'Isomap', 'Spectral']


In [3]:
# Evaluate KNN accuracy for each embedding
results = []
for name, X_emb in nonlinear_embeddings.items():
    X_train, X_test, y_train, y_test = train_test_split(X_emb, y, test_size=0.2, random_state=42)
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X_train, y_train)
    acc = accuracy_score(y_test, knn.predict(X_test))
    results.append((name, acc))

# Store and sort results
results_df = pd.DataFrame(results, columns=["Method", "KNN_Accuracy"]).sort_values("KNN_Accuracy", ascending=False)
display(results_df)

# =====================================================
# Plotly Compact Visualization
# =====================================================

colors = ["#E4572E", "#17BEBB", "#76B041", "#4C6EF5"]  # UMAP, t-SNE, Isomap, Spectral

fig = go.Figure(go.Bar(
    x=results_df["Method"],
    y=results_df["KNN_Accuracy"],
    text=[f"{v:.2f}" for v in results_df["KNN_Accuracy"]],
    textposition="outside",
    marker=dict(color=colors[:len(results_df)], line=dict(color="#333", width=1.2))
))

fig.update_layout(
    title=dict(
        text="KNN Accuracy for Nonlinear Embeddings",
        x=0.5, xanchor="center", font=dict(size=18)
    ),
    xaxis=dict(title="Method", tickfont=dict(size=13)),
    yaxis=dict(title="Accuracy", range=[0, 1], tickfont=dict(size=12), gridcolor="rgba(200,200,200,0.3)"),
    plot_bgcolor="rgba(250,250,250,1)",
    paper_bgcolor="white",
    width=620,
    height=400,
    showlegend=False
)

fig.show()
fig.write_html('docs/knn_eval_nonlinear.html', include_plotlyjs="cdn")


Unnamed: 0,Method,KNN_Accuracy
1,t-SNE,0.707865
0,UMAP,0.640449
2,Isomap,0.629213
3,Spectral,0.617978
