In [9]:
import pandas as pd
from umap import umap_
import plotly.express as px
from pathlib import Path
from ipywidgets import interact, IntSlider, FloatSlider, Dropdown

In [None]:



HERE = Path(__file__).parent.resolve() if "__file__" in globals() else Path().resolve()

df_turkic = pd.read_csv(HERE / "turkicDefAncientScaledG25.csv",
                        index_col=0, header=None)
df_all    = pd.read_csv(HERE / "allAncientScaledG25.csv",
                        index_col=0, header=None)




df_turkic['label'] = 'Turkic'  
df_all['label']    = 'Other'    

df = (pd.concat([df_turkic, df_all], ignore_index=True)
        .drop_duplicates()
        .reset_index(drop=True))

X = df.drop(columns='label').values
y = df['label'].values



In [None]:
def run_umap(n_neighbors=15, min_dist=0.1, metric='euclidean', random_state=0):
    reducer = umap_.UMAP(
        n_components=2,
        n_neighbors=n_neighbors,
        min_dist=min_dist,
        metric=metric,
    )
    embedding = reducer.fit_transform(X)
    emb_df = pd.DataFrame(embedding, columns=['UMAP1', 'UMAP2'])
    emb_df['label'] = y

    fig = px.scatter(
        emb_df,
        x="UMAP1",
        y="UMAP2",
        color="label",
        color_discrete_map={"Turkic": "red", "Other": "blue"},
        category_orders={"label": ["Other", "Turkic"]},  # <- Turkic plotted last ➜ on top
        title=f"UMAP | n_neighbors={n_neighbors}, min_dist={min_dist}, metric={metric}",
    )
    fig.update_traces(marker=dict(size=6, opacity=0.8))
    fig.show()



interact(
    run_umap,
    n_neighbors=IntSlider(value=15, min=5,  max=100, step=1),
    min_dist=FloatSlider(value=0.1, min=0.0, max=1.0, step=0.05),
    metric=Dropdown(options=['euclidean', 'cosine', 'manhattan'], value='euclidean')
);


interactive(children=(IntSlider(value=15, description='n_neighbors', min=5), FloatSlider(value=0.1, descriptio…