In [1]:
import sys
sys.path.append('../')
import pandas as pd 

Code to recompute the metrics and figures on Topomap++ paper 


# Compute Metrics

The following cells can take a while to compute


In [74]:
import numpy as np
from topomap.test_utils import Compare_MSTs
import pandas as pd

datasets = ['Iris','Mfeat','Seeds','MNIST', 'BIGANN','Urban','LLM','StreetAware']

def compute_metrics(datasets,
                     alphas,
                     Ls,
                     Rs,
                     ):
    results = []
    for dataset in datasets:
        for alpha in alphas:
                for complexity in Ls:
                    for graph_degree in Rs:
                        emst_path = f'../data/msts/mst_{dataset}.npy'
                        amst_path = f'../data/msts/AMST_{dataset}_R_{graph_degree}_L_{complexity}_A_{int(10*alpha)}.npy'
                        EMST = np.load(emst_path, allow_pickle=True)
                        AMST = np.load(amst_path, allow_pickle=True)
                        result = Compare_MSTs(EMST, AMST, dataset_name=f'{dataset} dataset, R={graph_degree} L={complexity} A={alpha}', distance = 'bottleneck', show_plot=False, path_save_plot=f'../images/Comparison_mst_{dataset}_R_{graph_degree}_L_{complexity}A_{int(10*alpha)}.png')
                        # print(result)
                        result['dataset'] = dataset
                        result['alpha'] = alpha
                        result['L'] = complexity
                        result['R'] = graph_degree
                        results.append(result)
    df = pd.DataFrame(results)
    return df 
df = compute_metrics(datasets = ['Iris','Mfeat','Seeds','MNIST', 'BIGANN','Urban','LLM','StreetAware'], alphas = [1.0,1.1,1.2,1.3,1.4,1.5], Ls=[100], Rs = [100])
display(df)
df.to_csv('../data/comparisons/without_wasserstein_alpha.csv',index = False)



This function is not available.
ModuleNotFoundError: No module named 'matplotlib'.



In [None]:
df = compute_metrics(datasets = ['Iris','Mfeat','Seeds','MNIST', 'BIGANN','Urban','LLM','StreetAware'], alphas = [1.0,1.1,1.2,1.3,1.4,1.5], Ls=[100], Rs = [100])
display(df)
df.to_csv('../data/comparisons/without_wasserstein_alpha.csv',index = False)

In [None]:
df = compute_metrics(datasets = ['Iris','Mfeat','Seeds','MNIST', 'BIGANN','Urban','LLM','StreetAware'], alphas = [1.3], Ls=[100], Rs = [60,70,80,90])
display(df)
df.to_csv('../data/comparisons/without_wasserstein_graph_degrees.csv',index = False)

In [None]:
df = compute_metrics(datasets = ['Iris','Mfeat','Seeds','MNIST', 'BIGANN','Urban','LLM','StreetAware'], alphas = [1.3], Ls=[120,140,160,180,200], Rs = [100])
display(df)
df.to_csv('../data/comparisons/without_wasserstein_complexity.csv',index = False)

In [2]:
df1 = pd.read_csv('../data/comparisons/time_AMST_alpha.csv')
df2 = pd.read_csv('../data/comparisons/time_AMST_complexity.csv')
df3 = pd.read_csv('../data/comparisons/time_AMST_graph_degrees.csv')

In [114]:
df_missing_times = df1[(df1["L"]==100) & (df1["R"]==100) & (df1["alpha"]==1.3)]


In [3]:
Filterd_datasets = ['BIGANN', 'LLM', 'MNIST', 'StreetAware']
filtered_df1 = df1[df1["dataset"].isin(Filterd_datasets)]

# Analyzing Alpha

## Time

In [140]:
import plotly.express as px


fig = px.line(filtered_df1, x="alpha", y="time", facet_col="dataset",markers=True,line_shape='spline',facet_col_wrap=1)

fig.for_each_yaxis(lambda y: y.update(showticklabels=True,matches=None, title = None,tickfont = dict(size=20)))
fig.add_annotation(
    showarrow=False,
    xanchor='center',
    xref='paper', 
    x=-0.05, 
    yanchor='middle',
    yref='paper',
    y=0.5,
    textangle=-90,
    text='Time (s)'
)
fig.update_xaxes(tickfont=dict(size=20), title_font = dict(size=20))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], font = {'size': 20}))
fig.update_layout(margin = dict(t=25, l=80, r=25, b=25),
                        height=600,
                        width=1200)

fig.write_image("../images/updated_Impact-alpha-time.png", scale=3)
fig.show()




## RWE

In [55]:
df_alpha_comparisons = pd.read_csv('../data/comparisons/without_wasserstein_alpha.csv')
df_alpha_comparisons['Total weight error'] = df_alpha_comparisons['Total weight error'].apply(abs)
filtered_df_alpha_comparisons = df_alpha_comparisons[df_alpha_comparisons["dataset"].isin(Filterd_datasets)]

In [74]:
fig = px.line(filtered_df_alpha_comparisons, x="alpha", y="Total weight error", facet_col="dataset",markers=True,line_shape='spline',facet_col_wrap=1)

fig.for_each_yaxis(lambda y: y.update(showticklabels=True,matches=None, title = None,tickfont = dict(size=20)))
fig.add_annotation(
    showarrow=False,
    xanchor='center',
    xref='paper', 
    x=-0.1, 
    yanchor='middle',
    yref='paper',
    y=0.5,
    textangle=-90,
    text='RWE'
)
fig.update_xaxes(tickfont=dict(size=20), title_font = dict(size=20))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], font = {'size': 20}))
fig.update_layout(margin = dict(t=25, l=120, r=25, b=25),
                        height=600,
                        width=1200)

fig.write_image("../images/updated_Impact-alpha-RWE.png", scale=3)
fig.show()



## Bottleneck

In [80]:
fig = px.line(filtered_df_alpha_comparisons, x="alpha", y="Bottleneck distance", facet_col="dataset",markers=True,line_shape='spline',facet_col_wrap=1)

fig.for_each_yaxis(lambda y: y.update(showticklabels=True,matches=None, title = None,tickfont = dict(size=20)))
fig.add_annotation(
    showarrow=False,
    xanchor='center',
    xref='paper', 
    x=-0.075, 
    yanchor='middle',
    yref='paper',
    y=0.5,
    textangle=-90,
    text='Bottleneck distance'
)
fig.update_xaxes(tickfont=dict(size=20), title_font = dict(size=20))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], font = {'size': 20}))
fig.update_layout(margin = dict(t=25, l=100, r=25, b=25),
                        height=600,
                        width=1200)

fig.write_image("../images/updated_Impact-alpha-Bottleneck_distance.png", scale=3)
fig.show()

# Complexity (L)

In [45]:
df_complexity_comparisons = pd.read_csv('../data/comparisons/without_wasserstein_complexity.csv')
df_complexity_comparisons['Total weight error'] = df_complexity_comparisons['Total weight error'].apply(abs)

In [112]:
def rename(value):
    dict_rename = {"iris": "Iris",
                            "street_aware": "StreetAware",
                             "mfeat": "Mfeat",
                              "seeds":"Seeds",
                               "mnist":"MNIST",
                                "sift_learn":"BIGANN",
                                 "llm":"LLM",
                                  "urban": "Urban"}
    return dict_rename[value]


In [47]:
df_missing = pd.read_csv('../data/comparisons/comparisons_wassertein_complete_final.csv')
df_missing['L'] = 100
df_missing['alpha'] = 1.3
df_missing['R']= 100

df_missing.drop(columns = 'Wasserstein distance',inplace=True)
df_missing['dataset'] = df_missing['dataset'].apply(rename)


In [48]:
df_complexity_comparisons = pd.concat([df_complexity_comparisons, df_missing])
df_complexity_comparisons.sort_values(by = ['dataset','L'],inplace=True)
df_complexity_comparisons['Total weight error'] = df_complexity_comparisons['Total weight error'].apply(abs)

In [49]:
filtered_df_complexity_comparisons = df_complexity_comparisons[df_complexity_comparisons["dataset"].isin(Filterd_datasets)]

In [115]:
df2 = pd.concat([df2, df_missing_times])
df2.sort_values(by = ['dataset','L'],inplace=True)

In [116]:
filtered_df2 = df2[df2["dataset"].isin(Filterd_datasets)]

## Time

In [138]:
import plotly.express as px

fig = px.line(filtered_df2, x="L", y="time", facet_col="dataset",markers=True,line_shape='spline',facet_col_wrap=1)

fig.for_each_yaxis(lambda y: y.update(showticklabels=True,matches=None, title = None,tickfont = dict(size=20)))
fig.add_annotation(
    showarrow=False,
    xanchor='center',
    xref='paper', 
    x=-0.04, 
    yanchor='middle',
    yref='paper',
    y=0.5,
    textangle=-90,
    text='Time (s)'
)
fig.update_xaxes(tickfont=dict(size=20), title_font = dict(size=20))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], font = {'size': 20}))
fig.update_layout(margin = dict(t=25, l=70, r=25, b=25),
                        height=600,
                        width=1200)

fig.write_image("../images/updated_Impact-L-time.png", scale=3)
fig.show()


## RWE

In [87]:

fig = px.line(filtered_df_complexity_comparisons, x="L", y="Total weight error", facet_col="dataset",markers=True,line_shape='spline',facet_col_wrap=1)

fig.for_each_yaxis(lambda y: y.update(showticklabels=True,matches=None, title = None,tickfont = dict(size=20)))
fig.add_annotation(
    showarrow=False,
    xanchor='center',
    xref='paper', 
    x=-0.07, 
    yanchor='middle',
    yref='paper',
    y=0.5,
    textangle=-90,
    text='RWE'
)
fig.update_xaxes(tickfont=dict(size=20), title_font = dict(size=20))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], font = {'size': 20}))
fig.update_layout(margin = dict(t=25, l=100, r=25, b=25),
                        height=600,
                        width=1200)

fig.write_image("../images/updated_Impact-L-RWE.png", scale=3)
fig.show()




## Bottleneck

In [89]:
fig = px.line(filtered_df_complexity_comparisons, x="L", y="Bottleneck distance", facet_col="dataset",markers=True,line_shape='spline',facet_col_wrap=1)

fig.for_each_yaxis(lambda y: y.update(showticklabels=True,matches=None, title = None,tickfont = dict(size=20)))
fig.add_annotation(
    showarrow=False,
    xanchor='center',
    xref='paper', 
    x=-0.08, 
    yanchor='middle',
    yref='paper',
    y=0.5,
    textangle=-90,
    text='Bottleneck distance'
)
fig.update_xaxes(tickfont=dict(size=20), title_font = dict(size=20))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], font = {'size': 20}))
fig.update_layout(margin = dict(t=25, l=100, r=25, b=25),
                        height=600,
                        width=1200)

fig.write_image("../images/updated_Impact-L-Bottleneck_distance.png", scale=3)
fig.show()



# Graph Degree 

## Time

In [119]:
filtered_df3 = df3[df3["dataset"].isin(Filterd_datasets)]

In [118]:
df3 = pd.concat([df3, df_missing_times])
df3.sort_values(by = ['dataset','R'],inplace=True)

In [123]:
R_tick_vals = sorted(df3['R'].unique())

In [139]:
import plotly.express as px
# import plotly.graph_objects as go

fig = px.line(filtered_df3, x="R", y="time", facet_col="dataset",markers=True,line_shape='spline',facet_col_wrap=1)

fig.for_each_yaxis(lambda y: y.update(showticklabels=True,matches=None, title = None,tickfont = dict(size=20)))
fig.add_annotation(
    showarrow=False,
    xanchor='center',
    xref='paper', 
    x=-0.07, 
    yanchor='middle',
    yref='paper',
    y=0.5,
    textangle=-90,
    text='Time (s)'
)
fig.update_xaxes(tickfont=dict(size=20), title_font = dict(size=20))
fig.update_xaxes(tick0=60, dtick=10)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], font = {'size': 20}))
fig.update_layout(margin = dict(t=25, l=85, r=25, b=25),
                        height=600,
                        width=1200)

fig.write_image("../images/updated_Impact-R-time.png", scale=3)
fig.show()


## RWE

In [11]:
df_DEGREE_comparisons = pd.read_csv('../data/comparisons/without_wasserstein_graph_degrees.csv')
df_DEGREE_comparisons['Total weight error'] = df_DEGREE_comparisons['Total weight error'].apply(abs)

In [14]:
df_missing = pd.read_csv('../data/comparisons/comparisons_wassertein_complete_final.csv')
df_missing['L'] = 100
df_missing['alpha'] = 1.3
df_missing['R']= 100

df_missing.drop(columns = 'Wasserstein distance',inplace=True)
df_missing['dataset'] = df_missing['dataset'].apply(rename)


In [15]:
df_DEGREE_comparisons = pd.concat([df_DEGREE_comparisons, df_missing])
df_DEGREE_comparisons.sort_values(by = ['dataset','R'],inplace=True)

In [16]:
filtered_df_DEGREE_comparisons = df_DEGREE_comparisons[df_DEGREE_comparisons["dataset"].isin(Filterd_datasets)]

In [131]:
import plotly.express as px


fig = px.line(filtered_df_DEGREE_comparisons, x="R", y="Total weight error", facet_col="dataset",markers=True,line_shape='spline',facet_col_wrap=1)

fig.for_each_yaxis(lambda y: y.update(showticklabels=True,matches=None, title = None,tickfont = dict(size=20)))
fig.add_annotation(
    showarrow=False,
    xanchor='center',
    xref='paper', 
    x=-0.1, 
    yanchor='middle',
    yref='paper',
    y=0.5,
    textangle=-90,
    text='RWE'
)
fig.update_xaxes(tickfont=dict(size=20), title_font = dict(size=20))
fig.update_xaxes(tick0=60, dtick=10)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], font = {'size': 20}))
fig.update_layout(margin = dict(t=25, l=130, r=25, b=25),
                        height=600,
                        width=1200)

fig.write_image("../images/updated_Impact-R-RWE.png", scale=3)
fig.show()


## Bottleneck


In [132]:
import plotly.express as px

fig = px.line(filtered_df_DEGREE_comparisons, x="R", y="Bottleneck distance", facet_col="dataset",markers=True,line_shape='spline',facet_col_wrap=1)

fig.for_each_yaxis(lambda y: y.update(showticklabels=True,matches=None, title = None,tickfont = dict(size=20)))
fig.add_annotation(
    showarrow=False,
    xanchor='center',
    xref='paper', 
    x=-0.08, 
    yanchor='middle',
    yref='paper',
    y=0.5,
    textangle=-90,
    text='Bottleneck distance'
)
fig.update_xaxes(tickfont=dict(size=20), title_font = dict(size=20))
fig.update_xaxes(tick0=60, dtick=10)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], font = {'size': 20}))
fig.update_layout(margin = dict(t=25, l=100, r=25, b=25),
                        height=600,
                        width=1200)
# fig['layout']['xaxis']['title']['text']=''

fig.write_image("../images/updated_Impact-R-Bottleneck_distance.png", scale=3)
fig.show()

