# Generate results comparing lines and geodesics between $q^{\text{ref}}$ and $q$

In [20]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

import wandb

Import results from wandb table directly.
We load the .csv that corresponds to the wandb Table downloaded from:

https://wandb.ai/bioshape-lab/line_vs_geodesic/table?workspace=user-ninamiolane

In [19]:
api = wandb.Api()

runs = api.runs("bioshape-lab/line_vs_geodesic")

summary_list, config_list, name_list = [], [], []
for run in runs:
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict)

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})

    # .name is the human-readable name of the run.
    name_list.append(run.name)

In [22]:
summary_df = pd.DataFrame(summary_list)
config_df = pd.DataFrame(config_list)
name_df = pd.DataFrame(name_list)
print(f"lengths: {len(summary_df)}, {len(config_df)}, {len(name_df)}")

df = pd.concat([summary_df, config_df, name_df], axis=1)
print(f"total length: {len(df)}")

lengths: 119, 119, 119
total length: 119


In [23]:
df.head()

Unnamed: 0,geodesic_dist_per_vertex,offset_line,offset_geodesic,diff_seq_duration,geodesic_duration,linear_dist_per_vertex,relative_diff_seq_per_time_and_vertex,geodesic_regression_duration_per_vertex,geodesic_dist,noisy_vertices,...,linear_regression_duration,n_faces,n_steps,n_times,diameter,n_vertices,dataset_name,noise_factor,subdivisions,0
0,,,,,,,,,,,...,,1280,20,5,162000,642,synthetic,1.0,3,run_1bsy3km0
1,348134200.0,"{'size': 153021, '_type': 'object3D-file', 'sh...",{'path': 'media/object3D/offset_geodesic_0_91a...,-1961.550399,1961.550707,774.310523,-3936.902424,1.008941,56397740000.0,{'path': 'media/object3D/noisy_vertices_0_fd41...,...,0.000123,320,20,10,5400,162,synthetic,1.0,2,run_c5ewczah
2,332652500.0,{'path': 'media/object3D/offset_line_0_77653d8...,{'path': 'media/object3D/offset_geodesic_0_a98...,-1010.430251,1010.430487,709.417759,-5285.012651,0.879026,53889710000.0,{'path': 'media/object3D/noisy_vertices_0_46a5...,...,0.000161,320,20,5,5400,162,synthetic,1.0,2,run_uu30semj
3,41995.97,{'path': 'media/object3D/offset_line_0_22b199f...,"{'_type': 'object3D-file', 'sha256': 'f4523db7...",-462.152686,462.153007,48.163727,-3421.244763,0.450125,1763831.0,{'path': 'media/object3D/noisy_vertices_0_4d88...,...,9.5e-05,80,20,10,180,42,synthetic,1.0,1,run_76bkw2k3
4,34383.86,{'path': 'media/object3D/offset_line_0_d132c2b...,{'path': 'media/object3D/offset_geodesic_0_0c0...,-236.24728,236.247516,49.628512,-4780.686607,0.674877,1444122.0,"{'size': 4006, '_type': 'object3D-file', 'sha2...",...,0.000124,80,20,5,180,42,synthetic,1.0,1,run_mvg1ftc9


The columns of the dataframe corresponding to the .csv are the variables that we have logged into wandb:

In [24]:
df.columns

Index([               'geodesic_dist_per_vertex',
                                   'offset_line',
                               'offset_geodesic',
                             'diff_seq_duration',
                             'geodesic_duration',
                        'linear_dist_per_vertex',
         'relative_diff_seq_per_time_and_vertex',
       'geodesic_regression_duration_per_vertex',
                                 'geodesic_dist',
                                'noisy_vertices',
                            'relative_diff_dist',
                        'relative_diff_duration',
         'linear_regression_duration_per_vertex',
                                 'diff_duration',
                          'diff_dist_per_vertex',
                      'diff_duration_per_vertex',
                  'geodesic_regression_duration',
                    'relative_diff_seq_duration',
             'diff_seq_per_time_vertex_diameter',
                                        '_wandb',


# Exploration Plots: Histograms

We define human readable explanations of the different variables, to make better plots.

In [152]:
COL_TO_TEXT = {
    "line_duration": "Time to compute a line",
    "diff_seq_per_time_vertex_diameter": "Mesh distance per vertex [% diameter]",
    "diff_seq_duration_per_time_and_vertex": "Time difference [secs] per mesh and vertex",
    "relative_diff_seq_duration": "Time difference per vertex [% line]",
    "relative_diff_seq_duration_per_vertex": "Time difference per vertex [% line]",
    "noise_factor": "Deformation factor, Steps",
    "n_steps": "Number of steps",
    "n_vertices": "Number of vertices",
}

We can plot histograms of some of these variables to get intuition of their values.

Here, we plot how long is takes to compute one line between two meshes, per vertex. We see that it takes less than 1 microsecond per vertex, i.e. less than 5 seconds to compute a line between two meshes that has 5 mesh along the sequence and these meshes have 1000 vertices.

In [37]:
line_duration_per_time_vertex = df["line_duration"] / (df["n_vertices"] * df["n_times"])
average = line_duration_per_time_vertex.mean()
print("Average:", average)

fig = px.histogram(line_duration_per_time_vertex, nbins=10)
fig.update_layout(
    title="Histogram of Line Duration per Time Vertex",
    xaxis_title="Line Duration per Time Vertex",
    yaxis_title="Count",
    showlegend=False,
    width=500,
    height=300,
)

Average: 4.99521484367263e-07


We compare the time taken to compute a line versus a geodesic between two meshes.

In [38]:
fig = px.histogram(df["diff_seq_duration_per_time_and_vertex"], nbins=10)
fig.update_layout(
    title="Histogram of Difference of Duration per Time and Vertex",
    xaxis_title="Difference of Duration per Time and Vertex",
    yaxis_title="Count",
    showlegend=False,
    width=500,
    height=300,
)

## Histograms

We write a function that can plot more sophisticated histograms, by grouping according to a given variable `by`.

In [39]:
def hist_diff_seq_per_time_vertex_diameter_group(by="n_steps"):
    grouped_data = df.groupby(by)["diff_seq_per_time_vertex_diameter"]
    fig = go.Figure()

    for name, group in grouped_data:
        fig.add_trace(go.Histogram(x=group, opacity=0.5, name=f"{by} = {name}"))

    fig.update_layout(
        title=COL_TO_TEXT["diff_seq_per_time_vertex_diameter"],
        showlegend=True,
        width=600,  # Set the width of the figure to 500
        height=400,
    )

    fig.show()

In [40]:
hist_diff_seq_per_time_vertex_diameter_group(by="n_steps")

In [41]:
hist_diff_seq_per_time_vertex_diameter_group(by="n_vertices")

In [42]:
hist_diff_seq_per_time_vertex_diameter_group(by="noise_factor")

## Scatter plots

We write code to generate the main plot for the corresponding section of the paper.

In [161]:
fontsize = 15


def diff_scatter_grouped(
    colored_by="noise_factor",
    marked_by="n_steps",
    diff_seq="relative_diff_seq_duration",
):
    x = df["diff_seq_per_time_vertex_diameter"] * 100
    y = np.abs(df[diff_seq]) / df["n_vertices"]
    value_to_symbol = dict(
        zip(df[marked_by].unique(), ["square", "x", "cross", "diamond", "star"])
    )

    if colored_by == "n_steps":
        color_discrete_sequence=px.colors.sequential.Plasma_r
    else:
        color_discrete_sequence=px.colors.sequential.Viridis_r
        
        
    fig = px.scatter(
        df,
        x=x,
        y=y,
        color=[str(c) for c in df[colored_by].values],
        color_discrete_sequence=color_discrete_sequence,  # px.colors.qualitative.Dark24,  # D3 #Dark24 # Pastel
        # color_discrete_sequence=px.colors.sequential.Viridis_r, #px.colors.qualitative.Dark24,  # D3 #Dark24 # Pastel
        symbol=[s for s in df[marked_by].values],
        symbol_map=value_to_symbol,
    )

    fig.update_layout(
        xaxis_title=dict(
            text=COL_TO_TEXT["diff_seq_per_time_vertex_diameter"],
            font=dict(family="CMU", size=fontsize),
        ),
        yaxis_title=dict(
            text=COL_TO_TEXT[diff_seq], font=dict(family="CMU", size=fontsize)
        ),
        title_font=dict(family="CMU", size=fontsize),
        xaxis=dict(tickfont=dict(family="CMU", size=fontsize)),
        yaxis=dict(tickfont=dict(family="CMU", size=fontsize)),
        legend=dict(
            font=dict(family="CMU", size=fontsize), title=COL_TO_TEXT[colored_by]
        ),
        width=650,
        height=370,
    )

    fig.update_traces(marker=dict(size=9, opacity=0.9))
    pio.write_image(fig, f"line_vs_geodesic_colored_by_{colored_by}.svg")
    fig.show()

In [164]:
diff_scatter_grouped(colored_by="noise_factor", marked_by="n_vertices")

In [160]:
#diff_scatter_grouped(colored_by="n_vertices", marked_by="n_steps")

In [163]:
diff_scatter_grouped(colored_by="n_steps", marked_by="n_vertices")

In [154]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score


def line_plot(y_var, y):
    y = y.reset_index()
    colors_vertices = [
        px.colors.sequential.Plasma_r[0],
        px.colors.sequential.Plasma_r[5],
        px.colors.sequential.Plasma_r[-1],
    ]
    
    marked_by = "n_steps"
    value_to_symbol = dict(
        zip(df[marked_by].unique(), ["square", "x", "cross", "diamond", "star"])
    )
    
    fig = px.scatter(
        x=y["noise_factor"],
        y=y[y_var],
        color=y["n_vertices"],
        color_discrete_sequence=colors_vertices,
        symbol=[s for s in y[marked_by].values],
        symbol_map=value_to_symbol,
    )
    fig.update_layout(
        title="Line Plot",
        title_font=dict(family="CMU", size=fontsize),
        xaxis_title=dict(
            text="Deformation Factor",
            font=dict(family="CMU", size=fontsize),
        ),
        yaxis_title=dict(text=COL_TO_TEXT[y_var], font=dict(family="CMU", size=fontsize)),
        xaxis=dict(tickfont=dict(family="CMU", size=fontsize)),
        yaxis=dict(tickfont=dict(family="CMU", size=fontsize)),
        width=600,
        height=350,
    )
    
    color_sequence = colors_vertices[::-1]
    for n_vertices, color in zip(y["n_vertices"].unique(), color_sequence):
        filtered_data = y[y["n_vertices"] == n_vertices]
    
        xx = filtered_data["noise_factor"].values.reshape(-1, 1)
        yy = filtered_data[y_var].values.reshape(-1, 1)
        regression_model = LinearRegression()
        regression_model.fit(xx, yy)
        regression_y = regression_model.predict(xx)
    
        r_squared = r2_score(yy, regression_y)
        print(r_squared)
    
        fig.add_trace(
            go.Scatter(
                x=xx.flatten(),
                y=regression_y.flatten(),
                mode="lines",
                name=f"n_vertices: {n_vertices} (R2={r_squared:.3f})",
                line=dict(color=color),
                showlegend=True,  # Show legend only for the regression lines
                legendgroup="regression",
            )
        )
    
    fig.data[0].showlegend = False  # Show legend for the scatter symbols
    fig.data[0].legendgroup = "scatter"
    fig.show()
    pio.write_image(fig, f"{y_var}_vs_deformation_factor.svg")

In [155]:
y_var = "diff_seq_per_time_vertex_diameter"
y = df.groupby(["noise_factor", "n_vertices", "n_steps"])[y_var].mean() * 100

line_plot(y_var, y)

0.9950065676323988
0.9989705990628883
0.9995409046915548


In [157]:
y_var = "relative_diff_seq_duration_per_vertex"
df["relative_diff_seq_duration_per_vertex"] = df["relative_diff_seq_duration"] / df["n_vertices"]
y = df.groupby(["noise_factor", "n_vertices", "n_steps"])[y_var].mean()

line_plot(y_var, y)

0.03652100935614144
0.11291463972399463
0.0002951048435867465
