# Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.optimize import curve_fit
from scipy.stats import norm
import pandas as pd

import matplotlib.ticker

import glob, re

# Matplotlib Configuration

In [None]:
import scienceplots
plt.style.use('science')
plt.rcParams['figure.dpi'] = 300
# plt.rcParams['axes.fontsize'] = 8 

# Input

In [None]:
tests = {}
for np_file in glob.glob('results/path_length_test/*.npy'):
    N = int(re.findall(r'\d+', np_file)[0])
    data = np.load(np_file)
    tests[N] = data

# Plots

In [None]:
size = 4000

hist_data = tests[size][:,0]

labels, counts = np.unique(hist_data, return_counts=True)

fig, ax = plt.subplots()

ax.bar(labels, counts, align='center', width=1.0, edgecolor='black')
ax.set_xlabel("Path length",fontsize=8)
ax.set_ylabel("Counts",fontsize=8)

mu, std = norm.fit(hist_data)

xmin, xmax = plt.xlim()

x = np.linspace(xmin, xmax, 1000)
p = norm.pdf(x, mu, std)

plt.plot(x, p * len(hist_data), color='orange')

plt.legend([f"$\mu = {mu:0.2f}, \sigma = {std:0.2f}$"], loc=2, prop={'size': 6})

plt.savefig("figures/path_length_4000.eps", format="eps")

# Table

In [None]:
path_lengths = np.array([tests[key][:,0] for key in keys],dtype=np.float64)

In [None]:
means = np.mean(path_lengths, axis=1)

In [None]:
w, _ = curve_fit(lambda x,a,b: a*np.log2(x)+b, keys, means)

In [None]:
keys=[10,25,50,100,200,400,1000,2000,4000]
paths =[tests[i][:,0] for i in keys]

fig, ax = plt.subplots()

bp = ax.boxplot(paths, widths=[k/8 for k in keys], showfliers=False, showmeans=True, whis=(1, 99), meanprops=dict(marker=".", ms=4), positions=keys)
ax.set_xscale("log")

x = np.linspace(10, 4000)
y = (w[0] * np.log2(x)) - w[1]

line = ax.plot(x, y)

ax.set_ylabel("Path length", fontsize=8)
ax.set_xlabel("Number of nodes", fontsize=8)
ax.xaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter())

ax.legend(loc=2, prop={'size': 6})

line_label = f"{w[0]:.2f}$\log_2N$ {"+" if w[1] > 0 else "-"} {abs(w[1]):.2f}"

ax.legend([line[0], bp['medians'][0], bp['means'][0]], [line_label, 'median', 'mean'], prop={'size': 6})

plt.savefig("figures/path_length_spread.eps", dpi=2000)

In [None]:
df = pd.DataFrame([], index=keys)

df["0.1"] = np.percentile(paths, 0.1, axis=1)
df["1"] = np.percentile(paths, 1, axis=1)
df["50"] = np.percentile(paths, 50, axis=1)
df["99"] = np.percentile(paths, 99, axis=1)
df["99.9"] = np.percentile(paths, 99.9, axis=1)
df["mean "] = np.mean(paths, axis=1)
df["logx"] = 0.5 * np.log2(keys)

In [None]:
np.trunc(1000 * df) / 1000

In [None]:
df.to_latex()

In [None]:
df.to_latex('path_table.tex', float_format="%.2f")