In [None]:
# %load_ext profila

In [None]:
import zarr
from scipy.spatial.distance import squareform
import numpy as np
import anjl
import pandas as pd
import plotly.express as px

## Small

In [None]:
small = zarr.load("../data/small/dist.zarr.zip")
small_D = squareform(small)
small_D.shape

In [None]:
small_Z = anjl.canonical_nj(small_D)

In [None]:
small_Z_r = anjl.rapid_nj(small_D, gc=None)

In [None]:
small_Z_r = anjl.rapid_nj(small_D, gc=1)

In [None]:
%%timeit -r100 -n1
anjl.canonical_nj(small_D)

In [None]:
%%timeit -r100 -n1
anjl.rapid_nj(small_D, gc=None)

In [None]:
%%timeit -r100 -n1
anjl.rapid_nj(small_D, gc=100)

## Medium

In [None]:
medium = zarr.load("../data/medium/dist.zarr.zip")
medium_D = squareform(medium)
medium_D.shape

In [None]:
medium_Z = anjl.canonical_nj(medium_D)

In [None]:
medium_Z_r = anjl.rapid_nj(medium_D)

In [None]:
%%timeit -r50 -n1
anjl.canonical_nj(medium_D)

In [None]:
%%timeit -r30 -n1
anjl.rapid_nj(medium_D, gc=None)

In [None]:
%%timeit -r30 -n1
anjl.rapid_nj(medium_D, gc=10)

In [None]:
%%timeit -r30 -n1
anjl.rapid_nj(medium_D, gc=100)

In [None]:
%%timeit -r20 -n1
anjl.rapid_nj(medium_D, gc=1)

In [None]:
# %%profila
# anjl.canonical_nj(medium_D)

In [None]:
# %%profila
# anjl.rapid_nj(medium_D, gc=100)

## Large

In [None]:
large = zarr.load("../data/large/dist.zarr.zip")
large_D = squareform(large)  # [:3000, :3000]
large_D.shape

In [None]:
large_D

In [None]:
shuffle = np.random.choice(large_D.shape[0], size=large_D.shape[0], replace=False)
shuffle

In [None]:
large_D_shuffled = large_D.take(shuffle, axis=0).take(shuffle, axis=1)
large_D_shuffled

In [None]:
# %%profila
# large_Z = anjl.canonical_nj(large_D_shuffled)

In [None]:
# %%profila
# large_Z = anjl.rapid_nj(large_D_shuffled, gc=100)

In [None]:
large_Z, timings_canonical, searched_canonical, visited_canonical = anjl.canonical_nj(
    large_D_shuffled,
    # progress=tqdm,
    # progress_options=dict(desc="Compute neighbour-joining tree"),
    diagnostics=True,
)

In [None]:
large_Z, timings_rapid_gc100, searched_rapid_gc100, visited_rapid_gc100 = anjl.rapid_nj(
    large_D_shuffled,
    # progress=tqdm,
    # progress_options=dict(desc="Compute neighbour-joining tree"),
    diagnostics=True,
    gc=100,
)

In [None]:
large_Z, timings_rapid_gc10, searched_rapid_gc10, visited_rapid_gc10 = anjl.rapid_nj(
    large_D_shuffled,
    # progress=tqdm,
    # progress_options=dict(desc="Compute neighbour-joining tree"),
    diagnostics=True,
    gc=10,
)

In [None]:
large_Z, timings_rapid_nogc, searched_rapid_nogc, visited_rapid_nogc = anjl.rapid_nj(
    large_D_shuffled,
    # progress=tqdm,
    # progress_options=dict(desc="Compute neighbour-joining tree"),
    diagnostics=True,
    gc=None,
)

In [None]:
df_canonical = pd.DataFrame(
    {
        "time": timings_canonical,
        "searched": searched_canonical,
        "visited": visited_canonical,
        "iteration": np.arange(len(timings_canonical)),
        "algorithm": "canonical",
    }
)
df_rapid_gc100 = pd.DataFrame(
    {
        "time": timings_rapid_gc100,
        "searched": searched_rapid_gc100,
        "visited": visited_rapid_gc100,
        "iteration": np.arange(len(timings_rapid_gc100)),
        "algorithm": "rapid_gc100",
    }
)
df_rapid_gc10 = pd.DataFrame(
    {
        "time": timings_rapid_gc10,
        "searched": searched_rapid_gc10,
        "visited": visited_rapid_gc10,
        "iteration": np.arange(len(timings_rapid_gc10)),
        "algorithm": "rapid_gc10",
    }
)
df_rapid_nogc = pd.DataFrame(
    {
        "time": timings_rapid_nogc,
        "searched": searched_rapid_nogc,
        "visited": visited_rapid_nogc,
        "iteration": np.arange(len(timings_rapid_nogc)),
        "algorithm": "rapid_nogc",
    }
)

In [None]:
df_diagnostics = pd.concat(
    [
        df_rapid_gc100,
        df_rapid_gc10,
        df_rapid_nogc,
        df_canonical,
    ],
    axis=0,
)

In [None]:
px.line(
    df_diagnostics,
    x="iteration",
    y="time",
    color="algorithm",
    template="simple_white",
)

In [None]:
px.line(
    df_diagnostics,
    x="iteration",
    y="searched",
    color="algorithm",
    template="simple_white",
)

In [None]:
px.line(
    df_diagnostics,
    x="iteration",
    y="visited",
    color="algorithm",
    template="simple_white",
)

In [None]:
%%timeit -r2 -n1
anjl.canonical_nj(large_D)

In [None]:
%%timeit -r2 -n1
anjl.rapid_nj(large_D, gc=10)

In [None]:
%%timeit -r2 -n1
anjl.rapid_nj(large_D, gc=100)

In [None]:
%%timeit -r2 -n1
anjl.rapid_nj(large_D, gc=200)

In [None]:
%%timeit -r2 -n1
anjl.rapid_nj(large_D, gc=None)

In [None]:
%%timeit -r3 -n1
np.argsort(large_D_shuffled, axis=1)

In [None]:
%%timeit -r1000 -n1
np.argsort(large_D_shuffled[0])

In [None]:
import anjl._canonical
import anjl._rapid

In [None]:
# anjl._canonical._canonical_nj_search.inspect_types()

In [None]:
# anjl._rapid._rapid_search.inspect_types()