In [79]:
import pandas as pd
from pathlib import Path
import altair as alt

# Frame = all languages in GB
# k = range from 5 to 500, steps of 5
# distances = all language in GB
df = pd.read_csv("experiments/experiment-195.csv")

rand_methods = ["random_family", "random_genus", "random"]

# pretty label for the legend
df = df.rename({"method": "Method"}, axis=1)

OPACITY = 0.7
COLORS = ["steelblue", "#7D3C98", "chartreuse", "#F4D03F", "red", "#D35400"]
Y_LABELS = {
    "entropy_with_missing": "Entropy (H)",
    "entropy_without_missing": "Entropy (H) without missing",
    "fvi": "FVI",
    "mpd": "MPD",
    "fvo": "FVO",
}
METRICS = [
    # "entropy_with_missing",
    "entropy_without_missing",
    "fvi",
    "mpd",
    "fvo",
]

plots = []
for metric in METRICS:
    legend = alt.Legend(
        orient="none",
        legendX=130,
        legendY=-40,
        direction="horizontal",
        titleAnchor="middle",
    )

    err_bars = (
        alt.Chart(df[(df["Method"].isin(rand_methods))])
        .mark_errorbar(extent="stdev", opacity=OPACITY)
        # .mark_boxplot(extent="min-max")
        .encode(
            x=alt.X("k", title="Sample size"),
            y=alt.Y(metric, title=Y_LABELS[metric]),
            color=alt.Color("Method", legend=legend).scale(range=COLORS),
        )
    )

    points = plot = (
        alt.Chart(df)
        .mark_point(filled=True, opacity=OPACITY)
        .encode(
            x=alt.X("k", title="Sample size"),
            y=alt.Y(f"mean({metric})", title=Y_LABELS[metric]),
            color=alt.Color("Method", legend=legend).scale(range=COLORS),
        )
    )
    plots.append(err_bars + points)

In [80]:
top = plots.pop() | plots.pop()
bottom = plots.pop() | plots.pop()

combined = alt.vconcat(top, bottom)

In [81]:
combined.save("combined-195-pretty.pdf")

In [83]:
plot2 = (
    alt.Chart(df[df['k'] <= 100])
    .mark_circle()
    .encode(
        x=alt.X(
            "k",
            title="Sample size (k)",
        ),
        y=alt.Y(
            "entropy_without_missing",
            # scale=alt.Scale(domain=[0, 3]),
            title="Entropy",
        ),
        color=alt.Color('Method').scale(range=['steelblue', '#7D3C98', 'chartreuse', '#F4D03F', 'red', '#D35400', ])
    )
)
plot2