In [None]:
from pathlib import Path

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import seaborn as sns

from matplotlib.backends.backend_pdf import PdfPages
from pandas_profiling import ProfileReport

In [None]:
df = pd.read_csv("experiments/all_results.csv")
df = df.drop("Unnamed: 0", axis=1)
df.head()

In [None]:
ss_methods = df["selection_metric"].unique()
nets = df["network"].unique()
protocols = df["protocol"].unique()

ss_methods, nets, protocols

## Visualisations

In [None]:
jet = plt.get_cmap("jet")

def plot_series(
    series: pd.DataFrame, ax: plt.Axes, x: str, y:str, label: str
) -> None:
    # color = next(colors)
    avg = series.groupby(x)[y].mean()
    std = series.groupby(x)[y].std()
    ax.scatter(x=avg.index, y=avg.values, label=label, alpha=0.8)
    # ax.fill_between(x=avg.index, y1=avg-std, y2=avg+std, alpha=0.1, color=color)


In [None]:
x = "mi_value"
y = "gain"

proto = protocols[0]
proto_df = df.loc[df["protocol"] == proto]
proto_df = proto_df.drop("protocol", axis=1)

net = nets[3]
net_proto_df = proto_df.loc[proto_df["network"] == net]
net_proto_df = net_proto_df.drop("network", axis=1)

fig, ax = plt.subplots(nrows=1, ncols=1)

colors = iter(jet(np.linspace(0,1,len(ss_methods))))

for idx, ssm in enumerate(ss_methods):
    ssm_net_proto_df = net_proto_df.loc[net_proto_df["selection_metric"] == ssm]
    ssm_net_proto_df = ssm_net_proto_df.drop("selection_metric", axis=1)
    plot_series(series=ssm_net_proto_df, ax=ax, x=x, y=y, label=ssm)
    # break

ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
ax.set_xlabel(x)
ax.set_ylabel(y)
fig.suptitle(f"protocole: {proto}, network: {net}")

In [None]:
mi_values = net_proto_df["mi_value"].unique()

fig, ax = plt.subplots(nrows=1, ncols=len(mi_values))
fig.tight_layout()

for idx, mi in enumerate(mi_values):
    mi_net_proto_df = net_proto_df.loc[net_proto_df["mi_value"] == mi]
    for ssm in ss_methods:
        ssm_net_proto_df = mi_net_proto_df.loc[mi_net_proto_df["selection_metric"] == ssm]
        plot_series(series=ssm_net_proto_df, ax=ax[idx], x=x, y=y, label=ssm)

ax[-1].legend(loc="center left", bbox_to_anchor=(1, 0.5))
for idx, axx in enumerate(ax):
    axx.set_xticks([mi_values[idx]])
ax[0].set_ylabel(y)
ax[len(ax) // 2].set_xlabel(x)
fig.suptitle(f"protocole: {proto}, network: {net}")


## Heatmaps

In [None]:
def plot_heatmap(
    vis_df: pd.DataFrame, heatmap_ax: plt.Axes, bar_ax: plt.Axes, vrange=(0, 100), cmap="RdYlGn"
) -> None:
    sns.heatmap(
        vis_df,
        ax=heatmap_ax,
        cbar_ax=bar_ax,
        cmap=cmap,
        vmin=vrange[0],
        vmax=vrange[1],
        annot=True,
        annot_kws={"size": 9},
        fmt=".0f",
        yticklabels=vis_df.index.to_numpy().round(2),
        xticklabels=vis_df.columns.to_numpy().round(2),
        linewidth=.5,
    )
    heatmap_ax.invert_yaxis()

### single plot

In [None]:
x = "mi_value"
y = "seeding_budget"
z = "gain"

net = "aucs"
ssm = "greedy"

fig, ax = plt.subplots(
    nrows=1, ncols=3, figsize=(10, 4), gridspec_kw={'width_ratios': [49, 49, 2]}
)
fig.tight_layout(pad=0.5, rect=(0.05, 0.05, 0.95, 0.95))
title = f"{z} achieved by {ssm} s.s. method on {net} net; protocols from left:"

for idx, proto in enumerate(df["protocol"].unique()):
    df_plotted = df.loc[
        (df["network"] == net) &
        (df["protocol"] == proto) &
        (df["selection_metric"] == ssm)
    ]
    df_plotted = pd.pivot_table(df_plotted, index=x, columns=y, values=z)
    plot_heatmap(df_plotted, ax[idx], ax[2])
    title += f" {proto}"

fig.suptitle(title)

### bulk plot (pdf)

In [None]:
# define values to visualise as well as attributes of plots
x = "mi_value"
y = "seeding_budget" 
Z = {
    "gain": {"vrange": (0, 100), "cmap": "RdYlGn"},
    "diffusion_len": {"vrange": (0, df["diffusion_len"].max()), "cmap": "BuPu"},
}

# create file descriptor where to save visualisations
workdir = Path(".")
workdir.mkdir(exist_ok=True)
pdf = PdfPages(workdir.joinpath(f"heatmaps_{'_'.join(Z)}_by_{x}_{y}.pdf"))

for net in sorted(nets):

    # a dummy plot that contains just name of processed network
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(12, 4))
    ax.set_visible(False)
    fig.suptitle(f"{net} network - results", x=0.5, y=.5, fontsize = 15)
    fig.savefig(pdf, format='pdf')
    plt.close(fig)

    for ssm in sorted(ss_methods):

        for z, z_attrs in Z.items():
            print(f"processing: {net}, {ssm}, {z}")

            # prepare canvas - proto, proto, legend
            fig, ax = plt.subplots(
                nrows=1,
                ncols=3,
                figsize=(12, 4),
                gridspec_kw={'width_ratios': [49, 49, 2]}
            )
            fig.tight_layout(pad=0.5, rect=(0.1, 0.1, 0.9, 0.9))
            title = (
                f"{z} achieved by {ssm} s.s. method on {net} network; "
                "protocols from left:"
            )

            # prepare and plot heatmap for each proto
            for idx, proto in enumerate(protocols):
                df_plot = df.loc[
                    (df["network"] == net) &
                    (df["protocol"] == proto) &
                    (df["selection_metric"] == ssm)
                ]
                df_plot = pd.pivot_table(df_plot, index=x, columns=y, values=z)
                if len(df_plot) == 0:  # greedy wasn't evaluated for all nets
                    continue
                plot_heatmap(
                    df_plot, ax[idx], ax[2], z_attrs["vrange"], z_attrs["cmap"]
                )
                title += f" {proto}"

            # add title and save plot to pdf
            fig.suptitle(title)
            fig.savefig(pdf, format='pdf')
            plt.close(fig)

pdf.close()