In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import pandas as pd
import seaborn as sns

In [None]:
import sys, os

In [None]:
from datawand.parametrization import ParamHelper
ph = ParamHelper("../pipelines/GcnProject.json",sys.argv)

# 1. Collect accuracy results

In [None]:
input_prefix = ph.get("experiment_dir")
dataset_id = ph.get("dataset_id")
label_type = ph.get("label_type")
cut_type = ph.get("split_type")

In [None]:
top_k_vals ,time_frame_vals, train_ratio_vals = ph.get("top_k_vals"), ph.get("time_frame_vals"), ph.get("train_ratio_vals")

In [None]:
accuracy_df = pd.DataFrame(columns=["predictor","top_k","time_frame","train_ratio","train_acc","test_acc","val_acc"])
for k in top_k_vals:
    for delta in time_frame_vals:
        for cut in train_ratio_vals:
            acc_file_path = "%s/data/%s/k%i_t%i_r%.2f/%s_%.2f/%s/acc.csv" % (input_prefix,dataset_id,k,delta,cut,cut_type,cut,label_type)
            tmp_df = pd.read_csv(acc_file_path,sep=";",names=["predictor","train_acc","test_acc","val_acc"])
            tmp_df["top_k"] = k
            tmp_df["time_frame"] = delta
            tmp_df["train_ratio"] = cut
            accuracy_df = pd.concat([accuracy_df,tmp_df])
accuracy_df = accuracy_df.reset_index()
del accuracy_df["index"]

In [None]:
accuracy_df.head(10)

# 2. Visualization

In [None]:
import matplotlib.pyplot as plt

In [None]:
%matplotlib inline

## I. The effect of top_k and time_frame for accuracy

In [None]:
def factorplot_by_sets(df,x_col,hue_col="top_k"):
    f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 5), sharex=True)
    f1=sns.factorplot(x=x_col, y="test_acc", hue=hue_col, data=df, size=4, kind="bar", palette="muted", ax=ax1)
    f2=sns.factorplot(x=x_col, y="train_acc", hue=hue_col, data=df, size=4, kind="bar", palette="muted", ax=ax2)
    f3=sns.factorplot(x=x_col, y="val_acc", hue=hue_col, data=df, size=4, kind="bar", palette="muted", ax=ax3)
    ax1.set_title("Train")
    ax2.set_title("Test")
    ax3.set_title("Validation")
    ax1.set_ylim(0,0.9)
    ax2.set_ylim(0,0.9)
    ax3.set_ylim(0,0.9)
    plt.close(f1.fig)
    plt.close(f2.fig)
    plt.close(f3.fig)

### a.) Accuracy of GCN

In [None]:
factorplot_by_sets(accuracy_df[accuracy_df["predictor"]=="gcn"],x_col="time_frame")

### b.) Accuracy of random predictor

In [None]:
factorplot_by_sets(accuracy_df[accuracy_df["predictor"]=="rnd"],x_col="time_frame")

### c.) Accuracy of partial weighted random predictor

In [None]:
factorplot_by_sets(accuracy_df[accuracy_df["predictor"]=="part_w_rnd"],x_col="time_frame")

## II. Performance of random predictors vs. GCN

Here I only show performance for Test set with time_frame=60 seconds

In [None]:
factorplot_by_sets(accuracy_df[accuracy_df["time_frame"]==60],x_col="predictor")