In [None]:
import glob
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import seaborn as sns
from matplotlib.lines import Line2D
from matplotlib.ticker import StrMethodFormatter
from pathlib import Path

sys.path.append("../src/")

from graph_metrics import F1_score, matthews_correlation_coefficient, false_discovery_rate

In [None]:
GRIDS = [4, 5, 6, 7, 8, 9, 10]
Ts = [10, 150]
SIGMAS = [1.0]
DENSITIES = [0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9, 1.0]
DENSITY_LABELS = ["$\\frac{1}{9}$", "$\\frac{2}{9}$", "$\\frac{3}{9}$", "$\\frac{4}{9}$", "$\\frac{5}{9}$", "$\\frac{6}{9}$", "$\\frac{7}{9}$", "$\\frac{8}{9}$", "$\\frac{9}{9}$"]
MIN_VALUE = 0.1
MODE = False

In [None]:
data_root = "/path/to/benchmark/data/castle/"
VAR_results_root = "/path/to/benchmark/data/castle/VAR_graphs_results/r_"
VAR_csv_filename = "/data/castle/VAR_graphs_results.csv"
PC_results_root = "/path/to/benchmark/data/castle/PC_results/r_"
PC_csv_filename = "/data/castle/PC_results.csv"
PCMCI_results_root = "/path/to/benchmark/data/castle/PCMCI_results/r_"
PCMCI_csv_filename = "/data/castle/PCMCI_results.csv"
DYNOTEARS_results_root = "/path/to/benchmark/data/castle/DYNOT_results/r_"
DYNOTEARS_csv_filename = "/data/castle/DYNOT_results.csv"
castlePC_results_root = "/path/to/benchmark/data/castle/PC_CaStLe_results/r_"
castlePC_csv_filename = "/data/castle/PC_CaStLe_results.csv"
castlePCMCI_results_root = "/path/to/benchmark/data/castle/PCMCI_CaStLe_results/r_"
castlePCMCI_csv_filename = "/data/castle/PCMCI_CaStLe_results.csv"
castleVAR_results_root = "/path/to/benchmark/data/castle/VAR_CaStLe_results/r_"
castleVAR_csv_filename = "/data/castle/VAR_CaStLe_results.csv"
castleDYNOTEARS_results_root = "/path/to/benchmark/data/castle/DYNOT_CaStLe_results/r_"
castleDYNOTEARS_csv_filename = "/data/castle/DYNOT_CaStLe_results.csv"

In [None]:
method_meta_data = {
    "VAR": [VAR_results_root, VAR_csv_filename],
    "CaStLe-VAR": [castleVAR_results_root, castleVAR_csv_filename],
    "PC": [PC_results_root, PC_csv_filename],
    "PCMCI": [PCMCI_results_root, PCMCI_csv_filename],
    "CaStLe-PC": [castlePC_results_root, castlePC_csv_filename],
    "CaStLe-PCMCI": [castlePCMCI_results_root, castlePCMCI_csv_filename],
    "DYNOTEARS": [DYNOTEARS_results_root, DYNOTEARS_csv_filename],
    "CaStLe-DYNOTEARS": [castleDYNOTEARS_results_root, castleDYNOTEARS_csv_filename],
}

idx = 0
for method in method_meta_data:
    print(method)
    results_root = method_meta_data[method][0]
    csv_filename = method_meta_data[method][1]
    csv_loaded = False
    try:
        with open(str(Path.home()) + csv_filename, "rb") as f:
            # raise NameError
            results_df = pd.read_csv(f, index_col=0)
        csv_loaded = True
        method_meta_data[method].append(results_df)
    except Exception as e: 
        print("Exception encountered in method {}: {}".format(method, e))
        SAVE_DATA = True
        LOG_DATA_COMPLETED = True
        LOG_RESULTS_COMPLETED = True
        data_incomplete = []
        data_complete = []
        r_incomplete = []
        r_complete = []
        corrupted = []

        if LOG_DATA_COMPLETED and idx > 0:
            LOG_DATA_COMPLETED = False

        results_dict_list = []
        for grid_size in GRIDS:
            for T in Ts:
                for sigma in SIGMAS:
                    for density in DENSITIES:
                        data_name = (
                                str(grid_size) 
                                + "x"
                                + str(grid_size)
                                + "_"
                                + str(T)
                                + "T_"
                                + str(sigma)
                                + "sigma_"
                                + str(density)
                                + "density_"
                                + str(MIN_VALUE)
                                + "minval_wMode-"
                                + str(MODE)
                                + "_*"
                        )
                        DATA_PATH = data_root + data_name
                        RESULTS_PATH = results_root + data_name

                        for i, filename in enumerate(glob.glob(RESULTS_PATH)):
                            replicate_num = filename.split("_")[-1].split(".")[0]
                            with open(filename, "rb") as npyfile:
                                try:
                                    coefficients, reconstructed_graph, true_graph_metrics, recon_graph_metrics, equivalence_matrix, f1_result = np.load(npyfile, allow_pickle=True)
                                except pickle.UnpicklingError:
                                    print("Corrupted File Found!")
                                    corrupted.append(npyfile)
                                    continue
                                results_dict_list.append(
                                    {
                                        "Method": method,
                                        "CaStLed": "CaStLed" if method.split("-")[0] == "CaStLe" else "Not CaStLed",
                                        "Replicate ID": replicate_num,
                                        "Grid Size": grid_size,
                                        "Time Samples": T,
                                        "$\sigma$": sigma,
                                        "Density": density,
                                        "$F_1$ Score": f1_result[0],
                                        "Precision": f1_result[1],
                                        "Recall": f1_result[2],
                                        "TP": f1_result[3],
                                        "FP": f1_result[4],
                                        "FN": f1_result[5],
                                        "TN": f1_result[6],
                                    }
                                )

                        if LOG_DATA_COMPLETED:
                            if len(glob.glob(DATA_PATH)) != 30:
                                data_incomplete.append((DATA_PATH, len(glob.glob(DATA_PATH))))
                            else:
                                data_complete.append(RESULTS_PATH)
                        if LOG_RESULTS_COMPLETED:
                            if len(glob.glob(RESULTS_PATH)) != 30:
                                r_incomplete.append((RESULTS_PATH, len(glob.glob(RESULTS_PATH)), grid_size, T, sigma, density))
                            else:
                                r_complete.append(RESULTS_PATH)
                                 
        results_df = pd.DataFrame(results_dict_list)
        results_df["Matthews Correlation Coefficient"] = results_df.apply(lambda row: matthews_correlation_coefficient(row["TP"], row["FP"], row["FN"], row["TN"]), axis=1)
        results_df["False Discovery Rate"] = results_df.apply(lambda row: false_discovery_rate(row["FP"], row["TP"]), axis=1)
        results_df["True Positive Rate"] = results_df["TP"] / (results_df["TP"] + results_df["FN"])
        results_df["False Positive Rate"] = results_df["FP"] / (results_df["FP"] + results_df["TN"])
        results_df["False Negative Rate"] = results_df["FN"] / (results_df["TP"] + results_df["FN"])
        results_df["True Negative Rate"] = results_df["TN"] / (results_df["FP"] + results_df["TN"])
        
        method_meta_data[method].append(results_df)
        method_meta_data[method].append((r_incomplete, r_complete, corrupted))

        if SAVE_DATA:
            if LOG_RESULTS_COMPLETED:
                if len(r_incomplete) == 0:
                    with open(str(Path.home()) + csv_filename, "wb") as f:
                        results_df.to_csv(f)
            else:
                with open(str(Path.home()) + csv_filename, "wb") as f:
                        results_df.to_csv(f)
    idx +=1

In [None]:
for m in method_meta_data:
    print(m)
    try:
        print("r_incomplete:{}, r_complete:{}, corrupted:{}".format(len(method_meta_data[m][3][0]), len(method_meta_data[m][3][1]), len(method_meta_data[m][3][2])))
        if len(method_meta_data[m][3][0]) > 0:
            print([(r[1], r[0]) for r in method_meta_data[m][3][0]])
        if len(method_meta_data[m][3][2]) > 0:
            print("Corrupted files:")
            for f in method_meta_data[m][3][2]:
                print(f)
    except:
        pass

In [None]:
dataframes = [method_meta_data[method][2] for method in method_meta_data]
complete_results = pd.concat(dataframes, ignore_index=True)
complete_results["Method"] = complete_results.apply(lambda row: "Sparse VAR" if row["Method"] == "VAR" else row["Method"], axis=1)
complete_results["Method"] = complete_results.apply(lambda row: "CaStLe-Sparse VAR" if row["Method"] == "CaStLe-VAR" else row["Method"], axis=1)
complete_results

In [None]:
complete_results[(complete_results["Method"] == "DYNOTEARS") & (complete_results["Grid Size"] == 10) & (complete_results["Time Samples"] == 150)]

In [None]:
complete_results[(complete_results["Method"] == "alt-CaStLe-PC")]

In [None]:
complete_results[(complete_results["Method"] == "DYNOTEARS") & (complete_results["Grid Size"] == 10) & (complete_results["Time Samples"] == 10)]

In [None]:
try:
    print(len(data_incomplete), len(data_complete))
except:
    pass

In [None]:
complete_results["Implementation"] = complete_results["CaStLed"]
complete_results["Matthews Correlation Coefficient"] = complete_results["Matthews Correlation Coefficient"] * 100
complete_results["$F_1$ Score"] = complete_results["$F_1$ Score"] * 100

In [None]:
complete_results[complete_results["Method"] == "CaStLe-PC-stable"]

In [None]:
sns.set_context("talk", font_scale=1.5)

In [None]:
data = complete_results[complete_results["Density"] == 0.4]
hue_order = ["CaStLe-Sparse VAR", "Sparse VAR", "CaStLe-PC", "PC", "CaStLe-PCMCI", "PCMCI", "CaStLe-DYNOTEARS", "DYNOTEARS"]
style_order = ["CaStLed", "Not CaStLed"]
palette = list(np.repeat(np.array(sns.color_palette("bright")),2,axis=0))
g = sns.relplot(data=data[(data["Time Samples"] == 10) | (data["Time Samples"] == 150)], x="Grid Size", y="Matthews Correlation Coefficient",
                kind="line", col="Time Samples", hue="Method", hue_order=hue_order, palette=palette, style="Implementation", style_order=style_order, marker="o", aspect=6.5/3.0)
g.set_ylabels("Matthews\nCorrelation Coefficient")

handles, labels = g.axes[0][0].get_legend_handles_labels()
new_handles = []
new_labels = []
for idx in range(len(handles)):
    if "Method" in labels[idx]:
        labels[idx] = "Parent-Identification Phase"
    if "CaStLed" in labels[idx]:
        new_handles.append(handles[idx])
        new_labels.append(labels[idx])
    elif "CaStLe" not in labels[idx]:
        new_handles.append(handles[idx])
        new_labels.append(labels[idx])
g._legend.remove()
g.figure.legend(new_handles, new_labels,bbox_to_anchor=(1.05, 1), frameon=False)

for ax in g.axes.flat:
    xtix = ax.get_xticks()
    ax.set_xticklabels(["{}x{}".format(int(label), int(label)) for label in xtix])
    ax.yaxis.set_major_formatter(StrMethodFormatter(u"{x:.0f}%"))

In [None]:
hue_order = ['Sparse VAR', 'CaStLe-Sparse VAR', 'PC', 'PCMCI', 'CaStLe-PC',
                'CaStLe-PC-stable', 'CaStLe-PCMCI', 'DYNOTEARS',
                'CaStLe-DYNOTEARS']
style_order = ["CaStLed", "Not CaStLed"]
unique_methods = ['CaStLe-PC-stable']
grid_sizes = complete_results["Grid Size"].unique()
for d in [0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9, 1.0]:
    data = complete_results[complete_results["Density"] == d]

    # Generate a palette
    palette = []
    unique_color = sns.color_palette("bright")[-1]  # Unique color for 'CaStLe-PC-stable'
    base_color_map = {
        'Sparse VAR': sns.color_palette("bright")[0],
        'PC': sns.color_palette("bright")[1],
        'PCMCI': sns.color_palette("bright")[2],
        'DYNOTEARS': sns.color_palette("bright")[3],
    }
    for method in hue_order:
        if method in unique_methods:
            palette.append(unique_color)
        else:
            base_method = method.replace("CaStLe-", "").split('-')[0]
            if base_method in base_color_map:
                palette.append(base_color_map[base_method])
            else:
                palette.append(sns.color_palette("pastel")[0])

    g = sns.relplot(data=data[(data["Time Samples"] == 10) | (data["Time Samples"] == 150)], x="Grid Size", y="Matthews Correlation Coefficient",
                    kind="line", col="Time Samples", hue="Method", hue_order=hue_order, palette=palette, style="Implementation", style_order=style_order, marker="o", aspect=6.5/3.0)
    g.set_ylabels("Matthews\nCorrelation Coefficient")

    handles, labels = g.axes[0][0].get_legend_handles_labels()
    # Adjust labels and handles for the legend
    new_handles, new_labels = [], []
    for method in hue_order:  # Ensure order and correct naming
        for handle, label in zip(handles, labels):
            if label == method:  # Match the handle with the method
                new_label = label.replace("CaStLe-", "")  # Adjust label
                if "CaStLe-PC-stable" in label:
                    new_label = "PC-Stable-Single"  # Special case
                if new_label not in new_labels:  # Avoid duplicates
                    new_handles.append(handle)
                    new_labels.append(new_label)

    # Add the title for "Parent-Identification Phase" and "Implementation" manually
    new_labels = ['Parent-Identification Phase'] + new_labels
    new_handles = [Line2D([0], [0], color='none')] + new_handles  # Placeholder for the title
    new_labels += ['Implementation', 'Not CaStLed', 'CaStLed']
    style_handles = [handles[labels.index('Not CaStLed')], handles[labels.index('CaStLed')]]
    new_handles += [Line2D([0], [0], color='none'), *style_handles]  # Add a placeholder for the title, then the style handles

    # Update the legend with the corrected handles and labels
    g._legend.remove()
    g.figure.legend(handles=new_handles, labels=new_labels, bbox_to_anchor=(1.05, 1), frameon=False, title=None)

    for ax in g.axes.flat:
        ax.set_xticks(grid_sizes)  # Set the tick locations explicitly
        ax.set_xticklabels([f"{size}x{size}" for size in grid_sizes])  # Set the tick labels
        # Explicitly set the Y-axis ticks to display 0%, 50%, and 100%
        ax.set_yticks([0, 50, 100])
        ax.yaxis.set_major_formatter(StrMethodFormatter(u"{x:.0f}%"))

    sparsity_d = int(d*10) if d < 0.5 else int(d*10-1)
    plt.suptitle(r"Sparsity $d={}$".format(sparsity_d), y=1.05)

In [None]:
data = complete_results.copy()
data["Density"] = data.apply(lambda row: int(row["Density"]*10) if row["Density"] < 0.5 else int(row["Density"]*10-1), axis=1)

grid_sizes = complete_results["Grid Size"].unique()

# Corrected hue_order to include "CaStLe-PC-stable"
hue_order = ["CaStLe-Sparse VAR", "Sparse VAR", "CaStLe-PC", "PC", "CaStLe-PCMCI", "PCMCI", "CaStLe-PC-stable", "CaStLe-DYNOTEARS", "DYNOTEARS"]
style_order = ["CaStLed", "Not CaStLed"]

# Initialize an empty palette list
palette = []

# Map base methods to colors
base_color_map = {
    'Sparse VAR': sns.color_palette("bright")[0],
    'PC': sns.color_palette("bright")[1],
    'PCMCI': sns.color_palette("bright")[2],
    'DYNOTEARS': sns.color_palette("bright")[3],
}

# Unique color for 'CaStLe-PC-stable'
unique_color = sns.color_palette("bright")[-1]

# Generate the palette
for method in hue_order:
    if "CaStLe-PC-stable" in method:
        # Assign the unique color to 'CaStLe-PC-stable'
        palette.append(unique_color)
    elif "CaStLe-" in method:
        # For CaStLe- methods, use the corresponding base method color
        base_method = method.replace("CaStLe-", "").split('-')[0]
        palette.append(base_color_map.get(base_method, sns.color_palette("pastel")[0]))
    else:
        # For non-CaStLe methods, directly use the base method color
        palette.append(base_color_map.get(method, sns.color_palette("pastel")[0]))

# Now, use this palette in your plot
g = sns.relplot(data=data[(data["Time Samples"] == 10) | (data["Time Samples"] == 150)], x="Grid Size", y="Matthews Correlation Coefficient",
                kind="line", col="Time Samples", row="Density", hue="Method", hue_order=hue_order, palette=palette, style="Implementation", style_order=style_order, marker="o", aspect=6.5/3.0)
g.set_ylabels("Matthews\nCorrelation Coefficient")

for ax in g.axes.flat:
    ax.set_xticks(grid_sizes)  # Set the tick locations explicitly
    ax.set_xticklabels([f"{size}x{size}" for size in grid_sizes])  # Set the tick labels
    # Explicitly set the Y-axis ticks to display 0%, 50%, and 100%
    ax.set_yticks([0, 50, 100])
    ax.yaxis.set_major_formatter(StrMethodFormatter(u"{x:.0f}%"))

handles, labels = g.axes[0][0].get_legend_handles_labels()
new_handles, new_labels = [], []
for method in hue_order:  # Ensure order and correct naming
    for handle, label in zip(handles, labels):
        if label == method:  # Match the handle with the method
            new_label = label.replace("CaStLe-", "")  # Adjust label for consistency
            if "CaStLe-PC-stable" in label:
                new_label = "PC-Stable-Single"  # Special case, rename for clarity
            if new_label not in new_labels:  # Avoid duplicates
                new_handles.append(handle)
                new_labels.append(new_label)

# Add the title for "Parent-Identification Phase" and "Implementation" manually
new_labels = ['Parent-Identification Phase'] + new_labels
new_handles = [Line2D([0], [0], color='none')] + new_handles  # Placeholder for the title
new_labels += ['Implementation', 'Not CaStLed', 'CaStLed']
style_handles = [handles[labels.index('Not CaStLed')], handles[labels.index('CaStLed')]]
new_handles += [Line2D([0], [0], color='none'), *style_handles]  # Add a placeholder for the title, then the style handles

# Update the legend with the corrected handles and labels
g._legend.remove()
g.figure.legend(handles=new_handles, labels=new_labels, bbox_to_anchor=(1.05, 1), frameon=False, title=None)

In [None]:
raise KeyboardInterrupt

In [None]:
g_f1 = plt.figure(figsize=(16,9))
g_f1 = sns.catplot("Density", "Matthews Correlation Coefficient", "Time Samples", col="Grid Size", col_wrap=3, data=results_df, kind="box")
g_f1.set_xticklabels(DENSITY_LABELS)
for ax in g_f1.axes.flat:
    ax.tick_params(axis="x", labelbottom=True, labelrotation=0)
plt.tight_layout()

In [None]:
f1_grid_medians = results_df.groupby(['Grid Size'])['$F_1$ Score'].median().round(2)
vertical_offset = results_df['$F_1$ Score'].median() * 0.02 # offset from median for display

g_grids_f1 = sns.boxplot(data=results_df, x="Grid Size", y="$F_1$ Score", color="slategrey")
for xtick, median in enumerate(f1_grid_medians):
    g_grids_f1.text(xtick, median + vertical_offset, median, 
            horizontalalignment='center', size='x-small', color='w', weight='semibold')
fig = g_grids_f1.get_figure()

In [None]:
mcc_grid_medians = results_df.groupby(['Grid Size'])["Matthews Correlation Coefficient"].median().round(2)
vertical_offset = results_df["Matthews Correlation Coefficient"].median() * 0.05 # offset from median for display

g_grids_mcc = sns.boxplot(data=results_df, x="Grid Size", y="Matthews Correlation Coefficient", color="slategrey",)
for xtick, median in enumerate(mcc_grid_medians):
    g_grids_mcc.text(xtick, median + vertical_offset, median, 
            horizontalalignment='center', size='x-small', color='w', weight='semibold')
fig = g_grids_mcc.get_figure()

In [None]:
# Fix some params to show fewer datapoints
data = results_df[(results_df["$\sigma$"] == 1.0) & (results_df["Density"] == 0.3) & (results_df["Time Samples"] == 1000)].copy()
f1_grid_medians = data.groupby(['Grid Size'])['$F_1$ Score'].median().round(2)
f1_vertical_offset = data['$F_1$ Score'].median() * -0.022 # offset from median for display
mcc_grid_medians = data.groupby(['Grid Size'])["Matthews Correlation Coefficient"].median().round(2)
mcc_vertical_offset = data["Matthews Correlation Coefficient"].median() * -0.022 # offset from median for display

fig = plt.figure(figsize=(15,5))

plt.subplot(1, 2, 1)
g_grids_f1 = sns.boxplot(data=data, x="Grid Size", y="$F_1$ Score", color="slategrey", notch=True)
for xtick, median in enumerate(f1_grid_medians):
    g_grids_f1.text(xtick, median + f1_vertical_offset, median, 
            horizontalalignment='center', size='x-small', color='w', weight='semibold')
ax = g_grids_f1.axes
new_labels = ["{}x{}".format(label.get_text(), label.get_text()) for label in ax.get_xticklabels()]
ax.set_xticklabels(new_labels)

plt.subplot(1, 2, 2)
g_grids_mcc = sns.boxplot(data=data, x="Grid Size", y="Matthews Correlation Coefficient", color="slategrey", notch=True)
for xtick, median in enumerate(mcc_grid_medians):
    g_grids_mcc.text(xtick, median + mcc_vertical_offset, median, 
            horizontalalignment='center', size='x-small', color='w', weight='semibold')
ax = g_grids_mcc.axes
new_labels = ["{}x{}".format(label.get_text(), label.get_text()) for label in ax.get_xticklabels()]
ax.set_xticklabels(new_labels)
plt.tight_layout()

In [None]:
plt.figure(figsize=(16,9))
# Fix some params to show fewer datapoints
data = results_df[(results_df["$\sigma$"] == 1.0) & (results_df["Density"] == 0.3) & (results_df["Grid Size"] == 10)].copy()
mcc_t_medians = data.groupby(['Time Samples'])["Matthews Correlation Coefficient"].median().round(2)
vertical_offset = data["Matthews Correlation Coefficient"].median() * 0.01 # offset from median for display

g_t_mcc = sns.boxplot(data=data, x="Time Samples", y="Matthews Correlation Coefficient", color="slategrey", notch=True, )
for xtick, median in enumerate(mcc_t_medians):
    g_t_mcc.text(xtick, median + vertical_offset, median, 
            horizontalalignment='center', size='x-small', color='w', weight='semibold', fontsize=12)
g_t_mcc.tick_params(axis="x", labelrotation=45)
fig = g_t_mcc.get_figure() 

In [None]:
mcc_sigma_medians = results_df.groupby(['$\sigma$'])["Matthews Correlation Coefficient"].median().round(2)
vertical_offset = results_df["Matthews Correlation Coefficient"].median() * 0.03 # offset from median for display

g_sigma_mcc = sns.boxplot(data=results_df, x="$\sigma$", y="Matthews Correlation Coefficient", color="slategrey", notch=False)
for xtick, median in enumerate(mcc_sigma_medians):
    g_sigma_mcc.text(xtick, median + vertical_offset, median, 
            horizontalalignment='center', size='x-small', color='w', weight='semibold')
plt.xlabel("Sigma (St. Dev. of added Gaussian noise per timestep)")
plt.ylabel("Matthews Correlation Coefficient");
fig = g_sigma_mcc.get_figure()

In [None]:
fig = plt.figure()
g_f1 = sns.FacetGrid(results_df, col="Time Samples", row="Grid Size", )
g_f1.map(sns.boxplot, "Density", "$F_1$ Score", order=DENSITIES, );
g_f1.set_titles("{row_var}={row_name}\n{col_var}={col_name}")
g_f1.set_xticklabels(DENSITY_LABELS)
for ax in g_f1.axes.flat:
    ax.tick_params(axis="x", labelbottom=True, labelrotation=0)
plt.tight_layout()

In [None]:
g_mcc = sns.FacetGrid(results_df, col="Time Samples", row="Grid Size")
g_mcc.map(sns.boxplot, "Density", "Matthews Correlation Coefficient", order=DENSITIES);
g_mcc.set_titles("{row_var}={row_name}\n{col_var}={col_name}")
g_mcc.set_ylabels("MCC")
g_mcc.set_xticklabels(DENSITY_LABELS)
for ax in g_mcc.axes.flat:
    ax.tick_params(axis="x", labelbottom=True, labelrotation=0)
plt.tight_layout()

In [None]:
g_fdr = sns.FacetGrid(results_df[results_df["Time Samples"] == 10], col="Time Samples", row="Grid Size")
g_fdr.map(sns.boxplot, "Density", "False Discovery Rate", order=DENSITIES,);
g_fdr.set_titles("{row_var}={row_name}\n{col_var}={col_name}")
g_fdr.set_xticklabels(DENSITY_LABELS)
for ax in g_fdr.axes.flat:
    ax.tick_params(axis="x", labelbottom=True, labelrotation=0)
plt.tight_layout()

In [None]:
g_tpr = sns.FacetGrid(results_df, col="Time Samples", row="Grid Size")
g_tpr.map(sns.boxplot, "Density", "True Positive Rate", order=DENSITIES,);
g_tpr.set_titles("{row_var}={row_name}\n{col_var}={col_name}")
g_tpr.set_xticklabels(DENSITY_LABELS)
for ax in g_tpr.axes.flat:
    ax.tick_params(axis="x", labelbottom=True, labelrotation=0)
plt.tight_layout()

In [None]:
g_fpr = sns.FacetGrid(results_df, col="Time Samples", row="Grid Size")
g_fpr.map(sns.boxplot, "Density", "False Positive Rate", order=DENSITIES,);
g_fpr.set_titles("{row_var}={row_name}\n{col_var}={col_name}")
g_fpr.set_xticklabels(DENSITY_LABELS)
for ax in g_fpr.axes.flat:
    ax.tick_params(axis="x", labelbottom=True, labelrotation=0)
plt.tight_layout()

In [None]:
g_tnr = sns.FacetGrid(results_df, col="Time Samples", row="Grid Size")
g_tnr.map(sns.boxplot, "Density", "True Negative Rate", order=DENSITIES,);
g_tnr.set_titles("{row_var}={row_name}\n{col_var}={col_name}")
g_tnr.set_xticklabels(DENSITY_LABELS)
for ax in g_tnr.axes.flat:
    ax.tick_params(axis="x", labelbottom=True, labelrotation=0)
plt.tight_layout()

In [None]:
g_fnr = sns.FacetGrid(results_df, col="Time Samples", row="Grid Size")
g_fnr.map(sns.boxplot, "Density", "False Negative Rate", order=DENSITIES,);
g_fnr.set_titles("{row_var}={row_name}\n{col_var}={col_name}")
g_fnr.set_xticklabels(DENSITY_LABELS)
for ax in g_fnr.axes.flat:
    ax.tick_params(axis="x", labelbottom=True, labelrotation=0)
plt.tight_layout()

In [None]:
g_mcc_lines = sns.FacetGrid(results_df[results_df["Time Samples"].isin([150, 350, 575, 775, 1000])], col="Time Samples", row="Grid Size", hue="Density", legend_out=True, palette="magma_r")
g_mcc_lines.map(sns.lineplot, "$\sigma$", "Matthews Correlation Coefficient", marker='o');
g_mcc_lines.set_titles("{row_var}={row_name}\n{col_var}={col_name}")
for ax in g_mcc_lines.axes.flat:
    ax.tick_params(axis="x", labelbottom=True)
g_mcc_lines.set_ylabels("MCC")
plt.tight_layout()
g_mcc_lines.add_legend(labels=DENSITY_LABELS)
sns.move_legend(g_mcc_lines, "lower center", ncol=10, bbox_to_anchor=(0.5, -0.05), frameon=True)

In [None]:
test_medians = results_df.groupby(['$\sigma$'])["Matthews Correlation Coefficient"].median().round(2)
test_medians

In [None]:
filtered_results_df = results_df[results_df["$\sigma$"] ==  0.5].copy()
filtered_results_df

In [None]:
g_mcc_density = sns.FacetGrid(filtered_results_df, hue="Grid Size", col="Density", col_wrap=3, palette="magma_r")
g_mcc_density.map(sns.lineplot, "Time Samples", "Matthews Correlation Coefficient", markers=True, ci=None);
g_mcc_density.add_legend()
g_mcc_density.set_ylabels("MCC")
for ax, density in zip(g_mcc_density.axes.flat, DENSITY_LABELS):
    ax.tick_params(labelbottom=True)
    ax.set_title("Density = " + density)
sns.move_legend(g_mcc_density, "lower center", ncol=10, bbox_to_anchor=(0.5, -0.1), frameon=True)
plt.tight_layout()

In [None]:
filtered_results_df["T/grid"] = filtered_results_df.apply(lambda row: (row["Time Samples"] / row["Grid Size"]), axis=1)

g_test = sns.FacetGrid(filtered_results_df, hue="Time Samples", col="Density", col_wrap=5)
g_test.map(sns.lineplot, "T/grid", "Matthews Correlation Coefficient", markers=True, ci=None);
g_test.add_legend()

In [None]:
starting = 0.5
step = 0.1
ending = 0.9

prob_of_success = pd.DataFrame()
prob_of_success = filtered_results_df.groupby(["Grid Size", "Time Samples", "Density"])["Matthews Correlation Coefficient"].apply(lambda c: (c >= starting).sum()/len(c))
prob_of_success = prob_of_success.reset_index().rename(columns={"Matthews Correlation Coefficient": "Probability of Exceeding Threshold"})
prob_of_success["MCC Threshold"] = starting
for threshold in [round(i, 1) for i in np.arange(starting + step, ending, step)]:
    new_frame = pd.DataFrame()
    new_frame = filtered_results_df.groupby(["Grid Size", "Time Samples", "Density"])["Matthews Correlation Coefficient"].apply(lambda c: (c >= threshold).sum()/len(c))
    new_frame = new_frame.reset_index().rename(columns={"Matthews Correlation Coefficient": "Probability of Exceeding Threshold"})
    new_frame["MCC Threshold"] = threshold
    prob_of_success = pd.concat(
        [
            prob_of_success,
            new_frame
        ], ignore_index=True,
    )
prob_of_success

In [None]:
with sns.axes_style("whitegrid"):
    g_prob_of_success = sns.lmplot(data=prob_of_success[prob_of_success["Density"] == 0.6], x="Grid Size", y="Probability of Exceeding Threshold", col="MCC Threshold", hue="Time Samples", col_wrap=2, palette="magma_r",)
    for ax in g_prob_of_success.axes.flat:
        ax.tick_params(axis="x", labelbottom=True)
        ax.set(ylim=(-0.1, 1.1))
    sns.move_legend(g_prob_of_success, "upper left", bbox_to_anchor=(.62, .28), frameon=True, ncol=2)
    plt.tight_layout()