In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches
plt.rcParams['figure.figsize'] = [15, 5]
import numpy as np
import seaborn as sns
import scipy.stats

import networkx as nx

import sys
sys.path.append('../')
from data_opener import open_dataset_and_ground_truth

In [None]:
df_results = pd.read_csv("natural_stats.csv")
#NaN convert int to floats, make sure that variable names are strings.
df_results['target'] = df_results['target'].apply(lambda x: x if pd.isnull(x) or isinstance(x,str) else str(int(x)))
df_results['association_chosen'] = df_results['association_chosen'].apply(lambda x: x if pd.isnull(x) or isinstance(x,str) else str(int(x)))

In [None]:
df_results = df_results.replace(to_replace="SynthNonlin/7ts2h", value="7ts2h")
df_results = df_results.replace(to_replace="fMRI_processed_by_Nauta/returns/our_selection", value="fMRI")
df_results = df_results.replace(to_replace="TestCLIM_N-5_T-250/returns", value="CLIM")
df_results = df_results.replace(to_replace="FinanceCPT/returns/our_selection", value="Finance")
df_results = df_results.replace(to_replace="VARProcess/returns", value = "VARProcess")
df_results = df_results.replace(to_replace="VARProcessNoCorr/returns", value = "VARProcessNoCorr")
df_results = df_results.replace(to_replace="VARLarge/returns", value = "VARLarge")
df_results = df_results.replace(to_replace="VARSmall/returns", value = "VARSmall")

Type 1 error: failing to accept a null hypothesis
Type 2 error: failing to reject a null hypothesis

    
What would be TP, TN, FP, FN?
- Look at association with residuals, not just highest association.
- Change correlation to p-values
- Add within the algorithm 4 columns at each step: TP, TN, FP, FN counts on associations because it is too cumbersome to do it afterward.

So, what do I need to show?
- Step by step, how many inclusion error have been made by the highest association ?
  - More precisely: selecting a variable that isn't causal  => AKA False Positive w.r.t. the selection
  - Same question when restricting to data where the algorithm has made no wrong decision beforehand
  - Same question when restricting to data where the algorithm hasn't normally terminated
  - Same question when adjusting for the number of causal variables remaining to be selected
- Step by step, how many TP,FP,FN,TN with the association ?
  - Same question when restricting to data where the algorithm has made no wrong decision beforehand
  - Same question when restricting to data where the algorithm hasn't normally terminated
- Step by step, how many true positive and false positive have been made by the stopping criterion?
  - Same question when restricting to data where the algorithm has made no wrong decision beforehand
  - Same question when restricting to data where the algorithm has normally terminated
- Performance of final model vs Performance of causal model
  - Same question but for each number of selected variables

### ARDL - VARProcess

In [None]:
# I want to show that the training is long enough.
# show the distribution of the difference between two timesteps
df = df_results[df_results["dataset"]=="VARSmall"]
df = df[df["model"]=="ARDL"]
df = df[df["association"]=="Pearson"]
df = df[df["stopping_criterion"]=="f-test"]

In [None]:
df_results["model"].unique()

## Trained model statistics along algorithm

In [None]:
## I want to show the evolution of several indicators of the models at different iterations
newdf = []

metrics_name_list = ["train_sse","train_llh","train_aic", "train_rmse", "train_R2"]

for metric_name in metrics_name_list:
    for group_name,group in df.groupby(["filename","target"]):
        differenciated_metric = np.diff(group[["step",metric_name]].sort_values("step")[metric_name])
        for i, element in enumerate(differenciated_metric):
            new_row = {"new - old":element, "metric":metric_name,"Vars in new model":i+2}
            newdf.append(new_row)
    
newdf = pd.DataFrame(newdf)

g = sns.FacetGrid(data=newdf,col="metric", sharey=False)
g.map_dataframe(sns.lineplot,x="Vars in new model",y="new - old",errorbar=('ci', 99))

for i,ax in enumerate(g.axes[0]):
    ax.plot([2,10],[0,0],c="black")
    if metrics_name_list[i] in ["train_llh","test_llh","train_R2","test_R2"]:
        maxval = ax.get_ylim()[1]
        r = matplotlib.patches.Rectangle((2,0),8,maxval,alpha=0.2,facecolor="green")
        ax.add_patch(r)
    else:
        minval = ax.get_ylim()[0]
        r = matplotlib.patches.Rectangle((2,minval),8,-minval,alpha=0.2,facecolor="green")
        ax.add_patch(r)

g.fig.subplots_adjust(top=0.72)
_=g.fig.suptitle("Difference between consecutive algorithm steps.\nThe new model is better than the previous model if the difference is in the green zone.")

In [None]:
## I want to show the evolution of several indicators of the models at different iterations
newdf = []

metrics_name_list = ["test_sse", "test_rmse", "test_R2"]

for metric_name in metrics_name_list:
    for group_name,group in df.groupby(["filename","target"]):
        differenciated_metric = np.diff(group[["step",metric_name]].sort_values("step")[metric_name])
        for i, element in enumerate(differenciated_metric):
            new_row = {"new - old":element, "metric":metric_name,"Vars in new model":i+2}
            newdf.append(new_row)
    
newdf = pd.DataFrame(newdf)

g = sns.FacetGrid(data=newdf,col="metric", sharey=False)
g.map_dataframe(sns.lineplot,x="Vars in new model",y="new - old",errorbar=('ci', 99))

for i,ax in enumerate(g.axes[0]):
    ax.plot([2,10],[0,0],c="black")
    if metrics_name_list[i] in ["train_llh","test_llh","train_R2","test_R2"]:
        maxval = ax.get_ylim()[1]
        r = matplotlib.patches.Rectangle((2,0),8,maxval,alpha=0.2,facecolor="green")
        ax.add_patch(r)
    else:
        minval = ax.get_ylim()[0]
        r = matplotlib.patches.Rectangle((2,minval),8,-minval,alpha=0.2,facecolor="green")
        ax.add_patch(r)

g.fig.subplots_adjust(top=0.72)
_=g.fig.suptitle("Difference between consecutive algorithm steps.\nThe new model is better than the previous model if the difference is in the green zone.")

In [None]:
df.columns

## Time spent at different levels of the algorithm

In [None]:
g = sns.displot(df["totaltime"])
_=g.axes[0][0].set_title("Distribution of the execution time (s) of the algorithm.")

In [None]:
ax = plt.subplot(131)
g = sns.lineplot(df,x="step",y="previous_model_time",ax=ax)
ax = plt.subplot(133)
g = sns.lineplot(df,x="step",y="associations_time",ax=ax)

## Final itemset results

In [None]:
# overall TP,FP,TN,FN sur le résultat final de l'algo

#select only timestep of natural termination
df1 = df[~df["current_is_last_model"].isnull()]
df1 = df1[df1["current_is_last_model"]]
#aggregate TP, TN, FP, FN
df1[["final_TP","final_FP","final_TN","final_FN"]].sum(axis=0)

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
TP,FP,TN,FN=df1[["final_TP","final_FP","final_TN","final_FN"]].sum(axis=0)
y_true,y_pred = [1]*TP,[1]*TP
y_true,y_pred = y_true+[0]*FP,y_pred+[1]*FP
y_true,y_pred = y_true+[0]*TN,y_pred+[0]*TN
y_true,y_pred = y_true+[1]*FN,y_pred+[0]*FN
cm=confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=["Non-causal","Causal"])
disp.plot()

In [None]:
## Final rmse of selected model vs rmse of causal model

#select only timestep of natural termination
df1 = df[~df["current_is_last_model"].isnull()]
df1 = df1[df1["current_is_last_model"]]

g = sns.scatterplot(df1,x="test_rmse",y="causal_test_rmse")
xlim,ylim = g.axes.get_xlim(),g.axes.get_ylim()
mini,maxi = min(xlim[0],ylim[0]),max(xlim[1],ylim[1])
g.axes.plot([mini,maxi],[mini,maxi],color="black")
g.axes.set_xlim(xlim)
g.axes.set_ylim(ylim)
g.axes.set_title("test RMSE of selected model vs test RMSE of Causal Model")

# wilcoxon test
print("Using wilcoxon signed rank test:")
print("p-value of test H0: rmse of selected model >= rmse causal model: ",scipy.stats.wilcoxon(df1["test_rmse"],df1["causal_test_rmse"],alternative="less")[1])

In [None]:
## Final r2 of selected model vs r2 of causal model

#select only timestep of natural termination
df1 = df[~df["current_is_last_model"].isnull()]
df1 = df1[df1["current_is_last_model"]]

g = sns.scatterplot(df1,x="test_R2",y="causal_test_R2")
xlim,ylim = g.axes.get_xlim(),g.axes.get_ylim()
mini,maxi = min(xlim[0],ylim[0]),max(xlim[1],ylim[1])
g.axes.plot([mini,maxi],[mini,maxi],color="black")
g.axes.set_xlim(xlim)
g.axes.set_ylim(ylim)
g.axes.set_title("test R2 of selected model vs test R2 of Causal Model")

# wilcoxon test
print("Using wilcoxon signed rank test:")
print("p-value of test H0: r2 of selected model >= r2 causal model: ",scipy.stats.wilcoxon(df1["test_R2"],df1["causal_test_R2"],alternative="less")[1])

## Choices along algorithm

In [None]:
# number of good and bad inclusions by step of the algo
# only include variables that are actually part of the returned set.

#select only timesteps before natural termination
df1 = df[~df["should_have_stopped"].isnull()]
df1 = df1[~df1["should_have_stopped"]]
#exclude timestep of termination since the variable is said non significant by stopping criterion
df1 = df1[~df1["current_is_last_model"]]

# aggregate by causal type
df1[["step","chosen_in_ground_truth","train_aic"]].groupby(["step","chosen_in_ground_truth"]).count().rename(columns={'train_aic':"Count"})

In [None]:
# overall TP, FP, TN, FN of association as the algo progress

#select only timesteps before natural termination
df1 = df[~df["should_have_stopped"].isnull()]
df1 = df1[~df1["should_have_stopped"]]
# here we include associations computed from the last trained model residuals (which doesnt lead to new variable selection).
# to only get associations where chosen variable is significant according to the stopping criterion, add line below:
# df1 = df1[~df1["current_is_last_model"]]

#aggregate TP, TN, FP, FN
print("TP: causal variable is detected as correlated with the residuals")
print("FP: noncausal variable is detected as correlated with the residuals")
df1[["step","associations_TP","associations_FP","associations_TN","associations_FN"]].groupby(["step"]).sum()


In [None]:
# overall TP, FP, TN, FN of the stopping criterion
#(TP means that the selected variable was causal and the stopping criterion said to continue)
#(FP means that the selected variable wasn't causal but the stopping criterion said to continue)
#(TN means that the selected variable was causal but the stopping criterion said to stop)

#select only timesteps before natural termination
df1 = df[~df["should_have_stopped"].isnull()]
df1 = df1[~df1["should_have_stopped"]]
#group by chosen_in_ground_truth, create column of stopping decision
df1["decide_to_stop"] = df1["stopping_metric"]>=0.05
res=df1[["chosen_in_ground_truth","decide_to_stop","train_aic"]].groupby(["chosen_in_ground_truth","decide_to_stop"]).count()
res = res.reset_index()
dico = {(True,False):"TP",(True,True):"FN",(False,False):"FP",(False,True):"TN"}
res["category"] = res.apply(lambda r: dico[(r["chosen_in_ground_truth"],r["decide_to_stop"])],axis=1)

print("Decisions by the stopping criterion.")
print("TP: the selected variable is causal, and the stopping criterion says that its inclusion is significant.")
print("FP: the selected variable isn't causal and the stopping criterion says that its inclusion is significant.")
res[["category","train_aic"]].rename(columns={"train_aic":"count"})

## Causal graph construction along algorithm

Individual exemple only

In [None]:
#get some dataset and file arbitrarily
dataset_name = "VARSmall/returns"
filename = df["filename"].unique()[0]

df1 = df[df["filename"]==filename]
#select before natural termination
df1 = df1[~df1["should_have_stopped"].isnull()]
df1 = df1[~df1["should_have_stopped"]]
#exclude timestep of termination since the variable is said non significant by stopping criterion
df1 = df1[~df1["current_is_last_model"]]
#relevant columns
df1=df1[["dataset","filename","target","step","association_chosen","chosen_in_ground_truth"]]

In [None]:
_, var_names, causaldict = open_dataset_and_ground_truth(dataset_name, filename, rootdir="../")

In [None]:
stepG = []
targetlist = df1["target"].unique()
#initial step
G = nx.DiGraph()
for target in targetlist:
    G.add_edge(target,target, color="black")
stepG.append(G)
#successive steps
for step in sorted(df1["step"].unique()):
    df_step = df1[df1["step"]==step]
    G=G.copy()
    nx.set_edge_attributes(G,1,'width')
    for index,row in df_step.iterrows():
        color = "r" if row["association_chosen"] not in causaldict[row["target"]] else "g"  # color by causal status
        width = 4
        G.add_edge(row["association_chosen"],row["target"],color=color, width=width)
    stepG.append(G)

referenceG = nx.DiGraph()
for target in targetlist:
    referenceG.add_node(target)
    for cause in causaldict[target]:
        referenceG.add_edge(cause,target)
for node in set(G.nodes).difference(referenceG.nodes):
    G.add_node(node)

In [None]:
plt.rcParams['figure.figsize'] = [15, 5]

In [None]:
pos=nx.circular_layout(referenceG)
connectionstyle='arc3, rad = 0.1'
nbgraph = len(stepG)

plt.subplot(2,nbgraph//2,1)
nx.draw(referenceG,pos=pos, connectionstyle=connectionstyle,
        labels={node: node for node in referenceG.nodes()})
plt.title("Ground truth")

for i in range(len(stepG)-1):
    plt.subplot(2,nbgraph//2,2+i)
    colors = nx.get_edge_attributes(stepG[i+1],'color').values()
    labels = {node: node for node in stepG[i+1].nodes()}
    width = list(nx.get_edge_attributes(stepG[i+1],'width').values())
    nx.draw(stepG[i+1],pos=pos,
            edge_color = colors,
            labels=labels,
            width = width,
            connectionstyle=connectionstyle)
    plt.title("Iteration "+str(i+1))
plt.savefig("temp.svg")

# Plot comparative statistics

In [None]:
df = df_results[df_results["dataset"]=="VARSmall"]

#select only timestep of natural termination
df1 = df[~df["current_is_last_model"].isnull()]
df1 = df1[df1["current_is_last_model"]]

df1["assoc,stopping"]=df1['association']+","+df1["stopping_criterion"]

sns.boxplot(df1, x="assoc,stopping" , y="test_R2")

In [None]:
sns.boxplot(df1, x="assoc,stopping" , y="final_precision")

In [None]:
sns.boxplot(df1, x="assoc,stopping" , y="final_recall")

In [None]:
sns.boxplot(df1, x="assoc,stopping" , y="test_rmse")