# Run 'SimulationDataRun.ipynb' on all simulation datasets to get results before running this code.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn import svm
from sklearn.metrics import roc_curve,auc
from sklearn.metrics import RocCurveDisplay
from sklearn.model_selection import StratifiedKFold

import random

In [None]:
simDataDir = "nonlinear/";

#Number of simulation iterations
iterIdxs = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20];


tprsMGM = [];
aucsMGM = [];
meanFprMGM = np.linspace(0, 1, 100);

#True label list
trueLabelDictionary = [1,1,1,1,1,1,1,1,1,1
                       ,1,1,1,1,1,1,1,1,1,1
                       ,1,1,1,1,1,1,1,1,1,1
                       ,1,1,1,1,1,1,1,1,1,1
                       ,1,1,1,1,1,1,1,1,1,1
                       ,0,0,0,0,0,0,0,0,0,0
                       ,0,0,0,0,0,0,0,0,0,0
                       ,0,0,0,0,0,0,0,0,0,0
                       ,0,0,0,0,0,0,0,0,0,0
                       ,0,0,0,0,0,0,0,0,0,0
                      ];

trueFeatureList = {"F1","F2","F3","F4","F5","F6","F7","F8","F9","F10"
                   ,"F11","F12","F13","F14","F15","F16","F17","F18","F19","F20"
                  ,"F21","F22","F23","F24","F25","F26","F27","F28","F29","F30"
                   ,"F31","F32","F33","F34","F35","F36","F37","F38","F39","F40"
                  ,"F41","F42","F43","F44","F45","F46","F47","F48","F49","F50"
                  };
falseFeatureList = {"F51","F52","F53","F54","F55","F56","F57","F58","F59","F60"
                   ,"F61","F62","F63","F64","F65","F66","F67","F68","F69","F70"
                    ,"F71","F72","F73","F74","F75","F76","F77","F78","F79","F80"
                    ,"F81","F82","F83","F84","F85","F86","F87","F88","F89","F90"
                    ,"F91","F92","F93","F94","F95","F96","F97","F98","F99","F100"
                   };

for index,iterVal in enumerate(iterIdxs):#iterIdxs
    
    predictedLabelListMGM = [];
    #Load the result data
    resultFile = "XYData_likelihood_vals.txt";
    result = pd.read_csv(resultFile,index_col=None,sep="---",names=["Feature1","Feature2","Likelihood"]);

    selectedList = [];
    for index,row in result.iterrows():
        feature1 = row["Feature1"];
        feature2 = row["Feature2"];
        likelihood = row["Likelihood"];
        if feature1=="Y" and likelihood>0:
            selectedList.append(feature2);
            print(feature1);
        elif feature2 =="Y" and likelihood>0:
            selectedList.append(feature1);
            print(feature1);

    for feature in trueFeatureList:
        if feature in selectedList:
            tempNum = random.uniform(0.9, 1.0);
            predictedLabelListMGM.append(tempNum);
        else:
            tempNum = random.uniform(0.0, 0.1);
            predictedLabelListMGM.append(tempNum);
    for feature in falseFeatureList:
        if feature in selectedList:
            tempNum = random.uniform(0.9, 1.0);
            predictedLabelListMGM.append(tempNum);
        else:
            tempNum = random.uniform(0.0, 0.1);
            predictedLabelListMGM.append(tempNum);
            
    #Calculate AUC
    viz = RocCurveDisplay.from_predictions(trueLabelList, predictedLabelListMGM,color="grey",label=None
                                     ,alpha=0.3,lw=1,ax=ax,);
    interpTprMGM = np.interp(meanFprMGM, viz.fpr, viz.tpr)
    interpTprMGM[0] = 0.0
    #Store result
    tprsMGM.append(interpTprMGM)
    aucsMGM.append(viz.roc_auc)
    
#Calculate standard deviation and mean
meanTprMGM = np.mean(tprsMGM, axis=0)
meanTprMGM[-1] = 1.0
meanAucMGM = auc(meanFprMGM, meanTprMGM)
stdAucMGM = np.std(aucsMGM)


fig, ax = plt.subplots(figsize=(10,8));


ax.plot(
    meanFprMGM,
    meanTprMGM,
    color="grey",
    label=r"Average ROC (AUC = %0.2f) from causalMGM" % (meanAucMGM),#$\pm$ %0.2f std_auc
    lw=2,
    #alpha=0.8,
)

stdTprMGM = np.std(tprsMGM, axis=0)
tprsUpperMGM = np.minimum(meanTprMGM + stdTprMGM, 1)
tprsLowerMGM = np.maximum(meanTprMGM - stdTprMGM, 0)
ax.fill_between(
    meanFprMGM,
    tprsLowerMGM,
    tprsUpperMGM,
    color="grey",
    alpha=0.1,
    #label=r"$\pm$ 1 std. dev.",
)
ax.plot([0, 0], [0, 0], lw=1, color="grey", label="10 ROCs (Run 1 to Run 10) from causalMGM", alpha=0.2)




tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)

for index,iterVal in enumerate(iterIdxs):
    
    resultFileName = "DNN_selected_associations_40p_"+str(iterVal)+"iter.csv";
    
    DNNDF = pd.read_csv(simDataDir+resultFileName,index_col=0);
    selectedFeatures = DNNDF["Feature1"].values.tolist();
    labelList = [];
    predictedLabelList = [];
    
    for i in range(1,21):
        tempStr = "V"+str(i);
        labelList.append(1);
        if tempStr in selectedFeatures:
            tempNum = random.uniform(0.9, 1.0);
            predictedLabelList.append(tempNum);
        else:
            tempNum = random.uniform(0.0, 0.1);
            predictedLabelList.append(tempNum);
    for i in range(21,41):
        tempStr = "V"+str(i);
        #print(tempStr);
        labelList.append(0);
        if tempStr in selectedFeatures:
            tempNum = random.uniform(0.9, 1.0);
            predictedLabelList.append(tempNum);
        else:
            tempNum = random.uniform(0.0, 0.1);
            predictedLabelList.append(tempNum);
    
    viz = RocCurveDisplay.from_predictions(labelList, predictedLabelList,color="red",label=None
                                     ,alpha=0.3,lw=1,ax=ax,)#name="ROC from run {}".format(index+1)
    interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
    interp_tpr[0] = 0.0
    tprs.append(interp_tpr)
    aucs.append(viz.roc_auc)
    

mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)

ax.plot(
    mean_fpr,
    mean_tpr,
    color="red",
    label=r"Average ROC (AUC = %0.2f) from DAG-DeepVASE" % (mean_auc),#$\pm$ %0.2f std_auc
    lw=2,
    #alpha=0.8,
)

std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
ax.fill_between(
    mean_fpr,
    tprs_lower,
    tprs_upper,
    color="red",
    alpha=0.1,
    #label=r"$\pm$ 1 std. dev.",
)


ax.fill_between([0],[0],[0],color="black",label=r"$\pm$ 1 std. dev.",)
ax.plot([0, 0], [0, 0], lw=1, color="red", label="10 ROCs (Run 1 to Run 10) from DAG-DeepVASE", alpha=0.2)
ax.plot([0, 1], [0, 1], linestyle="--", lw=2, color="black", label="Random guess", alpha=0.8)

ax.set(xlim=[-0.05, 1.05],ylim=[-0.05, 1.05],)
plt.xlabel("False positive rate",fontsize=20)
plt.ylabel("True positive rate",fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
ax.legend(loc="lower right",fontsize=12,frameon=False)
# plt.savefig("AverageAUCROC50True50FalseFrom20RepetitionsPartialNonlinearSimulationData.svg")
plt.show()

