In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
import warnings
from scipy.stats import spearmanr
import seaborn as sns
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

In [11]:
def spearman_analysis(S):
    d = "~/GitHub/Block_Importance_Quantification/examples/data/simulation/"
    X_train = pd.read_csv(d +"X_train.csv").iloc[:,1:]
    X_test = pd.read_csv(d +"X_test.csv").iloc[:,1:]
    y_train = pd.read_csv(d + "y_train_"+ S +".csv").iloc[:,1:]
    y_test = pd.read_csv(d + "y_test_"+ S +".csv").iloc[:,1:]
    beta = pd.read_csv(d + "beta_"+ S +".csv").iloc[:,1:]

    r2 = pd.read_csv("results_"+S+"/r2.csv").iloc[:,0]
    no_outliers = np.where(r2 > 0.9)[0]

    KO = pd.read_csv("results_"+S+"/MI_knock_out.csv").iloc[no_outliers,:]
    KI = pd.read_csv("results_"+S+"/MI_knock_in.csv").iloc[no_outliers,:]
    V_max = pd.read_csv("results_"+S+"/vargrad_max.csv").iloc[no_outliers,:]
    V_mean = pd.read_csv("results_"+S+"/vargrad_mean.csv").iloc[no_outliers,:]
    
    # data blocks
    data_blocks = []
    data_blocks_test = []
    beta_blocks = []
    j=0
    for i in range(8):
        data_blocks.append(X_train.iloc[:,j:(j+32)])
        data_blocks_test.append(X_test.iloc[:,j:(j+32)])
        beta_blocks.append(beta.iloc[j:(j+32),j:(j+32)].values)
        j = j+32


    # ground truth Knock In
    rmseiqr_KI = []
    r2_KI = []
    rmse_q1 = np.quantile(y_train, 0.25)
    rmse_q3 = np.quantile(y_train, 0.75)

    for i in range(8):
        TP = np.diagonal(np.matmul(np.matmul(data_blocks[i], beta_blocks[i]), np.transpose(data_blocks[i])))
        r2_KI.append(r2_score(y_true=y_train, y_pred=TP))
        rmseiqr_KI.append(-1 * mean_squared_error(y_true=y_train, y_pred=TP, squared=False) / (rmse_q3-rmse_q1))

    # ground truth Knock Out
    rmseiqr_KO = []
    r2_KO = []
    j=0
    for i in range(8):
        bb = beta.copy()
        bb.iloc[j:(j+32), j:(j+32)] = 0
        TP = np.diagonal(np.matmul(np.matmul(X_train, bb), np.transpose(X_train)))
        r2_KO.append(r2_score(y_true=y_train, y_pred=TP))
        rmseiqr_KO.append(mean_squared_error(y_true=y_train, y_pred=TP, squared=False) / (rmse_q3-rmse_q1))
        j = j+32

    # summary statistics (ground truth VarGrad-Max and VarGrad-Mean)    
    def summary_merge(vec_list, absolute=True):

        def summary(vec, absolute=True):

            if absolute:
                vec = abs(vec)

            summary = {"mean": np.mean(vec),
                       "max": np.max(vec)}

            return pd.DataFrame(summary, index=[0])

        summary = [np.round(summary(vec, absolute=absolute), 3) for vec in vec_list]
        return(pd.concat(summary, axis=0, ignore_index=True))

    summary = summary_merge(beta_blocks)


    # Spearman Rank Correlation
    def corr_summary(metric, measure):
        sp = []
        for i in range(np.shape(metric)[0]):
            sp.append(spearmanr(metric.iloc[i,:], measure)[0])
        return np.mean(sp), np.std(sp)


    print("MI knock in:", corr_summary(KI, rmseiqr_KI))
    print("MI knock out:", corr_summary(KO, rmseiqr_KO))

    print("VG max:", corr_summary(V_max, summary["max"]))
    print("VG mean:", corr_summary(V_mean, summary["mean"]))


In [12]:
spearman_analysis("S1a")

MI knock in: (0.9869047619047617, 0.011845088536983529)
MI knock out: (0.9904761904761905, 0.013883218797250681)
VG max: (0.9726190476190475, 0.01729266553134992)
VG mean: (0.9833333333333332, 0.01325658181626192)


In [13]:
spearman_analysis("S1b")

MI knock in: (0.9833333333333334, 0.020062261364705576)
MI knock out: (0.9845238095238095, 0.017292665531349897)
VG max: (0.5773502691896258, 1.1102230246251565e-16)
VG mean: (0.9523809523809522, 0.02916059217599022)


In [14]:
spearman_analysis("S1c")

MI knock in: (0.8148148148148151, 0.026978939225358618)
MI knock out: (0.8492063492063493, 0.038880789567986906)
VG max: (0.925925925925926, 0.0249576220424778)
VG mean: (0.9597546098342258, 0.02030613569606609)


In [15]:
spearman_analysis("S2a")

MI knock in: (0.9952380952380953, 0.00952380952380949)
MI knock out: (0.9738095238095237, 0.016666666666666653)
VG max: (0.9760654034593061, 0.01672394060407837)
VG mean: (0.9738095238095239, 0.01666666666666665)


In [16]:
spearman_analysis("S2b")

MI knock in: (0.9880952380952379, 0.011904761904761862)
MI knock out: (0.9976190476190476, 0.007142857142857117)
VG max: (0.5773502691896258, 1.1102230246251565e-16)
VG mean: (0.9547619047619046, 0.021162367660275207)


In [17]:
spearman_analysis("S2c")

MI knock in: (0.8922305764411029, 0.045286493733314934)
MI knock out: (0.8947368421052632, 0.06384831680630074)
VG max: (0.9083049702194768, 0.017917334028643258)
VG mean: (-0.396477325654442, 0.03601163045724992)
