In [2]:
import numpy as np
from src.run_shap import load_shap_vals
import pickle
from tqdm import tqdm
from src.utils import token_segments
from src.utils import legacy_get_dataset_info
from transformers import AutoTokenizer
import shap
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from matplotlib import rcParams

  from .autonotebook import tqdm as notebook_tqdm


## Kendalls tau

In [3]:
results = {}
tab_scale_factor = 1
rows = []
for ds_name in [
    "prod_sent",
    "kick",
    "jigsaw",
    "wine",
    "fake",
    "imdb_genre",
    "channel",
    "airbnb",
    "salary",
]:  #
    for text_model_code in [
        "disbert",
        "bert",
        "drob",
        "deberta",
    ]:
        # summary_plot(ds_name)
        di = get_dataset_info(ds_name)
        tab_pre = f"_sf{tab_scale_factor}" if tab_scale_factor != 2 else ""
        vals = []
        for name in [
            "ensemble_25",
            "ensemble_50",
            "ensemble_75",
            "stack",
            "all_text",
        ]:
            filepath = f"../models/shap_vals_{text_model_code}{tab_pre}/{ds_name}/summed_shap_vals_{name}.pkl"
            with open(filepath, "rb") as f:
                grouped_shap_vals = pickle.load(f)
            abs_ft = np.sum(np.abs(grouped_shap_vals), axis=0)
            cols = [f"(Tab) {col}" for col in di.tab_cols] + [
                f"(Text) {col}" for col in di.text_cols
            ]
            vals.append(abs_ft)

        name = "all_text_baseline"
        col_name_filepath = f"../models/shap_vals_{text_model_code}{tab_pre}/{ds_name}/col_names_shap_vals_all_text_baseline.pkl"
        colon_filepath = f"../models/shap_vals_{text_model_code}{tab_pre}/{ds_name}/colon_shap_vals_all_text_baseline.pkl"
        fts_filepath = f"../models/shap_vals_{text_model_code}{tab_pre}/{ds_name}/summed_shap_vals_all_text_baseline.pkl"

        with open(col_name_filepath, "rb") as f:
            grouped_col_name_shap_vals = pickle.load(f)
        with open(colon_filepath, "rb") as f:
            grouped_colon_shap_vals = pickle.load(f)
        with open(fts_filepath, "rb") as f:
            grouped_shap_vals = pickle.load(f)
        val_only = (
            grouped_shap_vals - grouped_col_name_shap_vals - grouped_colon_shap_vals
        )
        grouped_template = np.mean(
            grouped_shap_vals - val_only, axis=2, keepdims=True)

        cols = [f"(Tab) {col}" for col in di.tab_cols] + [
            f"(Text) {col}" for col in di.text_cols
        ]
        abs_ft = np.sum(np.abs(grouped_shap_vals), axis=0)
        vals.append(abs_ft)
        rows.append([ds_name, text_model_code, vals])
pre_tau_df = pd.DataFrame(
    rows,
    columns=["ds_name", "text_model", "vals"],
)
# unranked_df_no_template.to_csv("unranked_df_no_template.csv", index=False)

No model type specified for prod_sent. (This is fine during dataset creation)
No model type specified for kick. (This is fine during dataset creation)
No model type specified for jigsaw. (This is fine during dataset creation)
No model type specified for wine. (This is fine during dataset creation)
No model type specified for fake. (This is fine during dataset creation)
No model type specified for imdb_genre. (This is fine during dataset creation)
No model type specified for channel. (This is fine during dataset creation)
No model type specified for airbnb. (This is fine during dataset creation)
No model type specified for salary. (This is fine during dataset creation)


In [4]:
# Now for the whole tau_df
from scipy import stats

methods = [
    "ensemble_25",
    "ensemble_50",
    "ensemble_75",
    "stack",
    "all_text",
    "all_text_baseline",
]
method_comparions = []
for row in pre_tau_df.itertuples():
    for i, method in enumerate(methods):
        for j, method2 in enumerate(methods):
            if i < j:
                taus = []
                p_values = []
                vals = np.stack(row.vals, axis=2)
                for instance in vals:
                    tau, p_value = stats.kendalltau(
                        instance[:, i], instance[:, j])
                    taus.append(tau)
                    p_values.append(p_value)
                method_comparions.append(
                    [
                        row.ds_name,
                        row.text_model,
                        method + " vs " + method2,
                        np.mean(taus),
                        np.std(taus),
                        taus,
                        p_values,
                    ]
                )

In [5]:
tau_df = pd.DataFrame(
    method_comparions,
    columns=[
        "ds_name",
        "text_model",
        "methodA_vs_methodB",
        "mean_tau",
        "std_tau",
        "taus",
        "p_values",
    ],
)
tau_df.to_csv("tau_df.csv", index=False)


In [6]:
all_comparsions = {}
for row in tau_df.itertuples():
    # Exclude the following:
    # 1. ds_name == "channel" and method in ["all_text_baseline", "all_text"]
    # 2. ds_name == "salary" and method == "stack"
    # 3. ds_name == "wine" and method == "stack"
    # 4. ds_name == "prod_sent"
    if row.ds_name == "channel" and "all_text" in row.methodA_vs_methodB:
        continue
    if row.ds_name in ("salary", "wine") and "stack" in row.methodA_vs_methodB:
        continue
    if row.ds_name == "prod_sent":
        continue
    if row.methodA_vs_methodB not in all_comparsions:
        all_comparsions[row.methodA_vs_methodB] = []
    all_comparsions[row.methodA_vs_methodB].extend(row.taus)


In [7]:
all_comparsions_summary = {}
for key, val in all_comparsions.items():
    all_comparsions_summary[key] = [np.mean(val), np.std(val), len(val)]

In [8]:
summary_df = pd.DataFrame(all_comparsions_summary).T
summary_df.columns = ["mean_tau", "std_tau", "n"]

In [9]:
summary_df["mean (std)"] = summary_df.apply(
    lambda row: f"{row.mean_tau:.2f} ({row.std_tau:.2f})", axis=1
)


In [10]:
summary_df

Unnamed: 0,mean_tau,std_tau,n,mean (std)
ensemble_25 vs ensemble_50,0.827074,0.16945,3200.0,0.83 (0.17)
ensemble_25 vs ensemble_75,0.692781,0.239508,3200.0,0.69 (0.24)
ensemble_25 vs stack,0.628466,0.254488,2400.0,0.63 (0.25)
ensemble_25 vs all_text,0.361467,0.380843,2800.0,0.36 (0.38)
ensemble_25 vs all_text_baseline,0.297973,0.326997,2800.0,0.30 (0.33)
ensemble_50 vs ensemble_75,0.852899,0.156338,3200.0,0.85 (0.16)
ensemble_50 vs stack,0.620746,0.285503,2400.0,0.62 (0.29)
ensemble_50 vs all_text,0.450483,0.371557,2800.0,0.45 (0.37)
ensemble_50 vs all_text_baseline,0.382227,0.335375,2800.0,0.38 (0.34)
ensemble_75 vs stack,0.551946,0.306982,2400.0,0.55 (0.31)


## Kendalls tau, split by text models

In [11]:
results = {}
tab_scale_factor = 1
rows = []
for ds_name in [
    "prod_sent",
    "kick",
    "jigsaw",
    "wine",
    "fake",
    "imdb_genre",
    "channel",
    "airbnb",
    "salary",
]:  #
    for name in [
        "ensemble_25",
        "ensemble_50",
        "ensemble_75",
        "stack",
        "all_text",
    ]:
        # summary_plot(ds_name)
        di = get_dataset_info(ds_name)
        tab_pre = f"_sf{tab_scale_factor}" if tab_scale_factor != 2 else ""
        vals = []
        for text_model_code in [
            "disbert",
            "bert",
            "drob",
            "deberta",
        ]:
            filepath = f"../models/shap_vals_{text_model_code}{tab_pre}/{ds_name}/summed_shap_vals_{name}.pkl"
            with open(filepath, "rb") as f:
                grouped_shap_vals = pickle.load(f)
            abs_ft = np.sum(np.abs(grouped_shap_vals), axis=0)
            cols = [f"(Tab) {col}" for col in di.tab_cols] + [
                f"(Text) {col}" for col in di.text_cols
            ]
            vals.append(abs_ft)
        rows.append([ds_name, name, vals])

    name = "all_text_baseline"
    for text_model_code in [
        "disbert",
        "bert",
        "drob",
        "deberta",
    ]:
        col_name_filepath = f"../models/shap_vals_{text_model_code}{tab_pre}/{ds_name}/col_names_shap_vals_all_text_baseline.pkl"
        colon_filepath = f"../models/shap_vals_{text_model_code}{tab_pre}/{ds_name}/colon_shap_vals_all_text_baseline.pkl"
        fts_filepath = f"../models/shap_vals_{text_model_code}{tab_pre}/{ds_name}/summed_shap_vals_all_text_baseline.pkl"

        with open(col_name_filepath, "rb") as f:
            grouped_col_name_shap_vals = pickle.load(f)
        with open(colon_filepath, "rb") as f:
            grouped_colon_shap_vals = pickle.load(f)
        with open(fts_filepath, "rb") as f:
            grouped_shap_vals = pickle.load(f)
        val_only = (
            grouped_shap_vals - grouped_col_name_shap_vals - grouped_colon_shap_vals
        )
        grouped_template = np.mean(
            grouped_shap_vals - val_only, axis=2, keepdims=True)

        cols = [f"(Tab) {col}" for col in di.tab_cols] + [
            f"(Text) {col}" for col in di.text_cols
        ]
        abs_ft = np.sum(np.abs(grouped_shap_vals), axis=0)
        vals.append(abs_ft)
    rows.append([ds_name, name, vals])
pre_tau_df2 = pd.DataFrame(
    rows,
    columns=["ds_name", "method", "vals"],
)
# unranked_df_no_template.to_csv("unranked_df_no_template.csv", index=False)

No model type specified for prod_sent. (This is fine during dataset creation)
No model type specified for kick. (This is fine during dataset creation)
No model type specified for jigsaw. (This is fine during dataset creation)
No model type specified for wine. (This is fine during dataset creation)
No model type specified for fake. (This is fine during dataset creation)
No model type specified for imdb_genre. (This is fine during dataset creation)
No model type specified for channel. (This is fine during dataset creation)
No model type specified for airbnb. (This is fine during dataset creation)
No model type specified for salary. (This is fine during dataset creation)


In [12]:
np.stack(pre_tau_df2.iloc[0].vals).shape

(4, 100, 2)

In [13]:
pre_tau_df2


Unnamed: 0,ds_name,method,vals
0,prod_sent,ensemble_25,"[[[0.5008984502820633, 0.274237389327027], [0...."
1,prod_sent,ensemble_50,"[[[0.3339323001880409, 0.548474778654053], [0...."
2,prod_sent,ensemble_75,"[[[0.16696615009402102, 0.8227121679810816], [..."
3,prod_sent,stack,"[[[0.6967018983048695, 0.021057132623985314], ..."
4,prod_sent,all_text,"[[[0.6826972063363064, 0.18024434113060123], [..."
5,prod_sent,all_text_baseline,"[[[0.6826972063363064, 0.18024434113060123], [..."
6,kick,ensemble_25,"[[[0.13595617524406073, 0.0, 0.008954340475157..."
7,kick,ensemble_50,"[[[0.09063745016270844, 0.0, 0.003443258992492..."
8,kick,ensemble_75,"[[[0.001484591677663449, 0.001484591677663449,..."
9,kick,stack,"[[[0.08750044316429603, 0.0005075893585239008,..."


In [14]:
# Now for the whole tau_df
from scipy import stats

text_models = [
    "disbert",
    "bert",
    "drob",
    "deberta",
]
model_comparions = []
for row in pre_tau_df2.itertuples():
    for i, model in enumerate(text_models):
        for j, model2 in enumerate(text_models):
            if i < j:
                taus = []
                p_values = []
                vals = np.stack(row.vals, axis=2)
                for instance in vals:
                    tau, p_value = stats.kendalltau(
                        instance[:, i], instance[:, j])
                    taus.append(tau)
                    p_values.append(p_value)
                model_comparions.append(
                    [
                        row.ds_name,
                        row.method,
                        model + " vs " + model2,
                        np.mean(taus),
                        np.std(taus),
                        taus,
                        p_values,
                    ]
                )

KeyboardInterrupt: 

In [19]:
tau_df2 = pd.DataFrame(
    model_comparions,
    columns=[
        "ds_name",
        "method",
        "modelA_vs_modelB",
        "mean_tau",
        "std_tau",
        "taus",
        "p_values",
    ],
)
tau_df2.to_csv("tau_df_model.csv", index=False)


In [20]:
tau_df2


Unnamed: 0,ds_name,method,modelA_vs_modelB,mean_tau,std_tau,taus,p_values
0,prod_sent,ensemble_25,disbert vs bert,1.000000,0.000000,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..."
1,prod_sent,ensemble_25,disbert vs drob,1.000000,0.000000,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..."
2,prod_sent,ensemble_25,disbert vs deberta,1.000000,0.000000,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..."
3,prod_sent,ensemble_25,bert vs drob,1.000000,0.000000,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..."
4,prod_sent,ensemble_25,bert vs deberta,1.000000,0.000000,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..."
...,...,...,...,...,...,...,...
319,salary,all_text_baseline,disbert vs drob,0.417333,0.302673,"[0.4666666666666666, 0.2, 0.06666666666666665,...","[0.2722222222222222, 0.7194444444444444, 1.0, ..."
320,salary,all_text_baseline,disbert vs deberta,0.525333,0.261497,"[0.7333333333333333, 0.6, 0.3333333333333333, ...","[0.05555555555555555, 0.1361111111111111, 0.46..."
321,salary,all_text_baseline,bert vs drob,0.346667,0.285657,"[0.2, 0.3333333333333333, -0.4666666666666666,...","[0.7194444444444444, 0.46944444444444444, 0.27..."
322,salary,all_text_baseline,bert vs deberta,0.569333,0.242839,"[0.7333333333333333, 0.4666666666666666, -0.2,...","[0.05555555555555555, 0.2722222222222222, 0.71..."


In [21]:
# Exclude the following from tau_df2:
# 1. ds_name == "channel" and method in ["all_text_baseline", "all_text"]
mask1 = (tau_df2["ds_name"] == "channel") & (
    tau_df2["method"].isin(["all_text_baseline", "all_text"])
)

# 2. ds_name == "salary" and method == "stack"
mask2 = (tau_df2["ds_name"] == "salary") & (tau_df2["method"] == "stack")

# 3. ds_name == "wine" and method == "stack"
mask3 = (tau_df2["ds_name"] == "wine") & (tau_df2["method"] == "stack")

# 4. ds_name == "prod_sent"
mask4 = tau_df2["ds_name"] == "prod_sent"
exclude_mask = mask1 | mask2 | mask3 | mask4
tau_df2 = tau_df2.loc[~exclude_mask]


all_comparsions = {}
for row in tau_df2.itertuples():
    if row.modelA_vs_modelB not in all_comparsions:
        all_comparsions[row.modelA_vs_modelB] = []
    all_comparsions[row.modelA_vs_modelB].extend(row.taus)

In [22]:
pd.DataFrame(all_comparsions).describe()

Unnamed: 0,disbert vs bert,disbert vs drob,disbert vs deberta,bert vs drob,bert vs deberta,drob vs deberta
count,4400.0,4400.0,4400.0,4400.0,4400.0,4400.0
mean,0.747003,0.729292,0.699818,0.714736,0.689424,0.689842
std,0.267213,0.285095,0.284459,0.291593,0.283449,0.301491
min,-0.610738,-1.0,-0.716349,-0.712569,-0.8,-0.650727
25%,0.611111,0.6,0.6,0.6,0.581729,0.6
50%,0.8,0.8,0.782674,0.8,0.763763,0.777778
75%,0.947772,0.949686,0.931034,0.941176,0.925394,0.928571
max,1.0,1.0,1.0,1.0,1.0,1.0


In [24]:
all_comparsions_summary = {}
for key, val in all_comparsions.items():
    all_comparsions_summary[key] = [np.mean(val), np.std(val), len(val)]
summary_df = pd.DataFrame(all_comparsions_summary).T
summary_df.columns = ["mean_tau", "std_tau", "n"]
summary_df["mean (std)"] = summary_df.apply(
    lambda row: f"{row.mean_tau:.2f} ({row.std_tau:.2f})", axis=1
)


In [26]:
summary_df

Unnamed: 0,mean_tau,std_tau,n,mean (std)
disbert vs bert,0.747003,0.267183,4400.0,0.75 (0.27)
disbert vs drob,0.729292,0.285062,4400.0,0.73 (0.29)
disbert vs deberta,0.699818,0.284427,4400.0,0.70 (0.28)
bert vs drob,0.714736,0.29156,4400.0,0.71 (0.29)
bert vs deberta,0.689424,0.283417,4400.0,0.69 (0.28)
drob vs deberta,0.689842,0.301456,4400.0,0.69 (0.30)
