In [1]:
# Import libraries
import os
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import random
import json
from typing import Union
import numpy as np
import itertools
from disagreement import Disagreement
import joblib
import re

In [2]:
from compactor.MaxNonActivatedCompactor import MaxNonActivatedCompactor
from compactor.ActivatedCompactor import ActivatedCompactor
from compactor.MaxCompactor import MaxCompactor

In [1]:
!ls /storage/scratch/e17-fyp-xai/projects/e17-4yp-using-machine-learning-in-high-stake-settings/code/new/model_outputs/artifacts/lgbm_classifier_numl_100_md_10_lr_0.05/

lgbm_classifier_numl_100_md_10_lr_0.05_fold_1_2016-01-07.sav
lgbm_classifier_numl_100_md_10_lr_0.05_fold_2_2015-09-09.sav
lgbm_classifier_numl_100_md_10_lr_0.05_fold_3_2015-05-12.sav
lgbm_classifier_numl_100_md_10_lr_0.05_fold_4_2015-01-12.sav
lgbm_classifier_numl_100_md_10_lr_0.05_fold_5_2014-09-14.sav
lgbm_classifier_numl_100_md_10_lr_0.05_fold_6_2014-05-17.sav
test_prediction_fold_1_2016-01-07.csv
test_prediction_fold_2_2015-09-09.csv
test_prediction_fold_3_2015-05-12.csv
test_prediction_fold_4_2015-01-12.csv
test_prediction_fold_5_2014-09-14.csv
test_prediction_fold_6_2014-05-17.csv


In [3]:
root = "/storage/scratch/e17-fyp-xai/projects/e17-4yp-using-machine-learning-in-high-stake-settings/code/new/model_outputs/"
model_path = "lgbm_classifier_numl_100_md_10_lr_0.05/" # Change
model_name = "lgbm_classifier_numl_100_md_10_lr_0.05_fold_4_2015-01-12.sav" # Change
xai_root = root + "xai/2024/"
art_root = root + "artifacts/"
analysis_path = root + "analysis/2024/"
json_file_path = xai_root + model_path + "all_exp.json"
processed_data_path = "/storage/scratch/e17-fyp-xai/projects/e17-4yp-using-machine-learning-in-high-stake-settings/code/processed_data/processed_final_data_latest.csv"

fold = 'fold4' # Change
model_type = 'lgbm' # Types: nn, xgb, lgbm, lr, rf # Change

model_type_save = 'LGBM'

save_path = analysis_path + model_path + f"agreement_levels_all_explanations/{fold}/"

In [4]:
# Change directory and import
os.chdir("/storage/scratch/e17-fyp-xai/projects/mad_v3/e17-4yp-using-machine-learning-in-high-stake-settings/code/")
import config
import helper as hp
import data_processor as dp
import feature_engineer as fe
categorical_cols = config.CATEGORICAL_COLS

2024-03-02 12:57:43.722167: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-02 12:57:43.769897: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-02 12:57:43.769949: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-02 12:57:43.771307: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-02 12:57:43.779267: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


In [6]:
# Helper functions
# Function to save images
def save_image(caption, path):
    set_path = f'{path}/{caption}.png'
    plt.savefig(set_path)

def make_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [18]:
make_directory(save_path)

In [5]:
# Read dataframe to get actual label and for the categorical columns
processed = pd.read_csv(processed_data_path)
processed.columns

Index(['Unnamed: 0', 'Project ID', 'Project Posted Date', 'Project Type',
       'Project Posted Month', 'Project Subject Category Tree', 'Project Cost',
       'Project Subject Subcategory Tree', 'Project Grade Level Category',
       'Project Resource Category', 'School Metro Type', 'School State',
       'Teacher Project Posted Sequence', 'Label', 'Teacher Success Rate',
       'Teacher Success Rate Imputed', 'School City Success Rate',
       'School City Success Rate Imputed', 'School Success Rate',
       'School Success Rate Imputed', 'School County Success Rate',
       'School County Success Rate Imputed', 'Project Count in State',
       'Project Need Statement Length', 'School City',
       'Project Need Statement', 'Resource Vendor Name', 'Teacher Prefix',
       'Project Short Description Length', 'School County',
       'Project Count in County', 'Project Title', 'Project Essay',
       'Resource Cost Percentage', 'Project Essay Length',
       'School Percentage Free Lun

In [6]:
# Load explanations
explanations = json.load(open(json_file_path))

In [7]:
xai_keys_list = list(explanations[fold].keys())
print(xai_keys_list)

['lime_fs_auto_sai_True_nums_1000', 'lime_fs_auto_sai_False_nums_5000', 'treeshap_alg_auto_tr_None', 'treeshap_alg_deep_tr_100']


In [8]:
# Set n features values
top_keys = list(explanations[fold][xai_keys_list[0]]['top'].keys())
bottom_keys = list(explanations[fold][xai_keys_list[0]]['bottom'].keys())
top_10_perc_uncompressed = round(len(explanations[fold][xai_keys_list[0]]['top'][top_keys[0]])* 0.1) 
top_20_perc_uncompressed = round(len(explanations[fold][xai_keys_list[0]]['top'][top_keys[0]])* 0.2)
top_10_perc_compressed = round(31* 0.1) 
top_20_perc_compressed = round(31* 0.2)
print(top_10_perc_uncompressed, top_20_perc_uncompressed, top_10_perc_compressed, top_20_perc_compressed)

61 122 3 6


In [9]:
# Get feature names in order
raw_data_og = processed.copy(deep=True)
data = dp.set_data_types_to_datetime(raw_data_og, ["Project Posted Date"])
data = hp.filter_dataset_by_date(data)
data = data.drop(['School City', 'Project Need Statement', 'School County', 'Project Title',
                  'Project Essay', 'Project Short Description', 'Unnamed: 0'], axis=1, errors='ignore')
print(data.columns)
data_1 = dp.encode_data(data, config.CATEGORICAL_COLS)
data_folds, training_features_count = fe.split_data_folds(data_1)

Index(['Project ID', 'Project Posted Date', 'Project Type',
       'Project Posted Month', 'Project Subject Category Tree', 'Project Cost',
       'Project Subject Subcategory Tree', 'Project Grade Level Category',
       'Project Resource Category', 'School Metro Type', 'School State',
       'Teacher Project Posted Sequence', 'Label', 'Teacher Success Rate',
       'Teacher Success Rate Imputed', 'School City Success Rate',
       'School City Success Rate Imputed', 'School Success Rate',
       'School Success Rate Imputed', 'School County Success Rate',
       'School County Success Rate Imputed', 'Project Count in State',
       'Project Need Statement Length', 'Resource Vendor Name',
       'Teacher Prefix', 'Project Short Description Length',
       'Project Count in County', 'Resource Cost Percentage',
       'Project Essay Length', 'School Percentage Free Lunch', 'Resource Cost',
       'Resource Cost Imputed', 'Project Count in City',
       'School Percentage Free Lunch Impu

In [10]:
feature_names_list = data_folds[0].get("x_test").drop(columns=['Project ID'], errors='ignore').keys().to_list()
#print(feature_names_list)
print(len(feature_names_list))

['Project Posted Month', 'Project Cost', 'Teacher Project Posted Sequence', 'Teacher Success Rate', 'Teacher Success Rate Imputed', 'School City Success Rate', 'School City Success Rate Imputed', 'School Success Rate', 'School Success Rate Imputed', 'School County Success Rate', 'School County Success Rate Imputed', 'Project Count in State', 'Project Need Statement Length', 'Project Short Description Length', 'Project Count in County', 'Resource Cost Percentage', 'Project Essay Length', 'School Percentage Free Lunch', 'Resource Cost', 'Resource Cost Imputed', 'Project Count in City', 'School Percentage Free Lunch Imputed', 'Project Type_Professional Development', 'Project Type_Student-Led', 'Project Type_Teacher-Led', 'Project Subject Category Tree_Applied Learning', 'Project Subject Category Tree_Applied Learning, Health & Sports', 'Project Subject Category Tree_Applied Learning, History & Civics', 'Project Subject Category Tree_Applied Learning, Literacy & Language', 'Project Subject

In [11]:
def disagreement_average(explanations1: dict, explanations2: dict, both_local: bool, k: int, features_F: list, method = None, raw_data = None, debug = False) -> dict:
    
    disagreement_mean = {'feature_agreement': 0.0,
                         'rank_agreement': 0.0,
                         'sign_agreement': 0.0,
                         'signed_rank_agreement': 0.0}

    # explanations_1 is always local
    # explanations_2 can either be local or global
    
    for project_id in explanations1.keys():
        if debug:
            print(f"******************Project ID: {project_id}************************")
            print("Explanation 1 - always local - length: ", len(explanations1[project_id]))
            print("Explanation 2 - length: ", len(explanations2))
            print("Explanation 1 - always local: ", explanations1[project_id])
            print("Explanation 2: ", explanations2)
        
        disagreement_calc = None

       
        # Initialize disagreement calculation
        if both_local:
            disagreement_calc = Disagreement(explanations1[project_id], explanations2[project_id])
        else:
            disagreement_calc = Disagreement(explanations2, explanations1[project_id])
        
        if debug:
            print("Initial Explanation 1 \n", disagreement_calc.sorted_explanation1[:k])
            print("Initial Explanation 2 \n", disagreement_calc.sorted_explanation2[:k])
            
        if method=="max":
            max_compactor = MaxCompactor(categorical_cols)
            disagreement_calc.compact_features(max_compactor)
            
            
        elif method == "activated" and raw_data is not None:
            activated_features = {}
            for feature in categorical_cols:
                sub_feature = raw_data[raw_data['Project ID'] == project_id ][feature].values[0]
                activated_features[feature] = f"{feature}_{sub_feature}"

            activated_compactor = ActivatedCompactor(activated_features)
            disagreement_calc.compact_features(activated_compactor)
            
        elif method == "maxnonactivated" and raw_data is not None:
            activated_features = {}
            for feature in categorical_cols:
                sub_feature = raw_data[raw_data['Project ID'] == project_id ][feature].values[0]
                activated_features[feature] = f"{feature}_{sub_feature}"
            max_non_activated_compactor = MaxNonActivatedCompactor(activated_features)
            disagreement_calc.compact_features(max_non_activated_compactor)
            
        if debug:
            print("After Compact Explanation 1 \n",disagreement_calc.sorted_explanation1[:k])
            print("After Compact Explanation 2 \n",disagreement_calc.sorted_explanation2[:k])
            
            
        disagreement = disagreement_calc.get_disagreement(k, features_F)
        
        if debug:
            print("Disagreement : ", disagreement)
        
        for key in list(disagreement_mean.keys()):
            if disagreement[key] == None:
                raise ValueError(f"The value for key '{key}' in disagreement_mean is None.")
            disagreement_mean[key] += disagreement[key]
        
        if debug:
            print("****************************************")

    for key in list(disagreement_mean.keys()):
        disagreement_mean[key] = round(disagreement_mean[key] / len(explanations1.keys()),3)
    
    if debug:
        print("*********************************************")
        print(disagreement_mean)
        print("*********************************************")

    
    return disagreement_mean

In [13]:
def load_model_and_feat(model_path, model_name, model_type):
    # Load the model
    model = None
    importance = None
    feat_names = None
    
    if model_type == 'nn':
        model_file_path = f'{model_path}{model_name}' 
        model = keras.models.load_model(model_file_path)
        # Skip the rest

    elif model_type == 'lgbm':
        model_file_path = f'{model_path}{model_name}'
        model = joblib.load(model_file_path)
        # Load the feature importance array
        importance = model.feature_importances_
        # Get the feature names
        #feat_names = model.feature_name_
        feat_names = feature_names_list

    elif model_type == 'lr':
        model_file_path = f'{model_path}{model_name}'
        model = joblib.load(model_file_path)
        # Load the feature importance array
        importance = model.coef_[0]
        # Get the feature names
        feat_names = model.feature_names_in_
    
    else:
        model_file_path = f'{model_path}{model_name}'
        model = joblib.load(model_file_path)
        # Load the feature importance array
        importance = model.feature_importances_
        # Get the feature names
        feat_names = model.feature_names_in_
    
    return importance, feat_names

In [14]:
# Function to select the top k features and plot
def select_top_features(importance, feat_names):
    # Create df and sort
    df = pd.DataFrame({"Feature": feat_names, "Importance": importance})
    #df_sorted = df.sort_values("Importance", ascending=False)
    df_sorted = df.reindex(df.Importance.abs().sort_values(ascending=False).index)
    return df_sorted

In [15]:
global_importance, feat_names = load_model_and_feat(art_root+model_path, model_name, model_type)
#print(feat_names)

global_explanation = select_top_features(global_importance, feat_names)
#print(global_explanation.values)

['Project Posted Month', 'Project Cost', 'Teacher Project Posted Sequence', 'Teacher Success Rate', 'Teacher Success Rate Imputed', 'School City Success Rate', 'School City Success Rate Imputed', 'School Success Rate', 'School Success Rate Imputed', 'School County Success Rate', 'School County Success Rate Imputed', 'Project Count in State', 'Project Need Statement Length', 'Project Short Description Length', 'Project Count in County', 'Resource Cost Percentage', 'Project Essay Length', 'School Percentage Free Lunch', 'Resource Cost', 'Resource Cost Imputed', 'Project Count in City', 'School Percentage Free Lunch Imputed', 'Project Type_Professional Development', 'Project Type_Student-Led', 'Project Type_Teacher-Led', 'Project Subject Category Tree_Applied Learning', 'Project Subject Category Tree_Applied Learning, Health & Sports', 'Project Subject Category Tree_Applied Learning, History & Civics', 'Project Subject Category Tree_Applied Learning, Literacy & Language', 'Project Subject

In [16]:
# Create empty df - define the column names
df = pd.DataFrame(columns=['Explanations', 'Compactor', 'Model', 
                           'Feature(10%)', 'Feature(20%)', 
                           'Rank(10%)', 'Rank(20%)', 
                           'Sign(10%)', 'Sign(20%)', 
                           'SignedRank(10%)', 'SignedRank(20%)'])

Local vs local

In [17]:
for key_pair in list(itertools.combinations(xai_keys_list, 2)):
    exp_combined_1 = explanations[fold][key_pair[0]]['top'] | explanations[fold][key_pair[0]]['bottom']
    exp_combined_2 = explanations[fold][key_pair[1]]['top'] | explanations[fold][key_pair[1]]['bottom']

    # Top 10%
    disagreement_no_comp_top_10_perc = disagreement_average(exp_combined_1, exp_combined_2, True, top_10_perc_uncompressed, None)
    disagreement_max_comp_top_10_perc = disagreement_average(exp_combined_1, exp_combined_2, True, top_10_perc_compressed, None, 'max', processed)
    disagreement_act_comp_top_10_perc = disagreement_average(exp_combined_1, exp_combined_2, True, top_10_perc_compressed, None, 'activated', processed)
    disagreement_maxnonact_comp_top_10_perc = disagreement_average(exp_combined_1, exp_combined_2, True, top_10_perc_compressed, None, 'maxnonactivated', processed)

    # Top 20%
    disagreement_no_comp_top_20_perc = disagreement_average(exp_combined_1, exp_combined_2, True, top_20_perc_uncompressed, None)
    disagreement_max_comp_top_20_perc = disagreement_average(exp_combined_1, exp_combined_2, True, top_20_perc_compressed, None, 'max', processed)
    disagreement_act_comp_top_20_perc = disagreement_average(exp_combined_1, exp_combined_2, True, top_20_perc_compressed, None, 'activated', processed)
    disagreement_maxnonact_comp_top_20_perc = disagreement_average(exp_combined_1, exp_combined_2, True, top_20_perc_compressed, None, 'maxnonactivated', processed)

    # Add to dataframe
    df.loc[len(df)] = {'Explanations' : f'{key_pair[0]}_vs_{key_pair[1]}', 
                    'Compactor' : 'No compactor', 
                    'Model' : model_type_save, 
                    'Feature(10%)': disagreement_no_comp_top_10_perc['feature_agreement'], 
                    'Feature(20%)': disagreement_no_comp_top_20_perc['feature_agreement'], 
                    'Rank(10%)': disagreement_no_comp_top_10_perc['rank_agreement'], 
                    'Rank(20%)': disagreement_no_comp_top_20_perc['rank_agreement'], 
                    'Sign(10%)': disagreement_no_comp_top_10_perc['sign_agreement'], 
                    'Sign(20%)': disagreement_no_comp_top_20_perc['sign_agreement'], 
                    'SignedRank(10%)': disagreement_no_comp_top_10_perc['signed_rank_agreement'], 
                    'SignedRank(20%)': disagreement_no_comp_top_20_perc['signed_rank_agreement']}
    
    df.loc[len(df)] = {'Explanations' : f'{key_pair[0]}_vs_{key_pair[1]}', 
                    'Compactor' : 'Max compactor', 
                    'Model' : model_type_save, 
                    'Feature(10%)': disagreement_max_comp_top_10_perc['feature_agreement'], 
                    'Feature(20%)': disagreement_max_comp_top_20_perc['feature_agreement'], 
                    'Rank(10%)': disagreement_max_comp_top_10_perc['rank_agreement'], 
                    'Rank(20%)': disagreement_max_comp_top_20_perc['rank_agreement'], 
                    'Sign(10%)': disagreement_max_comp_top_10_perc['sign_agreement'], 
                    'Sign(20%)': disagreement_max_comp_top_20_perc['sign_agreement'], 
                    'SignedRank(10%)': disagreement_max_comp_top_10_perc['signed_rank_agreement'], 
                    'SignedRank(20%)': disagreement_max_comp_top_20_perc['signed_rank_agreement']}
    
    df.loc[len(df)] = {'Explanations' : f'{key_pair[0]}_vs_{key_pair[1]}', 
                    'Compactor' : 'Activated compactor', 
                    'Model' : model_type_save, 
                    'Feature(10%)': disagreement_act_comp_top_10_perc['feature_agreement'], 
                    'Feature(20%)': disagreement_act_comp_top_20_perc['feature_agreement'], 
                    'Rank(10%)': disagreement_act_comp_top_10_perc['rank_agreement'], 
                    'Rank(20%)': disagreement_act_comp_top_20_perc['rank_agreement'], 
                    'Sign(10%)': disagreement_act_comp_top_10_perc['sign_agreement'], 
                    'Sign(20%)': disagreement_act_comp_top_20_perc['sign_agreement'], 
                    'SignedRank(10%)': disagreement_act_comp_top_10_perc['signed_rank_agreement'], 
                    'SignedRank(20%)': disagreement_act_comp_top_20_perc['signed_rank_agreement']}

    df.loc[len(df)] = {'Explanations' : f'{key_pair[0]}_vs_{key_pair[1]}', 
                    'Compactor' : 'Non-activated max compactor', 
                    'Model' : model_type_save, 
                    'Feature(10%)': disagreement_maxnonact_comp_top_10_perc['feature_agreement'], 
                    'Feature(20%)': disagreement_maxnonact_comp_top_20_perc['feature_agreement'], 
                    'Rank(10%)': disagreement_maxnonact_comp_top_10_perc['rank_agreement'], 
                    'Rank(20%)': disagreement_maxnonact_comp_top_20_perc['rank_agreement'], 
                    'Sign(10%)': disagreement_maxnonact_comp_top_10_perc['sign_agreement'], 
                    'Sign(20%)': disagreement_maxnonact_comp_top_20_perc['sign_agreement'], 
                    'SignedRank(10%)': disagreement_maxnonact_comp_top_10_perc['signed_rank_agreement'], 
                    'SignedRank(20%)': disagreement_maxnonact_comp_top_20_perc['signed_rank_agreement']}


{'feature_agreement': 0.189, 'rank_agreement': 0.019, 'sign_agreement': 0.136, 'signed_rank_agreement': 0.018}
{'feature_agreement': 0.858, 'rank_agreement': 0.598, 'sign_agreement': 0.641, 'signed_rank_agreement': 0.484}
{'feature_agreement': 0.654, 'rank_agreement': 0.525, 'sign_agreement': 0.64, 'signed_rank_agreement': 0.515}
{'feature_agreement': 0.857, 'rank_agreement': 0.598, 'sign_agreement': 0.64, 'signed_rank_agreement': 0.485}
{'feature_agreement': 0.13, 'rank_agreement': 0.016, 'sign_agreement': 0.121, 'signed_rank_agreement': 0.016}
{'feature_agreement': 0.374, 'rank_agreement': 0.316, 'sign_agreement': 0.342, 'signed_rank_agreement': 0.302}


KeyboardInterrupt: 

Global vs local

In [18]:
for local_key in xai_keys_list:
    exp_combined = explanations[fold][local_key]['top'] | explanations[fold][local_key]['bottom']


    # Top 10%
    disagreement_no_comp_top_10_perc = disagreement_average(exp_combined, global_explanation.values, False, top_10_perc_uncompressed, None)
    disagreement_max_comp_top_10_perc = disagreement_average(exp_combined, global_explanation.values, False, top_10_perc_compressed, None, 'max', processed)
    disagreement_act_comp_top_10_perc = disagreement_average(exp_combined, global_explanation.values, False, top_10_perc_compressed, None, 'activated', processed)
    disagreement_maxnonact_comp_top_10_perc = disagreement_average(exp_combined, global_explanation.values, False, top_10_perc_compressed, None, 'maxnonactivated', processed)

    # Top 20%
    disagreement_no_comp_top_20_perc = disagreement_average(exp_combined, global_explanation.values, False, top_20_perc_uncompressed, None)
    disagreement_max_comp_top_20_perc = disagreement_average(exp_combined, global_explanation.values, False, top_20_perc_compressed, None, 'max', processed)
    disagreement_act_comp_top_20_perc = disagreement_average(exp_combined, global_explanation.values, False, top_20_perc_compressed, None, 'activated', processed)
    disagreement_maxnonact_comp_top_20_perc = disagreement_average(exp_combined, global_explanation.values, False, top_20_perc_compressed, None, 'maxnonactivated', processed)

    # Add to dataframe
    df.loc[len(df)] = {'Explanations' : f'Global_vs_{local_key}', 
                    'Compactor' : 'No compactor', 
                    'Model' : model_type_save, 
                    'Feature(10%)': disagreement_no_comp_top_10_perc['feature_agreement'], 
                    'Feature(20%)': disagreement_no_comp_top_20_perc['feature_agreement'], 
                    'Rank(10%)': disagreement_no_comp_top_10_perc['rank_agreement'], 
                    'Rank(20%)': disagreement_no_comp_top_20_perc['rank_agreement'], 
                    'Sign(10%)': disagreement_no_comp_top_10_perc['sign_agreement'], 
                    'Sign(20%)': disagreement_no_comp_top_20_perc['sign_agreement'], 
                    'SignedRank(10%)': disagreement_no_comp_top_10_perc['signed_rank_agreement'], 
                    'SignedRank(20%)': disagreement_no_comp_top_20_perc['signed_rank_agreement']}
    
    df.loc[len(df)] = {'Explanations' : f'Global_vs_{local_key}', 
                    'Compactor' : 'Max compactor', 
                    'Model' : model_type_save, 
                    'Feature(10%)': disagreement_max_comp_top_10_perc['feature_agreement'], 
                    'Feature(20%)': disagreement_max_comp_top_20_perc['feature_agreement'], 
                    'Rank(10%)': disagreement_max_comp_top_10_perc['rank_agreement'], 
                    'Rank(20%)': disagreement_max_comp_top_20_perc['rank_agreement'], 
                    'Sign(10%)': disagreement_max_comp_top_10_perc['sign_agreement'], 
                    'Sign(20%)': disagreement_max_comp_top_20_perc['sign_agreement'], 
                    'SignedRank(10%)': disagreement_max_comp_top_10_perc['signed_rank_agreement'], 
                    'SignedRank(20%)': disagreement_max_comp_top_20_perc['signed_rank_agreement']}

    df.loc[len(df)] = {'Explanations' : f'Global_vs_{local_key}', 
                    'Compactor' : 'Activated compactor', 
                    'Model' : model_type_save, 
                    'Feature(10%)': disagreement_act_comp_top_10_perc['feature_agreement'], 
                    'Feature(20%)': disagreement_act_comp_top_20_perc['feature_agreement'], 
                    'Rank(10%)': disagreement_act_comp_top_10_perc['rank_agreement'], 
                    'Rank(20%)': disagreement_act_comp_top_20_perc['rank_agreement'], 
                    'Sign(10%)': disagreement_act_comp_top_10_perc['sign_agreement'], 
                    'Sign(20%)': disagreement_act_comp_top_20_perc['sign_agreement'], 
                    'SignedRank(10%)': disagreement_act_comp_top_10_perc['signed_rank_agreement'], 
                    'SignedRank(20%)': disagreement_act_comp_top_20_perc['signed_rank_agreement']}

    
    df.loc[len(df)] = {'Explanations' : f'Global_vs_{local_key}', 
                    'Compactor' : 'Non-activated max compactor', 
                    'Model' : model_type_save, 
                    'Feature(10%)': disagreement_maxnonact_comp_top_10_perc['feature_agreement'], 
                    'Feature(20%)': disagreement_maxnonact_comp_top_20_perc['feature_agreement'], 
                    'Rank(10%)': disagreement_maxnonact_comp_top_10_perc['rank_agreement'], 
                    'Rank(20%)': disagreement_maxnonact_comp_top_20_perc['rank_agreement'], 
                    'Sign(10%)': disagreement_maxnonact_comp_top_10_perc['sign_agreement'], 
                    'Sign(20%)': disagreement_maxnonact_comp_top_20_perc['sign_agreement'], 
                    'SignedRank(10%)': disagreement_maxnonact_comp_top_10_perc['signed_rank_agreement'], 
                    'SignedRank(20%)': disagreement_maxnonact_comp_top_20_perc['signed_rank_agreement']}



{'feature_agreement': 0.14, 'rank_agreement': 0.002, 'sign_agreement': 0.076, 'signed_rank_agreement': 0.001}
{'feature_agreement': 0.0, 'rank_agreement': 0.0, 'sign_agreement': 0.0, 'signed_rank_agreement': 0.0}
{'feature_agreement': 0.008, 'rank_agreement': 0.002, 'sign_agreement': 0.002, 'signed_rank_agreement': 0.0}
{'feature_agreement': 0.0, 'rank_agreement': 0.0, 'sign_agreement': 0.0, 'signed_rank_agreement': 0.0}


In [20]:
#Save df as csv
df.to_csv(f'{save_path}agreement_all_exp_{model_type}_{fold}.csv')