### Exp_3_MAS_explains_BB


In [1]:
# import required packages
import pandas as pd
import numpy as np

import joblib

import api_calls
from api_calls import *

import seaborn as sns
from sklearn.metrics import precision_recall_fscore_support, classification_report, confusion_matrix, plot_confusion_matrix

#from pandas import json_normalize
import warnings
import requests
from requests import get
import time

from mycbr_py_api import MyCBRRestApi as mycbr

In [2]:
warnings.filterwarnings("ignore")

# Variables that are related to current CBR project
concept = 'case' 

# API connection

server = 'localhost'
#server = 'user@hv-6066.idi.ntnu.no'

port = '8080'
base_url = 'http://' + server + ':' + port + '/'

headers = {'Content-type':'application/json'}

obj = mycbr(base_url)

# Confidence scores of CBR agents 
#(Confidence valeus calculated in exp_2 and treshold value determined manually according to system performance)
confidence = [0.62, 0.57, 0.33]
conf_tresh = 0.5

In [3]:
# Read data 
train = pd.read_csv('./data/train_matched.csv', index_col=0) 
test = pd.read_csv('./data/test.csv')

train_X, train_y = train.drop(['Target'],axis=1), train['Target']
test_X, test_y = test.drop(['Target'],axis=1), test['Target']

### Load black-box model

In [4]:
bb_model = joblib.load("./model/MLP_oversampled.pkl")

### Build 3 agents CBR system

In [5]:
# Delete all cases
delete_instances_from_concept(concept)

# Group test samples up to classes
train_class0 = train.loc[train.Target == 0]
train_class1 = train.loc[train.Target == 1]
train_class2 = train.loc[train.Target == 2]

# Adding cases to respective casebases
add_cases_from_df(train_class0, concept, 'cb_class0')    
add_cases_from_df(train_class1, concept, 'cb_class1')    
add_cases_from_df(train_class2, concept, 'cb_class2')    

cases deleted from case: True


In [6]:
# Read shap values to create amalgamation functuions
shap_c0 = pd.read_csv('./shap_values/c0.csv', index_col=[0]) 
shap_c1 = pd.read_csv('./shap_values/c1.csv', index_col=[0])
shap_c2 = pd.read_csv('./shap_values/c2.csv', index_col=[0])

matched_booster = 15
shap_c0 = shap_c0.append(pd.DataFrame({'shap':shap_c0.max().item()*matched_booster},index=['Matched']))
shap_c1 = shap_c1.append(pd.DataFrame({'shap':shap_c1.max().item()*matched_booster},index=['Matched']))
shap_c2 = shap_c2.append(pd.DataFrame({'shap':shap_c2.max().item()*matched_booster},index=['Matched']))

# Cast shap values to string for creating json objects (for newAmalgamationFunc)
str_map0 = str(shap_c0.to_dict()['shap']).replace("'",'"')
str_map1 = str(shap_c1.to_dict()['shap']).replace("'",'"')
str_map2 = str(shap_c2.to_dict()['shap']).replace("'",'"')

# Set amalgamation functions for each casebase
newAmalgamationFunc('case','amal_func_class0', 'WEIGHTED_SUM', str_map0)
newAmalgamationFunc('case','amal_func_class1', 'WEIGHTED_SUM', str_map1)
newAmalgamationFunc('case','amal_func_class2', 'WEIGHTED_SUM', str_map2)

<Response [200]>

In [7]:
def retrieve_cases(df):
    c0= obj.getCaseByCaseID(df.caseID_c0[0],concept,'cb_class0')
    c0['Class'] = 0
    c0['Similarity'] = df.similarity_c0[0]
    c0['Support'] = None

    c1 = obj.getCaseByCaseID(df.caseID_c1[0],concept,'cb_class1')
    c1['Class'] = 1
    c1['Similarity'] = df.similarity_c1[0]
    c1['Support'] = None

    c2 = obj.getCaseByCaseID(df.caseID_c2[0],concept,'cb_class2')
    c2['Class'] = 2
    c2['Similarity'] = df.similarity_c2[0]
    c2['Support'] = None

    res = c0
    res = res.append([c1,c2])
    return res.reset_index()


# row is a test sample (without Target)
def comparison(row):
    # Prediction of model
    bb_pred =  bb_model.predict([row])[0]
    
    # query the case in 3 cb
    row['Matched'] = calculateMatched(row)
    cbr_sim = query_all_cbr_systems(concept,  row, k=1)
    # Retrieve closest cases from cb
    cases = retrieve_cases(cbr_sim)
    # Cbr prediction result
    cbr_pred = get_class_from_cbr_results(cbr_sim.loc[0])
    

    # Return a Contrastive or Supportive explanation case 
    exp_case = pd.DataFrame()
    if bb_pred != cbr_pred and confidence[cbr_pred] > conf_tresh:
        exp_case = cases.loc[cbr_pred]
        exp_case.Support = 'Contrastive'
    else: 
        exp_case = cases.loc[bb_pred]
        exp_case.Support = 'Supportive'

    exp_case['bb'] = bb_pred
    
    return exp_case.drop(['index'])

In [8]:
for idx in range(len(test)):
    exp = comparison(test_X.loc[idx])
    print('------------------- Query_{} -------------------'.format(idx))
    print('The Black-box predicts class {}'.format(exp.bb))
    print('The Multi-Agent CBR system returns "{}" with a similarity score {:.3f} as a {} case.\n\n'.format(exp.caseID, exp.Similarity, exp.Support.upper()))

------------------- Query_0 -------------------
The Black-box predicts class 1
The Multi-Agent CBR system returns "case_9" with a similarity score 0.911 as a CONTRASTIVE case.


------------------- Query_1 -------------------
The Black-box predicts class 2
The Multi-Agent CBR system returns "case_46" with a similarity score 0.898 as a CONTRASTIVE case.


------------------- Query_2 -------------------
The Black-box predicts class 2
The Multi-Agent CBR system returns "case_67" with a similarity score 0.901 as a CONTRASTIVE case.


------------------- Query_3 -------------------
The Black-box predicts class 2
The Multi-Agent CBR system returns "case_2" with a similarity score 0.908 as a CONTRASTIVE case.


------------------- Query_4 -------------------
The Black-box predicts class 2
The Multi-Agent CBR system returns "case_28" with a similarity score 0.917 as a SUPPORTIVE case.


------------------- Query_5 -------------------
The Black-box predicts class 1
The Multi-Agent CBR system re

### Comparing the query  with the most similar case

In [33]:
idx = 16

exp = comparison(test_X.loc[idx])
exp2 = pd.DataFrame(exp.drop(['Matched','caseID','Class','Support'])).T
q = pd.DataFrame(test_X.loc[idx]).T
q = q.astype(float)
exp2 = exp2.astype(float)

res = q.append(exp2)
res.index = ['query','similar case']

print('Attributes that differ between the query and the most similar case')
res[res.diff() != 0].dropna(axis=1)

Attributes that differ between the query and the most similar case


Unnamed: 0,t4,t9,t13,t15,t41,t46,t51,t55,t63,t64,t66,t73,t100
query,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
similar case,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0


In [34]:
print('Attributes that are the same for the query and the most similar case')
res[res.diff() != 1].dropna(axis=1)

Attributes that are the same for the query and the most similar case


Unnamed: 0,t0,t1,t2,t3,t5,t6,t7,t8,t10,t11,...,t91,t92,t93,t94,t95,t96,t97,t98,t99,t101
query,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0
similar case,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0


In [37]:
print('Complete comparison of query and the most similar case')


Complete comparison of query and the most similar case


In [None]:
sns.set(rc = {'figure.figsize':(30,2)})
sns.heatmap(res.drop(['Similarity', 'bb'], axis = 1), cmap='plasma', cbar=False)

<AxesSubplot:>