Skip to content

amaa11/MIP

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

7 Commits
 
 

Repository files navigation

Modified Informative Position (MIP) is a post-hoc method applied to list of important features generated by any XAI method to consider the collinearity issue among the features.

It is the practical implementation of the paper "Characterizing the Contribution of Dependent Features in XAI Methods" published in IEEE Journal of Biomedical and Health Informatics

Please cite as below if you use the method.

@article{salih2024characterizing, title={Characterizing the contribution of dependent features in XAI methods}, author={Salih, Ahmed M and Galazzo, Ilaria Boscolo and Raisi-Estabragh, Zahra and Petersen, Steffen E and Menegaz, Gloria and Radeva, Petia}, journal={IEEE Journal of Biomedical and Health Informatics}, year={2024}, publisher={IEEE} }

X_train = path
X_test = path
y_train = path
y_test = path
Columns_list = list(X_train.columns.values)
#########################################
dicts = dict()
model_1= LogisticRegression(C=0.1)                                ## defin a model
model_1.fit(X_train, y_train.values.ravel())                      ## fit the data to the model
ypred_test = model_1.predict (X_test)                             ## predict test data
    
#prepare the data for shap
masker = shap.maskers.Independent(data = X_train)
    
# fit to shap
explainer = shap.LinearExplainer(model_1, masker = masker)
    
# apply shap to test data
shap_values = explainer.shap_values(X_test)
    
# the follwoing steps to represents the SHAP outcome (informative predictors) as datafram
vals= np.abs(shap_values).mean(0)
SHAP_out = pd.DataFrame(list(zip(X_train.columns,vals)),columns=['SHAP','feature_importance_vals'])
SHAP_out.sort_values(by=['feature_importance_vals'],ascending=False,inplace=True)
SHAP_out.reset_index(inplace=True, drop=True)
SHAP_out.index = np.arange(1, len(SHAP_out) + 1)


    
while X_train.shape[1] != 1 :
    print('Iteration: ', X_train.shape[1])                          ## number of features in each iteration
    model= LogisticRegression(C=0.1)                                ## defin a model
    model.fit(X_train, y_train.values.ravel())                      ## fit the data to the model
    ypred_test = model.predict (X_test)                             ## predict test data
    
    #prepare the data for shap
    masker = shap.maskers.Independent(data = X_train)
    
    # fit shap
    explainer = shap.LinearExplainer(model, masker = masker)
    
    # apply shap to test data
    shap_values = explainer.shap_values(X_test)
    
    # the follwoing steps to represents the SHAP outcome (informative predictors) as datafram
    vals= np.abs(shap_values).mean(0)
    feature_importance = pd.DataFrame(list(zip(X_train.columns,vals)),columns=['Features','feature_importance_vals'])
    feature_importance.sort_values(by=['feature_importance_vals'],ascending=False,inplace=True)
    feature_importance.reset_index(inplace=True, drop=True)
    feature_importance.index = np.arange(1, len(feature_importance) + 1)
    #print(feature_importance)
    FEATURES = feature_importance[['Features']]
    #print(FEATURES)
    print('=====================')
    for i in range(FEATURES.shape[0]):
        #print(FEATURES.iloc[i]['Features'], end = " ")
        #print(FEATURES.iloc[i].name, end = " ")
        Frac = FEATURES.iloc[i].name/X_train.shape[1]
        #print(Frac)
        #for key in dicts:
        if FEATURES.iloc[i]['Features'] in dicts:
            dicts[FEATURES.iloc[i]['Features']].append(Frac)
        else:
            dicts[FEATURES.iloc[i]['Features']] = [Frac]
        
        #dicts[FEATURES.iloc[i]['Features']] = FEATURES.iloc[i].name/X_train.shape[1]
    
    
    # identify the most informative predictor
    Top_feature = feature_importance.iloc[0]['Features']
    
    # remove the most informative predictor from train and test data
    X_train.drop([Top_feature], axis=1, inplace=True)
    X_train.reset_index(inplace=True, drop=True)
    X_test.drop([Top_feature], axis=1, inplace=True)
    X_test.reset_index(inplace=True, drop=True)
out = {k: [sum(dicts[k])] for k in dicts.keys()}
out = pd.DataFrame(out.items(), columns=['Modified SHAP', 'Score'])
out['Score'] = out['Score'].str[0]
out.sort_values(by=['Score'], inplace=True)
out.reset_index(inplace=True, drop=True)
out.index = np.arange(1, len(out) + 1)
out = pd.concat([out, SHAP_out[['SHAP']]], axis=1)

And then print the new order along side with the initial order and the standard deviation

print(out)
print(out['Score'].std())

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published