# iSee Explainer Template (Example)

# Step 1: Load sample model, data and additional information

We support models with different implementation frameworks such as Scikit-learn, Tensorflow, Pytorch and others. You can load your model by using the recommended approach of your framework.

In [None]:
# LOADING MODEL #
#################
import urllib.request

#######
# TODO:
# Load your model file here. 
# You may do so via google drive
#######
urllib.request.urlretrieve("https://github.com/XCBRChallenge/2023-utils/raw/main/INCOME.pkl", "INCOME.pkl")
model_file=open("INCOME.pkl",'rb') 

#######
# TODO:
# Choose how to load your model. 
# Comment the rest of options
#######

# For scikit-learn
import joblib
model=joblib.load(model_file)

# For tensorflow
#import tensorflow as tf
#model=tf.keras.models.load_model(model_file)

#For Pytorch
#import torch
#model=torch.load(model_file)

# For different implementations, please make sure the model object can be loaded with joblib
# and that it has a "predict" function for consistency

#model=joblib.load(model_file)
#predic_func=model.predict

model_file.close()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [None]:
# LOADING DATA #
################


#######
# TODO:
# You can load your data from a .csv file using numpy or pandas functions
# IMPORTANT: your data must have a header file
#######
import pandas as pd
urllib.request.urlretrieve("https://github.com/XCBRChallenge/2023-utils/raw/main/INCOME_data.csv", "INCOME_data.csv")

data=pd.read_csv("INCOME_data.csv",header=0) 


#######
# (OPTIONAL):
# If you have to do any processing of the data, please do it here. 
# But keep in mind that when you upload the data file for the explainer 
# to the iSee platform, the data should be already processed.
#######


It's possible that you need to know certain characteristics of the model to execute the explainer. You will be able to access them from the configuration file of the model. We provide some examples of the file structure so you can refer to the information you need from the explainer. You will need to define the characteristics of your model following these examples. **Please keep in mind that the order of the features should match the order expected by your model.**


In [None]:
# CONFIG FILE EXAMPLES #
########################


# TABULAR DATA
{
    
  "attributes": {
      
      "target_names": [ "Feature_3" ],  # Contains the name of the target feature/s

      "features": { # Dictionary where the keys are the feature names, and the values contain information about that feature
          
            "Feature_1": {
              "data_type": "numerical",   # For continuous numerical values, the data_type must be "numerical"
              "min": 0,
              "max": 1,
              "min_raw": 13,
              "max_raw": 84
            },

            "Feature_2": {
              "data_type": "numerical",
              "min": 0,    # minimum value expected by the model 
              "max": 1,    # maximum value expected by the model
              "min_raw": 10,  # If data was normalized, we can use these attributes to denormalize it in case we need to 
              "max_raw": 32   # It's also possible to denormalize using mean and standard deviation. Please refer to the image data example

            },

            "Feature_3": {
                "data_type": "categorical",   # For categorical variables
                "values": [ 0, 1 ],   # The encoded values for the categories as expected by the model
                "values_raw": [ "No", "Yes" ]   # The real names of the categories. This is useful to create better explanations
           }
      
      }
      
  }

},


# IMAGE DATA

{
  "attributes": {

    "target_names": [ "label" ], # Contains the name of the target feature/s 

    "features": {   # Dictionary where the keys are the feature names, and the values contain information about that feature
        
        "image": {  # For images, the name of the main feature will alwats be "image"
            "data_type": "image", 
            "shape": [320, 320, 3], # This is the shape expected by the model (not including the batch size)
            "shape_raw": [320, 320, 3], # This is the actual shape of the raw image. In some cases, the raw image has a different shape than the one fed to the model
            "mean_raw": 45.46098,   # If data was normalized, we can use these attributes to denormalize it in case we need to 
            "std_raw": 50.87204     # It's also possible to denormalize using min and max values. Please refer to the tabular data example.
        },

        "label": {
          "data_type": "categorical", # For categorical variables
          "values": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], # The encoded values for the categories as expected by the model
          "values_raw": [ "Zero", "One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine" ]  # The real names of the categories. This is useful to create better explanations
      }
    }

  }

},

# TEXT DATA
{
  "attributes": {
      
      "target_names": [ "target" ],

      "features": {
          
        "text": { # For text, the name of the main feature will alwats be "text". Currently no addiotional information is given.
            "data_type":"text"
        }, 
    
        "target": {
          "data_type": "categorical", # For categorical variables
          "values": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ], # The encoded values for the categories as expected by the model
          "values_raw": [ "atheism", "graphics", "ms-windows.misc", "pc.hardware", "mac.hardware", "x", 
                         "misc.forsale", "autos", "motorcycles", "baseball", "hockey", "crypt", "electronics", 
                         "med", "space", "christian", "guns", "mideast", "politics.misc", "religion.misc" ] # The real names of the categories. This is useful to create better explanations
        } 
      }
    }
},

# TIME SERIES DATA
# This format is very similar to the tabular data example, but with a special attribute: "window_size".

{
    
  "attributes": {
      
      "target_names": [ "Feature_3" ],  # Contains the name of the target feature/s

      "window_size": 14,  # The number of data instances per time window

      "features": { # Dictionary where the keys are the feature names, and the values contain information about that feature
          
            "Feature_0": {
              "data_type": "time"   # To identify the time-related feature
            },

            "Feature_1": {
              "data_type": "numerical",   # For continuous numerical values, the data_type must be "numerical"
              "min": 0,
              "max": 1,
              "min_raw": 13,
              "max_raw": 84
            },

            "Feature_2": {
              "data_type": "numerical",
              "min": 0,    # minimum value expected by the model 
              "max": 1,    # maximum value expected by the model
              "min_raw": 10,  # If data was normalized, we can use these attributes to denormalize it in case we need to 
              "max_raw": 32   # It's also possible to denormalize using mean and standard deviation. Please refer to the image data example

            },

            "Feature_3": {
                "data_type": "categorical",   # For categorical variables
                "values": [ 0, 1 ],   # The encoded values for the categories as expected by the model
                "values_raw": [ "No", "Yes" ]   # The real names of the categories. This is useful to create better explanations
           }
      
      }
      
  }

}


#######
# TODO:
# Describe your model configuration
# REMEMBER: feature order is important!
#######

model_info={
    
  "attributes": {
      
    "features": {
      "annual_inc": {
        "data_type": "numerical",
        "max": 1,
        "max_raw": 700000,
        "min": 0,
        "min_raw": 3600
      },
      "home_ownership": {
        "data_type": "categorical",
        "values": [ 0, 1, 2, 3 ],
        "values_raw": [ "RENT", "OEN", "MORTGAGE", "ANY" ]
      },
      "installment": {
        "data_type": "numerical",
        "max": 1,
        "max_raw": 1474.75,
        "min": 0,
        "min_raw": 32.47
      },
      "int_rate": {
        "data_type": "numerical",
        "max": 1,
        "max_raw": 30.79,
        "min": 0,
        "min_raw": 5.31
      },
      "loan_amnt": {
        "data_type": "numerical",
        "max": 1,
        "max_raw": 40000,
        "min": 0,
        "min_raw": 1000
      },
      "loan_status": {
        "data_type": "categorical",
        "values": [ 0, 1 ],
        "values_raw": [ "Rejected", "Accepted" ]
      },
      "purpose": {
        "data_type": "categorical",
        "values": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ],
        "values_raw": [ "major purchase", "other", "home improvement", "debt consolidation", "house", "credit card", "car", "medical", "vacation", "small business", "moving", "renewable energy" ]
      },
      "term": {
        "data_type": "categorical",
        "values": [ 0, 1 ],
        "values_raw": [ "36 months", "60 months" ]
      },
      "total_pymnt": {
        "data_type": "numerical",
        "max": 1,
        "max_raw": 44881.66051,
        "min": 0,
        "min_raw": 41.62
      },
      "total_rec_int": {
        "data_type": "numerical",
        "max": 1,
        "max_raw": 7036.9,
        "min": 0,
        "min_raw": 0
      },
      "verification_status": {
        "data_type": "categorical",
        "values": [ 0, 1, 2 ],
        "values_raw": [ "Source Verified", "Not Verified", "Verified" ]
      }
    },
    "target_names": [ "loan_status" ]
  }
}

## Step 2: Develop your explainer

In this step, we ask you write the code for your explainer. The code will be included in the *explain()* function, but you can define your own helper functions as well. You can refer to our helper functions as well at https://github.com/isee4xai/iSeeExplainerLibrary/blob/dev/utils.

In [None]:
!pip install NICEx

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting NICEx
  Downloading NICEx-0.2.3-py3-none-any.whl (10 kB)
Installing collected packages: NICEx
Successfully installed NICEx-0.2.3


In [None]:
import numpy as np
from nice import NICE

In [None]:
# Utility function from https://github.com/isee4xai/iSeeExplainerLibrary/blob/dev/utils

def denormalize_dataframe(df,model_info):
    denorm_df=df.copy()
    column_names=list(denorm_df.columns)
    for feature in column_names:
        feature_dict=model_info["attributes"]["features"][feature]
        if(feature_dict["data_type"]=="numerical"):
            if("min" in feature_dict and "max" in feature_dict and "min_raw" in feature_dict and "max_raw" in feature_dict):
                nmin=feature_dict["min"]
                nmax=feature_dict["max"]
                min_raw=feature_dict["min_raw"]
                max_raw=feature_dict["max_raw"]
                try:
                    denorm_df[feature]=(((denorm_df[feature]-nmin)/(nmax-nmin))*(max_raw-min_raw)+min_raw)
                except:
                    raise
            elif ("mean_raw" in feature_dict and "std_raw" in feature_dict):
                mean=np.array(feature_dict["mean_raw"])
                std=np.array(feature_dict["std_raw"])
                try:
                    denorm_df[feature]=((denorm_df[feature]*std)+mean)
                except Exception as e:
                    raise
        elif feature_dict["data_type"]=="categorical":
            if("values" in feature_dict and "values_raw" in feature_dict):
                try:
                    denorm_df[feature]=denorm_df[feature].apply(lambda row: feature_dict["values_raw"][int(row)])
                except:
                    pass
            elif("value" in feature_dict and "ohe_feature" in feature_dict):
                if(denorm_df[feature].values[0]==0):
                    denorm_df=denorm_df.drop(feature,axis=1)
                else:
                    denorm_df[feature]=feature_dict["value"]
                    denorm_df=denorm_df.rename(columns={feature: feature_dict["ohe_feature"]})
                                        
    return denorm_df

In [None]:
# The explain() function takes the following parameters:

    # Parameters:

    # model (Object): the model object as described above
    #
    # model_info (dict): the dictionary with the model information as described above
    #
    # data (Pandas.DataFrame or numpy.array, optional): the training data used by the model. 
    #                                                   Please avoid data processing inside the explain function
    #
    # params_json (dict, optional): dictionary containing additional parameters that maybe needed for execution.
    #                               e.g. { "n_steps":50, "batch_size": 100}
    #                               Please assign default values for these parameters in your code whenever possible
    #
    # instance (-, optional): data point to be explained. The format will depend on the input expected by the model

    # Returns: dictionary containing the explanation and its type/format. Currently, we accept the following formats:
              # type:"html" - > string with html code
              # type:"image" - > base64 encoded image (you can refer to our helper functions)
              # type:"dict" - > dictionary/JSON object
              # type:"text" - > plain text (string)
                
def explain(model, model_info, data=None, params_json=None, instance=None):

  #######
  # TODO:
  # YOUR EXPLAINER CODE HERE
  #######

  # getting necessary data from model info
  target_name=model_info["attributes"]["target_names"][0]
  features=model_info["attributes"]["features"]
  feature_names=list(features.keys())

  X=data.drop([target_name], axis=1, inplace=False).values
  y=data.loc[:,target_name].values

  feature_names.remove(target_name)
  categorical_features=[]
  for feature in feature_names:
      if features[feature]["data_type"]=="categorical":
          categorical_features.append(data.columns.get_loc(feature))


  # Getting prediction for instance
  pred_func=model.predict_proba
  instance_pred=np.array(pred_func(instance)[0])

  # Getting parameters from json
  optimization_criteria="sparsity"
  desired_class="other"
  if "optimization_criteria" in params_json and params_json["optimization_criteria"] in ["sparsity","proximity","plausibility"]:
      optimization_criteria = params_json["optimization_criteria"]
  if "desired_class" in params_json:
      try: 
          u_class=int(params_json["desired_class"])
          if u_class >= 0 and u_class < instance_pred.shape[-1]:
              desired_class=[u_class]
      except:
          pass

  # Generate counterfactuals
  NICE_res = NICE(pred_func,X,categorical_features,y_train=y,optimization=optimization_criteria)
  CF = NICE_res.explain(instance,target_class=desired_class)[0]

  instance_row=np.array(np.append(instance,np.argmax(instance_pred)))
  cf_row=np.array(list(CF)+[np.argmax(pred_func([CF])[0])])

  df = pd.DataFrame(data = np.array([instance_row,cf_row]), 
            index = ["Original Instance","Counterfactual"], 
            columns = feature_names + [target_name])
  
  # denormalizing for a more interpretable output
  # using one of the utility functions from the iSee repo
  df_norm=denormalize_dataframe(df,model_info)


  ret={"type":"html", "explanation":df_norm.to_html()}
  return ret

You can test your explain() function in the following cell by changing the parameters below.

In [None]:
  #######
  # TODO:
  # Test your explainer by replacing these values 
  # with your own values according to the description above
  #######

data=data  
params_json={"optimization_criteria":"proximity"} 
instance=data.iloc[[0]].drop(["loan_status"],axis=1).to_numpy()

output=explain(model,model_info, data, params_json, instance) 
print(output)

{'type': 'html', 'explanation': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>annual_inc</th>\n      <th>home_ownership</th>\n      <th>installment</th>\n      <th>int_rate</th>\n      <th>loan_amnt</th>\n      <th>purpose</th>\n      <th>term</th>\n      <th>total_pymnt</th>\n      <th>total_rec_int</th>\n      <th>verification_status</th>\n      <th>loan_status</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Original Instance</th>\n      <td>200020.512821</td>\n      <td>RENT</td>\n      <td>147.95242</td>\n      <td>5.31</td>\n      <td>15892.857143</td>\n      <td>major purchase</td>\n      <td>60 months</td>\n      <td>2127.802241</td>\n      <td>0.0</td>\n      <td>Source Verified</td>\n      <td>Rejected</td>\n    </tr>\n    <tr>\n      <th>Counterfactual</th>\n      <td>200020.512821</td>\n      <td>RENT</td>\n      <td>147.95242</td>\n      <td>5.31</td>\n      <td>15892.857143</td>\n      <td>majo

In [None]:
import IPython
IPython.display.HTML(data=output["explanation"])

Unnamed: 0,annual_inc,home_ownership,installment,int_rate,loan_amnt,purpose,term,total_pymnt,total_rec_int,verification_status,loan_status
Original Instance,200020.512821,RENT,147.95242,5.31,15892.857143,major purchase,60 months,2127.802241,0.0,Source Verified,Rejected
Counterfactual,200020.512821,RENT,147.95242,5.31,15892.857143,major purchase,60 months,2127.802241,0.0,Source Verified,Accepted


## Step 3: Add documentation for your explainer

The last step is to write the documentation for your explainer. The documentation of the explainers is available in the *get()* method. Below is an example of the expected format for the documentation.

In [None]:
# DESCRIPTION EXAMPLE #
#######################

def get(self):
    return {
    "_method_description": "Displays the SHAP interaction values of a feature. Only supports scikit-learn-based models. This method accepts 2 argument: " 
                        "the model 'id', and the 'params' JSON with the configuration parameters of the method. ",
    "id": "Identifier of the ML model that was stored locally.",
    "params": {  # For each of the parameters of your explainer in params_json
            "feature": {
                "description":"Name of the feature which will be used to calculate the SHAP interaction values. Defaults to the feature with the highest average SHAP value.",
                "type": "string",  #possible types: string, int, float, array, dict.
                "default": None, # default value of the parameter
                "range":[], # use when values are within a range e.g [0,1] or when there is a fixed set of accepted values e.g ['mean','median','mode']
                "required": False  # If the parameter is required for the explainer to execute. Please try to include default values in your code so parameter aren't strictly required
                },
            },
    "output_description":{
            "bar_plot": "The bar plot shows the SHAP interaction values with the other features for the selected feature."
      },
    "meta":{
            "supportsAPI":False, # ignore
            "needsData": True   # explainer needs training data
        }
    }



Finally, include the description of your explainer in your own *get()* method

In [None]:
  #######
  # TODO:
  # Describe your explainer in the documentation
  # Fill in the dictionary below with your values
  # or replace the values accordingly
  #######

def get(self):
    return {
    "_method_description": "NICE is an algorithm to generate Counterfactual Explanations for heterogeneous tabular data."
                            "NICE exploits information from a nearest instance to speed up the search process and guarantee that an explanation will be found. Accepts the following arguments: " 
                            "the 'id' string, the 'instance', and the 'params' dictionary (optional) containing the configuration parameters of the explainer."
                            " These arguments are described below.",
    "id": "Identifier of the ML model that was stored locally.",
    "instance": "Row with the feature values of an instance (not including the target class).",
    "params": { 
            "desired_class": {
                "description": "Integer representing the index of the desired counterfactual class. Defaults to string 'other', which will look for any different class.",
                "type":"int",
                "default": None,
                "range":None,
                "required":False
                },
            "optimization_criteria":{
                "description": "The counterfactual criteria to optimize.",
                "type":"string",
                "default": "sparsity",
                "range":["sparsity","proximity","plausibility"],
                "required":False
                } 
            },
    "output_description":{
            "html_table": "An html page containing a table with the original instance compared against the generated counterfactual."
            },
    "meta":{
            "supportsAPI":False,
            "needsData": True,
            "needsMin&Max": False

        }
    }