In [14]:
# this cell is here to suppress annoying progress bars that DiCE generates wih tqdm
import tqdm

def tqdm_replacement(iterable_object,*args,**kwargs):
    return iterable_object

tqdm.tqdm = tqdm_replacement

import warnings
warnings.filterwarnings("ignore")

In [15]:
import numpy as np
import pandas as pd
import dice_ml

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_openml

from lime.lime_tabular import LimeTabularExplainer

seed = 42
np.random.seed(seed)

In [16]:
credit_X, credit_y = fetch_openml(data_id=31, parser="auto", as_frame=True, return_X_y=True)

feature_names = ['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']

# only keep the features of feature_names in dataset
credit_X = credit_X.drop([feature for feature in credit_X.columns if feature not in feature_names], axis=1)

X_train, X_test, y_train, y_test = train_test_split(credit_X, credit_y, test_size=0.2, stratify=credit_y)

In [17]:
model = RandomForestClassifier(n_estimators=200, max_depth=10, class_weight='balanced')
model.fit(X_train, y_train)

print("Accuracy: ", accuracy_score(y_test, model.predict(X_test)))

Accuracy:  0.7


In [18]:
# just an instance with bad credit rating
instance = X_test.iloc[2]
print(model.predict([instance]))
instance

['bad']


duration                     48
credit_amount             12204
installment_commitment        2
residence_since               2
age                          48
existing_credits              1
num_dependents                1
Name: 615, dtype: int64

In [19]:
%%html
<style>
    .lime {
        background-color: white;
        }
</style>

### Setup explainers

In [20]:
lime_explainer = LimeTabularExplainer(X_train.to_numpy(),
                                      feature_names=feature_names,
                                      class_names=['bad', 'good'])

predict_fn = lambda x: model.predict_proba(x)

In [21]:
d = dice_ml.Data(dataframe=pd.concat([credit_X, credit_y], axis=1), continuous_features=feature_names, outcome_name='class')
m = dice_ml.Model(model=model, backend='sklearn')

dice_explainer = dice_ml.Dice(d, m, method='random')

In [22]:
def generate_counterfactuals(instance: pd.Series, amount: int = 3):
    '''
    Returns a list of counterfactuals as dictionaries where the entries show differences to the input instance.
    Of course, the output class is also different.
    '''
    exp = dice_explainer.generate_counterfactuals(instance.to_frame().T,
                                                total_CFs=amount, 
                                                desired_class='opposite',
                                                features_to_vary=['duration', 'credit_amount', 'installment_commitment', 'existing_credits'])
    #exp.visualize_as_dataframe(show_only_changes=True)
    cfs_list = [row.to_dict() for _, row in exp.cf_examples_list[0].final_cfs_df.iterrows()]
    for cf in cfs_list:
        for feature, value in instance.to_dict().items():
            if cf[feature] == value:
                del cf[feature]
        del cf['class']
    return cfs_list

In [23]:
def generate_lime_explanation(instance: pd.Series, num_features: int = 3):
    '''
    Returns a list of feature weights of the lime explanation for the input instance.
    '''
    expl = lime_explainer.explain_instance(instance, predict_fn, num_features=num_features)
    return expl.as_list()

In [24]:
import openai
import json

# load and set our api key
openai.api_key = open("key.txt", "r").read().strip("\n")

message_history = []
def append_message(message, role="user"):
    message_history.append({"role": role, "content": f"{message}"})
    
def append_function_call(name, arguments="{}"):
    message_history.append({
        "role": "assistant",
        "content": None,
        "function_call": {
            "name": name,
            "arguments": arguments
        }
    })
    
functions = [
    {
        "name": "explain_instance",
        "description": '''Explains the current instance or decision to the user.
            Returns a list of the top features that where most important in the decision of the model.
            This function should be called when the user asks questions such as "Why was my loan rejected?",
            "Why did the model decide this?", "What were the most important features for this decision?" or "Can you explain this to me?".
            It has no paramters.''',
        "parameters": {"type": "object", "properties": {}}
    },
    {
        "name": "change_feature",
        "description": '''Changes a feature of the current instance that is explained.''',
        "parameters": {
            "type": "object",
            "properties": {
                "feature_name": {
                    "type": "string",
                    "enum": feature_names,
                    "description": "The name of the feature that will be changed."
                },
                "new_value": {
                    "type": "string",
                    "description": "The new value of the feature."
                }
            },
            "required": ["feature_name", "new_value"]
        }
    },
    {
        "name": "generate_counterfactuals",
        "description": '''Generates potential scenarios where the models decision is different from the current decision.
            So for example if the customers asks what they could do to get a good credit rating, this method should be called.
            Should be called when the user asks questions such as "How can I get the loan?", "What can I do in order to get the loan?"
            or "What would need to happen for me to get a good credit rating?".''',
        "parameters": {"type": "object", "properties": {}}
    }
]

def explain_with_lime():
    credit_rating = model.predict([instance])[0]
    explanation = generate_lime_explanation(instance, 5)
    # inject 'positive' for positve weight and vice versa, so GPT understands better
    for i in range(len(explanation)):
        if explanation[i][1] >= 0:
            explanation[i] = (explanation[i][0], 'positive with ' + str(explanation[i][1]))
        else:
            explanation[i] = (explanation[i][0], 'negative with ' + str(explanation[i][1]))
    # if credit_rating == 'bad':
    #     explanation = [entry for entry in explanation if entry[1] < 0]
    # else:
    #     explanation = [entry for entry in explanation if entry[1] >= 0]
        
    message = f'''Here is a list with features and their weight in the decision for the rating: {explanation}.
    Positive weights indicate a good rating, meaning loan acceptance. Negative weights indicate a bad rating, meaning loan rejection.
    The model gave the user a {credit_rating} rating. Explain the models decision for lay users and keep it short.
    Focus on the most important feature'''
    append_message(message)
    
def explain_with_counterfactuals():
    credit_rating = model.predict([instance])[0]
    cfs = generate_counterfactuals(instance, amount=1)
    cfs_string = ' or '.join([str(cf) for cf in cfs])
    message = f'''The credit rating is '{credit_rating}'. The users features are {str(instance.to_dict())}.
    In order to not get that credit rating, the user should make the following changes: {cfs_string}.
    If the user changed the features like that, the credit rating would definetly change.
    Explain this to the user in simple terms and keep it short.'''
    append_message(message)
    
def change_feature(feature_name, new_value):
    if feature_name not in feature_names:
        print("error")
        #feature does not exist
        append_message("Tell the user that the feature does not exist", "system")
        return
    new_value = float(new_value)
    instance[feature_name] = new_value
    append_message(f"The users features are now {instance.to_dict()}. Inform the user about the feature change in a very short sentence")

functions_map = {
    "explain_instance": explain_with_lime,
    "change_feature": change_feature,
    "generate_counterfactuals": explain_with_counterfactuals
}

def handle_function_call(fc):
    name = fc["name"]
    #print(f"Function called: '{name}'")
    append_function_call(name, fc["arguments"])
    arguments = json.loads(fc["arguments"])
    function = functions_map[name]
    if arguments == {}:
        function()
    else:
        function(**arguments)
    # in case of a function call, the model is queried behind the scenes but it should not call another function
    next_completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-0613",
        messages=message_history,
        functions=functions,
        function_call="none"
    )
    handle_completion(next_completion)

def handle_completion(completion):
    message = completion.choices[0].message.to_dict()
    
    # function call
    if message["content"] == None:
        handle_function_call(message["function_call"])
    # normal answer
    else:
        append_message(message["content"], role="assistant")
        print(message["content"])
        
def chat(inp, role="user"):
    append_message(inp, role)
    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-0613",
        messages=message_history,
        functions=functions,
        function_call="auto"
    )
    handle_completion(completion)
    
# describe the bot functionality
bot_description = f'''You are an explanation system for a credit scoring AI model. You explain the decisions and classifications of the model.
The model gives a customer a credit rating. The rating can either be 'good' or 'bad'.
'good' means the customers loan request is accepted, 'bad' means the loan is rejected.
The model uses a Random Forest Classifier to decide whether the person has a good or bad credit rating.
The model was trained on 800 instances.
The features the model uses in the classifying the customers credit/loan risk:
duration: Duration of the loan/credit in months.
credit_amount: The amount of the credit.
installment_commitment: Installment rate in percentage of disposable income.
residence_since: How long the customer has lived at his current residence in years.
age: Age of the customer in years.
existing_credits: Number of existing credits at this bank.
num_dependents: Number of people the customer is liable to provide maintenance for.

When the users asks questions about the model, you can either call an explanation function or elaborate on previous explanations.
Only answer questions about the model, its decisions and the customers features it is using!
Do not answer questions related to anything else!
When asked about a different topic, remind the user to ask questions about the model.
The users features are {instance.to_dict()}'''

# Inject "past" dialogue into the api. The intention is to "teach" the model how it should answer the users questions.
message_history = []
append_message(bot_description)
append_message("OK", "assistant")
append_message("How big is the sun?")
append_message("Please ask questions about the model.", "assistant")
append_message("How do I use a bottle opener?")
append_message("Please ask questions about the model.", "assistant")

In [25]:
for i in range(5):
    user_input = input("> ")
    print("User: ", user_input)
    print()
    chat(user_input)
    print()

User:  Why was my loan rejected?

The model rejected your loan request primarily because of the duration of the loan. The longer the duration, the more likely it is for the loan to be rejected.

User:  What impacted the decision negatively?

The factors that had a negative impact on the decision were the duration of the loan and the credit amount. A longer duration and a higher credit amount resulted in a higher chance of loan rejection.

User:  Was my age important?

Yes, your age also played a role in the decision. The model considered your age and it had a positive impact, meaning that being older increased the likelihood of getting a good credit rating. However, it was not the most influential factor in this case.

User:  What can I do to get the loan?

To improve your credit rating and increase the chances of getting the loan, you can consider reducing the duration of the loan to 4 months and lowering the credit amount to 7191. These changes would have a positive impact on your cr

In [27]:
message_history

[{'role': 'user',
  'content': "You are an explanation system for a credit scoring AI model. You explain the decisions and classifications of the model.\nThe model gives a customer a credit rating. The rating can either be 'good' or 'bad'.\n'good' means the customers loan request is accepted, 'bad' means the loan is rejected.\nThe model uses a Random Forest Classifier to decide whether the person has a good or bad credit rating.\nThe model was trained on 800 instances.\nThe features the model uses in the classifying the customers credit/loan risk:\nduration: Duration of the loan/credit in months.\ncredit_amount: The amount of the credit.\ninstallment_commitment: Installment rate in percentage of disposable income.\nresidence_since: How long the customer has lived at his current residence in years.\nage: Age of the customer in years.\nexisting_credits: Number of existing credits at this bank.\nnum_dependents: Number of people the customer is liable to provide maintenance for.\n\nWhen th