# Tell Me a Story! - SHAPStories Example

This notebook shows an example of how to generate SHAPstories using GPT models. In order to do this, you will require a OpenAI API key.

In [1]:
#!git clone https://github.com/ADMAntwerp/XAIstories.git

#!pip install -r XAIstories/requirements.txt

In [2]:
import shap
import pandas as pd
import numpy as np
import pickle

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from shapStories.stories import SHAPstory
from shapStories.llm_wrappers import GeminiAPI

  from .autonotebook import tqdm as notebook_tqdm


## FIFA Example

### Load Pre-Trained Model and Data

In [3]:
# Load Pre-Trained Model

fifa_model_path = "models/RF_model_FIFA.pkl"
fifa_model_file = open(fifa_model_path, "rb")

fifa_model = pickle.load(fifa_model_file)

# Load Data and Split

fifa_data = pd.read_csv("data/FIFA_2018_Statistics.csv")

feature_names = [i for i in fifa_data.columns if fifa_data[i].dtype in [np.int64, np.int64]]
fifa_x = fifa_data[feature_names]

fifa_y = (fifa_data["Man of the Match"] == "Yes")

fifa_x_train, fifa_x_test, fifa_y_train, fifa_y_test = train_test_split(fifa_x, fifa_y, test_size=0.2)

### Train and Compare Accuracy of Various Models

In [4]:
# Pre-Trained (Random Forrest) Accuracy
pre_trained_predictions = fifa_model.predict(fifa_x_test)
pre_trained_accuracy = accuracy_score(fifa_y_test, pre_trained_predictions)
print("Pre-Trained Accuracy:", pre_trained_accuracy)

# Train a SVM
fifa_svm_model = SVC(kernel="linear", probability=True)
fifa_svm_model.fit(fifa_x_train, fifa_y_train)

svm_predictions = fifa_svm_model.predict(fifa_x_test)

svm_accuracy = accuracy_score(fifa_y_test, svm_predictions)
print("SVM Accuracy:", svm_accuracy)

Pre-Trained Accuracy: 1.0
SVM Accuracy: 0.7692307692307693


Manually Created Descriptions

In [5]:
feature_desc = [
    'Number of goals scored by the team during the match.',
    'Percentage of ball possession by the team during the match.',
    'Number of attempts or shots taken by the team.',
    'Number of shots that were on target.',
    'Number of shots that went off target.',
    'Number of shots that were blocked by the opponent.',
    'Number of corner kicks taken by the team.',
    'Number of times the team was caught offside.',
    'Number of free kicks taken by the team.',
    "Number of saves made by the team's goalkeeper.",
    'Percentage of passes that successfully reached a teammate.',
    'Total number of passes made by the team.',
    "Total distance covered by the team's players during the match, in kilometers.",
    'Number of fouls committed by the team.',
    'Number of yellow cards received by the team.',
    'Number of yellow-red cards received by the team.',
    'Number of red cards received by the team.',
    'Number of goals scored by the team during the penalty shootout.'
]

fifa_desc_df = pd.DataFrame({
    "feature_name": list(fifa_x.columns),
    "feature_desc": feature_desc
})

In [None]:
fifa_dataset_description = """whether a football team will have the "Man of the Match" winner in a FIFA Worldcup match, based on the team's statistics """

fifa_input_description = "the match"

fifa_target_description = """one of the team's players will be the "Man of the Match" winner"""

api_key = ""

In [7]:
llm = GeminiAPI(api_key, "gemini-2.0-flash-lite")

Generate Stories for both pre-trained random forest and SVM

In [8]:
help(SHAPstory.generate_stories)

Help on function generate_stories in module shapStories.stories:

generate_stories(self, model, x, y, tree=True)
    Generates SHAPstories for each instance in the given data.
    
    Parameters:
    -----------
    model : object
        A trained model which supports SHAP explanations.
    x : DataFrame
        The input data.
    y : Series or array-like
        The true labels for the input data.
    temp : float, default=0.2
        The temperature setting for the GPT model.
    tree : bool, default=True
        Boolean indicating if the model is tree-based. If True, TreeExplainer will be 
        used for SHAP explanations, else KernelExplainer will be used.
    
    Returns:
    --------
    list of str
        A list containing the generated SHAPstories for each instance.



In [9]:
fifaStory = SHAPstory(fifa_desc_df, fifa_dataset_description,
                      fifa_input_description, fifa_target_description, llm)

rf_stories = fifaStory.generate_stories(fifa_model,fifa_x_test.head(),fifa_y_test.head())

print("RF Story")
print(rf_stories[0])

print()

svm_stories = fifaStory.generate_stories(fifa_svm_model,fifa_x_test.head(),fifa_y_test.head(),tree=False)

print("SVM story")

print(svm_stories[0])

ServerError: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The model is overloaded. Please try again later.', 'status': 'UNAVAILABLE'}}