<a href="https://colab.research.google.com/github/gianluigilopardo/xai_text_practice/blob/main/xai_finbert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install required packages if not already installed
# Uncomment the following lines to install the packages if needed
# !pip install transformers torch lime shap anchor-exp

In [2]:
# Import necessary libraries
import torch
from transformers import pipeline
import numpy as np
import spacy

# For LIME
from lime.lime_text import LimeTextExplainer

# For SHAP
import shap

# For Anchors
from anchor import anchor_text

In [3]:
# Load FinBERT pipeline
# https://huggingface.co/ProsusAI/finbert
finbert_pipeline = pipeline(
    'text-classification',
    model='ProsusAI/finbert',
    tokenizer='ProsusAI/finbert',
    return_all_scores=True,
    device=0  # Use -1 for CPU, change to device number if using GPU
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [4]:
# Define the class labels
label_to_index = {'positive': 0, 'negative': 1, 'neutral': 2}
index_to_label = {0: 'positive', 1: 'negative', 2: 'neutral'}
class_names = ['positive', 'negative', 'neutral']

# Function to get model predictions (probabilities)
def predict_proba(texts):
    results = finbert_pipeline(texts)
    probs = []
    for result in results:
        # Initialize probability array
        prob = [0] * len(class_names)
        for item in result:
            label = item['label'].lower()
            idx = label_to_index[label]
            prob[idx] = item['score']
        probs.append(prob)
    return np.array(probs)

In [5]:
# Sample text for prediction
text = "The company's stock price increased by 5% after the positive earnings report."
# https://huggingface.co/ProsusAI/finbert?text=The+company%27s+stock+price+increased+by+5%25+after+the+positive+earnings+report.

# Get the model's prediction
probs = predict_proba([text])
predicted_index = np.argmax(probs)
predicted_label = index_to_label[predicted_index]
print(f"Predicted label: {predicted_label}")
print(f"Predicted probs: {probs}")

Predicted label: positive
Predicted probs: [[0.95633453 0.01633848 0.027327  ]]


In [6]:
# ======================================
# LIME Explanation
# ======================================

# Initialize the LIME text explainer
explainer = LimeTextExplainer(class_names=class_names)

# Explain the instance
exp = explainer.explain_instance(
    text_instance=text,
    classifier_fn=predict_proba,
    num_features=10,
    labels=[predicted_index]
)

# Display the explanation for the predicted class
print("\nLIME Explanation:")
print(f"Explanation for class '{predicted_label}':")
for feature, weight in exp.as_list(label=predicted_index):
    print(f"{feature}: {weight:.4f}")


LIME Explanation:
Explanation for class 'positive':
increased: 0.4239
positive: 0.1951
price: -0.0586
5: 0.0480
company: 0.0339
earnings: 0.0282
report: -0.0259
the: -0.0167
by: 0.0166
stock: -0.0151


In [7]:
# ======================================
# SHAP Explanation
# ======================================

# Create a masker that masks out tokens
masker = shap.maskers.Text()

# Initialize the SHAP explainer with the pipeline model
explainer_shap = shap.Explainer(finbert_pipeline, masker)

# Compute SHAP values
shap_values = explainer_shap([text])

# Display the SHAP explanation
print("\nSHAP Explanation:")
shap.plots.text(shap_values[0])

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset



SHAP Explanation:


In [8]:
# ======================================
# Anchors Explanation
# ======================================

# Load the spaCy model
nlp = spacy.load('en_core_web_sm')

# Define a prediction function that returns class labels
def predict_label(texts):
    # Ensure the function returns a list of predictions
    probs = predict_proba(texts)
    # return [index_to_label[np.argmax(p)] for p in probs]
    return [np.argmax(p) for p in probs]

# Initialize the AnchorText explainer
anchor_explainer = anchor_text.AnchorText(
    nlp=nlp,
    class_names=class_names,
    use_unk_distribution=False  # Set to True to account for unknown words
)

# Explain the instance
anchor_exp = anchor_explainer.explain_instance(
    text,
    classifier_fn=predict_label,
    threshold=0.95,
    use_proba=False  # Set to True if your predict function returns probabilities
)

# Display the anchor explanation
print("\nAnchors Explanation:")
print(f"Anchor: {anchor_exp.names()}")
print(f"Precision: {anchor_exp.precision()}")
print(f"Coverage: {anchor_exp.coverage()}")

  to_pred = torch.tensor([encoded], device=self.device)



Anchors Explanation:
Anchor: ['increased', '%']
Precision: 0.9618320610687023
Coverage: 0.0
