In [None]:
from transformers import AutoModelForSequenceClassification, pipeline, AutoTokenizer
import shap

## Load Dataset

In [None]:
unhcr_data = catalog.load("test_unhcr")

In [None]:
unhcr_data.head()

## Create Transformer Pipeline

Download tokenizer

In [None]:
def _download_tokenizer(tokenizer_identifier: str = "unhcr/hatespeech-detection"):
    """
    Downloads the tokenizer that is associated
    with the model from https://huggingface.co/unhcr/hatespeech-detection

    Parameters
    ----------
    tokenizer_identifier : str
        The name of Huggingface project that the tokenizer is associated with

    Returns
    -------
    tokenizer
        Huggingface transformer tokenizer
    """
    return AutoTokenizer.from_pretrained(tokenizer_identifier)

Download model

In [None]:
def _download_classifier(classifier_identifier: str = "unhcr/hatespeech-detection"):
    """
    Downloads the model from https://huggingface.co/unhcr/hatespeech-detection

    Parameters
    ----------
    classifier_identifier : str
        The name of Huggingface project that the model is associated with

    Returns
    -------
    model
        Huggingface transformer model

    Dict
        Label mapping from ids to labels

    """
    id2label = {
        0: "Normal",
        1: "Offensive",
        2: "Hate speech",
    }

    label2id = {id2label[i]: i for i in id2label}

    # TODO: add num_labels to config
    model = AutoModelForSequenceClassification.from_pretrained(
        classifier_identifier, num_labels=3, id2label=id2label, label2id=label2id
    )

    return model, id2label

Create pipeline

In [None]:
local = True
tokenizer = _download_tokenizer()
classifier, id2label = _download_classifier()

inference_pipeline = pipeline(
    task="text-classification",
    tokenizer=tokenizer,
    model=classifier,
    device=-1 if local else 0,
    top_k=3,
    max_length=128,
    padding=True,
    truncation=True,
)

## SHAP Analysis

The code below takes long time to run. It is advised that this is run on a GPU or large CPU.

For the purpose of the example, shap values are only calculated for two text samples.

In [None]:
explainer = shap.Explainer(inference_pipeline, inference_pipeline.tokenizer)
shap_values = explainer(unhcr_data["text"][:2])

### Sentence-level: Text

In [None]:
# Plot the results for the first sentence
shap.plots.text(shap_values[0])

### Sentence-level: Waterfall

In [None]:
shap.plots.waterfall(shap_values[0])

### Top Most Important Features for the Hate Speech Class

In [None]:
shap.plots.bar(shap_values[:,:,2].mean(0), order=shap.Explanation.argsort.flip, max_display=15)