# Importing libararies

In [3]:
import sys
import subprocess
import os
import numpy as np
import pandas as pd
import re
import seaborn as sns
import torch
import matplotlib.pyplot as plt
from collections import Counter
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline, AlbertTokenizer
import shap
from lime.lime_text import LimeTextExplainer
from scipy.stats import pearsonr
from scipy.spatial.distance import jensenshannon
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

# Setting paths 

In [4]:
# Setting paths for model, data, and results
MODEL_DIR = "./indicbert_hate_model_v2_final"
DATA_PATH = "./hindi_hatespeech_cleaned.csv"   
RESULTS_DIR = "./indicbert_explainability"

os.makedirs(RESULTS_DIR, exist_ok=True)

model_path = MODEL_DIR
print(f"\nModel directory: {MODEL_DIR}")
print(f"Results directory: {RESULTS_DIR}")
print(f"Data path: {DATA_PATH}")


Model directory: ./indicbert_hate_model_v2_final
Results directory: ./indicbert_explainability
Data path: ./hindi_hatespeech_cleaned.csv


# Loading model and tokenizer

In [44]:
# Setting device
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Loading model from: {MODEL_DIR}")

# Loading tokenizer
tokenizer_hf = AlbertTokenizer.from_pretrained("ai4bharat/indic-bert")
# Loading model
model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)
model.to(device)
model.eval()

print("Model and tokenizer loaded successfully.")


Loading model from: ./indicbert_hate_model_v2_final
Model and tokenizer loaded successfully.


In [45]:
# Defining a Hindi-friendly word-level tokenizer for SHAP & LIME
# This is necessary to ensure that SHAP and LIME assign imprtance to words, not alphabet
def expl_tokenizer(text: str):
    """
    Tokenizer for explainability:
    - Splits on whitespace -> word-level tokens for Hindi + code-mixed text.
    - Keeps punctuation as part of the word
    """
    if not isinstance(text, str):
        text = str(text)
    return text.split()

# Loading the cleaned dataset

In [6]:
# Loading the dataset using the specified data path
df = pd.read_csv(DATA_PATH)

print("Dataset loaded.")
print(df.head())
print(df.columns)

Dataset loaded.
                                                text  \
0  ‡§µ‡§ï‡•ç‡§§, ‡§á‡§®‡•ç‡§∏‡§æ‡§® ‡§î‡§∞ ‡§á‡§Ç‡§ó‡•ç‡§≤‡•à‡§Ç‡§° ‡§ï‡§æ ‡§Æ‡•å‡§∏‡§Æ ‡§Ü‡§™‡§ï‡•ã ‡§ï‡§≠‡•Ä ‡§≠‡•Ä ‡§ß...   
1  #‡§ï‡§æ‡§Ç‡§ó‡•ç‡§∞‡•á‡§∏ ‡§ï‡•á ‡§á‡§∏ #‡§ï‡§Æ‡•Ä‡§®‡•á ‡§ï‡•Ä #‡§ï‡§∞‡§§‡•Ç‡§§ ‡§ï‡•ã ‡§¶‡•á‡§ñ‡§ø‡§è ‡§¶‡•á‡§∂ ...   
2  ‡§™‡§æ‡§ï‡§ø‡§∏‡•ç‡§§‡§æ‡§® ‡§ï‡•ã ‡§´‡•á‡§ï‡§®‡§æ ‡§•‡§æ ‡§´‡•á‡§ï‡§æ ‡§ó‡§Ø‡§æ‡•§ ‡§ú‡•ã ‡§π‡§æ‡§∞ ‡§ï‡§∞ ‡§≠‡•Ä ‡§¶...   
3  ‡§ú‡•ã ‡§∂‡§¨‡•ç‡§¶ ‡§§‡•Ç‡§Æ ‡§Ü‡§ú ‡§ï‡§ø‡§∏‡•Ä ‡§î‡§∞ ‡§î‡§∞‡§§ ‡§ï‡•á ‡§≤‡§ø‡§è ‡§Ø‡•Ç‡§ú ‡§ï‡§∞ ‡§∞‡§π‡•á ‡§µ...   
4  ‡§®‡•á‡§§‡§æ ‡§ú‡•Ä ‡§π‡§Æ ‡§∏‡§Æ‡§æ‡§ú‡§µ‡§æ‡§¶‡•Ä ‡§∏‡§ø‡§™‡§æ‡§π‡•Ä ‡§π‡§Æ‡•á‡§∂‡§æ ‡§Ü‡§™‡§ï‡•á ‡§∏‡§æ‡§• ‡§π‡•à ‡§Ü...   

                                        cleaned_text  labels  
0  ‡§µ‡§ï‡•ç‡§§, ‡§á‡§®‡•ç‡§∏‡§æ‡§® ‡§î‡§∞ ‡§á‡§Ç‡§ó‡•ç‡§≤‡•à‡§Ç‡§° ‡§ï‡§æ ‡§Æ‡•å‡§∏‡§Æ ‡§Ü‡§™‡§ï‡•ã ‡§ï‡§≠‡•Ä ‡§≠‡•Ä ‡§ß...       0  
1  ‡§ï‡§æ‡§Ç‡§ó‡•ç‡§∞‡•á‡§∏ ‡§ï‡•á ‡§á‡§∏ ‡§ï‡§Æ‡•Ä‡§®‡•á ‡§ï‡•Ä ‡§ï‡§∞‡§§‡•Ç‡§§ ‡§ï‡•ã ‡§¶‡•á‡

In [7]:
# Using cleaned text for SHAP/LIME
TEXT_COL = "cleaned_text"  
LABEL_COL = "labels"

texts = df[TEXT_COL].tolist()
labels = df[LABEL_COL].tolist()

print(f"Total samples: {len(texts)}")

Total samples: 14127


# Selecting samples for SHAP/LIME

In [8]:
# Defining a function to select a subset of the cleaned dataset for SHAP/LIME anallysis
def get_explainability_subset(df, n_samples, random_seed=42):
    """
    Select a balanced subset for SHAP/LIME, similar to the HEARTS framework.

    Steps:
    - Extract equal hate + non-hate samples
    - Focus on moderate-length texts (20‚Äì200 chars)
    - Random sampling with reproducibility
    """

    df = df.copy()
    df["len"] = df["cleaned_text"].astype(str).apply(len)

    # Filtering to medium-length texts (avoids very short/noise)
    df_mid = df[(df["len"] > 20) & (df["len"] < 200)]

    hate = df_mid[df_mid["labels"] == 1]
    non_hate = df_mid[df_mid["labels"] == 0]

    # Calculating how many per class
    per_class = n_samples // 2

    hate_sample = hate.sample(n=min(per_class, len(hate)),
                              random_state=random_seed)
    non_hate_sample = non_hate.sample(n=min(per_class, len(non_hate)),
                                      random_state=random_seed)

    subset = pd.concat([hate_sample, non_hate_sample]).sample(
        frac=1, random_state=random_seed
    )
    
    subset = subset[["cleaned_text", "labels"]]
    
    print(f"Selected subset size: {len(subset)}")
    print("Hate:", sum(subset['labels']==1), 
          "Non-hate:", sum(subset['labels']==0))

    return subset.reset_index(drop=True)

In [9]:
# Adding predicted labels to the explainability subset 
def add_model_predictions(df, model, tokenizer):
    """
    Adds model predictions to the sampled dataframe.
    Creates: predicted_label column.
    """
    all_preds = []

    for text in df["cleaned_text"]:      
        enc = tokenizer(
            text,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=128
        )

        if torch.cuda.is_available():
            enc = {k: v.to("cuda") for k, v in enc.items()}
            model.to("cuda")
        else:
            model.to("cpu")

        with torch.no_grad():
            logits = model(**enc).logits
            pred = torch.argmax(logits, dim=-1).cpu().item()

        all_preds.append(pred)

    df["predicted_label"] = all_preds
    df["actual_label"] = df["labels"]  # rename for consistency
    df["dataset_name"] = "HASOC"       # or any name you prefer
    df["categorisation"] = df["labels"].map({0: "non-hate", 1: "hate"})

    return df

# SHAP Analysis

In [49]:
# Defining helper function 
def predict_proba(text_list):
    """
    Wrapper that takes a list of texts and returns an (N, 2) numpy array
    with probabilities for [non-hate, hate].
    """
    if isinstance(text_list, str):
        text_list = [text_list]

    enc = tokenizer_hf(
        text_list,
        padding=True,
        truncation=True,
        max_length=128,
        return_tensors="pt",
    )

    enc = {k: v.to(device) for k, v in enc.items()}
    model.to(device)
    model.eval()

    with torch.no_grad():
        outputs = model(**enc)
        probs = torch.softmax(outputs.logits, dim=-1).cpu().numpy()

    return probs

In [50]:
# Using the model and tokenizer loaded before
pipe = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer_hf,
    return_all_scores=True,
    device=0 if torch.cuda.is_available() else -1,
)

Device set to use cuda:0


In [66]:
# Defining a function for SHAP analysis
import re
import shap

def shap_analysis(sampled_data: pd.DataFrame, text_col: str = "cleaned_text"):
    """
    Computes token-level SHAP values for each sentence in sampled_data
    using the fine-tuned IndicBERT hate-speech model.
    """

    # 1) SHAP text masker ‚Äì use REGEX, not a Python function here
    # \S+ = sequences of non-whitespace chars; works fine for Hindi + code-mixed text
    masker = shap.maskers.Text(tokenizer=r"\S+")

    # 2) SHAP explainer on top of your existing HF pipeline
    explainer = shap.Explainer(pipe, masker)

    results = []
    class_names = ["LABEL_0", "LABEL_1"]  # 0=non-hate, 1=hate

    for idx, row in sampled_data.iterrows():
        text_input = [row[text_col]]

        # Run SHAP
        shap_values = explainer(text_input)  # shape: (1, tokens, classes)

        # Use stored predicted_label if present, else recompute
        if "predicted_label" in row:
            label_index = int(row["predicted_label"])
        else:
            probs = predict_proba([row[text_col]])
            label_index = int(np.argmax(probs, axis=-1)[0])

        true_label = int(row["labels"]) if "labels" in row else None

        print(
            f"Row {idx} | True: {true_label} | Pred: {label_index} "
            f"| Text: {row[text_col][:60]}..."
        )

        # Take SHAP values for that class
        specific_shap_values = shap_values.values[0, :, label_index]

        # Use the *same regex* as the masker to extract tokens
        tokens = re.findall(r"\S+", row[text_col])

        for token, value in zip(tokens, specific_shap_values):
            results.append(
                {
                    "sentence_id": idx,
                    "token": token,
                    "value_shap": float(value),
                    "sentence": row[text_col],
                    "true_label": true_label,
                    "predicted_label": label_index,
                }
            )

    return pd.DataFrame(results)

# LIME Analysis

In [67]:
# Defining a LIME analysis function 
def lime_analysis_indicbert(
    sampled_df: pd.DataFrame,
    text_col="cleaned_text",
    true_col="true_label",
    pred_col="predicted_label",
    save_path=None
):
    results = []

    # Infer num classes
    example = sampled_df[text_col].iloc[0]
    num_classes = predict_proba([example]).shape[1]
    class_names = [f"LABEL_{i}" for i in range(num_classes)]

    explainer = LimeTextExplainer(
        class_names=class_names,
        split_expression=expl_tokenizer  # Using Hindi-friendly tokenizer
    )

    for idx, row in sampled_df.iterrows():
        sentence = row[text_col]
        tokens = expl_tokenizer(sentence)

        if len(tokens) == 0:
            print(f"‚ö†Ô∏è Row {idx} skipped (no tokens)")
            continue

        exp = explainer.explain_instance(
            sentence,
            predict_proba,
            num_features=len(tokens),
            num_samples=1000
        )

        target_label = int(row[pred_col])
        if target_label not in exp.local_exp:
            target_label = max(exp.local_exp.keys())

        token_value_dict = dict(exp.as_list(label=target_label))

        print(f"Row {idx} | True: {row[true_col]} | Pred: {row[pred_col]} | Text: {sentence[:60]}...")

        for tok in tokens:
            results.append({
                "sentence_id": idx,
                "token": tok,
                "value_lime": float(token_value_dict.get(tok, 0.0)),
                "sentence": sentence,
                "true_label": int(row[true_col]),
                "predicted_label": int(row[pred_col])
            })

    lime_df = pd.DataFrame(results)
    if save_path:
        lime_df.to_csv(save_path, index=False)
        print(f"‚úÖ Saved LIME results to: {save_path}")

    return lime_df

# Comparing SHAP/LIME

In [68]:
# Defining a function to merge SHAP/LIME
def merge_shap_lime(results_dir: str,
                    shap_filename: str = "shap_results_indicbert_v2.csv",
                    lime_filename: str = "lime_results_indicbert_v2.csv") -> pd.DataFrame:
    """
    Load SHAP and LIME token-level results and merge them on (sentence_id, token).

    Parameters
    ----------
    results_dir : str
        Directory where the SHAP / LIME CSVs are stored.
    shap_filename : str
        File name of SHAP CSV.
    lime_filename : str
        File name of LIME CSV.

    Returns
    -------
    merged_df : pd.DataFrame
        DataFrame with columns:
        ['sentence_id', 'token', 'value_shap', 'value_lime', ...meta columns from SHAP...]
    """
    shap_path = os.path.join(results_dir, shap_filename)
    lime_path = os.path.join(results_dir, lime_filename)

    shap_df = pd.read_csv(shap_path)
    lime_df = pd.read_csv(lime_path)

    print("SHAP shape:", shap_df.shape)
    print("LIME shape:", lime_df.shape)

    # Inner-join on sentence_id + token; keep meta columns from SHAP side
    merged_df = pd.merge(
        shap_df,
        lime_df[["sentence_id", "token", "value_lime"]],
        on=["sentence_id", "token"],
        how="inner"
    )

    print("\nMerged shape:", merged_df.shape)
    return merged_df

# Confidence Scores

In [69]:
# Defining a function to compute confidence scores for SHAP and LIME analysis
def compute_shap_lime_similarity(
    merged_df: pd.DataFrame,
    results_dir: str,
    out_name: str = "similarity_confidence_indicbert_v2.csv"
) -> pd.DataFrame:
    """
    Given merged SHAP+LIME token importances, compute agreement metrics and
    confidence scores per sentence, then save to CSV.

    Parameters
    ----------
    merged_df : pd.DataFrame
        Output of merge_shap_lime().
    results_dir : str
        Directory where the similarity CSV will be saved.
    out_name : str
        File name for the similarity + confidence CSV.

    Returns
    -------
    similarity_df : pd.DataFrame
        One row per sentence_id with cosine, Pearson, JS and confidence_score/level.
    """

    def _to_prob(x: np.ndarray) -> np.ndarray:
        """Turning signed importance into a probability distribution (for JS)."""
        x = np.abs(x.astype(float))
        if x.sum() == 0:
            return np.ones_like(x) / len(x)
        return x / x.sum()

    def _js_divergence(p: np.ndarray, q: np.ndarray) -> float:
        """Jensen‚ÄìShannon divergence between two discrete distributions."""
        m = 0.5 * (p + q)

        def _kl(a, b):
            mask = (a > 0) & (b > 0)
            return np.sum(a[mask] * np.log(a[mask] / b[mask]))

        return 0.5 * _kl(p, m) + 0.5 * _kl(q, m)

    similarity_rows = []

    for sid, group in merged_df.groupby("sentence_id"):
        shap_vals = group["value_shap"].values
        lime_vals = group["value_lime"].values

        # Skipping degenerate cases
        if len(shap_vals) < 2 or np.all(shap_vals == 0) or np.all(lime_vals == 0):
            continue

        # Defining Cosine similarity
        cos = float(
            cosine_similarity(
                shap_vals.reshape(1, -1),
                lime_vals.reshape(1, -1)
            )[0, 0]
        )

        # Defining Pearson correlation
        if np.std(shap_vals) == 0 or np.std(lime_vals) == 0:
            pearson = np.nan
        else:
            pearson = float(np.corrcoef(shap_vals, lime_vals)[0, 1])

        # Defining JS divergence on |values|
        p = _to_prob(shap_vals)
        q = _to_prob(lime_vals)
        js = float(_js_divergence(p, q))

        row0 = group.iloc[0]
        similarity_rows.append({
            "sentence_id": sid,
            "true_label": row0.get("true_label", np.nan),
            "predicted_label": row0.get("predicted_label", np.nan),
            "cosine_similarity": cos,
            "pearson_correlation": pearson,
            "js_divergence": js,
        })

    similarity_df = pd.DataFrame(similarity_rows)
    print("Similarity rows:", len(similarity_df))

    if len(similarity_df) > 0:
        # Filling NaNs in Pearson before combining
        similarity_df["pearson_correlation"] = similarity_df["pearson_correlation"].fillna(0.0)

        similarity_df["confidence_score"] = (
            similarity_df["cosine_similarity"]
            + similarity_df["pearson_correlation"]
            + (1 - similarity_df["js_divergence"])
        ) / 3.0

        similarity_df["confidence_level"] = pd.cut(
            similarity_df["confidence_score"],
            bins=[-np.inf, 0.3, 0.6, np.inf],
            labels=["Low", "Medium", "High"]
        )

        print("\nConfidence distribution:")
        print(similarity_df["confidence_level"].value_counts().sort_index())

        avg_cosine = similarity_df["cosine_similarity"].mean()
        avg_pearson = similarity_df["pearson_correlation"].mean()
        avg_js = similarity_df["js_divergence"].mean()

        print("\nAverage agreement metrics:")
        print(f"  Cosine similarity:   {avg_cosine:.3f}")
        print(f"  Pearson correlation: {avg_pearson:.3f}")
        print(f"  JS divergence:       {avg_js:.3f}")

        out_csv = os.path.join(results_dir, out_name)
        similarity_df.to_csv(out_csv, index=False)
        print(f"\n‚úÖ Saved similarity + confidence to: {out_csv}")
    else:
        print("‚ö†Ô∏è No overlapping tokens between SHAP and LIME; similarity_df is empty.")

    return similarity_df

# Explainability Analysis

In [70]:
# Defining a function for explainability analysis on a given number of samples
def explainability_analysis(df, num_samples, shap_results_file_name, lime_results_file_name, explainability_results_file_name):
    # Creating a subset of the datset for the analysis
    explain_subset = get_explainability_subset(df, num_samples)
    explain_subset.head()
    
    # Onbtaining a subset of samples from the dataset
    explain_subset = add_model_predictions(explain_subset, model, tokenizer)
    print(explain_subset.head())
    
    # Running SHAP on the selected subset of data
    shap_results = shap_analysis(explain_subset, text_col="cleaned_text")  

    print("\nSHAP results (head):")
    print(shap_results.head())

    shap_csv_path = os.path.join(RESULTS_DIR,  shap_results_file_name)
    shap_results.to_csv(shap_csv_path, index=False)
    print(f"‚úÖ Saved SHAP token-level results to: {shap_csv_path}")
    
    # Aplplying the LIME analysis function to selected subset of the datset
    lime_results = lime_analysis_indicbert(
    sampled_df=explain_subset,
    text_col="cleaned_text",          
    true_col="labels",
    pred_col="predicted_label",
    save_path=os.path.join(RESULTS_DIR, lime_results_file_name),
    )

    print(lime_results.head())
    
    # Merging the SHAP/LIME results
    merged_file = merge_shap_lime(
    RESULTS_DIR,
    shap_filename= shap_results_file_name,
    lime_filename= lime_results_file_name,
    )
    
    # Calculating SHAP/LIME confidence scores
    similarity_file = compute_shap_lime_similarity(
    merged_df=merged_file,
    results_dir=RESULTS_DIR,
    out_name=  explainability_results_file_name
    )
    
    return 

# For 10 samples

In [71]:
# Calling the function  for explainability analysis on 10 samples
explainability_analysis(df, 10, "shap_results_10", "lime_results_10", "explainability_results_10")

Selected subset size: 10
Hate: 5 Non-hate: 5
                                        cleaned_text  labels  predicted_label  \
0  udan-4: ‡§Ø‡•á ‡§π‡•à‡§Ç 78 ‡§®‡§è ‡§∞‡•Ç‡§ü ‡§ú‡§π‡§æ‡§Ç ‡§π‡§µ‡§æ‡§à ‡§∏‡•á‡§µ‡§æ ‡§ï‡•ã ‡§Æ‡§ø‡§≤...       0                0   
1  '‡§Æ‡§ú‡§π‡§¨ ‡§®‡§π‡•Ä‡§Ç ‡§∏‡§ø‡§ñ‡§æ‡§§‡§æ ‡§Ü‡§™‡§∏ ‡§Æ‡•á‡§Ç ‡§¨‡•à‡§∞ ‡§∞‡§ñ‡§®‡§æ' ... ‡§Ø‡§π ‡§≤‡§æ‡§á...       1                1   
2  ‡§Ö‡§Æ‡•á‡§∞‡§ø‡§ï‡•Ä ‡§∂‡•ã‡§ß‡§ï‡§∞‡•ç‡§§‡§æ‡§ì‡§Ç ‡§ï‡§æ ‡§¶‡§æ‡§µ‡§æ: ‡§¨‡§ø‡§≤‡•ç‡§≤‡§ø‡§Ø‡•ã‡§Ç ‡§ï‡•ã ‡§¶‡•Ä ‡§ú‡§æ...       0                0   
3  ‡§ï‡§æ‡§Ç‡§ó‡•ç‡§∞‡•á‡§∏‡•Ä ‡§ö‡§Æ‡§ö‡•á - ‡§∞‡§æ‡§π‡•Å‡§≤ ‡§ó‡§æ‡§Ç‡§ß‡•Ä ‡§π‡§∞ ‡§Æ‡•Å‡§¶‡•ç‡§¶‡•á ‡§™‡§∞ ‡§Æ‡•ã‡§¶‡•Ä...       1                1   
4  ‡§™‡•ç‡§∞‡§∂‡§æ‡§Ç‡§§ ‡§≠‡•Ç‡§∑‡§£ ‡§Ö‡§µ‡§Æ‡§æ‡§®‡§®‡§æ ‡§Æ‡§æ‡§Æ‡§≤‡•á ‡§Æ‡•á‡§Ç ‡§∏‡•Å‡§™‡•ç‡§∞‡•Ä‡§Æ ‡§ï‡•ã‡§∞‡•ç‡§ü ‡§Ü...       0                0   

   actual_label dataset_name categorisation  
0             0        HASOC       non-hate  
1             1        HASOC  

# For 20 samples

In [74]:
# Calling the function for explainability analysis on 20 samples
explainability_analysis(df, 20, "shap_results_20", "lime_results_20", "explainability_results_20")

Selected subset size: 20
Hate: 10 Non-hate: 10
                                        cleaned_text  labels  predicted_label  \
0  ‡§ï‡§æ‡§Ç‡§ó‡•ç‡§∞‡•á‡§∏‡•Ä ‡§ö‡§Æ‡§ö‡•á - ‡§∞‡§æ‡§π‡•Å‡§≤ ‡§ó‡§æ‡§Ç‡§ß‡•Ä ‡§π‡§∞ ‡§Æ‡•Å‡§¶‡•ç‡§¶‡•á ‡§™‡§∞ ‡§Æ‡•ã‡§¶‡•Ä...       1                1   
1      ‡§§‡•Ç ‡§ï‡§π‡§æ ‡§Ö‡§™‡§®‡§æ ‡§ù‡•ã‡§™‡§°‡§º‡§æ ‡§Æ‡§∞‡§µ‡§æ ‡§∞‡§π‡•Ä ‡§π‡•à ‡§Æ‡§æ‡§¶‡§∞‡§ö‡•ã‡§¶ ‡§ï‡•Ç‡§§‡•ç‡§§‡•Ä       0                0   
2  ‡§Ü‡§™ ‡§è‡§ï ‡§∏‡§æ‡§≤ ‡§§‡§ï ‡§¨‡§æ‡§π‡§∞ ‡§®‡§π‡•Ä‡§Ç ‡§ú‡§æ ‡§∏‡§ï‡§§‡•á, ‡§ê‡§∏‡•á ‡§π‡•Ä ‡§ï‡•Å‡§≤ 21 ...       0                0   
3  '‡§Æ‡§ú‡§π‡§¨ ‡§®‡§π‡•Ä‡§Ç ‡§∏‡§ø‡§ñ‡§æ‡§§‡§æ ‡§Ü‡§™‡§∏ ‡§Æ‡•á‡§Ç ‡§¨‡•à‡§∞ ‡§∞‡§ñ‡§®‡§æ' ... ‡§Ø‡§π ‡§≤‡§æ‡§á...       1                1   
4  ‡§Ø‡§π ‡§π‡§∞‡§æ‡§Æ‡§ø ‡§¶‡§ø‡§≤‡•ç‡§≤‡•Ä ‡§Æ‡•Å‡§∏‡§≤‡§Æ‡§æ‡§® ‡§ï‡•ã ‡§§‡§æ‡§≤‡§ø‡§¨‡§æ‡§®‡•Ä ‡§ï‡§π ‡§∞‡§π‡•Ä ‡§π‡•à ...       1                1   

   actual_label dataset_name categorisation  
0             1        HASOC           hate  
1             0        HAS

# For 50 samples

In [73]:
# Calling the function
explainability_analysis(df, 50, "shap_results_50", "lime_results_50", "explainability_results_50")


Selected subset size: 50
Hate: 25 Non-hate: 25
                                        cleaned_text  labels  predicted_label  \
0  ‡§ú‡§¨ ‡§§‡§ï ‡§ó‡•ã‡§∞‡§ñ‡§™‡•Å‡§∞ ‡§Æ‡•á‡§Ç ‡§¨‡•Ä‡§ú‡•á‡§™‡•Ä ‡§ï‡§æ ‡§∂‡§æ‡§∏‡§® ‡§π‡•à ‡§§‡§¨ ‡§§‡§ï ‡§ó‡•ã‡§∞‡§ñ...       1                1   
1  ‡§ï‡§Ç‡§ó‡§®‡§æ ‡§ï‡•ã ‡§Ö‡§ó‡§∞ ‡§Æ‡•Å‡§ñ‡•ç‡§Ø‡§Æ‡§Ç‡§§‡•ç‡§∞‡•Ä ‡§ï‡•ã '‡§§‡•Ç' ‡§¨‡•ã‡§≤‡§®‡•á ‡§™‡§∞ y+ ‡§∏...       0                0   
2  ‡§Ü‡§™ ‡§è‡§ï ‡§∏‡§æ‡§≤ ‡§§‡§ï ‡§¨‡§æ‡§π‡§∞ ‡§®‡§π‡•Ä‡§Ç ‡§ú‡§æ ‡§∏‡§ï‡§§‡•á, ‡§ê‡§∏‡•á ‡§π‡•Ä ‡§ï‡•Å‡§≤ 21 ...       0                0   
3  ‡§î‡§∞ ‡§ú‡§ø‡§∏‡§ï‡§æ ‡§Ö‡§∞‡•ç‡§• ‡§∏‡§Æ‡§ù ‡§Æ‡•á‡§Ç ‡§Ü‡§Ø‡§æ, ‡§â‡§∏‡§Æ‡•á ‡§™‡§§‡§æ ‡§ö‡§≤‡§æ ‡§π‡•ã‡§ó‡§æ ‡§ï...       0                0   
4  ‡§π‡§ø‡§Ç‡§¶‡•Å‡§∏‡•ç‡§§‡§æ‡§® ‡§ï‡•á ‡§Ö‡§Ç‡§¶‡§∞ ‡§π‡§ø‡§Ç‡§¶‡•Å‡§ì‡§Ç ‡§ï‡§æ ‡§π‡•Ä ‡§ß‡§∞‡•ç‡§Æ‡§™‡§∞‡§ø‡§µ‡§∞‡•ç‡§§‡§® ...       1                1   

   actual_label dataset_name categorisation  
0             1        HASOC           hate  
1             0        HAS

Row 17 | True: 0 | Pred: 0 | Text: ‡§Ö‡§ñ‡§¨‡§æ‡§∞ ‡§ï‡•Ä ‡§∏‡•Å‡§∞‡•ç‡§ñ‡•Ä - ‡§ï‡•ã‡§∞‡•ç‡§ü ‡§ï‡•Ä ‡§ï‡§∏‡•å‡§ü‡•Ä ‡§™‡§∞ ‡§ñ‡§∞‡§æ ‡§∏‡§æ‡§¨‡§ø‡§§ ‡§π‡•Å‡§Ü pm ‡§ï‡•á‡§Ö‡§∞ ‡§´‡§£...
Row 18 | True: 0 | Pred: 0 | Text: ‡§®‡•Ä‡§§‡•Ä‡§∂ ‡§ï‡•Å‡§Æ‡§æ‡§∞ ‡§®‡•á ‡§ú‡•á‡§°‡•Ä‡§Ø‡•Ç ‡§ï‡•á ‡§µ‡§∞‡•ç‡§ö‡•Å‡§Ö‡§≤ ‡§∏‡§Æ‡•ç‡§Æ‡•á‡§≤‡§® ‡§Æ‡•á‡§Ç ‡§∏‡•Å‡§∂‡§æ‡§Ç‡§§ ‡§∏‡§ø‡§Ç‡§π ‡§∞‡§æ‡§ú...
Row 19 | True: 1 | Pred: 0 | Text: ‡§∏‡§Ç‡§ú‡§Ø ‡§∏‡§ø‡§Ç‡§π ‡§Æ‡§æ‡§¶‡§∞‡§ö‡•ã‡§¶ ‡§ï‡•Ä ‡§î‡§≤‡§æ‡§¶ ‡§π‡•à ‡§∏‡§æ‡§≤‡§æ ‡§ï‡•Å‡§§‡•ç‡§§‡§æ ‡§π‡•à...
Row 20 | True: 1 | Pred: 1 | Text: ‡§Ü‡§§‡§Ç‡§ï‡§µ‡§æ‡§¶‡•Ä ‡§π‡§Æ‡§≤‡•á ‡§ï‡•á ‡§Ü‡§∞‡•ã‡§™‡•Ä ‡§∏‡§æ‡§ß‡•ç‡§µ‡•Ä ‡§™‡•ç‡§∞‡§ú‡•ç‡§û‡§æ_‡§†‡§æ‡§ï‡•Å‡§∞ ‡§ï‡§æ ‡§∏‡§Ç‡§∏‡§¶ ‡§™‡§π‡•Å‡§Ç‡§ö‡§®‡§æ ...
Row 21 | True: 1 | Pred: 1 | Text: ‡§≠‡§æ‡§∞‡§§ ‡§Æ‡•á‡§Ç ‡§Æ‡•Å‡§∏‡•ç‡§≤‡§ø‡§Æ‡•ã‡§Ç ‡§¶‡•ç‡§µ‡§æ‡§∞‡§æ ‡§∏‡•ã‡§∂‡§≤ ‡§°‡§ø‡§∏‡•ç‡§ü‡•á‡§Ç‡§∏‡§ø‡§Ç‡§ó ‡§ï‡•ã ‡§´‡•â‡§≤‡•ã ‡§®‡§π‡•Ä‡§Ç ‡§ï‡§ø‡§Ø‡§æ...
Row 22 | True: 1 | Pred: 1 | Text: ‡§Ü‡§ú ‡§∏‡§∞‡§ï‡§æ‡§∞ ‡§ï‡

# For 100 samples

In [75]:
# Calling the function
explainability_analysis(df, 100, "shap_results_100", "lime_results_100", "explainability_results_100")

Selected subset size: 100
Hate: 50 Non-hate: 50
                                        cleaned_text  labels  predicted_label  \
0  ‡§¶‡•á‡§∂ ‡§Æ‡•á‡§Ç ‡§ï‡•ã‡§µ‡§ø‡§°-19 ‡§∏‡•á ‡§∏‡•ç‚Äç‡§µ‡§∏‡•ç‚Äç‡§• ‡§π‡•ã‡§®‡•á ‡§µ‡§æ‡§≤‡•ã‡§Ç ‡§ï‡•Ä ‡§¶‡§∞ ...       0                0   
1  udan-4: ‡§Ø‡•á ‡§π‡•à‡§Ç 78 ‡§®‡§è ‡§∞‡•Ç‡§ü ‡§ú‡§π‡§æ‡§Ç ‡§π‡§µ‡§æ‡§à ‡§∏‡•á‡§µ‡§æ ‡§ï‡•ã ‡§Æ‡§ø‡§≤...       0                0   
2  ‡§î‡§∞ ‡§ú‡§ø‡§∏‡§ï‡§æ ‡§Ö‡§∞‡•ç‡§• ‡§∏‡§Æ‡§ù ‡§Æ‡•á‡§Ç ‡§Ü‡§Ø‡§æ, ‡§â‡§∏‡§Æ‡•á ‡§™‡§§‡§æ ‡§ö‡§≤‡§æ ‡§π‡•ã‡§ó‡§æ ‡§ï...       0                0   
3  ‡§ü‡•Ä‡§µ‡•Ä ‡§™‡§∞ ‡§ß‡§æ‡§∞‡•ç‡§Æ‡§ø‡§ï ‡§µ‡§ø‡§µ‡§æ‡§¶‡•ã‡§Ç ‡§™‡§∞ ‡§¨‡§π‡§∏ ‡§¶‡§ø‡§ñ‡§æ‡§®‡•á ‡§µ‡§æ‡§≤‡•á ‡§™‡§§‡•ç...       1                1   
4  aap to pura family ko le.dube bhaiya ji ....bu...       1                1   

   actual_label dataset_name categorisation  
0             0        HASOC       non-hate  
1             0        HASOC       non-hate  
2             0        HASOC       non-hate  
3            

Row 76 | True: 0 | Pred: 0 | Text: ‡§ï‡§Æ‡§≤‡§®‡§æ‡§• ‡§∏‡§∞‡§ï‡§æ‡§∞ ‡§®‡•á ‡§∏‡§æ‡§≤ 2018 ‡§Æ‡•á‡§Ç ‡§ï‡§≤‡•á‡§ï‡•ç‡§ü‡§∞ ‡§™‡§¶‡§®‡§æ‡§Æ ‡§ï‡•ã ‡§¨‡§¶‡§≤‡§®‡•á ‡§ï‡•á ‡§≤‡§ø‡§è ‡§™...
Row 77 | True: 1 | Pred: 1 | Text: ‡§¨‡•Ä‡§ú‡•á‡§™‡•Ä ‡§ï‡•á ‡§ñ‡§ø‡§≤‡§æ‡§´ ‡§≤‡§ø‡§ñ‡§®‡§æ (‡§≠‡•å‡§Ç‡§ï‡§®‡§æ) ‡§∂‡•Å‡§∞‡•Ç ‡§ï‡§∞‡•ã‡•§ ‡§ï‡•Å‡§õ ‡§¶‡§ø‡§® ‡§¨‡§æ‡§¶ ‡§µ‡•á ‡§Ü‡§™‡§∏‡•á...
Row 78 | True: 0 | Pred: 0 | Text: ‡§≠‡§æ‡§∞‡§§ ‡§®‡•á ‡§™‡§ø‡§õ‡§≤‡•á 24 ‡§ò‡§Ç‡§ü‡•ã‡§Ç ‡§Æ‡•á‡§Ç 65,081 ‡§∞‡§ø‡§ï‡§µ‡§∞‡•Ä (‡§†‡•Ä‡§ï ‡§π‡•ã‡§®‡•á ‡§ï‡•Ä ‡§¶‡§∞) ‡§¶‡§∞...
Row 79 | True: 0 | Pred: 1 | Text: ‡§µ‡•ã ‡§ï‡•Å‡§õ ‡§≤‡•ã‡§ó ‡§ú‡•ã ‡§Æ‡•Å‡§≤‡•ç‡§≤‡•á ‡§π‡•à ‡§µ‡•ã ‡§∞‡§ø‡§∂‡•ç‡§§‡•á ‡§Æ‡•á‡§Ç ‡§§‡•á‡§∞‡•á ‡§¨‡§æ‡§™ ‡§≤‡§ó‡§§‡•á ‡§π‡•à ‡§∏‡§Æ‡§ù‡§æ ...
Row 80 | True: 0 | Pred: 0 | Text: ‡§≠‡•á‡§®‡§ö‡•ã‡§¶ ‡§ú‡§π‡§æ‡§Å ‡§¶‡§ø‡§ñ‡§æ‡§Ø‡§æ ‡§ú‡§æ ‡§∞‡§π‡§æ ‡§µ‡§π‡§æ‡§Ç ‡§ï‡•ç‡§Ø‡•ã‡§Ç ‡§®‡§π‡•Ä‡§Ç ‡§¨‡•ã‡§≤ ‡§∞‡§π‡§æ ‡§ó‡§æ‡§Ç‡§° ‡§´‡§ü‡§§‡•Ä ...
Row 81 | True: 0 | Pred: 1 | Text: ‡§Æ‡§Ç‡§¶‡§ø‡§∞ ‡§Æ‡•á‡§

Row 51 | True: 1 | Pred: 1 | Text: ‡§¶‡•á‡§ñ ‡§≤‡•ã ‡§π‡§ø‡§®‡•ç‡§¶‡•Å‡§∏‡•ç‡§§‡§æ ‡§ï‡•á ‡§≤‡•ã‡§ó‡•ã ‡§Ü‡§ú ‡§§‡§ø‡§∞‡§Ç‡§ó‡§æ ‡§ú‡§≤‡§æ ‡§∞‡§π‡•á ‡§π‡•à ‡§∏‡§æ‡§≤‡•á ‡§ï‡§≤ ‡§≤‡•ã‡§ó‡•ã ...
Row 52 | True: 1 | Pred: 1 | Text: ‡§π‡§ø‡§Ç‡§¶‡•Å‡§∏‡•ç‡§§‡§æ‡§® ‡§ï‡•á ‡§Ö‡§Ç‡§¶‡§∞ ‡§π‡§ø‡§Ç‡§¶‡•Å‡§ì‡§Ç ‡§ï‡§æ ‡§π‡•Ä ‡§ß‡§∞‡•ç‡§Æ‡§™‡§∞‡§ø‡§µ‡§∞‡•ç‡§§‡§® ‡§π‡§ø‡§Ç‡§¶‡•Ç ‡§¨‡•á‡§ü‡§ø‡§Ø‡•ã‡§Ç ...
Row 53 | True: 1 | Pred: 1 | Text: ‡§Ö‡§≠‡§ø‡§®‡•á‡§§‡•ç‡§∞‡•Ä ‡§ú‡§æ‡§Ø‡§∞‡§æ ‡§®‡•á "‡§á‡§∏‡•ç‡§≤‡§æ‡§Æ ‡§Æ‡•á‡§Ç ‡§Ö‡§≠‡§ø‡§®‡§Ø ‡§π‡§∞‡§æ‡§Æ" ‡§¨‡§§‡§æ ‡§´‡§ø‡§≤‡•ç‡§Æ‡•á‡§Ç ‡§õ‡•ã‡§°‡§º‡•Ä...
Row 54 | True: 1 | Pred: 1 | Text: ‡§Ø‡§π ‡§π‡§∞‡§æ‡§Æ‡§ø ‡§¶‡§ø‡§≤‡•ç‡§≤‡•Ä ‡§Æ‡•Å‡§∏‡§≤‡§Æ‡§æ‡§® ‡§ï‡•ã ‡§§‡§æ‡§≤‡§ø‡§¨‡§æ‡§®‡•Ä ‡§ï‡§π ‡§∞‡§π‡•Ä ‡§π‡•à ‡§á‡§∏‡§ï‡•ã ‡§∏‡§π‡•Ä ‡§ú‡§µ‡§æ‡§¨ ...
Row 55 | True: 0 | Pred: 0 | Text: biharelection: ‡§¨‡§ø‡§π‡§æ‡§∞ ‡§ö‡•Å‡§®‡§æ‡§µ ‡§ï‡•á ‡§≤‡§ø‡§è rjd ‡§ï‡§æ ‡§π‡•á‡§°‡§ï‡•ç‡§µ‡§æ‡§∞‡•ç‡§ü‡§∞ ‡§¨‡§®‡§æ rim...
Row 56 | True: 1 | Pred: 1 | Text: ‡§π‡§ø‡§Ç‡§¶‡•Å‡§ì