In [1]:
import torch
import numpy as np
import pandas as pd
import shap
import lime
import lime.lime_text
from transformers import AutoTokenizer, AutoModelForTokenClassification
from datasets import load_dataset
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


# Load the fine-tuned NER model and tokenizer


In [2]:
def load_model_and_tokenizer(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForTokenClassification.from_pretrained(model_name)
    return model, tokenizer

# Prepare NER model for prediction


In [3]:
class NERModel:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def predict(self, texts):
        inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
        outputs = self.model(**inputs)
        predictions = torch.argmax(outputs.logits, dim=2)
        return predictions.detach().numpy()

# Initialize LIME Explainer


In [4]:
def initialize_lime_explainer(class_names):
    return lime.lime_text.LimeTextExplainer(class_names=class_names)



# Explain a single instance using LIME


In [5]:
def explain_instance(text, model_predict, explainer):
    exp = explainer.explain_instance(text, model_predict, num_features=10)
    return exp

# Visualize LIME explanation


In [6]:
def visualize_lime_explanation(explanation, sample_text):
    word_idx = explanation.local_pred[0]
    exp_map = explanation.as_map()
    labels = list(exp_map.keys())
    weights = [exp_map[label] for label in labels]

    plt.figure()
    plt.barh(labels, weights, color='skyblue')
    plt.title(f'LIME Explanation for: "{sample_text}"')
    plt.xlabel('Weight')
    plt.show()

# Generate SHAP explanations


In [7]:
def shap_explanation(model, tokenizer, texts):
    explainer = shap.Explainer(model, tokenizer)
    shap_values = explainer(texts)
    return shap_values

# Visualize SHAP values


In [8]:
def visualize_shap_values(shap_values):
    shap.initjs()
    shap.plots.text(shap_values[0])

In [9]:
# Identify difficult cases based on custom logic
def analyze_difficult_cases(texts, keyword):
    return [text for text in texts if keyword in text]

# Main Execution Flow


In [None]:
if __name__ == "__main__":
    model_name = "your_model_name"  # Replace with your trained model name
    dataset_name = "your_dataset_name"  # Replace with your dataset name
    class_names = ['O', 'B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-Price', 'I-Price']

    # Load model and dataset
    model, tokenizer = load_model_and_tokenizer(model_name)
    dataset = load_dataset(dataset_name, split='validation')
    texts = dataset['tokens']  # Assuming your dataset has a 'tokens' field

    # Initialize NER model and LIME explainer
    ner_model = NERModel(model, tokenizer)
    explainer = initialize_lime_explainer(class_names)

    # Explain a sample instance
    sample_text = texts[0]  # Change index for different samples
    explanation = explain_instance(sample_text, ner_model.predict, explainer)

    # Visualize LIME explanation
    visualize_lime_explanation(explanation, sample_text)

    # SHAP analysis for a batch of texts
    shap_values = shap_explanation(model, tokenizer, texts[:10])  # Analyze first 10 texts

    # Visualize SHAP values for the first text
    visualize_shap_values(shap_values)

    # Analyze difficult cases
    difficult_texts = analyze_difficult_cases(texts, "ambiguous")  # Customize keyword as needed

    # Generate and print a report on model decision-making
    report = {
        "num_difficult_cases": len(difficult_texts),
        "examples": difficult_texts
    }

    print("Model Decision-Making Report:")
    print(report)