<a href="https://colab.research.google.com/github/anonymouaccount/From-Theory-to-Evaluation-An-Experimental-Survey-on-Explainable-Artificial-Intelligence/blob/main/Text_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Text **Data**

# **Deeplift**

In [None]:
!pip install deeplift

In [None]:
!pip install captum

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from captum.attr import DeepLift
import torch
import torch.nn as nn
import torch.optim as optim
import zipfile
import re
from IPython.core.display import display, HTML

# Mount Google Drive (if using Google Colab)
from google.colab import drive
drive.mount('/content/drive')

# Define paths
zip_path = '/content/drive/MyDrive/Reviews.zip'  # Path to the zip file
extract_to = '/content/Reviews'  # Directory to extract the zip file
data_path = os.path.join(extract_to, 'IMDB Dataset.csv')  # CSV file path

# Extract the zip file
def unzip_data(zip_path, extract_to):
    if not os.path.exists(extract_to):
        os.makedirs(extract_to)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

# Unzip the dataset
unzip_data(zip_path, extract_to)

# Load dataset
df = pd.read_csv(data_path)

# Convert sentiment labels to binary
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

# Split data
X_train, X_test, y_train, y_test = train_test_split(df['review'], df['sentiment'], test_size=0.2, random_state=42)

# Convert text to numerical features
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_tfidf, y_train)

# Predictions
y_pred = clf.predict(X_test_tfidf)
print(f'Accuracy: {accuracy_score(y_test, y_pred):.4f}')
print(classification_report(y_test, y_pred))

# Define a simple neural network for DeepLift explanation
class SentimentNN(nn.Module):
    def __init__(self, input_dim):
        super(SentimentNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 50)
        self.fc2 = nn.Linear(50, 2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Convert TF-IDF to PyTorch tensors
X_test_torch = torch.tensor(X_test_tfidf.toarray(), dtype=torch.float32)
y_test_torch = torch.tensor(y_test.values, dtype=torch.long)

# Initialize and load the model
input_dim = X_train_tfidf.shape[1]
model = SentimentNN(input_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the neural network
epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_test_torch)
    loss = criterion(outputs, y_test_torch)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')

# DeepLift explanation
deep_lift = DeepLift(model)

def explain_instance(instance, target_class=1):
    """
    Explain a single review using DeepLift.
    - instance: The text review.
    - target_class: The predicted class (0 = negative, 1 = positive).
    """
    instance_tfidf = vectorizer.transform([instance]).toarray()
    instance_tensor = torch.tensor(instance_tfidf, dtype=torch.float32)
    attributions = deep_lift.attribute(instance_tensor, target=target_class)
    return attributions.detach().numpy().flatten()

# Explain a sample review
sample_review = X_test.iloc[0]
sample_prediction = clf.predict(vectorizer.transform([sample_review]))[0]  # Get prediction
explanation = explain_instance(sample_review, target_class=int(sample_prediction))  # Convert to int

# Get words from vectorizer
words = np.array(vectorizer.get_feature_names_out())

# Normalize importance scores for color intensity
normalized_importance = (explanation - np.min(explanation)) / (np.max(explanation) - np.min(explanation))

# Highlight words in the review
word_importance = dict(zip(words, normalized_importance))
review_words = sample_review.split()
highlighted_review = []
for word in review_words:
    clean_word = re.sub(r'[^a-zA-Z]', '', word).lower()
    if clean_word in word_importance:
        intensity = word_importance[clean_word]  # Normalized importance (0 to 1)
        if explanation[words.tolist().index(clean_word)] > 0:
            color = f'rgba(255, 0, 0, {intensity})'  # Red for positive words
        else:
            color = f'rgba(0, 0, 255, {intensity})'  # Blue for negative words
        highlighted_review.append(f'<span style="background-color: {color};"> {word} </span>')
    else:
        highlighted_review.append(word)

highlighted_text = ' '.join(highlighted_review)
display(HTML(f'<p style="font-size:16px;">{highlighted_text}</p>'))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Accuracy: 0.8493
              precision    recall  f1-score   support

           0       0.84      0.86      0.85      4961
           1       0.86      0.84      0.85      5039

    accuracy                           0.85     10000
   macro avg       0.85      0.85      0.85     10000
weighted avg       0.85      0.85      0.85     10000

Epoch 1/10, Loss: 0.6933
Epoch 2/10, Loss: 0.6915
Epoch 3/10, Loss: 0.6895
Epoch 4/10, Loss: 0.6869
Epoch 5/10, Loss: 0.6840
Epoch 6/10, Loss: 0.6809
Epoch 7/10, Loss: 0.6776
Epoch 8/10, Loss: 0.6742
Epoch 9/10, Loss: 0.6706
Epoch 10/10, Loss: 0.6668


               activations. The hooks and attributes will be removed
            after the attribution is finished


In [None]:
# Evaluation metrics
def fidelity_score(model, instance_tensor, attributions, target_class):
    masked_tensor = instance_tensor.clone()
    masked_tensor[:, np.abs(attributions) < np.percentile(np.abs(attributions), 50)] = 0
    original_pred = model(instance_tensor).argmax(dim=1).item()
    masked_pred = model(masked_tensor).argmax(dim=1).item()
    return int(original_pred == masked_pred)

def sparsity_score(attributions):
    return 1.0 - (np.count_nonzero(attributions) / len(attributions))

def stability_score(model, instance_tensor, attributions, target_class, noise_std=0.01):
    noise = torch.randn_like(instance_tensor) * noise_std
    perturbed_instance = instance_tensor + noise
    original_attr = attributions
    perturbed_attr = deep_lift.attribute(perturbed_instance, target=target_class).detach().numpy().flatten()
    return 1 - (np.linalg.norm(original_attr - perturbed_attr) / np.linalg.norm(original_attr))

def robustness_score(model, instance_tensor, attributions, target_class, noise_std=0.1):
    noise = torch.randn_like(instance_tensor) * noise_std
    perturbed_instance = instance_tensor + noise
    original_pred = model(instance_tensor).argmax(dim=1).item()
    perturbed_pred = model(perturbed_instance).argmax(dim=1).item()
    return int(original_pred == perturbed_pred)

# Explain and evaluate sample review
sample_review = X_test.iloc[0]
sample_prediction = clf.predict(vectorizer.transform([sample_review]))[0]
instance_tensor = torch.tensor(vectorizer.transform([sample_review]).toarray(), dtype=torch.float32)
explanation = explain_instance(sample_review, target_class=int(sample_prediction))

fidelity = fidelity_score(model, instance_tensor, explanation, int(sample_prediction))
sparsity = sparsity_score(explanation)
stability = stability_score(model, instance_tensor, explanation, int(sample_prediction))
robustness = robustness_score(model, instance_tensor, explanation, int(sample_prediction))

print(f'Fidelity: {fidelity}')
print(f'Sparsity: {sparsity:.4f}')
print(f'Stability: {stability:.4f}')
print(f'Robustness: {robustness:.4f}')


Fidelity: 1
Sparsity: 0.9826
Stability: 0.0940
Robustness: 1.0000


# **LIME**

In [None]:
!pip install lime

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from lime.lime_text import LimeTextExplainer
import torch
import torch.nn as nn
import torch.optim as optim
import zipfile
import re
from IPython.core.display import display, HTML

# Mount Google Drive (if using Google Colab)
from google.colab import drive
drive.mount('/content/drive')

# Define paths
zip_path = '/content/drive/MyDrive/Reviews.zip'  # Path to the zip file
extract_to = '/content/Reviews'  # Directory to extract the zip file
data_path = os.path.join(extract_to, 'IMDB Dataset.csv')  # CSV file path

# Extract the zip file
def unzip_data(zip_path, extract_to):
    if not os.path.exists(extract_to):
        os.makedirs(extract_to)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

# Unzip the dataset
unzip_data(zip_path, extract_to)

# Load dataset
df = pd.read_csv(data_path)

# Convert sentiment labels to binary
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

# Split data
X_train, X_test, y_train, y_test = train_test_split(df['review'], df['sentiment'], test_size=0.2, random_state=42)

# Convert text to numerical features
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_tfidf, y_train)

# Predictions
y_pred = clf.predict(X_test_tfidf)
print(f'Accuracy: {accuracy_score(y_test, y_pred):.4f}')
print(classification_report(y_test, y_pred))

# Define a function for LIME to process text correctly
def classifier_fn(texts):
    """ Wrapper function to make LIME work with the classifier. """
    return clf.predict_proba(vectorizer.transform(texts))

# Initialize LIME
explainer = LimeTextExplainer(class_names=['negative', 'positive'])

def explain_instance(instance):
    """ Explain a single review using LIME. """
    exp = explainer.explain_instance(instance, classifier_fn, num_features=10)
    return exp.as_list()

# Test LIME explanation on a sample review
sample_review = X_test.iloc[0]
sample_prediction = clf.predict(vectorizer.transform([sample_review]))[0]  # Get prediction
explanation = explain_instance(sample_review)

# Normalize importance scores for better color scaling
max_importance = max(abs(value) for _, value in explanation)

# Highlight words in the review
review_words = sample_review.split()
highlighted_review = []
for word in review_words:
    clean_word = re.sub(r'[^a-zA-Z]', '', word).lower()
    for key, value in explanation:
        if key == clean_word:
            intensity = abs(value) / max_importance  # Normalize intensity
            color = f'rgba(255, 0, 0, {intensity})' if value > 0 else f'rgba(0, 0, 255, {intensity})'
            highlighted_review.append(f'<span style="background-color: {color};"> {word} </span>')
            break
    else:
        highlighted_review.append(word)

highlighted_text = ' '.join(highlighted_review)
display(HTML(f'<p style="font-size:16px;">{highlighted_text}</p>'))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Accuracy: 0.8493
              precision    recall  f1-score   support

           0       0.84      0.86      0.85      4961
           1       0.86      0.84      0.85      5039

    accuracy                           0.85     10000
   macro avg       0.85      0.85      0.85     10000
weighted avg       0.85      0.85      0.85     10000



In [None]:
# XAI Evaluation Metrics
def fidelity_score(model, instance, explanation, target_class):
    important_features = [word for word, _ in explanation[:5]]  # Top 5 important words
    masked_instance = ' '.join([word if word.lower() not in important_features else '' for word in instance.split()])
    original_pred = model.predict(vectorizer.transform([instance]))[0]
    masked_pred = model.predict(vectorizer.transform([masked_instance]))[0]
    return int(original_pred == masked_pred)

def sparsity_score(explanation):
    return 1.0 - (len(explanation) / 5000)  # Normalize by feature space size

def stability_score(model, instance, explanation, target_class, noise_std=0.01):
    perturbed_instance = instance + ' ' + ' '.join(np.random.choice(instance.split(), size=5))
    original_exp = explain_instance(instance)
    perturbed_exp = explain_instance(perturbed_instance)
    return 1 - (np.linalg.norm(np.array([v for _, v in original_exp]) - np.array([v for _, v in perturbed_exp])) / np.linalg.norm(np.array([v for _, v in original_exp])))

def robustness_score(model, instance, explanation, target_class, noise_std=0.1):
    perturbed_instance = instance + ' ' + ' '.join(np.random.choice(instance.split(), size=5))
    original_pred = model.predict(vectorizer.transform([instance]))[0]
    perturbed_pred = model.predict(vectorizer.transform([perturbed_instance]))[0]
    return int(original_pred == perturbed_pred)

# Evaluate model on XAI metrics
fidelity = fidelity_score(clf, sample_review, explanation, sample_prediction)
sparsity = sparsity_score(explanation)
stability = stability_score(clf, sample_review, explanation, sample_prediction)
robustness = robustness_score(clf, sample_review, explanation, sample_prediction)

print(f'Fidelity: {fidelity}')
print(f'Sparsity: {sparsity:.4f}')
print(f'Stability: {stability:.4f}')
print(f'Robustness: {robustness:.4f}')


Fidelity: 1
Sparsity: 0.9980
Stability: 0.5757
Robustness: 1.0000
