# Sentiment Analysis with Explanations

This notebook demonstrates refactored sentiment analysis with LIME and SHAP explanations.
The code has been modularized for better maintainability and includes fidelity testing.

## 1. Setup and Dependencies

In [None]:
# Install required packages
!pip install -q lime datasets transformers accelerate bitsandbytes sentencepiece shap matplotlib seaborn

# Import necessary modules
import logging
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

## 2. Import Refactored Modules

In [None]:
# Import our custom modules
from config import ExplanationConfig, DatasetConfig, SentimentLabel
from sentiment_analyzer import SentimentAnalyzer
from data_handler import DataHandler
from explanation_generator import ExplanationGenerator
from explanation_tester import ExplanationTester

## 3. Initialize Components

In [None]:
# Initialize the sentiment analyzer
analyzer = SentimentAnalyzer()
print(f"Device: {analyzer.get_device_info()}")

# Initialize data handler
data_handler = DataHandler()
print(f"Dataset info: {data_handler.get_dataset_info()}")

# Initialize explanation generator
explainer = ExplanationGenerator(analyzer)

# Initialize explanation tester
tester = ExplanationTester(analyzer, explainer)

## 4. Basic Sentiment Analysis

In [None]:
# Get sample data
sample_texts = data_handler.get_test_texts(0, 3)
sample_labels = data_handler.get_test_labels(0, 3)

print("Sample predictions:")
for i, (text, true_label) in enumerate(zip(sample_texts, sample_labels)):
    prediction = analyzer.predict_single(text)
    probabilities = analyzer.predict_proba([text])[0]
    
    print(f"\nSample {i}:")
    print(f"Text: {text[:100]}...")
    print(f"True label: {true_label}")
    print(f"Prediction: {prediction}")
    print(f"Probabilities [positive, negative]: {probabilities.round(3)}")

## 5. LIME Explanations

In [None]:
# Generate LIME explanation for a specific sample
explanation_index = 22
sample_data = data_handler.get_test_sample(explanation_index)

print(f"Generating LIME explanation for document {explanation_index}")
print(f"Text: {sample_data['text'][:200]}...")
print(f"True label: {sample_data['label']}")

# Get comprehensive explanation summary
explanation_summary = explainer.get_explanation_summary(
    sample_data['text'], 
    explanation_index, 
    sample_data['label']
)

print(f"\nPrediction: {explanation_summary['prediction']}")
print(f"Probabilities: {explanation_summary['probabilities']}")
print(f"LIME explanation: {explanation_summary['lime_explanation']}")

In [None]:
# Display LIME explanation in notebook
lime_explanation = explainer.explain_instance_lime(sample_data['text'])
lime_explanation.show_in_notebook(text=True)

## 6. SHAP Explanations

In [None]:
# Get background texts for SHAP
background_texts = data_handler.get_test_texts(0, 20)

# Generate SHAP explanation
print(f"Generating SHAP explanation for document {explanation_index}")
shap_explanation = explainer.explain_instance_shap(
    sample_data['text'], 
    background_texts
)

if shap_explanation is not None:
    print("SHAP explanation generated successfully")
    # You can add SHAP visualization here if needed
    # shap.plots.text(shap_explanation)
else:
    print("SHAP explanation not available")

## 7. Submodular Pick Explanations

In [None]:
# Generate submodular explanations
sample_texts_for_sp = data_handler.get_test_texts(0, 10)

print(f"Generating submodular explanations for {len(sample_texts_for_sp)} texts")
sp_obj = explainer.generate_submodular_explanations(
    sample_texts_for_sp,
    sample_size=3,
    num_features=6,
    num_explanations=2
)

print(f"Generated {len(sp_obj.sp_explanations)} submodular explanations")

In [None]:
# Display submodular explanation plots
figures = [exp.as_pyplot_figure() for exp in sp_obj.sp_explanations]
plt.show()

In [None]:
# Display labeled explanation plots
labeled_figures = [
    exp.as_pyplot_figure(label=exp.available_labels()[0]) 
    for exp in sp_obj.sp_explanations
]
plt.show()

## 8. Explanation Fidelity Testing

In [None]:
# Test explanation fidelity
test_texts = data_handler.get_test_texts(0, 20)

print(f"Testing explanation fidelity on {len(test_texts)} samples...")
fidelity_results = tester.test_batch_fidelity(test_texts, sample_size=10)

# Generate and display report
report = tester.generate_fidelity_report(fidelity_results)
print(report)

In [None]:
# Display detailed fidelity results
print("Detailed fidelity results for first 3 tests:")
for i, detail in enumerate(fidelity_results.details[:3]):
    print(f"\nTest {i+1}:")
    print(f"  Original text: {detail['original_text'][:100]}...")
    print(f"  Original prediction: {detail['original_prediction']}")
    print(f"  Supporting fidelity: {detail['supporting_fidelity']}")
    print(f"  Contrary fidelity: {detail['contrary_fidelity']}")
    print(f"  Top features: {detail['top_features']}")
    print(f"  Supporting text: {detail['supporting_text'][:100]}...")
    print(f"  Contrary text: {detail['contrary_text'][:100]}...")

## 9. Visualization and Analysis

In [None]:
# Create fidelity visualization
import matplotlib.pyplot as plt
import numpy as np

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Fidelity scores
fidelity_types = ['Supporting', 'Contrary', 'Average']
fidelity_scores = [
    fidelity_results.supporting_fidelity,
    fidelity_results.contrary_fidelity,
    fidelity_results.average_fidelity
]

bars = ax1.bar(fidelity_types, fidelity_scores, color=['green', 'red', 'blue'])
ax1.set_title('Explanation Fidelity Scores')
ax1.set_ylabel('Fidelity Score')
ax1.set_ylim(0, 1)

# Add value labels on bars
for bar, score in zip(bars, fidelity_scores):
    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
             f'{score:.3f}', ha='center', va='bottom')

# Probability differences distribution
supporting_diffs = [detail['supporting_prob_diff'] for detail in fidelity_results.details]
contrary_diffs = [detail['contrary_prob_diff'] for detail in fidelity_results.details]

ax2.hist(supporting_diffs, alpha=0.7, label='Supporting', bins=10)
ax2.hist(contrary_diffs, alpha=0.7, label='Contrary', bins=10)
ax2.set_title('Probability Difference Distribution')
ax2.set_xlabel('Probability Difference')
ax2.set_ylabel('Frequency')
ax2.legend()

plt.tight_layout()
plt.show()

## 10. Summary and Conclusions

In [None]:
print("=== Analysis Summary ===")
print(f"Total samples analyzed: {len(sample_texts)}")
print(f"Explanations generated: {len(sp_obj.sp_explanations)}")
print(f"Fidelity tests conducted: {fidelity_results.num_tests}")
print(f"Overall explanation quality: {fidelity_results.average_fidelity:.3f}")

if fidelity_results.average_fidelity >= 0.8:
    print("✅ Explanation quality is HIGH")
elif fidelity_results.average_fidelity >= 0.6:
    print("⚠️ Explanation quality is MODERATE")
else:
    print("❌ Explanation quality is LOW")

print("\nKey improvements in this refactored version:")
print("- Modular architecture with separate concerns")
print("- Proper error handling and logging")
print("- Configuration management with enums")
print("- Comprehensive testing framework")
print("- Support for both LIME and SHAP explanations")
print("- Fidelity testing for explanation quality")