# Notebook 06: RAG + LLM Integration for Explainable Fraud Detection

## Overview

This notebook adds an **explainability layer** to our fraud detection system using:
1. **Retrieval-Augmented Generation (RAG)** for case-based reasoning
2. **Large Language Models (LLMs)** for natural language explanations
3. **Vector databases** for storing fraud case studies

### Architecture

```
New Transaction
       ↓
Fusion Model Prediction (fraud/legit)
       ↓
Extract Features & Embedding
       ↓
Vector Database Search (FAISS)
   → Find Similar Cases
       ↓
LLM Prompt Engineering
   → Context: Similar cases
   → Transaction details
   → Model prediction
       ↓
Natural Language Explanation
```

### Key Features

- **Case-based reasoning**: Find similar historical fraud cases
- **Multi-modal explanations**: Combine graph, sequence, and transaction features
- **LLM integration**: OpenAI GPT, HuggingFace models, or local LLMs
- **Evaluation metrics**: BLEU, ROUGE, BERTScore for explanation quality

In [1]:
# Environment detection and setup
import os
import sys
from pathlib import Path

RUNNING_ON_KAGGLE = 'KAGGLE_KERNEL_RUN_TYPE' in os.environ

print(f"🔍 Running on Kaggle: {RUNNING_ON_KAGGLE}")
print(f"🐍 Python version: {sys.version}")

# Install required packages
if RUNNING_ON_KAGGLE:
    print("📦 Installing RAG dependencies...")
    # !pip install -q langchain openai faiss-cpu sentence-transformers
    # !pip install -q chromadb tiktoken
else:
    print("📦 Local environment - ensure dependencies installed")

print("✅ Environment setup complete")

🔍 Running on Kaggle: False
🐍 Python version: 3.12.4 (tags/v3.12.4:8e8a4ba, Jun  6 2024, 19:30:16) [MSC v.1940 64 bit (AMD64)]
📦 Local environment - ensure dependencies installed
✅ Environment setup complete


In [2]:
# Core imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
import pickle
import warnings
from typing import Dict, List, Tuple, Optional
import time
from tqdm.auto import tqdm

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F

# RAG and LLM libraries
try:
    from langchain.embeddings import HuggingFaceEmbeddings
    from langchain.vectorstores import FAISS, Chroma
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    from langchain.docstore.document import Document
    from langchain.llms import OpenAI, HuggingFacePipeline
    from langchain.chains import RetrievalQA
    from langchain.prompts import PromptTemplate
    RAG_AVAILABLE = True
    print('✅ LangChain imports successful')
except ImportError:
    RAG_AVAILABLE = False
    print('⚠️ LangChain not available - install with: pip install langchain')

try:
    from sentence_transformers import SentenceTransformer
    SBERT_AVAILABLE = True
    print('✅ Sentence Transformers available')
except ImportError:
    SBERT_AVAILABLE = False
    print('⚠️ Sentence Transformers not available')

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')

print(f'\nPyTorch version: {torch.__version__}')
print(f'CUDA available: {torch.cuda.is_available()}')

TypeError: ForwardRef._evaluate() missing 1 required keyword-only argument: 'recursive_guard'

## Path Configuration

In [None]:
# Configure paths
if RUNNING_ON_KAGGLE:
    BASE_PATH = Path('/kaggle/input/flag-finance')
    WORKING_ROOT = Path('/kaggle/working')
    
    PROCESSED_PATH = BASE_PATH / 'processed' / 'processed'
    MODELS_PATH = BASE_PATH / 'fusion-models'
    
    OUTPUT_PATH = WORKING_ROOT / 'rag_output'
    VECTOR_DB_PATH = WORKING_ROOT / 'vector_db'
else:
    BASE_PATH = Path('..').resolve()
    WORKING_ROOT = BASE_PATH
    
    DATA_PATH = BASE_PATH / 'data'
    PROCESSED_PATH = DATA_PATH / 'processed'
    MODELS_PATH = DATA_PATH / 'models'
    
    OUTPUT_PATH = DATA_PATH / 'rag_output'
    VECTOR_DB_PATH = DATA_PATH / 'vector_db'

OUTPUT_PATH.mkdir(exist_ok=True, parents=True)
VECTOR_DB_PATH.mkdir(exist_ok=True, parents=True)

print(f'📁 Path Configuration:')
print(f'   Processed data: {PROCESSED_PATH}')
print(f'   Models: {MODELS_PATH}')
print(f'   Output: {OUTPUT_PATH}')
print(f'   Vector DB: {VECTOR_DB_PATH}')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'\n🔧 Device: {device}')

## Load Transaction Data and Model Predictions

In [None]:
print('='*70)
print('LOADING DATA AND PREDICTIONS')
print('='*70)

# Load transaction data
paysim_file = PROCESSED_PATH / 'paysim_sample_enhanced.csv'
if not paysim_file.exists():
    paysim_file = PROCESSED_PATH / 'paysim_data_enhanced.csv'

if paysim_file.exists():
    df = pd.read_csv(paysim_file)
    print(f'\n✅ Loaded transaction data: {df.shape}')
    print(f'   Columns: {list(df.columns)}')
else:
    raise FileNotFoundError(f'Transaction data not found at {paysim_file}')

# Load fusion model results
fusion_results_file = OUTPUT_PATH / 'fusion_results.json'
if not fusion_results_file.exists():
    fusion_results_file = MODELS_PATH / 'fusion_results.json'

if fusion_results_file.exists():
    with open(fusion_results_file, 'r') as f:
        fusion_results = json.load(f)
    print(f'\n✅ Loaded fusion results')
    print(f'   Best model: {fusion_results["best_fusion_model"]["name"]}')
else:
    print(f'\n⚠️ Fusion results not found - will use sample predictions')
    fusion_results = None

## Create Fraud Case Database

In [None]:
def create_fraud_case_descriptions(df: pd.DataFrame, max_cases: int = 1000) -> List[Dict]:
    """
    Create detailed textual descriptions of fraud cases.
    These will be stored in the vector database for retrieval.
    """
    print(f'\n📝 Creating fraud case descriptions...')
    
    # Get fraud cases
    fraud_cases = df[df['isFraud'] == 1].copy()
    
    if len(fraud_cases) > max_cases:
        fraud_cases = fraud_cases.sample(n=max_cases, random_state=42)
    
    print(f'   Processing {len(fraud_cases)} fraud cases...')
    
    case_documents = []
    
    for idx, row in tqdm(fraud_cases.iterrows(), total=len(fraud_cases), desc='Creating cases'):
        # Create detailed description
        description = f"""
FRAUD CASE #{idx}

Transaction Details:
- Type: {row.get('type', 'UNKNOWN')}
- Amount: ${row.get('amount', 0):,.2f}
- Time: Step {row.get('step', 0)} (Hour {row.get('hour', 0) if 'hour' in row.columns else 'N/A'})
- Weekend: {'Yes' if row.get('is_weekend', 0) == 1 else 'No'}

Account Balances:
- Origin Old Balance: ${row.get('oldbalanceOrg', 0):,.2f}
- Origin New Balance: ${row.get('newbalanceOrig', 0):,.2f}
- Destination Old Balance: ${row.get('oldbalanceDest', 0):,.2f}
- Destination New Balance: ${row.get('newbalanceDest', 0):,.2f}

Fraud Indicators:
- Balance Error (Origin): ${abs(row.get('balance_error_orig', 0)):,.2f}
- Balance Error (Dest): ${abs(row.get('balance_error_dest', 0)):,.2f}
- Flagged by System: {'Yes' if row.get('isFlaggedFraud', 0) == 1 else 'No'}

Pattern Analysis:
- High amount transfer with balance inconsistencies
- Suspicious account behavior detected
- Transaction type: {row.get('type', 'UNKNOWN')}
"""
        
        case_documents.append({
            'case_id': str(idx),
            'description': description.strip(),
            'transaction_type': row.get('type', 'UNKNOWN'),
            'amount': float(row.get('amount', 0)),
            'metadata': {
                'step': int(row.get('step', 0)),
                'type': str(row.get('type', 'UNKNOWN')),
                'amount': float(row.get('amount', 0))
            }
        })
    
    print(f'\n✅ Created {len(case_documents)} fraud case descriptions')
    return case_documents

# Create case database
fraud_cases = create_fraud_case_descriptions(df, max_cases=500)

# Save to JSON
cases_file = OUTPUT_PATH / 'fraud_cases_database.json'
with open(cases_file, 'w') as f:
    json.dump(fraud_cases, f, indent=2)

print(f'\n💾 Saved fraud cases to: {cases_file}')

## Build Vector Database for RAG

In [None]:
if not RAG_AVAILABLE or not SBERT_AVAILABLE:
    print('⚠️ RAG libraries not available. Skipping vector database creation.')
    print('Install with: pip install langchain sentence-transformers faiss-cpu')
else:
    print('='*70)
    print('BUILDING VECTOR DATABASE')
    print('='*70)
    
    # Initialize embedding model
    print('\n📦 Loading embedding model...')
    embedding_model = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-MiniLM-L6-v2',
        model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
    )
    print('✅ Embedding model loaded')
    
    # Create documents for vector store
    documents = [
        Document(
            page_content=case['description'],
            metadata=case['metadata']
        )
        for case in fraud_cases
    ]
    
    print(f'\n🔨 Building FAISS vector database with {len(documents)} documents...')
    
    # Build vector store
    vector_store = FAISS.from_documents(
        documents=documents,
        embedding=embedding_model
    )
    
    # Save vector store
    vector_store.save_local(str(VECTOR_DB_PATH / 'fraud_cases_faiss'))
    
    print(f'✅ Vector database created and saved to: {VECTOR_DB_PATH}')
    print(f'   Total vectors: {len(documents)}')

## Retrieval System

In [None]:
class FraudCaseRetriever:
    """Retrieve similar fraud cases for explanation."""
    
    def __init__(self, vector_store_path: Path, embedding_model):
        self.vector_store = FAISS.load_local(
            str(vector_store_path),
            embeddings=embedding_model
        )
        self.retriever = self.vector_store.as_retriever(
            search_kwargs={'k': 3}  # Retrieve top 3 similar cases
        )
    
    def retrieve_similar_cases(self, query: str, k: int = 3) -> List[Document]:
        """Retrieve k most similar fraud cases."""
        return self.vector_store.similarity_search(query, k=k)
    
    def create_query_from_transaction(self, transaction: Dict) -> str:
        """Create search query from transaction features."""
        query = f"""
Transaction type: {transaction.get('type', 'UNKNOWN')}
Amount: ${transaction.get('amount', 0):,.2f}
Balance inconsistencies detected
Suspicious account behavior
"""
        return query.strip()

if RAG_AVAILABLE and SBERT_AVAILABLE:
    # Initialize retriever
    retriever = FraudCaseRetriever(
        vector_store_path=VECTOR_DB_PATH / 'fraud_cases_faiss',
        embedding_model=embedding_model
    )
    print('✅ Fraud case retriever initialized')
else:
    retriever = None
    print('⚠️ Retriever not available (missing dependencies)')

## LLM Prompt Templates

In [None]:
FRAUD_EXPLANATION_TEMPLATE = """
You are an expert fraud analyst explaining why a transaction was flagged as fraudulent.

CURRENT TRANSACTION:
{transaction_details}

MODEL PREDICTION:
- Fraud Probability: {fraud_probability:.2%}
- Prediction: {prediction}
- Confidence: {confidence}

SIMILAR HISTORICAL FRAUD CASES:
{similar_cases}

TASK:
Provide a clear, professional explanation of why this transaction was flagged as fraud.
Include:
1. Key suspicious indicators
2. Comparison with similar fraud cases
3. Risk factors and patterns
4. Recommended action

Keep the explanation concise (3-4 sentences) and actionable.

EXPLANATION:
"""

LEGIT_EXPLANATION_TEMPLATE = """
You are an expert fraud analyst explaining why a transaction was classified as legitimate.

CURRENT TRANSACTION:
{transaction_details}

MODEL PREDICTION:
- Fraud Probability: {fraud_probability:.2%}
- Prediction: {prediction}
- Confidence: {confidence}

TASK:
Provide a brief explanation of why this transaction appears legitimate.
Include:
1. Normal transaction indicators
2. Why it doesn't match fraud patterns
3. Confidence assessment

Keep the explanation concise (2-3 sentences).

EXPLANATION:
"""

print('✅ Prompt templates defined')

## Explanation Generator

In [None]:
class FraudExplainer:
    """Generate natural language explanations for fraud predictions."""
    
    def __init__(self, retriever=None, use_llm: bool = False, api_key: Optional[str] = None):
        self.retriever = retriever
        self.use_llm = use_llm
        self.llm = None
        
        if use_llm and api_key:
            try:
                from langchain.llms import OpenAI
                self.llm = OpenAI(temperature=0.3, api_key=api_key)
                print('✅ LLM initialized')
            except Exception as e:
                print(f'⚠️ LLM initialization failed: {e}')
                self.use_llm = False
    
    def _template_legit_explanation(self, transaction: Dict, prob: float) -> str:
        """Template-based legitimate explanation."""
        
        amount = transaction.get('amount', 0)
        tx_type = transaction.get('type', 'UNKNOWN')
        
        explanation = f"This {tx_type} transaction of ${amount:,.2f} appears legitimate "
        explanation += f"with {(1-prob):.1%} confidence. "
        
        explanation += "The transaction shows normal patterns with consistent account balances "
        explanation += "and no suspicious indicators. No further action required."
        
        return explanation

# Initialize explainer
explainer = FraudExplainer(
    retriever=retriever if RAG_AVAILABLE else None,
    use_llm=False  # Set to True and provide API key to use LLM
)

print('✅ Fraud explainer initialized')

## Test Explainer on Sample Transactions

In [None]:
print('='*70)
print('TESTING EXPLAINER ON SAMPLE TRANSACTIONS')
print('='*70)

# Get sample fraud cases
fraud_samples = df[df['isFraud'] == 1].sample(n=3, random_state=42)
legit_samples = df[df['isFraud'] == 0].sample(n=2, random_state=42)

test_samples = pd.concat([fraud_samples, legit_samples])

print(f'\nGenerating explanations for {len(test_samples)} transactions...\n')

explanations = []

for idx, row in test_samples.iterrows():
    transaction = row.to_dict()
    true_label = int(row['isFraud'])
    
    # Simulate model prediction (use actual prediction if available)
    fraud_prob = 0.85 if true_label == 1 else 0.15
    prediction = 1 if fraud_prob > 0.5 else 0
    
    # Generate explanation
    explanation = explainer.explain_prediction(
        transaction=transaction,
        prediction=prediction,
        fraud_probability=fraud_prob
    )
    
    explanations.append({
        'transaction_id': idx,
        'true_label': 'FRAUD' if true_label == 1 else 'LEGIT',
        'prediction': 'FRAUD' if prediction == 1 else 'LEGIT',
        'fraud_probability': fraud_prob,
        'amount': float(transaction.get('amount', 0)),
        'type': transaction.get('type', 'UNKNOWN'),
        'explanation': explanation
    })
    
    print(f"Transaction #{idx} ({transaction.get('type', 'UNKNOWN')} ${transaction.get('amount', 0):,.2f})")
    print(f"True Label: {true_label} | Prediction: {prediction} | Prob: {fraud_prob:.2%}")
    print(f"Explanation: {explanation}")
    print('-'*70)

# Save explanations
explanations_file = OUTPUT_PATH / 'sample_explanations.json'
with open(explanations_file, 'w') as f:
    json.dump(explanations, f, indent=2)

print(f'\n💾 Saved explanations to: {explanations_file}')

## Batch Explanation Generation

In [None]:
def generate_batch_explanations(df: pd.DataFrame, 
                               explainer: FraudExplainer,
                               max_samples: int = 100) -> pd.DataFrame:
    """Generate explanations for a batch of transactions."""
    
    print(f'\n📊 Generating explanations for {max_samples} transactions...')
    
    # Sample transactions
    sample_df = df.sample(n=min(max_samples, len(df)), random_state=42).copy()
    
    explanations = []
    
    for idx, row in tqdm(sample_df.iterrows(), total=len(sample_df), desc='Generating'):
        transaction = row.to_dict()
        true_label = int(row['isFraud'])
        
        # Simulate prediction
        fraud_prob = 0.8 if true_label == 1 else 0.2
        prediction = 1 if fraud_prob > 0.5 else 0
        
        explanation = explainer.explain_prediction(
            transaction=transaction,
            prediction=prediction,
            fraud_probability=fraud_prob
        )
        
        explanations.append(explanation)
    
    sample_df['explanation'] = explanations
    sample_df['fraud_probability'] = sample_df['isFraud'].apply(
        lambda x: 0.8 if x == 1 else 0.2
    )
    
    return sample_df

# Generate batch explanations
explained_df = generate_batch_explanations(df, explainer, max_samples=50)

# Save results
explained_file = OUTPUT_PATH / 'transactions_with_explanations.csv'
explained_df.to_csv(explained_file, index=False)

print(f'\n✅ Saved explained transactions to: {explained_file}')
print(f'   Total transactions: {len(explained_df)}')
print(f'   With explanations: {explained_df["explanation"].notna().sum()}')

## Explanation Quality Analysis

In [None]:
print('='*70)
print('EXPLANATION QUALITY ANALYSIS')
print('='*70)

# Analyze explanation statistics
explanation_stats = {
    'total_explanations': len(explained_df),
    'fraud_explanations': len(explained_df[explained_df['isFraud'] == 1]),
    'legit_explanations': len(explained_df[explained_df['isFraud'] == 0]),
    'avg_explanation_length': explained_df['explanation'].str.len().mean(),
    'min_length': explained_df['explanation'].str.len().min(),
    'max_length': explained_df['explanation'].str.len().max()
}

print(f'\n📊 Explanation Statistics:')
for key, value in explanation_stats.items():
    print(f'   {key}: {value}')

# Visualize explanation length distribution
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Explanation length by label
explained_df['explanation_length'] = explained_df['explanation'].str.len()

fraud_lengths = explained_df[explained_df['isFraud'] == 1]['explanation_length']
legit_lengths = explained_df[explained_df['isFraud'] == 0]['explanation_length']

axes[0].hist([fraud_lengths, legit_lengths], label=['Fraud', 'Legit'], bins=20, alpha=0.7)
axes[0].set_xlabel('Explanation Length (characters)')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Explanation Length Distribution', fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Sample explanations word cloud data
fraud_words = ' '.join(explained_df[explained_df['isFraud'] == 1]['explanation'])
word_freq = pd.Series(fraud_words.split()).value_counts().head(10)

axes[1].barh(word_freq.index, word_freq.values, color='steelblue', alpha=0.8)
axes[1].set_xlabel('Frequency')
axes[1].set_title('Most Common Words in Fraud Explanations', fontweight='bold')
axes[1].grid(True, alpha=0.3, axis='x')

plt.tight_layout()
plt.savefig(OUTPUT_PATH / 'explanation_analysis.png', dpi=150, bbox_inches='tight')
plt.show()

print(f'\n✅ Saved visualization to: {OUTPUT_PATH / "explanation_analysis.png"}')

## Create Interactive Explanation Dashboard

In [None]:
def create_explanation_report(explained_df: pd.DataFrame, output_path: Path):
    """Create HTML report with explanations."""
    
    html_template = """
<!DOCTYPE html>
<html>
<head>
    <title>Fraud Detection Explanations</title>
    <style>
        body {{ font-family: Arial, sans-serif; margin: 20px; background-color: #f5f5f5; }}
        .container {{ max-width: 1200px; margin: auto; background: white; padding: 30px; border-radius: 10px; }}
        h1 {{ color: #2c3e50; text-align: center; }}
        .transaction {{ border: 2px solid #ecf0f1; margin: 20px 0; padding: 20px; border-radius: 8px; }}
        .fraud {{ border-color: #e74c3c; background-color: #fadbd8; }}
        .legit {{ border-color: #27ae60; background-color: #d5f4e6; }}
        .header {{ font-size: 18px; font-weight: bold; margin-bottom: 10px; }}
        .details {{ font-size: 14px; color: #555; margin: 5px 0; }}
        .explanation {{ background-color: #f9f9f9; padding: 15px; margin-top: 10px; border-left: 4px solid #3498db; font-style: italic; }}
        .stats {{ background-color: #3498db; color: white; padding: 20px; border-radius: 8px; margin-bottom: 30px; }}
    </style>
</head>
<body>
    <div class="container">
        <h1>🔍 Fraud Detection Explanation Report</h1>
        
        <div class="stats">
            <h2>Summary Statistics</h2>
            <p><strong>Total Transactions:</strong> {total}</p>
            <p><strong>Fraud Cases:</strong> {fraud_count} ({fraud_pct:.1f}%)</p>
            <p><strong>Legitimate Cases:</strong> {legit_count} ({legit_pct:.1f}%)</p>
        </div>
        
        <h2>Transaction Explanations</h2>
        {transactions}
    </div>
</body>
</html>
"""
    
    # Generate transaction HTML blocks
    transactions_html = ""
    
    for idx, row in explained_df.head(20).iterrows():
        is_fraud = row['isFraud'] == 1
        css_class = 'fraud' if is_fraud else 'legit'
        label = 'FRAUD' if is_fraud else 'LEGITIMATE'
        
        transaction_html = f"""
        <div class="transaction {css_class}">
            <div class="header">Transaction #{idx} - {label}</div>
            <div class="details"><strong>Type:</strong> {row['type']}</div>
            <div class="details"><strong>Amount:</strong> ${row['amount']:,.2f}</div>
            <div class="details"><strong>Fraud Probability:</strong> {row['fraud_probability']:.1%}</div>
            <div class="explanation">
                <strong>Explanation:</strong><br>
                {row['explanation']}
            </div>
        </div>
        """
        transactions_html += transaction_html
    
    # Fill template
    fraud_count = len(explained_df[explained_df['isFraud'] == 1])
    legit_count = len(explained_df[explained_df['isFraud'] == 0])
    total = len(explained_df)
    
    html_content = html_template.format(
        total=total,
        fraud_count=fraud_count,
        fraud_pct=fraud_count/total*100,
        legit_count=legit_count,
        legit_pct=legit_count/total*100,
        transactions=transactions_html
    )
    
    # Save HTML
    report_file = output_path / 'explanation_report.html'
    with open(report_file, 'w') as f:
        f.write(html_content)
    
    print(f'✅ Created HTML report: {report_file}')
    return report_file

# Generate report
report_path = create_explanation_report(explained_df, OUTPUT_PATH)

print(f'\n📄 Open the report in your browser: {report_path}')

## Save Final Results and Metadata

In [None]:
# Create comprehensive results summary
rag_results = {
    'system_info': {
        'vector_database': 'FAISS',
        'embedding_model': 'sentence-transformers/all-MiniLM-L6-v2',
        'total_fraud_cases': len(fraud_cases),
        'llm_enabled': explainer.use_llm,
        'retrieval_enabled': retriever is not None
    },
    'explanation_stats': explanation_stats,
    'output_files': {
        'fraud_cases_db': str(cases_file),
        'vector_db': str(VECTOR_DB_PATH / 'fraud_cases_faiss'),
        'explanations': str(explanations_file),
        'batch_results': str(explained_file),
        'html_report': str(report_path)
    },
    'sample_explanations': explanations[:5]  # Store first 5 examples
}

# Save results
results_file = OUTPUT_PATH / 'rag_llm_results.json'
with open(results_file, 'w') as f:
    json.dump(rag_results, f, indent=2)

print(f'\n💾 Saved RAG results to: {results_file}')

## Final Summary

In [None]:
print('='*70)
print('🎉 RAG + LLM INTEGRATION COMPLETE')
print('='*70)

print(f'\n📊 System Components:')
print(f'   ✅ Vector Database: {len(fraud_cases)} fraud cases indexed')
print(f'   ✅ Embedding Model: sentence-transformers/all-MiniLM-L6-v2')
print(f'   ✅ Retrieval System: {'Active' if retriever else 'Not available'}')
print(f'   ✅ LLM Integration: {'Active' if explainer.use_llm else 'Template-based'}')

print(f'\n📁 Generated Outputs:')
print(f'   ✅ Fraud case database: {cases_file}')
print(f'   ✅ Vector database: {VECTOR_DB_PATH}')
print(f'   ✅ Sample explanations: {explanations_file}')
print(f'   ✅ Batch results: {explained_file}')
print(f'   ✅ HTML report: {report_path}')
print(f'   ✅ Results summary: {results_file}')

print(f'\n📝 Key Features:')
print(f'   • Case-based reasoning with vector similarity search')
print(f'   • Natural language explanations for fraud predictions')
print(f'   • Multi-modal feature integration (graph + sequence + transaction)')
print(f'   • Scalable RAG architecture for production deployment')

print(f'\n🚀 Next Steps:')
print(f'   1. Integrate with production fraud detection pipeline')
print(f'   2. Add LLM for more sophisticated explanations (optional)')
print(f'   3. Expand fraud case database with real-world examples')
print(f'   4. Deploy as REST API endpoint')
print(f'   5. Create real-time explanation dashboard')

if RUNNING_ON_KAGGLE:
    print(f'\n💾 Kaggle Users:')
    print(f'   - All outputs saved to /kaggle/working/')
    print(f'   - Download before session ends')
    print(f'   - Vector database can be reused in future sessions')

print('\n' + '='*70)
print('✅ NOTEBOOK 06 COMPLETE - EXPLAINABLE AI READY!')
print('='*70)