In [None]:
### 1. Configuración Inicial de Colab

In [None]:
# 1. Instalar dependencias principales
!pip install transformers torch accelerate
!pip install langchain langchain-community
!pip install chromadb sentence-transformers
!pip install fastapi uvicorn pyngrok
!pip install gradio pandas numpy
!pip install shap lime-tabular
!pip install arxiv requests beautifulsoup4

In [None]:
### 2. Importaciones y Configuración

In [None]:
import os
import pandas as pd
import numpy as np
from typing import List, Dict, Any, Optional
import json
import re
from datetime import datetime

In [None]:
# LangChain y RAG
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.schema import Document

So far so good. Main Dependencies were installed Ok.

In [None]:
# Transformers para LLM
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

In [None]:
# Explicabilidad
!pip install shap
!pip install lime

import shap
from lime.lime_text import LimeTextExplainer

In [None]:
# API y Frontend
import gradio as gr
from pyngrok import ngrok

In [None]:
# Utilidades
import requests
from bs4 import BeautifulSoup
import arxiv
import warnings
warnings.filterwarnings('ignore')

## Fase 1: Preparación de Datos Científicos

### 3. Descarga de Corpus Científico

In [None]:
class ScientificDataCollector:
    def __init__(self):
        self.documents = []
        self.tortured_phrases = {
            "bosom malignancy": "breast cancer",
            "corpus luteum": "corpus luteum",  # correcto
            "fake example": "real term"
        }

    def collect_arxiv_papers(self, query: str, max_results: int = 20):
        """Descarga papers de ArXiv en biomedicina"""
        client = arxiv.Client()
        search = arxiv.Search(
            query=f"{query} AND cat:q-bio*",
            max_results=max_results,
            sort_by=arxiv.SortCriterion.Relevance
        )

        papers = []
        for result in client.results(search):
            paper = {
                'title': result.title,
                'authors': [author.name for author in result.authors],
                'abstract': result.summary,
                'url': result.pdf_url,
                'published': result.published.strftime('%Y-%m-%d'),
                'categories': result.categories
            }
            papers.append(paper)

        return papers

    def create_synthetic_contaminated_data(self):
        """Crea datos sintéticos con 'tortured phrases'"""
        contaminated_texts = [
            "The study of bosom malignancy has shown significant progress in recent years. Traditional treatments for bosom malignancy include chemotherapy and radiation.",
            "Machine learning models can help detect fake patterns in medical imaging, improving diagnostic accuracy.",
            "Recent research in artificial intelligence applications for medical diagnosis shows promising results."
        ]

        clean_texts = [
            "Breast cancer research has advanced significantly with new immunotherapy approaches. Clinical trials show improved survival rates.",
            "Deep learning algorithms demonstrate high accuracy in medical image analysis for early disease detection.",
            "Natural language processing techniques are being applied to clinical notes for better patient care."
        ]

        return contaminated_texts + clean_texts


In [None]:
# Recolectar datos
collector = ScientificDataCollector()

In [None]:
# Datos de ArXiv (biomedicina)
print("🔄 Descargando papers de ArXiv...")
arxiv_papers = collector.collect_arxiv_papers("cancer detection machine learning", 15)

In [None]:
# Datos sintéticos contaminados
synthetic_data = collector.create_synthetic_contaminated_data()

In [None]:
print(f"✅ Recolectados {len(arxiv_papers)} papers de ArXiv")
print(f"✅ Creados {len(synthetic_data)} documentos sintéticos")

In [None]:
### 4. Procesamiento y Chunking

class DocumentPreprocessor:
    def __init__(self):
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=50,
            separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
        )

    def process_arxiv_papers(self, papers: List[Dict]) -> List[Document]:
        """Convierte papers de ArXiv a documentos LangChain"""
        documents = []

        for paper in papers:
            # Combinar título y abstract
            content = f"Title: {paper['title']}\n\nAbstract: {paper['abstract']}"

            # Crear documento con metadata
            doc = Document(
                page_content=content,
                metadata={
                    'source': 'arxiv',
                    'title': paper['title'],
                    'authors': ', '.join(paper['authors']),
                    'url': paper['url'],
                    'published': paper['published'],
                    'categories': ', '.join(paper['categories'])
                }
            )
            documents.append(doc)

        return documents

    def process_synthetic_data(self, texts: List[str]) -> List[Document]:
        """Procesa datos sintéticos"""
        documents = []

        for i, text in enumerate(texts):
            doc = Document(
                page_content=text,
                metadata={
                    'source': 'synthetic',
                    'doc_id': f'synthetic_{i}',
                    'contaminated': 'bosom malignancy' in text or 'fake' in text
                }
            )
            documents.append(doc)

        return documents

    def chunk_documents(self, documents: List[Document]) -> List[Document]:
        """Divide documentos en chunks"""
        return self.text_splitter.split_documents(documents)


In [None]:
# Procesar documentos
preprocessor = DocumentPreprocessor()

In [None]:
print("🔄 Procesando documentos...")
arxiv_docs = preprocessor.process_arxiv_papers(arxiv_papers)
synthetic_docs = preprocessor.process_synthetic_data(synthetic_data)

In [None]:
all_documents = arxiv_docs + synthetic_docs
chunked_docs = preprocessor.chunk_documents(all_documents)

print(f"✅ Total documentos: {len(all_documents)}")
print(f"✅ Total chunks: {len(chunked_docs)}")

## Fase 2: Base de Datos Vectorial

### 5. Creación de Embeddings y ChromaDB

In [None]:
class VectorDatabase:
    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
        self.embeddings = HuggingFaceEmbeddings(
            model_name=model_name,
            model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
        )
        self.vectorstore = None

    def create_vectorstore(self, documents: List[Document], persist_directory: str = "./chroma_db"):
        """Crea base de datos vectorial con ChromaDB"""
        print("🔄 Creando embeddings y base vectorial...")

        self.vectorstore = Chroma.from_documents(
            documents=documents,
            embedding=self.embeddings,
            persist_directory=persist_directory
        )

        print(f"✅ Base vectorial creada con {len(documents)} documentos")
        return self.vectorstore

    def similarity_search(self, query: str, k: int = 5) -> List[Document]:
        """Búsqueda por similitud"""
        if not self.vectorstore:
            raise ValueError("Vectorstore no inicializado")

        return self.vectorstore.similarity_search(query, k=k)

In [None]:
# Crear base vectorial
vector_db = VectorDatabase()
vectorstore = vector_db.create_vectorstore(chunked_docs)

## Fase 3: Sistema LLM y RAG

### 6. Configuración del Modelo de Lenguaje

In [None]:
class LLMManager:
    def __init__(self, model_name: str = "microsoft/DialoGPT-medium"):
        """Usa modelo más ligero para Colab gratuito"""
        self.model_name = model_name
        self.tokenizer = None
        self.model = None
        self.pipeline = None
        self.setup_model()

    def setup_model(self):
        """Configura el modelo de lenguaje"""
        print(f"🔄 Cargando modelo {self.model_name}...")

        # Para Colab gratuito, usar pipeline más eficiente
        self.pipeline = pipeline(
            "text-generation",
            model=self.model_name,
            tokenizer=self.model_name,
            device=0 if torch.cuda.is_available() else -1,
            max_length=512,
            do_sample=True,
            temperature=0.7
        )

        print("✅ Modelo cargado correctamente")

    def generate_response(self, prompt: str, max_length: int = 300) -> str:
        """Genera respuesta usando el LLM"""
        if not self.pipeline:
            raise ValueError("Pipeline no inicializado")

        # Generar respuesta
        response = self.pipeline(
            prompt,
            max_length=max_length,
            num_return_sequences=1,
            pad_token_id=self.pipeline.tokenizer.eos_token_id
        )

        # Extraer solo la parte nueva generada
        generated_text = response[0]['generated_text']
        new_text = generated_text[len(prompt):].strip()

        return new_text


In [None]:
# Inicializar LLM
llm_manager = LLMManager()

### 7. Sistema RAG Básico

In [None]:
class RAGSystem:
    def __init__(self, vector_db: VectorDatabase, llm_manager: LLMManager):
        self.vector_db = vector_db
        self.llm_manager = llm_manager
        self.tortured_phrases = {
            "bosom malignancy": "breast cancer",
            "fake patterns": "actual patterns",
            "artificial results": "research results"
        }

    def retrieve_context(self, query: str, k: int = 3) -> List[Document]:
        """Recupera contexto relevante"""
        return self.vector_db.similarity_search(query, k=k)

    def create_prompt(self, query: str, context_docs: List[Document]) -> str:
        """Crea prompt con contexto"""
        context = "\n\n".join([
            f"Document {i+1}: {doc.page_content}"
            for i, doc in enumerate(context_docs)
        ])

        prompt = f"""Based on the following scientific documents, please answer the question accurately.

Context:
{context}

Question: {query}

Answer:"""

        return prompt

    def generate_rag_response(self, query: str) -> Dict[str, Any]:
        """Genera respuesta completa RAG"""
        # 1. Recuperar contexto
        context_docs = self.retrieve_context(query)

        # 2. Crear prompt
        prompt = self.create_prompt(query, context_docs)

        # 3. Generar respuesta
        response = self.llm_manager.generate_response(prompt)

        # 4. Preparar resultado
        result = {
            'query': query,
            'response': response,
            'context_docs': context_docs,
            'sources': [doc.metadata for doc in context_docs],
            'prompt_used': prompt
        }

        return result

# Inicializar sistema RAG
rag_system = RAGSystem(vector_db, llm_manager)


## Fase 4: Componente XAI (Explicabilidad)

### 8. Detector de Hallucinations y Tortured Phrases

In [None]:
class FactChecker:
    def __init__(self):
        self.tortured_phrases = {
            "bosom malignancy": {
                "correct": "breast cancer",
                "confidence": 0.95,
                "explanation": "Término médico no estándar que puede indicar contenido generado automáticamente"
            },
            "fake patterns": {
                "correct": "patterns",
                "confidence": 0.85,
                "explanation": "Uso de 'fake' puede indicar contenido poco confiable"
            }
        }

        self.medical_terms_validation = {
            "breast cancer": True,
            "machine learning": True,
            "bosom malignancy": False
        }

    def detect_tortured_phrases(self, text: str) -> List[Dict]:
        """Detecta frases problemáticas"""
        detections = []

        for phrase, info in self.tortured_phrases.items():
            if phrase.lower() in text.lower():
                detection = {
                    'phrase': phrase,
                    'position': text.lower().index(phrase.lower()),
                    'severity': 'high' if info['confidence'] > 0.9 else 'medium',
                    'suggested_correction': info['correct'],
                    'explanation': info['explanation'],
                    'confidence': info['confidence']
                }
                detections.append(detection)

        return detections

    def validate_against_sources(self, response: str, source_docs: List[Document]) -> Dict:
        """Valida respuesta contra fuentes"""
        source_text = " ".join([doc.page_content for doc in source_docs])

        # Análisis simple de overlap
        response_words = set(response.lower().split())
        source_words = set(source_text.lower().split())

        overlap = response_words.intersection(source_words)
        overlap_ratio = len(overlap) / len(response_words) if response_words else 0

        validation = {
            'overlap_ratio': overlap_ratio,
            'supported': overlap_ratio > 0.3,  # Umbral simple
            'unsupported_words': response_words - source_words,
            'explanation': f"La respuesta tiene {overlap_ratio:.2%} de palabras respaldadas por las fuentes"
        }

        return validation

    def comprehensive_check(self, response: str, source_docs: List[Document]) -> Dict:
        """Verificación completa"""
        # Detectar tortured phrases
        tortured_detections = self.detect_tortured_phrases(response)

        # Validar contra fuentes
        source_validation = self.validate_against_sources(response, source_docs)

        # Calcular score de confiabilidad
        reliability_score = 1.0

        # Penalizar por tortured phrases
        for detection in tortured_detections:
            penalty = detection['confidence'] * 0.3
            reliability_score -= penalty

        # Penalizar por bajo respaldo de fuentes
        if not source_validation['supported']:
            reliability_score -= 0.4

        reliability_score = max(0.0, reliability_score)

        return {
            'tortured_phrases': tortured_detections,
            'source_validation': source_validation,
            'reliability_score': reliability_score,
            'is_reliable': reliability_score > 0.6,
            'warnings': self._generate_warnings(tortured_detections, source_validation)
        }

    def _generate_warnings(self, tortured_detections: List, source_validation: Dict) -> List[str]:
        """Genera advertencias para el usuario"""
        warnings = []

        if tortured_detections:
            warnings.append(f"⚠️ Detectadas {len(tortured_detections)} frases problemáticas")

        if not source_validation['supported']:
            warnings.append("⚠️ Respuesta poco respaldada por fuentes originales")

        return warnings

# Inicializar fact checker
fact_checker = FactChecker()

### 9. Sistema de Explicabilidad con SHAP/LIME


In [None]:
class ExplainabilityEngine:
    def __init__(self, rag_system: RAGSystem):
        self.rag_system = rag_system
        self.lime_explainer = LimeTextExplainer(class_names=['reliable', 'unreliable'])

    def explain_response_sources(self, query: str, response: str, source_docs: List[Document]) -> Dict:
        """Explica qué fuentes contribuyeron a la respuesta"""
        explanations = []

        for i, doc in enumerate(source_docs):
            # Calcular similitud simple entre respuesta y documento
            doc_words = set(doc.page_content.lower().split())
            response_words = set(response.lower().split())

            common_words = doc_words.intersection(response_words)
            influence_score = len(common_words) / len(response_words) if response_words else 0

            explanation = {
                'source_id': i,
                'title': doc.metadata.get('title', f'Document {i+1}'),
                'influence_score': influence_score,
                'common_concepts': list(common_words)[:10],  # Top 10
                'metadata': doc.metadata
            }
            explanations.append(explanation)

        # Ordenar por influencia
        explanations.sort(key=lambda x: x['influence_score'], reverse=True)

        return {
            'source_influences': explanations,
            'most_influential': explanations[0] if explanations else None,
            'explanation_summary': self._create_influence_summary(explanations)
        }

    def explain_reliability_factors(self, fact_check_result: Dict) -> Dict:
        """Explica factores que afectan la confiabilidad"""
        factors = []

        # Factor: Tortured phrases
        if fact_check_result['tortured_phrases']:
            factor = {
                'type': 'tortured_phrases',
                'impact': 'negative',
                'weight': 0.3,
                'description': f"Detectadas {len(fact_check_result['tortured_phrases'])} frases problemáticas",
                'details': fact_check_result['tortured_phrases']
            }
            factors.append(factor)

        # Factor: Source support
        source_support = fact_check_result['source_validation']['supported']
        factor = {
            'type': 'source_support',
            'impact': 'positive' if source_support else 'negative',
            'weight': 0.4,
            'description': f"Respaldo de fuentes: {'Alto' if source_support else 'Bajo'}",
            'details': fact_check_result['source_validation']
        }
        factors.append(factor)

        return {
            'reliability_factors': factors,
            'overall_score': fact_check_result['reliability_score'],
            'recommendation': self._get_reliability_recommendation(fact_check_result['reliability_score'])
        }

    def _create_influence_summary(self, explanations: List[Dict]) -> str:
        """Crea resumen de influencias"""
        if not explanations:
            return "No se encontraron fuentes influyentes"

        top_source = explanations[0]
        return f"La fuente más influyente es '{top_source['title']}' con {top_source['influence_score']:.2%} de influencia"

    def _get_reliability_recommendation(self, score: float) -> str:
        """Obtiene recomendación basada en score"""
        if score > 0.8:
            return "✅ Alta confiabilidad - Información respaldada por fuentes"
        elif score > 0.6:
            return "⚠️ Confiabilidad moderada - Verificar información importante"
        else:
            return "❌ Baja confiabilidad - Consultar fuentes adicionales"

# Inicializar motor de explicabilidad
explainability_engine = ExplainabilityEngine(rag_system)

## Fase 5: Sistema Integrado XAI-RAG

### 10. Sistema Principal

In [None]:
class FactCheckXAIRAG:
    def __init__(self, rag_system: RAGSystem, fact_checker: FactChecker,
                 explainability_engine: ExplainabilityEngine):
        self.rag_system = rag_system
        self.fact_checker = fact_checker
        self.explainability_engine = explainability_engine

    def process_query(self, query: str) -> Dict[str, Any]:
        """Procesa query completo con XAI"""
        print(f"🔄 Procesando: {query}")

        # 1. Generar respuesta RAG
        rag_result = self.rag_system.generate_rag_response(query)

        # 2. Verificar hechos
        fact_check_result = self.fact_checker.comprehensive_check(
            rag_result['response'],
            rag_result['context_docs']
        )

        # 3. Generar explicaciones
        source_explanation = self.explainability_engine.explain_response_sources(
            query,
            rag_result['response'],
            rag_result['context_docs']
        )

        reliability_explanation = self.explainability_engine.explain_reliability_factors(
            fact_check_result
        )

        # 4. Compilar resultado completo
        complete_result = {
            'query': query,
            'response': rag_result['response'],
            'sources': rag_result['sources'],
            'fact_check': fact_check_result,
            'explanations': {
                'source_influence': source_explanation,
                'reliability_factors': reliability_explanation
            },
            'metadata': {
                'timestamp': datetime.now().isoformat(),
                'model_used': self.rag_system.llm_manager.model_name,
                'total_sources': len(rag_result['context_docs'])
            }
        }

        return complete_result

    def format_response_for_display(self, result: Dict) -> str:
        """Formatea respuesta para mostrar al usuario"""
        output = []

        # Respuesta principal
        output.append(f"**Respuesta:**\n{result['response']}\n")

        # Advertencias si las hay
        warnings = result['fact_check']['warnings']
        if warnings:
            output.append("**⚠️ Advertencias:**")
            for warning in warnings:
                output.append(f"- {warning}")
            output.append("")

        # Score de confiabilidad
        reliability = result['fact_check']['reliability_score']
        recommendation = result['explanations']['reliability_factors']['recommendation']
        output.append(f"**Confiabilidad:** {reliability:.2%}")
        output.append(f"{recommendation}\n")

        # Fuentes más influyentes
        most_influential = result['explanations']['source_influence']['most_influential']
        if most_influential:
            output.append(f"**Fuente principal:** {most_influential['title']}")
            output.append(f"**Influencia:** {most_influential['influence_score']:.2%}\n")

        # Tortured phrases detectadas
        tortured = result['fact_check']['tortured_phrases']
        if tortured:
            output.append("**🔍 Frases problemáticas detectadas:**")
            for detection in tortured:
                output.append(f"- '{detection['phrase']}' → Sugerencia: '{detection['suggested_correction']}'")
                output.append(f"  {detection['explanation']}")
            output.append("")

        return "\n".join(output)

# Inicializar sistema completo
xai_rag_system = FactCheckXAIRAG(rag_system, fact_checker, explainability_engine)

## Fase 6: Interfaz de Usuario con Gradio

### 11. Interfaz Web Interactiva

In [None]:
def create_gradio_interface():
    """Crea interfaz Gradio para el sistema"""

    def process_user_query(query, show_detailed_analysis):
        """Procesa query del usuario"""
        if not query.strip():
            return "Por favor, ingresa una pregunta.", ""

        try:
            # Procesar con el sistema XAI-RAG
            result = xai_rag_system.process_query(query)

            # Respuesta formateada para mostrar
            formatted_response = xai_rag_system.format_response_for_display(result)

            # Análisis detallado si se solicita
            detailed_analysis = ""
            if show_detailed_analysis:
                detailed_analysis = format_detailed_analysis(result)

            return formatted_response, detailed_analysis

        except Exception as e:
            return f"❌ Error procesando la consulta: {str(e)}", ""

    def format_detailed_analysis(result):
        """Formatea análisis detallado"""
        analysis = []

        analysis.append("## 📊 Análisis Detallado\n")

        # Información de fuentes
        analysis.append("### 📚 Fuentes Utilizadas:")
        for i, source in enumerate(result['sources']):
            analysis.append(f"{i+1}. **{source.get('title', 'Sin título')}**")
            if 'authors' in source:
                analysis.append(f"   - Autores: {source['authors']}")
            if 'published' in source:
                analysis.append(f"   - Publicado: {source['published']}")
            analysis.append("")

        # Influencia de fuentes
        source_influences = result['explanations']['source_influence']['source_influences']
        analysis.append("### 🎯 Influencia de Fuentes:")
        for influence in source_influences[:3]:  # Top 3
            analysis.append(f"- **{influence['title']}**: {influence['influence_score']:.2%} influencia")

        # Factores de confiabilidad
        analysis.append("\n### 🔍 Factores de Confiabilidad:")
        reliability_factors = result['explanations']['reliability_factors']['reliability_factors']
        for factor in reliability_factors:
            icon = "✅" if factor['impact'] == 'positive' else "❌"
            analysis.append(f"{icon} **{factor['type']}**: {factor['description']}")

        return "\n".join(analysis)

    # Crear interfaz
    with gr.Blocks(title="FactCheck XAI-RAG", theme=gr.themes.Soft()) as interface:
        gr.Markdown("""
        # 🔬 FactCheck XAI-RAG
        ### Sistema de Verificación de Hechos Explicable para Preguntas Científicas

        Este sistema utiliza RAG (Retrieval-Augmented Generation) con explicabilidad (XAI) para:
        - ✅ Responder preguntas científicas con fuentes
        - 🔍 Detectar información potencialmente incorrecta
        - 📊 Explicar el proceso de generación de respuestas
        - ⚠️ Alertar sobre contenido poco confiable
        """)

        with gr.Row():
            with gr.Column(scale=2):
                query_input = gr.Textbox(
                    label="💬 Tu pregunta científica",
                    placeholder="Ej: ¿Qué avances recientes hay en machine learning para detección de cáncer?",
                    lines=3
                )

                show_detailed = gr.Checkbox(
                    label="📊 Mostrar análisis detallado",
                    value=False
                )

                submit_btn = gr.Button("🔍 Analizar", variant="primary")

            with gr.Column(scale=1):
                gr.Markdown("""
                **💡 Ejemplos de preguntas:**
                - "¿Qué es la bosom malignancy?"
                - "¿Cómo funciona machine learning en medicina?"
                - "¿Cuáles son los tratamientos para breast cancer?"
                """)

        with gr.Row():
            with gr.Column():
                response_output = gr.Markdown(label="📋 Respuesta y Análisis")

            with gr.Column():
                detailed_output = gr.Markdown(label="📊 Análisis Detallado", visible=False)

        # Mostrar/ocultar análisis detallado
        def toggle_detailed(show_detailed):
            return gr.update(visible=show_detailed)

        show_detailed.change(
            fn=toggle_detailed,
            inputs=[show_detailed],
            outputs=[detailed_output]
        )

        # Procesar consulta
        submit_btn.click(
            fn=process_user_query,
            inputs=[query_input, show_detailed],
            outputs=[response_output, detailed_output]
        )

        # Ejemplos interactivos
        gr.Examples(
            examples=[
                ["¿Qué avances hay en machine learning para detección de bosom malignancy?", True],
                ["¿Cómo funciona la inteligencia artificial en medicina?", False],
                ["¿Cuáles son los síntomas del breast cancer?", True]
            ],
            inputs=[query_input, show_detailed]
        )

    return interface


In [None]:
# Crear y lanzar interfaz
print("🚀 Creando interfaz web...")
interface = create_gradio_interface()

## Fase 7: Lanzamiento y Pruebas

### 12. Configuración Final y Lanzamiento

In [None]:
# Configurar ngrok para acceso público (opcional)
def setup_public_access():
    """Configura acceso público con ngrok"""
    try:
        # Configurar token de ngrok si tienes uno
        # ngrok.set_auth_token("tu_token_aqui")

        # Lanzar túnel
        public_url = ngrok.connect(7860)
        print(f"🌐 URL pública: {public_url}")
        return public_url
    except Exception as e:
        print(f"⚠️ No se pudo configurar acceso público: {e}")
        return None


In [None]:
# Función principal de lanzamiento
def launch_system():
    """Lanza el sistema completo"""
    print("🚀 Iniciando FactCheck XAI-RAG...")

    # Verificar que todo esté inicializado
    components_status = {
        'Vector Database': vectorstore is not None,
        'LLM Manager': llm_manager.pipeline is not None,
        'RAG System': rag_system is not None,
        'Fact Checker': fact_checker is not None,
        'XAI Engine': explainability_engine is not None,
        'Main System': xai_rag_system is not None
    }

    print("\n📋 Estado de componentes:")
    for component, status in components_status.items():
        status_icon = "✅" if status else "❌"
        print(f"{status_icon} {component}")

    if not all(components_status.values()):
        print("❌ Error: Algunos componentes no están inicializados")
        return False

    # Configurar acceso público (opcional)
    # public_url = setup_public_access()

    # Lanzar interfaz
    print("\n🌐 Lanzando interfaz web...")
    interface.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True,  # Esto crea un enlace público temporal
        debug=True
    )

    return True


In [None]:
# Función de pruebas rápidas
def run_quick_tests():
    """Ejecuta pruebas rápidas del sistema"""
    print("🧪 Ejecutando pruebas rápidas...\n")

    test_queries = [
        "¿Qué es la bosom malignancy?",  # Debería detectar tortured phrase
        "¿Cómo funciona machine learning en medicina?",  # Pregunta normal
        "¿Cuáles son los tratamientos para breast cancer?"  # Pregunta médica válida
    ]

    for i, query in enumerate(test_queries, 1):
        print(f"🔬 Prueba {i}: {query}")
        try:
            result = xai_rag_system.process_query(query)

            # Mostrar resultados clave
            print(f"   📝 Respuesta: {result['response'][:100]}...")
            print(f"   🎯 Confiabilidad: {result['fact_check']['reliability_score']:.2%}")
            print(f"   ⚠️ Advertencias: {len(result['fact_check']['warnings'])}")
            print(f"   📚 Fuentes: {len(result['sources'])}")

            if result['fact_check']['tortured_phrases']:
                print(f"   🔍 Tortured phrases: {len(result['fact_check']['tortured_phrases'])}")

            print()

        except Exception as e:
            print(f"   ❌ Error: {e}\n")

    print("✅ Pruebas completadas")


In [None]:
# Ejecutar pruebas primero
run_quick_tests()

In [None]:
# Lanzar sistema
if launch_system():
    print("""
    🎉 ¡Sistema FactCheck XAI-RAG lanzado exitosamente!

    📱 Usa la interfaz web para:
    - Hacer preguntas científicas
    - Ver explicaciones detalladas
    - Identificar información poco confiable

    🔬 Características principales activas:
    ✅ Detección de tortured phrases
    ✅ Verificación contra fuentes
    ✅ Explicabilidad de respuestas
    ✅ Scoring de confiabilidad
    ✅ Interfaz intuitiva
    """)
else:
    print("❌ Error al lanzar el sistema")
