In [1]:
!pip install fastapi uvicorn nest_asyncio pyngrok transformers

Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.3.0


In [6]:
from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from pyngrok import ngrok
import uvicorn
from transformers import pipeline, AutoTokenizer
import nest_asyncio
import time
import logging
import os
import torch
from typing import List, Optional
import asyncio
from concurrent.futures import ThreadPoolExecutor
import gc

# Configuraciones
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Evitar warnings

# Logging mejorado
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('/content/resumen_server.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Modelos Pydantic
class Texto(BaseModel):
    contenido: str = Field(..., min_length=10, max_length=10000)

class TextosBatch(BaseModel):
    contenidos: List[str] = Field(..., min_items=1, max_items=20)

class ResumenResponse(BaseModel):
    resumen: str
    tiempo_procesamiento: float
    tokens_procesados: int

class ResumenesBatchResponse(BaseModel):
    resumenes: List[str]
    tiempo_total: float
    articulos_procesados: int

# Configuración de la aplicación
app = FastAPI(
    title="Servicio de Resumen Optimizado",
    description="API optimizada para resúmenes de noticias sobre cambio climático",
    version="2.0.0"
)

# CORS mejorado
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Pool de threads para procesamiento paralelo
executor = ThreadPoolExecutor(max_workers=4)

class OptimizedSummarizer:
    def __init__(self):
        self.model_name = "facebook/bart-large-cnn"
        self.summarizer = None
        self.tokenizer = None
        self.device = 0 if torch.cuda.is_available() else -1  # GPU si disponible
        self.max_input_length = 1024
        self.load_model()

    def load_model(self):
        try:
            logger.info("🔄 Cargando modelo de resumen...")
            start_time = time.time()

            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)

            self.summarizer = pipeline(
                "summarization",
                model=self.model_name,
                tokenizer=self.tokenizer,
                device=self.device,
                framework="pt",
                clean_up_tokenization_spaces=True
            )

            load_time = time.time() - start_time
            logger.info(f"✅ Modelo cargado en {load_time:.2f} segundos usando {'GPU' if self.device != -1 else 'CPU'}")

        except Exception as e:
            logger.error(f"❌ Error cargando modelo: {e}")
            raise

    def preprocess_text(self, texto: str) -> str:
        texto = texto.strip()
        lineas = [linea.strip() for linea in texto.split('\n') if len(linea.strip()) > 20]
        texto_limpio = ' '.join(lineas)
        tokens = self.tokenizer.encode(texto_limpio, truncation=True, max_length=self.max_input_length)
        return self.tokenizer.decode(tokens, skip_special_tokens=True)

    def generate_summary(self, texto: str):
        try:
            start_time = time.time()
            texto = self.preprocess_text(texto)
            tokens = self.tokenizer.encode(texto)
            resumen = self.summarizer(
                texto,
                max_length=130,
                min_length=30,
                do_sample=False
            )[0]['summary_text']
            resumen_final = self.postprocess_summary(resumen)
            return {
                "resumen": resumen_final,
                "tiempo_procesamiento": time.time() - start_time,  # Fixed key name
                "tokens_procesados": len(tokens)  # Fixed key name
            }
        except Exception as e:
            logger.error(f"Error generando resumen: {e}")
            return {
                "resumen": f"Error al generar resumen: {str(e)}",
                "tiempo_procesamiento": 0,
                "tokens_procesados": 0
            }

    def postprocess_summary(self, resumen: str) -> str:
        if not resumen.endswith('.'):
            resumen += '.'
        if resumen:
            resumen = resumen[0].upper() + resumen[1:]
        return resumen

    def generate_batch_summaries(self, textos: List[str]) -> List[dict]:
        futures = [executor.submit(self.generate_summary, texto) for texto in textos]
        resultados = []
        for future in futures:
            try:
                resultados.append(future.result(timeout=30))
            except Exception as e:
                logger.error(f"Error en batch processing: {e}")
                resultados.append({
                    "resumen": f"Error: {str(e)}",
                    "tiempo_procesamiento": 0,
                    "tokens_procesados": 0
                })
        return resultados


# Instancia global del resumidor
logger.info("🚀 Inicializando servicio de resumen...")
summarizer_service = OptimizedSummarizer()

@app.get("/")
async def root():
    """Endpoint de salud"""
    return {
        "status": "active",
        "service": "Servicio de Resumen Optimizado",
        "version": "2.0.0",
        "model": "facebook/bart-large-cnn",
        "timestamp": time.time()
    }

@app.get("/health")
async def health_check():
    """Check de salud detallado"""
    try:
        # Test rápido del modelo
        test_result = summarizer_service.generate_summary(
            "Esta es una prueba del sistema de resumen automático para verificar que funciona correctamente."
        )

        return {
            "status": "healthy",
            "model_loaded": summarizer_service.summarizer is not None,
            "test_time": test_result["tiempo_procesamiento"],
            "memory_usage": f"{torch.cuda.memory_allocated() / 1024**2:.1f}MB" if torch.cuda.is_available() else "CPU mode"
        }
    except Exception as e:
        return {"status": "unhealthy", "error": str(e)}

@app.post("/resumir", response_model=ResumenResponse)
async def resumir(texto: Texto):
    """Endpoint principal para resumir un texto individual"""
    try:
        logger.info(f"📝 Procesando texto de {len(texto.contenido)} caracteres")

        # Generar resumen
        resultado = summarizer_service.generate_summary(texto.contenido)
        return ResumenResponse(**resultado)
    except Exception as e:
        logger.error(f"❌ Error en /resumir: {e}")
        raise HTTPException(status_code=500, detail=f"Error procesando resumen: {str(e)}")

@app.post("/resumir_batch", response_model=ResumenesBatchResponse)
async def resumir_batch(req: TextosBatch):
    start_time = time.time()
    resultados = summarizer_service.generate_batch_summaries(req.contenidos)
    return ResumenesBatchResponse(
        resumenes=[r["resumen"] for r in resultados],
        tiempo_total=time.time() - start_time,
        articulos_procesados=len(resultados)
    )

@app.post("/resumir_parrafos")
async def resumir_parrafos(texto: Texto):
    """Endpoint para resumir párrafo por párrafo (método original mejorado)"""
    try:
        start_time = time.time()

        # Dividir en párrafos y filtrar
        parrafos = [p.strip() for p in texto.contenido.split('\n') if len(p.strip()) > 20]

        if not parrafos:
            return {"resumen": "No se encontraron párrafos válidos para resumir."}

        logger.info(f"📄 Procesando {len(parrafos)} párrafos")

        # Procesar párrafos en paralelo
        resumenes_parrafos = summarizer_service.generate_batch_summaries(parrafos)

        # Unir resúmenes
        resumen_final = "\n\n".join([r["resumen"] for r in resumenes_parrafos])

        tiempo_total = time.time() - start_time

        return {
            "resumen": resumen_final,
            "parrafos_procesados": len(parrafos),
            "tiempo_total": tiempo_total
        }

    except Exception as e:
        logger.error(f"❌ Error en /resumir_parrafos: {e}")
        raise HTTPException(status_code=500, detail=f"Error procesando párrafos: {str(e)}")

@app.get("/stats")
async def get_stats():
    """Estadísticas del servicio"""
    return {
        "model_name": summarizer_service.model_name,
        "device": "CPU" if summarizer_service.device == -1 else f"GPU:{summarizer_service.device}",
        "max_input_length": summarizer_service.max_input_length,
        "executor_threads": executor._max_workers,
        "memory_info": {
            "cuda_available": torch.cuda.is_available(),
            "cuda_memory": f"{torch.cuda.memory_allocated() / 1024**2:.1f}MB" if torch.cuda.is_available() else "N/A"
        }
    }

# Manejo de memoria
@app.post("/clear_cache")
async def clear_cache():
    """Limpia la caché para liberar memoria"""
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

        return {"status": "Cache cleared successfully"}
    except Exception as e:
        return {"status": "error", "message": str(e)}

# Funciones de utilidad para servidor
def find_free_port(start_port: int = 8000, max_attempts: int = 10) -> int:
    """Encuentra un puerto libre"""
    import socket
    for port in range(start_port, start_port + max_attempts):
        try:
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
                s.bind(('0.0.0.0', port))
                logger.info(f"✅ Puerto {port} disponible")
                return port
        except OSError:
            logger.warning(f"⚠️ Puerto {port} ocupado")
    raise RuntimeError("No se encontró puerto libre")

def setup_ngrok_with_port(port: int):
    """Configura ngrok con manejo de errores mejorado"""
    try:
        # Tu token de ngrok
        ngrok.set_auth_token("2xEermx5e5Clyf3vVih326xdKKb_7oaPNcL5tDaT2TLy9vBVF")

        # Configurar túnel
        public_url = ngrok.connect(port)
        logger.info(f"🌐 URL pública de ngrok: {public_url}")

        # Guardar URL para referencia
        with open('/content/ngrok_url.txt', 'w') as f:
            f.write(str(public_url))

        # Mostrar información útil
        print(f"\n{'='*60}")
        print(f"🚀 SERVIDOR LISTO")
        print(f"{'='*60}")
        print(f"📡 URL Local: http://localhost:{port}")
        print(f"🌐 URL Pública: {public_url}")
        print(f"📋 Documentación: {public_url}/docs")
        print(f"❤️ Health Check: {public_url}/health")
        print(f"{'='*60}\n")

        return public_url

    except Exception as e:
        logger.error(f"❌ Error configurando ngrok: {e}")
        return None

def start_server():
    """Inicia el servidor con configuración optimizada"""
    try:
        logger.info("🔧 Configurando servidor...")

        # Encontrar puerto libre
        port = find_free_port(8000, 10)

        # Configurar ngrok
        logger.info(f"🌐 Configurando ngrok en puerto {port}...")
        ngrok_url = setup_ngrok_with_port(port)

        if not ngrok_url:
            logger.warning("⚠️ No se pudo configurar ngrok, solo acceso local disponible")

        # Configurar uvicorn
        config = uvicorn.Config(
            app,
            host="0.0.0.0",
            port=port,
            log_level="info",
            access_log=True,
            reload=False,
            workers=1  # Un solo worker para evitar problemas con el modelo
        )

        server = uvicorn.Server(config)

        logger.info(f"🚀 Iniciando servidor en puerto {port}...")
        server.run()

    except KeyboardInterrupt:
        logger.info("🛑 Servidor detenido por el usuario")
    except Exception as e:
        logger.error(f"❌ Error fatal del servidor: {e}")
        raise
    finally:
        # Limpiar recursos
        try:
            ngrok.disconnect(ngrok.get_tunnels()[0].public_url)
            ngrok.kill()
        except:
            pass

# Middleware para logging de requests
@app.middleware("http")
async def log_requests(request, call_next):
    start_time = time.time()

    response = await call_next(request)

    process_time = time.time() - start_time
    logger.info(f"📊 {request.method} {request.url.path} - {response.status_code} - {process_time:.3f}s")

    return response

# Punto de entrada
if __name__ == "__main__":
    # Configurar asyncio para Colab
    try:
        nest_asyncio.apply()
    except:
        pass

    start_server()

ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-172' coro=<Server.serve() done, defined at /usr/local/lib/python3.11/dist-packages/uvicorn/server.py:69> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/tmp/ipython-input-2987199528.py", line 336, in start_server
    server.run()
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/server.py", line 67, in run
    return asyncio.run(self.serve(sockets=sockets))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 92, in run_until_complete
    self._run_once()
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 133, in _run_once
    handle._run()
  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run
    self._context.


🚀 SERVIDOR LISTO
📡 URL Local: http://localhost:8000
🌐 URL Pública: NgrokTunnel: "https://36760803ed93.ngrok-free.app" -> "http://localhost:8000"
📋 Documentación: NgrokTunnel: "https://36760803ed93.ngrok-free.app" -> "http://localhost:8000"/docs
❤️ Health Check: NgrokTunnel: "https://36760803ed93.ngrok-free.app" -> "http://localhost:8000"/health



INFO:     Started server process [722]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     2800:a4:1609:ec00:59a7:341c:eece:37e2:0 - "POST /resumir HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [722]
