In [1]:
# ---- Cell 1: Instalar dependencias ----
!pip install -U langchain langchain-community huggingface-hub duckduckgo-search ddgs pandas python-dotenv

Collecting ddgs
  Downloading ddgs-9.9.1-py3-none-any.whl.metadata (19 kB)
Collecting fake-useragent>=2.2.0 (from ddgs)
  Downloading fake_useragent-2.2.0-py3-none-any.whl.metadata (17 kB)
Collecting socksio==1.* (from httpx[brotli,http2,socks]>=0.28.1->ddgs)
  Downloading socksio-1.0.0-py3-none-any.whl.metadata (6.1 kB)
Downloading ddgs-9.9.1-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.4/41.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fake_useragent-2.2.0-py3-none-any.whl (161 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.7/161.7 kB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading socksio-1.0.0-py3-none-any.whl (12 kB)
Installing collected packages: socksio, fake-useragent, ddgs
Successfully installed ddgs-9.9.1 fake-useragent-2.2.0 socksio-1.0.0


In [2]:
try:
    import ddgs
    print("ddgs instalado correctamente ✅")
except ModuleNotFoundError:
    print("ddgs NO está instalado ❌")

ddgs instalado correctamente ✅


In [3]:
# ---- Cell 2: Traer agents.py directamente desde GitHub ----
import os
import requests
import importlib

# URL RAW del archivo agents.py en tu repositorio
GITHUB_RAW_URL = "https://raw.githubusercontent.com/almendram/multiagent_research-lab/refs/heads/main/src/agents.py"

# Nombre local del archivo
local_path = "agents.py"

# Descargar si no existe
if not os.path.exists(local_path):
    r = requests.get(GITHUB_RAW_URL)
    if r.status_code == 200:
        with open(local_path, "w", encoding="utf-8") as f:
            f.write(r.text)
        print(f"agents.py descargado desde GitHub y guardado en {local_path}")
    else:
        raise ValueError(f"No se pudo descargar agents.py desde {GITHUB_RAW_URL} (status {r.status_code})")
else:
    print(f"agents.py ya existe en el path actual: {local_path}")

# Importar el módulo
import agents
importlib.reload(agents)
print("agents.py importado correctamente.")


agents.py ya existe en el path actual: agents.py
agents.py importado correctamente.


In [4]:
# ---- Cell 3: Crear .env con HF_TOKEN (opcional en Colab) ----
import os

# Guardamos el token en una variable de entorno
os.environ["HF_TOKEN"] = "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"

# Crea el archivo .env con el token
with open(".env", "w") as f:
  f.write(f"HF_TOKEN={os.environ['HF_TOKEN']}\n")
  print("Token guardado y archivo .env creado correctamente")

Token guardado y archivo .env creado correctamente


In [5]:
# ---- Cell 4: Importar y comprobar agents.py ----
import importlib
import agents
importlib.reload(agents)
print("agents module imported:", agents)

agents module imported: <module 'agents' from '/content/agents.py'>


In [6]:
# ---- Cell 5: Parámetros y consulta ----
TOPIC = "Impacto de los datos sintéticos en la atención médica"
# Query para DuckDuckGo — puedes limitar por site:arxiv.org OR site:researchgate.net
QUERY = 'Impact of synthetic data in healthcare site:arxiv.org OR site:researchgate.net OR site:research.microsoft.com'
TOP_K = 5

# Modelos (ya configurados en agents.py)
WRITER_MODEL = "facebook/bart-large-cnn"

In [7]:
# ---- Cell 6: Ejecutar pipeline ----
from agents import Investigador, Redactor, Revisor, Coordinator

investigador = Investigador(top_k=TOP_K)
redactor = Redactor(modelo=WRITER_MODEL)
revisor = Revisor()
coord = Coordinator(investigador, redactor, revisor)

print("Ejecutando pipeline...")
res = coord.run(topic=TOPIC, query=QUERY, top_k=TOP_K)
print("Pipeline completado.")

Ejecutando pipeline...
Pipeline completado.


In [8]:
# ---- Cell 7: Guardar research_summary.md y mostrar extracto ----
out_md = "research_summary.md"
with open(out_md, "w", encoding="utf-8") as f:
    f.write(res["final"])

print(f"Archivo guardado: {out_md}")
print("\n--- Inicio del resumen (primeros 1200 caracteres) ---\n")
print(res["final"][:1200])
print("\n--- Fin extracto ---")

Archivo guardado: research_summary.md

--- Inicio del resumen (primeros 1200 caracteres) ---

# Resumen final - Impacto de los datos sintéticos en la atención médica

## Fuentes (resumen):
1.  - 

> Empower Your Team w/ Powerful Data Analytics Tools to Fuel Smarter, Faster Business Moves. Unlock The Full Potential of Your Data with AvePoint’s Powerful Analytics and AI Solutions 3 days ago · Synthetic data offers a viable solution by enabling safe, broad data usage without compromising sensitive information. This paper presents the MOSTLY AI Synthetic Data Software Development Kit (SDK), an open-source toolkit designed specifically for synthesizing high-quality tabular data . Aug 8, 2023 · Artificial Intelligence (AI) has the potential to revolutionize healthcare by improving patient outcomes, reducing costs, and enhancing efficiency. This paper provides a comprehensive review of ... May 6, 2022 · PDF | This explainer document aims to provide an overview of the current state of the rapi

In [9]:
# ---- Cell 8: Descargar (Colab) ----
try:
    from google.colab import files
    files.download("research_summary.md")
except Exception:
    pass

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [10]:
# ---- Cell 9: Crear requirements.txt ----
reqs = """langchain
langchain-community
huggingface-hub
duckduckgo-search
pandas
python-dotenv
"""
with open("requirements.txt","w") as f:
    f.write(reqs)
print("requirements.txt creado.")


requirements.txt creado.
