In [1]:
from dotenv import load_dotenv

load_dotenv()

from planning_agent import planner_agent, executor_agent_step

In [2]:
prompt = "build a research report on black holes"

In [3]:
model_ = "openai:gpt-4o"

In [4]:
steps_ = planner_agent(
    topic=prompt,
    model=model_
)

In [5]:
steps_

['Research agent: Use Tavily to perform a broad web search and collect top relevant items (title, authors, year, venue/source, URL, DOI if available).',
 'Research agent: For each collected item, search on arXiv to find matching preprints/versions and record arXiv URLs (if they exist).',
 'Research agent: Rank the collected and arXiv-matched items based on relevance, authoritativeness, and recency.',
 'Research agent: Extract key insights, summaries, and themes from the top-ranked items.',
 'Writer agent: Draft an initial report based on the extracted insights, summaries, and themes.',
 'Editor agent: Review and refine the draft, ensuring clarity, coherence, and logical flow.',
 'Writer agent: Generate a comprehensive Markdown report that includes inline citations, a References section with clickable links, and is detailed and self-contained.']

In [6]:
# agent_runner_notebook.py
import os
import uuid
import json
from datetime import datetime
from typing import List
from IPython.display import display, Markdown

from sqlalchemy import create_engine, Column, Text, DateTime, String
from sqlalchemy.orm import sessionmaker, declarative_base

# ⚙️ Importa tus agentes existentes
from planning_agent import planner_agent, executor_agent_step

# =========================
# DB: SQLite
# =========================
DB_PATH = os.getenv("SQLITE_PATH", "agent.db")
ENGINE = create_engine(
    f"sqlite:///{DB_PATH}",
    echo=False,
    future=True,
    connect_args={"check_same_thread": False},
)
SessionLocal = sessionmaker(bind=ENGINE, future=True)
Base = declarative_base()

class Task(Base):
    __tablename__ = "tasks"
    id = Column(String, primary_key=True, index=True)
    prompt = Column(Text)
    status = Column(String)
    created_at = Column(DateTime, default=datetime.utcnow)
    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
    result = Column(Text)

def init_db(drop: bool = False):
    if drop:
        Base.metadata.drop_all(bind=ENGINE)
    Base.metadata.create_all(bind=ENGINE)

def create_task(session, prompt: str) -> str:
    task_id = str(uuid.uuid4())
    t = Task(id=task_id, prompt=prompt, status="running", result=None)
    session.add(t)
    session.commit()
    return task_id

def save_result(session, task_id: str, status: str, result_obj: dict | None):
    t = session.get(Task, task_id)
    if not t:
        return
    t.status = status
    t.updated_at = datetime.utcnow()
    t.result = json.dumps(result_obj) if result_obj is not None else None
    session.commit()

def run_agent_workflow(task_id: str, prompt: str, model: str) -> dict:
    steps_data: List[dict] = []
    execution_history: List[list] = []

    # 1) Planificación
    initial_plan_steps = planner_agent(prompt, model)
    for title in initial_plan_steps:
        steps_data.append({
            "title": title,
            "status": "pending",
            "description": "Awaiting execution",
            "substeps": [],
            "updated_at": None,
        })

    # 2) Ejecución
    def update_step(i: int, status: str, description: str = "", substep: dict | None = None):
        steps_data[i]["status"] = status
        if description:
            steps_data[i]["description"] = description
        if substep:
            steps_data[i]["substeps"].append(substep)
        steps_data[i]["updated_at"] = datetime.utcnow().isoformat()

    for i, plan_step_title in enumerate(initial_plan_steps):
        print(f"[{i+1}/{len(initial_plan_steps)}] ▶️ {plan_step_title}")
        update_step(i, "running", f"Executing: {plan_step_title}")

        actual_step_description, agent_name, output = executor_agent_step(
            plan_step_title, execution_history, model, prompt
        )

        execution_history.append([plan_step_title, actual_step_description, output])

        sub = {
            "title": f"Called {agent_name}",
            "content": (
                f"📘 Prompt:\n{prompt}\n\n"
                f"🧹 Next task:\n{actual_step_description}\n\n"
                f"✅ Output:\n{output}"
            )
        }
        update_step(i, "done", f"Completed: {plan_step_title}", sub)
        print(f"[{i+1}/{len(initial_plan_steps)}] ✅ done")

    final_report_markdown = execution_history[-1][-1] if execution_history else "No report generated."
    return {"html_report": final_report_markdown, "history": steps_data}

# === Función notebook-friendly ===
def run_workflow(prompt: str, model: str = "openai:gpt-4o", drop: bool = False, show_markdown: bool = True):
    """
    Ejecuta el flujo desde un Jupyter Notebook.
    Guarda resultado en SQLite y opcionalmente muestra el reporte en Markdown.
    """
    init_db(drop=drop)
    session = SessionLocal()
    task_id = create_task(session, prompt)
    print(f"🆔 task_id={task_id}")

    try:
        result = run_agent_workflow(task_id, prompt, model)
        save_result(session, task_id, "done", result)
        print(f"💾 Saved to SQLite: {DB_PATH}")

        if show_markdown and result.get("html_report"):
            display(Markdown(result["html_report"]))

        return result
    except Exception as e:
        save_result(session, task_id, "error", {"error": str(e)})
        raise
    finally:
        session.close()


In [7]:
result = run_workflow(
    prompt="Summarize the main contributions of attention mechanisms in deep learning.",
    model="openai:gpt-4o",
    drop=True,          # recrea la BD
    show_markdown=True  # imprime el reporte en el notebook
)



🆔 task_id=62f44f61-2512-482b-9a2f-3bc6cd51435a
[1/7] ▶️ Research agent: Use Tavily to perform a broad web search and collect top relevant items (title, authors, year, venue/source, URL, DOI if available).
🔍 Research Agent


  steps_data[i]["updated_at"] = datetime.utcnow().isoformat()


✅ Output:
 Here are some resources that discuss the main contributions of attention mechanisms in deep learning:

1. **[What is an attention mechanism? | IBM](https://www.ibm.com/think/topics/attention-mechanism)**
   - Discusses how attention mechanisms work and the impact they had on generative AI, especially in improving RNN-based Seq2Seq models for machine translation.
   - Explains various attention mechanism variants, focusing on vector encoding, alignment scores, and attention weights.

2. **[Attention Mechanisms in Deep Learning: Enhancing Model Performance](https://medium.com/@zhonghong9998/attention-mechanisms-in-deep-learning-enhancing-model-performance-32a91006092a)**
   - Highlights how attention mechanisms enhance model performance and improve accuracy by selectively focusing on important input elements.
   - Includes technical examples and model architectures to showcase performance enhancements.

3. **[Attention Mechanism in Deep Learning - Analytics Vidhya](https://www

  t.updated_at = datetime.utcnow()


# Main Contributions of Attention Mechanisms in Deep Learning

## Introduction

Attention mechanisms have emerged as a pivotal innovation in the field of deep learning, significantly enhancing the performance and capabilities of various models. Initially introduced to improve neural machine translation systems, attention mechanisms have since been applied across a wide range of tasks, including natural language processing (NLP), computer vision, and speech processing. This report explores the main contributions of attention mechanisms in deep learning, highlighting their impact on model performance, efficiency, and applicability.

## Background or Context

The concept of attention in deep learning was inspired by the human cognitive process of selectively focusing on certain parts of information while ignoring others. This mechanism allows models to prioritize relevant data, thereby improving their ability to make accurate predictions. Attention mechanisms were first popularized by their application in sequence-to-sequence (Seq2Seq) models for machine translation, where they addressed the limitations of recurrent neural networks (RNNs) in handling long-range dependencies [1].

## Key Findings

### 1. Enhanced Model Performance

Attention mechanisms have been shown to significantly enhance model performance by allowing models to focus on important elements of input data. This selective focus leads to improved accuracy and efficiency, particularly in tasks involving sequence data, such as language translation and speech processing [2].

### 2. Improved Interpretability and Efficiency

By computing relevance between different data segments, attention mechanisms enhance the interpretability of models. They use vector encoding, alignment scores, and attention weights to determine which parts of the input data are most relevant for a given task, thereby improving the model's decision-making process [3].

### 3. Development of Novel Variations

The introduction of novel attention mechanisms, such as metric-attention, has further improved training efficiency and accuracy over traditional self-attention. These innovations provide a theoretical basis for understanding how attention mechanisms compute relevance and propagate information [4].

### 4. Wide Applicability

Attention mechanisms have been successfully applied across various domains, including NLP, computer vision, and speech processing. Their ability to handle long-range dependencies and focus on relevant data parts makes them suitable for a wide array of deep learning tasks [5].

## Discussion

The transformative role of attention mechanisms in deep learning cannot be overstated. By enabling models to focus on relevant data, attention mechanisms have addressed some of the key limitations of traditional neural networks, such as the inability to capture long-range dependencies. This has led to significant improvements in model performance and efficiency, particularly in tasks involving complex data sequences.

Moreover, the development of novel attention mechanisms, such as metric-attention, demonstrates the ongoing innovation in this field. These advancements not only enhance the theoretical understanding of attention mechanisms but also provide practical benefits in terms of training efficiency and accuracy.

## Conclusion

Attention mechanisms have revolutionized the field of deep learning by enhancing model performance, improving interpretability, and expanding the applicability of deep learning models across various domains. Their ability to focus on relevant data and handle long-range dependencies has made them an indispensable tool in the development of advanced AI systems. As research in this area continues to evolve, attention mechanisms are likely to play an even more significant role in the future of deep learning.

## References

1. [What is an attention mechanism? | IBM](https://www.ibm.com/think/topics/attention-mechanism)
2. [Attention Mechanisms in Deep Learning: Enhancing Model Performance](https://medium.com/@zhonghong9998/attention-mechanisms-in-deep-learning-enhancing-model-performance-32a91006092a)
3. [Attention Mechanism in Deep Learning - Analytics Vidhya](https://www.analyticsvidhya.com/blog/2019/11/comprehensive-guide-attention-mechanism-deep-learning/)
4. [Towards understanding how attention mechanism works in deep learning](http://arxiv.org/abs/2412.18288v1)
5. [Unpacking the Power of Attention Mechanisms in Deep Learning](https://viso.ai/deep-learning/attention-mechanisms/)

**Compliance checklist**
- [x] Used all provided research
- [x] Included inline citations
- [x] Included a References section
- [x] Preserved input references/sources/tools links
- [x] Structured sections present