In [3]:
from browser_use import Agent, ChatGoogle
from dotenv import load_dotenv
import os
import asyncio
from datetime import datetime
import json

# ===============================
# Setup
# ===============================

load_dotenv()
llm = ChatGoogle(model="gemini-2.5-flash")

sources = [
    "investopedia.com",
    "marketwatch.com",
    "seekingalpha.com",
    "fool.com",
    "finance.yahoo.com",
    "zacks.com",
    "morningstar.com",
    "investorplace.com",
    "investing.com",
    "barrons.com",
    "cfainstitute.org",
    "ssrn.com",
    "nber.org",
    "aqr.com",
    "researchaffiliates.com"
]

visited_sources_file = "visited_sources.json"
markdown_path = "momentum_investing_summary.md"
structured_path = "momentum_investing_data.json"

# ===============================
# Helpers
# ===============================

def load_visited_sources():
    if os.path.exists(visited_sources_file):
        with open(visited_sources_file, "r", encoding="utf-8") as f:
            return set(json.load(f))
    return set()

def save_visited_sources(visited):
    with open(visited_sources_file, "w", encoding="utf-8") as f:
        json.dump(list(visited), f, indent=2)

def get_remaining_sources():
    visited = load_visited_sources()
    return [s for s in sources if s not in visited]

def append_markdown(new_text, path=markdown_path):
    with open(path, "a", encoding="utf-8") as f:
        f.write("\n\n---\n\n")
        f.write(new_text)

def append_structured(new_data, path=structured_path):
    if os.path.exists(path):
        with open(path, "r", encoding="utf-8") as f:
            existing = json.load(f)
    else:
        existing = []

    if isinstance(new_data, str):
        try:
            new_data = json.loads(new_data)
        except:
            new_data = [{"raw": new_data}]

    combined = existing + new_data
    with open(path, "w", encoding="utf-8") as f:
        json.dump(combined, f, indent=2)

# ===============================
# Agent Setup
# ===============================

def make_agent(remaining_sources):
    return Agent(
        task=(
            "Search for the latest investor research and commentary on momentum investing, "
            "focusing on how strategies adapt to changing market dynamics. "
            "Summarize key takeaways, including insights on portfolio construction, "
            "trading costs, holding periods, scalability, and adaptation. "
            "Return a clean markdown summary with sections: Key Findings, Implications for Investors, "
            "and Practical Recommendations. "
            f"Restrict yourself to these sources: {', '.join(remaining_sources)}."
        ),
        llm=llm,
        browser_config={
            "headless": True,
            "browser_type": "chromium",
            "browser_timeout": 60,
            "viewport_size": {"width": 1280, "height": 720},
            "extra_chromium_args": [
                "--no-sandbox",
                "--disable-dev-shm-usage",
                "--disable-gpu",
                "--disable-extensions",
                "--disable-plugins",
                "--disable-images",
                "--disable-javascript",
            ]
        }
    )

# ===============================
# Main Run (manual trigger)
# ===============================

async def run_research(batch_size=3):
    visited = load_visited_sources()
    remaining = get_remaining_sources()

    if not remaining:
        print("✅ All sources already covered.")
        return

    next_batch = remaining[:batch_size]
    print(f"🔍 Running research on: {', '.join(next_batch)}")

    agent = make_agent(next_batch)

    try:
        history = await asyncio.wait_for(agent.run(max_steps=25), timeout=300)
    except asyncio.TimeoutError:
        print("❌ Agent timed out")
        return

    final_result = history.final_result()
    if final_result:
        # Save markdown
        append_markdown(f"## Research Batch ({', '.join(next_batch)})\n\n{final_result}")

        # Save structured if available
        if hasattr(history, "structured_output") and history.structured_output:
            append_structured(history.structured_output)

        print(f"✅ Results from {next_batch} saved.")
    else:
        print("⚠️ No final result produced.")

    # Update visited
    visited.update(next_batch)
    save_visited_sources(visited)

# ===============================
# Manual Execution
# ===============================
await run_research()
# Example: run one batch
if __name__ == "__main__":
    print("🔧 Manual research agent ready.")
    print("➡️ Run: await run_research() inside Jupyter or asyncio.run(run_research()) in script")


🔍 Running research on: morningstar.com, investorplace.com, investing.com
INFO     [Agent] [34m🚀 Task: Search for the latest investor research and commentary on momentum investing, focusing on how strategies adapt to changing market dynamics. Summarize key takeaways, including insights on portfolio construction, trading costs, holding periods, scalability, and adaptation. Return a clean markdown summary with sections: Key Findings, Implications for Investors, and Practical Recommendations. Restrict yourself to these sources: morningstar.com, investorplace.com, investing.com.[0m


python(99091) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


INFO     [Agent] 🧠 Starting a browser-use version 0.7.3 with model=gemini-2.5-flash
INFO     [Agent] 

INFO     [Agent] 📍 Step 1:
INFO     [Agent]   [32m👍 Eval: The previous goal was to start the task. Verdict: Success[0m
INFO     [Agent]   [34m🎯 Next goal: Create a `todo.md` file with a detailed plan for searching each specified website, extracting information, and generating the final markdown summary.[0m
INFO     [Agent]   🦾 [34m[ACTION 1/1][0m write_file: file_name: todo.md, content: # Momentum Investing Research and Commentary\n\n## Goal: Summarize key takeaways ...
INFO     [tools] 💾 Data written to file todo.md successfully.
INFO     [Agent] 

INFO     [Agent] 📍 Step 2:
INFO     [Agent]   [32m👍 Eval: Successfully created the todo.md file. Verdict: Success[0m
INFO     [Agent]   [34m🎯 Next goal: Search Google for "momentum investing strategies adapt changing market dynamics site:morningstar.com" to find relevant articles on morningstar.com.[0m
INFO     [Agent]   🦾 [34m[

In [6]:
from browser_use import Agent, ChatGoogle
from dotenv import load_dotenv
import os
import asyncio
from datetime import datetime
import json

# ===============================
# Setup
# ===============================

load_dotenv()
llm = ChatGoogle(model="gemini-2.5-flash")

sources = [
    "investopedia.com",
    "marketwatch.com",
    "seekingalpha.com",
    "fool.com",
    "finance.yahoo.com",
    "zacks.com",
    "morningstar.com",
    "investorplace.com",
    "investing.com",
    "barrons.com",
    "cfainstitute.org",
    "ssrn.com",
    "nber.org",
    "aqr.com",
    "researchaffiliates.com"
]

visited_sources_file = "visited_sources.json"
markdown_path = "momentum_investing_summary.md"
structured_path = "momentum_investing_data.json"
error_log_path = "momentum_investing_errors.md"  # ✅ NEW

# ===============================
# Helpers
# ===============================

def load_visited_sources():
    if os.path.exists(visited_sources_file):
        with open(visited_sources_file, "r", encoding="utf-8") as f:
            return set(json.load(f))
    return set()

def save_visited_sources(visited):
    with open(visited_sources_file, "w", encoding="utf-8") as f:
        json.dump(list(visited), f, indent=2)

def get_remaining_sources():
    visited = load_visited_sources()
    return [s for s in sources if s not in visited]

def append_markdown(new_text, path=markdown_path):
    with open(path, "a", encoding="utf-8") as f:
        f.write("\n\n---\n\n")
        f.write(new_text)

def append_structured(new_data, path=structured_path):
    if os.path.exists(path):
        with open(path, "r", encoding="utf-8") as f:
            existing = json.load(f)
    else:
        existing = []

    if isinstance(new_data, str):
        try:
            new_data = json.loads(new_data)
        except:
            new_data = [{"raw": new_data}]

    combined = existing + new_data
    with open(path, "w", encoding="utf-8") as f:
        json.dump(combined, f, indent=2)

def log_error_md(error_text, path=error_log_path):  # ✅ NEW
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    entry = f"### ⚠️ Error Logged: {timestamp}\n\n```\n{error_text}\n```\n"
    with open(path, "a", encoding="utf-8") as f:
        f.write("\n\n---\n\n")
        f.write(entry)

# ===============================
# Agent Setup
# ===============================

def make_agent(remaining_sources):
    return Agent(
        task=(
            "Search for the latest investor research and commentary on momentum investing, "
            "focusing on how strategies adapt to changing market dynamics. "
            "Summarize key takeaways, including insights on portfolio construction, "
            "trading costs, holding periods, scalability, and adaptation. "
            "Return a clean markdown summary with sections: Key Findings, Implications for Investors, "
            "and Practical Recommendations. "
            f"Restrict yourself to these sources: {', '.join(remaining_sources)}."
        ),
        llm=llm,
        browser_config={
            "headless": True,
            "browser_type": "chromium",
            "browser_timeout": 60,
            "viewport_size": {"width": 1280, "height": 720},
            "extra_chromium_args": [
                "--no-sandbox",
                "--disable-dev-shm-usage",
                "--disable-gpu",
                "--disable-extensions",
                "--disable-plugins",
                "--disable-images",
                "--disable-javascript",
            ]
        }
    )

# ===============================
# Main Run (manual trigger)
# ===============================

async def run_research(batch_size=3):
    visited = load_visited_sources()
    remaining = get_remaining_sources()

    if not remaining:
        print("✅ All sources already covered.")
        return

    next_batch = remaining[:batch_size]
    print(f"🔍 Running research on: {', '.join(next_batch)}")

    agent = make_agent(next_batch)

    try:
        history = await asyncio.wait_for(agent.run(max_steps=25), timeout=300)
    except asyncio.TimeoutError as e:
        error_msg = f"Agent timed out while processing: {', '.join(next_batch)}\n{str(e)}"
        log_error_md(error_msg)  # ✅ NEW
        print("❌ Agent timed out")
        return

    final_result = history.final_result()
    if final_result:
        append_markdown(f"## Research Batch ({', '.join(next_batch)})\n\n{final_result}")

        if hasattr(history, "structured_output") and history.structured_output:
            append_structured(history.structured_output)

        print(f"✅ Results from {next_batch} saved.")
    else:
        error_msg = f"No final result produced for: {', '.join(next_batch)}"
        log_error_md(error_msg)  # ✅ NEW
        print("⚠️ No final result produced.")

    visited.update(next_batch)
    save_visited_sources(visited)

# ===============================
# Manual Execution
# ===============================

# For Jupyter or async environments
await run_research()

# For script execution
if __name__ == "__main__":
    print("🔧 Manual research agent ready.")
    print("➡️ Run: await run_research() inside Jupyter or asyncio.run(run_research()) in script")


✅ All sources already covered.
🔧 Manual research agent ready.
➡️ Run: await run_research() inside Jupyter or asyncio.run(run_research()) in script
