# Agentic Finance — All-in-One Notebook
This notebook mirrors our layered app.


# Market Research Agent — Price, Indicators, and News Pipeline

This project builds a lightweight research workflow for equities: we fetch historical prices, compute common technical indicators (SMA, RSI), and pull headline summaries with basic sentiment. A small JSON cache keeps runs fast and reproducible while avoiding API rate limits. The notebook(s) walk through data loading, quick EDA, feature prep, and simple evaluation.

**Course:** MSAAI 520-02 — Group 5  
**Date:** October 18, 2025

## Team
- Ali Azizi  
- Sunitha Kosireddy  
- Victor Salcedo


In [None]:
# Setup: create package tree and write source files (no changes to your code)
import pathlib, sys
ROOT = pathlib.Path("/mnt/data/agentic-finance")
SRC = ROOT / "src"
UI = ROOT / "ui"
sys.path.insert(0, str(ROOT))
print("Project root:", ROOT)

Project root: \mnt\data\agentic-finance


### Add repo root to sys.path and ensure packages exist


In [None]:
from pathlib import Path
import sys

# Find repo root (folder containing "src")
ROOT = Path.cwd()
while not (ROOT / "src").exists() and ROOT.parent != ROOT:
    ROOT = ROOT.parent

sys.path.insert(0, str(ROOT))  # make "src" importable

# Ensure packages (empty __init__.py files)
for p in [
    ROOT / "src",
    ROOT / "src" / "config",
    ROOT / "src" / "data_io",
    ROOT / "src" / "system",
    ROOT / "src" / "analysis",


]:
    p.mkdir(parents=True, exist_ok=True)
    (p / "__init__.py").touch(exist_ok=True)


## src/analysis/features.py

In [None]:
# src/analysis/features.py
from __future__ import annotations
import pandas as pd
import numpy as np

def compute_sma(prices: pd.DataFrame, window: int) -> pd.Series:
    if prices is None or prices.empty:
        return pd.Series(dtype=float)
    return prices["close"].rolling(window=window).mean()

def compute_rsi(prices: pd.DataFrame, window: int = 14) -> pd.Series:
    if prices is None or prices.empty:
        return pd.Series(dtype=float)
    delta = prices["close"].diff()
    gain = np.where(delta > 0, delta, 0.0)
    loss = np.where(delta < 0, -delta, 0.0)
    gain_s = pd.Series(gain, index=prices.index)
    loss_s = pd.Series(loss, index=prices.index)
    avg_gain = gain_s.rolling(window=window).mean()
    avg_loss = loss_s.rolling(window=window).mean()
    rs = avg_gain / (avg_loss + 1e-10)
    rsi = 100 - (100 / (1 + rs))
    return rsi


## src/analysis/text.py

In [None]:
# Second approach

from __future__ import annotations
import re
import pandas as pd
from datetime import datetime, timedelta
# from config.settings import SETTINGS

# -----------------------------
# Existing tagging / preprocessing
# -----------------------------

TAG_RULES = {
    "earnings": ["earnings", "eps", "guidance", "outlook", "quarter", "revenue"],
    "product":  ["launch", "iphone", "chip", "feature", "service"],
    "legal":    ["lawsuit", "regulator", "antitrust", "fine", "settlement"],
    "macro":    ["inflation", "rates", "fed", "recession", "gdp"]
}

def preprocess_news(df: pd.DataFrame) -> pd.DataFrame:
    if df is None or df.empty:
        return pd.DataFrame(columns=[
            "published_at","source","title","summary","url",
            "overall_sentiment","tags","numbers"
        ])

    df = df.copy()

    # Alpha Vantage format is like "20251017T200143"
    # Parse with explicit format; keep timezone-aware for safety
    df["published_at"] = pd.to_datetime(
        df["published_at"], format="%Y%m%dT%H%M%S", errors="coerce", utc=True
    )

    # Drop rows with no title/url; keep others (don’t drop NaT here — the date filter happens later)
    df = df.dropna(subset=["title","url"]).drop_duplicates(subset=["url"])
    df["summary"] = df["summary"].fillna("")
    return df

def classify_tags(text: str) -> list[str]:
    text_l = text.lower()
    tags = [k for k, kws in TAG_RULES.items() if any(kw in text_l for kw in kws)]
    return tags or ["general"]

NUM_RE = re.compile(r'(\$?\b\d+(?:\.\d+)?%?)')

def extract_numbers(text: str) -> list[str]:
    return NUM_RE.findall(text or "")[:6]

def add_tags_and_numbers(df: pd.DataFrame) -> pd.DataFrame:
    if df.empty:
        return df
    df = df.copy()
    df["tags"] = (df["title"] + " " + df["summary"]).apply(classify_tags)
    df["numbers"] = (df["title"] + " " + df["summary"]).apply(extract_numbers)
    return df

def recent_topk(df: pd.DataFrame, topk: int, days: int, required_tags: list[str] | None = None) -> pd.DataFrame:
    if df.empty:
        return df

    # Make an aware UTC cutoff; df['published_at'] is already UTC-aware
    cutoff = pd.Timestamp.now(tz="UTC") - pd.Timedelta(days=days)
    f = df[df["published_at"] >= cutoff]

    if required_tags:
        want = [t.strip().lower() for t in required_tags]
        f_tags = f[f["tags"].apply(lambda ts: any(t in [x.lower() for x in ts] for t in want))]
        f = f_tags if not f_tags.empty else f

    return f.sort_values("published_at", ascending=False).head(topk)

# -----------------------------
# NEW: shared agent utilities
# -----------------------------

import json

def strip_code_fences(s: str) -> str:
    """Remove leading/trailing ``` blocks (optionally ```json)."""
    if not isinstance(s, str):
        return s
    return re.sub(r"^```(?:json)?\s*|\s*```$", "", s.strip(), flags=re.IGNORECASE)

def to_float(x, default: float = 0.0) -> float:
    """Best-effort conversion of model outputs or strings to float."""
    try:
        if isinstance(x, str):
            xs = x.strip().lower()
            # map common words to numeric anchors
            if xs in ("high", "strong", "bullish", "overbought"):
                return 0.8
            if xs in ("medium", "moderate", "neutral"):
                return 0.5
            if xs in ("low", "weak", "bearish", "oversold"):
                return 0.2
        return float(x)
    except Exception:
        return default

def clamp(x: float, lo: float, hi: float) -> float:
    return max(lo, min(hi, x))

def normalize_score(v: float) -> float:
    """
    Normalize arbitrary score ranges to [-1, 1].
    Heuristics:
      - If already in [-1,1], keep.
      - If in [0,1], map to [-1,1] via (v-0.5)*2.
      - If in (1,100], treat as percent.
      - If in (1,10], treat as 0-10 and map.
      - Else, clamp.
    """
    try:
        v = float(v)
    except Exception:
        return 0.0
    if -1.0 <= v <= 1.0:
        return v
    if 0.0 <= v <= 1.0:
        return (v - 0.5) * 2.0
    if 1.0 < v <= 100.0:
        v01 = v / 100.0
        return (v01 - 0.5) * 2.0
    if 1.0 < v <= 10.0:
        v01 = v / 10.0
        return (v01 - 0.5) * 2.0
    return clamp(v, -1.0, 1.0)

def normalize_conf(v) -> float:
    """Normalize any confidence-like value to [0,1]."""
    f = to_float(v, 0.7)
    if 1.0 < f <= 100.0:
        f = f / 100.0
    return clamp(f, 0.0, 1.0)

# Optional: helpers to render structured dicts into strings (for external tools)
def pretty_json_block(obj: dict, max_chars: int = 4000) -> str:
    """Return a fenced JSON markdown block, truncated for UI safety."""
    try:
        js = json.dumps(obj, ensure_ascii=False, indent=2)
    except Exception:
        js = str(obj)
    if len(js) > max_chars:
        js = js[: max_chars - 20] + "\n... (truncated)"
    return f"```json\n{js}\n```"


## src/config/settings.py

In [None]:
from __future__ import annotations
import os
from pathlib import Path
from dataclasses import dataclass
from dotenv import load_dotenv

def _find_project_root(start: Path) -> Path:
    """
    Walk upward to find the repo root heuristically.
    Treat a folder containing both 'src' and 'data' as the root.
    Fallback to the starting directory if not found.
    """
    for p in [start, *start.parents]:
        if (p / "src").exists() and (p / "data").exists():
            return p
    return start

# project root = repo root
if "__file__" in globals():
    ROOT = Path(__file__).resolve().parents[2]
else:
    # Notebook / REPL: start from CWD and auto-detect root
    ROOT = _find_project_root(Path.cwd())

load_dotenv(ROOT / ".env", override=False)

@dataclass(frozen=True)
class Settings:
    data_dir: Path = ROOT / "data"
    cache_dir: Path = ROOT / "data" / "cache"
    runs_dir: Path = ROOT / "data" / "runs"
    alpha_api_key: str = os.getenv("ALPHAVANTAGE_API_KEY", "")
    openai_api_key: str = os.getenv("OPENAI_API_KEY", "")
    news_window_days: int = 14
    topk_news: int = 5
    cache_ttl_minutes: int = 60

SETTINGS = Settings()
SETTINGS.cache_dir.mkdir(parents=True, exist_ok=True)
SETTINGS.runs_dir.mkdir(parents=True, exist_ok=True)


## src/data_io/cache.py

### Simple JSON file cache (TTL + atomic writes)
I use a tiny JSON-based cache so repeated runs don’t redo the same work. Each key maps to a file on disk, with a timestamp to support a time-to-live (TTL). Saving is done via a temp file + replace so partial writes don’t corrupt the cache.

**Inputs:** `key`, optional `ttl_minutes`, arbitrary `data`  
**Key choices:** per-key JSON files under `SETTINGS.cache_dir`, ISO-8601 for dates, atomic write on save  
**Output:** `load_cache` returns cached payload or `None`; `save_cache` writes `{"_ts": ..., "data": ...}` to disk


In [None]:
# Purpose: lightweight disk cache with TTL and atomic writes
# Context: used by data fetchers (e.g., price downloads) to avoid repeat network calls
# Notes: filenames derive from key under SETTINGS.cache_dir; payload stored as JSON

# cache.py
from __future__ import annotations
import json, time
from datetime import date, datetime
from pathlib import Path
from typing import Any
from src.data_io.cache import load_cache, save_cache
from src.config.settings import SETTINGS

def _cache_path(key: str) -> Path:
    return SETTINGS.cache_dir / f"{key}.json"

def _json_default(o: Any):
    # datetime & pandas.Timestamp (subclass of datetime) → ISO 8601
    if isinstance(o, (datetime, date)):
        return o.isoformat()
    # Fallback: make a best-effort string (covers Decimal, Path, Enum, etc.)
    try:
        return str(o)
    except Exception:
        return repr(o)

def load_cache(key: str, ttl_minutes: int | None = None) -> Any | None:
    p = _cache_path(key)
    if not p.exists():
        return None
    try:
        obj = json.loads(p.read_text(encoding="utf-8"))
        if ttl_minutes is None:
            return obj.get("data")  # consistent: always return payload
        if (time.time() - obj.get("_ts", 0)) <= ttl_minutes * 60:
            return obj.get("data")
    except Exception:
        return None
    return None

def save_cache(key: str, data: Any) -> None:
    p = _cache_path(key)
    p.parent.mkdir(parents=True, exist_ok=True)
    tmp = p.with_suffix(p.suffix + ".tmp")
    payload = {"_ts": time.time(), "data": data}
    tmp.write_text(json.dumps(payload, ensure_ascii=False, default=_json_default), encoding="utf-8")
    tmp.replace(p)  # atomic on most OS/filesystems


## src/data_io/prices.py

### Fetch historical prices (with simple caching)
I pull OHLCV data from Yahoo Finance and return a clean DataFrame. I use a short-lived cache so repeated runs don’t keep hitting the API. If data is already cached, I return it immediately. I also flatten any MultiIndex columns and standardize names so downstream code stays consistent.

**Inputs:** `symbol`, `start`, `end`  
**Key choices:** use `yfinance.download`, cache key includes date range, flatten MultiIndex, keep both `close` and `adj_close`  
**Output:** DataFrame with `date, open, high, low, close, adj_close, volume`


In [None]:
# Purpose: download OHLCV from Yahoo Finance and return a normalized DataFrame with caching
# Context: called by data prep steps before features/EDA; avoids repeated network calls
# Notes: flattens MultiIndex cols, standardizes names, stores json-serializable cache

from __future__ import annotations
import pandas as pd
import yfinance as yf
from src.data_io.cache import load_cache, save_cache
from src.config.settings import SETTINGS

def fetch_prices(symbol: str, start: str | None, end: str | None) -> pd.DataFrame:
    cache_key = f"prices_{symbol}_{start}_{end}"
    cached = load_cache(cache_key, ttl_minutes=SETTINGS.cache_ttl_minutes)
    if cached is not None:
        return pd.DataFrame(cached)
    df = yf.download(symbol, start=start, end=end, progress=False)
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = [c[0].lower() for c in df.columns]
    df = df.reset_index().rename(columns={
        "Date": "date", "open":"open","high":"high","low":"low","close":"close","adj close":"adj_close","volume":"volume"
    })
    df["date"] = df["date"].astype(str)
    save_cache(cache_key, df.to_dict(orient="records"))
    return df


## src/data_io/indicators.py

### Fetch technical indicators (Alpha Vantage with local fallback)
I get daily SMA/RSI for a symbol. I try Alpha Vantage first and fall back to computing the indicator locally from our price history when the key is missing or the API is rate-limited. Results are cached so repeated calls are fast.

**Inputs:** `symbol`, `indicator` (`"SMA"` or `"RSI"`), `time_period`  
**Key choices:** Alpha Vantage params (daily interval, series_type=close), JSON parsing with key map, local fallback uses `compute_sma`/`compute_rsi` on `fetch_prices`  
**Output:** DataFrame with `date` and indicator column(s), sorted ascending


In [None]:
# Purpose: fetch SMA/RSI via Alpha Vantage with a cached local-compute fallback
# Context: used by feature pipelines that need daily indicators
# Notes: caches by (symbol, indicator, time_period); normalizes dates and numeric types

# src/data_io/indicators.py
from __future__ import annotations
import requests
import pandas as pd
from typing import Optional
from src.config.settings import SETTINGS
from src.data_io.prices import fetch_prices
from src.analysis.features import compute_sma, compute_rsi
from src.data_io.cache import load_cache, save_cache

BASE = "https://www.alphavantage.co/query"
KEYS = {"SMA": "Technical Analysis: SMA", "RSI": "Technical Analysis: RSI"}


# If AV isn’t available (no key/limit), our code falls back to computing indicators locally from prices using our compute_sma / compute_rsi.
def _fallback_from_prices(symbol: str, indicator: str, time_period: int) -> pd.DataFrame:
    prices = fetch_prices(symbol, None, None)
    if prices is None or prices.empty:
        return pd.DataFrame()

    if indicator == "SMA":
        df = pd.DataFrame({"date": prices["date"], "SMA": compute_sma(prices, window=time_period)})
    elif indicator == "RSI":
        df = pd.DataFrame({"date": prices["date"], "RSI": compute_rsi(prices, window=time_period)})
    else:
        return pd.DataFrame()

    df["date"] = pd.to_datetime(df["date"], errors="coerce")
    df = df.dropna(subset=["date"])
    for c in df.columns:
        if c != "date":
            df[c] = pd.to_numeric(df[c], errors="coerce")
    df = df.dropna().sort_values("date", ascending=True).reset_index(drop=True)
    return df

def fetch_indicator(symbol: str, indicator: str, time_period: int = 14) -> pd.DataFrame:
    key = KEYS.get(indicator)

    # Try cache first
    cache_key = f"indicator_{symbol}_{indicator}_{time_period}"
    cached = load_cache(cache_key, ttl_minutes=SETTINGS.cache_ttl_minutes)
    if cached is not None:
        return pd.DataFrame(cached)

    if not SETTINGS.alpha_api_key or key is None:
        df = _fallback_from_prices(symbol, indicator, time_period)
        save_cache(cache_key, df.to_dict(orient="records"))
        return df

    params = {
        "function": indicator,
        "symbol": symbol,
        "interval": "daily",
        "time_period": time_period,
        "series_type": "close",
        "apikey": SETTINGS.alpha_api_key,
    }
    try:
        resp = requests.get(BASE, params=params, timeout=30)
        resp.raise_for_status()
        data = resp.json()
        # Alpha Vantage quota message handling:
        if (not data or key not in data or not data[key] or "Note" in data or "Information" in data or "Error Message" in data):
            df = _fallback_from_prices(symbol, indicator, time_period)
            save_cache(cache_key, df.to_dict(orient="records"))
            return df
    except Exception:
        df = _fallback_from_prices(symbol, indicator, time_period)
        save_cache(cache_key, df.to_dict(orient="records"))
        return df

    df = pd.DataFrame.from_dict(data[key], orient="index")
    df.index = pd.to_datetime(df.index, errors="coerce")
    df.reset_index(inplace=True)
    df = df.rename(columns={"index": "date"})
    for c in df.columns:
        if c != "date":
            df[c] = pd.to_numeric(df[c], errors="coerce")
    df = df.dropna(subset=["date"]).sort_values("date", ascending=True).reset_index(drop=True)
    save_cache(cache_key, df.to_dict(orient="records"))
    return df


## src/data_io/news.py

### Fetch news (Alpha Vantage, symbol-filtered, cached)
I pull recent news from Alpha Vantage’s `NEWS_SENTIMENT` endpoint and keep only items that explicitly mention my ticker with decent relevance. I cache the result so I don’t burn quota on repeat runs.

**Inputs:** `symbol`  
**Key choices:** `relevance_score >= 0.30`, require explicit ticker match, cache per symbol  
**Output:** DataFrame with `published_at, source, title, summary, url, overall_sentiment`


In [None]:
# Purpose: fetch and cache symbol-specific news via Alpha Vantage, filtered by relevance
# Context: called by downstream reporting/EDA to attach headlines and sentiment
# Notes: filters to items where ticker matches and relevance >= 0.30; caches by symbol

from __future__ import annotations
import os, requests, pandas as pd
from src.data_io.cache import load_cache, save_cache
from src.config.settings import SETTINGS

BASE = "https://www.alphavantage.co/query"


def fetch_news(symbol: str) -> pd.DataFrame:
    if not SETTINGS.alpha_api_key:
        return pd.DataFrame()  # safe fail
    cache_key = f"news_{symbol}"
    cached = load_cache(cache_key, ttl_minutes=SETTINGS.cache_ttl_minutes)
    if cached is not None:
        return pd.DataFrame(cached)

    params = {"function":"NEWS_SENTIMENT","tickers":symbol,"apikey":SETTINGS.alpha_api_key}
    r = requests.get(BASE, params=params, timeout=30)
    data = r.json()
    if "feed" not in data:
        return pd.DataFrame()

    rows = []
    for item in data.get("feed", []):
        tickers = item.get("ticker_sentiment", []) or []
        # keep only if our symbol is explicitly mentioned
        keep = any(t.get("ticker", "").upper() == symbol.upper() and float(t.get("relevance_score", 0) or 0) >= 0.30
                   for t in tickers)
        if not keep:
            continue

        rows.append({
            "published_at": item.get("time_published"),
            "source": item.get("source"),
            "title": item.get("title"),
            "summary": item.get("summary"),
            "url": item.get("url"),
            "overall_sentiment": item.get("overall_sentiment_label")
        })

    # ====== Forth APPROACH =====
    df = pd.DataFrame(rows)
    save_cache(cache_key, df.to_dict(orient="records"))
    return df


## src/system/router.py

In [None]:
from __future__ import annotations

def choose_agents(has_news: bool, has_prices: bool, has_technicals: bool) -> list[str]:
    agents = []
    if has_news: agents.append("news")
    # earnings optional if you add a financials fetch later
    if has_technicals and has_prices: agents.append("technical")
    agents.append("risk")
    return agents


## src/system/memory.py

In [None]:
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from src.config.settings import SETTINGS

MEM_PATH = SETTINGS.runs_dir / "run_notes.jsonl"

def append_memory(record: dict[str, Any]) -> None:
    MEM_PATH.parent.mkdir(parents=True, exist_ok=True)
    with MEM_PATH.open("a", encoding="utf-8") as f:
        f.write(json.dumps(record, ensure_ascii=False) + "\n")


## src/agents.py

In [None]:
# === Second approach

from __future__ import annotations
import os, json
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Dict, List

# Import shared helpers from analysis.text
from src.analysis.text import (
    strip_code_fences,
    to_float,
    clamp,
    normalize_score,
    normalize_conf,
)

In [1]:
"""
AGENT PROCESSING SYSTEM
Multi-Agent Financial Analysis
"""

from openai import OpenAI
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, asdict
import json
from datetime import datetime
import os
import time
import logging

# ============================================================================
# SETUP API KEYS (Open AI keys before running)
# ============================================================================

import os

# Alpha Vantage
os.environ["ALPHAVANTAGE_API_KEY"] = "BVGUKZR1MHVS0T6B"

# OpenAI - REPLACE WITH YOUR KEY
os.environ["OPENAI_API_KEY"] = "sk-proj-"  # REPLACE BEFORE RUNNING

# Initialize OpenAI client
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

print("API keys configured")

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


API keys configured


In [2]:
# ============================================================================
# DATA STRUCTURES
# ============================================================================

@dataclass
class AgentResponse:
    """
    Standardized response format returned by all financial analysis agents.
    Contains the agent's analysis text, numerical score, confidence level,
    key factors driving the assessment, and performance metrics like execution
    time and token usage. Ensures consistent output structure across all
    agents for easy aggregation and synthesis """

    agent_name: str
    analysis: str
    score: float
    confidence: float
    key_factors: List[str]
    timestamp: str
    execution_time: float = 0.0  # NEW: Track execution time
    token_usage: Optional[Dict[str, int]] = None  # NEW: Track token usage


@dataclass
class AgentMetrics:
    """
    Performance tracking metrics for monitoring agent efficiency and reliability.
    Records success/failure rates, execution times, and API token consumption
    to identify bottlenecks and optimize system performance. Used for debugging,
    cost analysis, and ensuring agents operate within acceptable performance bounds"""

    agent_name: str
    total_calls: int = 0
    successful_calls: int = 0
    failed_calls: int = 0
    total_execution_time: float = 0.0
    average_execution_time: float = 0.0
    total_tokens_used: int = 0


In [3]:

# ============================================================================
# SHARED CONTEXT MANAGER
# ============================================================================

class SharedContext:
    """
    Enables inter-agent communication by allowing agents to share insights,
    findings, and intermediate results with each other during analysis.
    Maintains both current context state and complete historical log of all
    shared information with timestamps and agent attribution. Used by agents
    to make more informed decisions based on collaborative intelligence from
    other specialists in the system """

    def __init__(self):
        self.context = {}
        self.history = []

    def add_insight(self, agent_name: str, key: str, value: Any):
        """Agent shares an insight"""
        self.context[key] = {
            'value': value,
            'from_agent': agent_name,
            'timestamp': datetime.now().isoformat()
        }
        self.history.append({
            'agent': agent_name,
            'key': key,
            'value': value,
            'timestamp': datetime.now().isoformat()
        })
        logger.info(f"{agent_name} shared insight: {key}")

    def get_insight(self, key: str) -> Optional[Any]:
        """Retrieve an insight"""
        return self.context.get(key, {}).get('value')

    def get_all_insights(self) -> Dict:
        """Get all shared insights"""
        return self.context

    def clear(self):
        """Clear all context"""
        self.context = {}
        self.history = []


# Initialize shared context for all agents
shared_context = SharedContext()


In [4]:
# ============================================================================
# BASE AGENT
# ============================================================================

class BaseAgent:
    """base class for all financial agents with memory, retry, and metrics"""

    def __init__(self, agent_name: str, model: str = "gpt-4o-mini"):
        self.agent_name = agent_name
        self.model = model
        self.memory = []  # Conversation history
        self.metrics = AgentMetrics(agent_name=agent_name)  # NEW: Performance metrics
        self.shared_context = shared_context  # NEW: Shared context with other agents
        logger.info(f"Initialized {agent_name}")

    def call_llm(self, system_prompt: str, user_message: str, max_retries: int = 3) -> tuple:
        """
        Call LLM with error handling, retry logic, and token tracking
        Returns: (response_text, token_usage)
        """
        for attempt in range(max_retries):
            try:
                start_time = time.time()

                response = client.chat.completions.create(
                    model=self.model,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_message}
                    ],
                    temperature=0.3,
                    max_tokens=800
                )

                elapsed = time.time() - start_time

                # Extract token usage
                token_usage = {
                    'prompt_tokens': response.usage.prompt_tokens,
                    'completion_tokens': response.usage.completion_tokens,
                    'total_tokens': response.usage.total_tokens
                }

                # Update metrics
                self.metrics.total_tokens_used += token_usage['total_tokens']

                logger.info(f"{self.agent_name} LLM call completed in {elapsed:.2f}s, tokens: {token_usage['total_tokens']}")

                return response.choices[0].message.content, token_usage

            except Exception as e:
                if attempt == max_retries - 1:
                    logger.error(f"{self.agent_name} failed after {max_retries} attempts: {str(e)}")
                    return f"Error processing request: {str(e)}", None

                wait_time = 2 ** attempt  # Exponential backoff
                logger.warning(f"{self.agent_name} retry {attempt + 1}/{max_retries}, waiting {wait_time}s")
                time.sleep(wait_time)

    def add_to_memory(self, interaction: Dict):
        """Store conversation history"""
        self.memory.append({
            'timestamp': datetime.now().isoformat(),
            'input': interaction.get('input'),
            'output': interaction.get('output'),
            'metadata': interaction.get('metadata', {})
        })
        logger.debug(f"{self.agent_name} added to memory (total: {len(self.memory)})")

    def get_context(self, last_n: int = 5) -> List[Dict]:
        """Retrieve recent context from memory"""
        return self.memory[-last_n:] if self.memory else []

    def share_insight(self, key: str, value: Any):
        """Share an insight with other agents via shared context"""
        self.shared_context.add_insight(self.agent_name, key, value)

    def get_shared_insights(self) -> Dict:
        """Get insights shared by other agents"""
        return self.shared_context.get_all_insights()

    def update_metrics(self, success: bool, execution_time: float):
        """Update agent performance metrics"""
        self.metrics.total_calls += 1
        if success:
            self.metrics.successful_calls += 1
        else:
            self.metrics.failed_calls += 1

        self.metrics.total_execution_time += execution_time
        self.metrics.average_execution_time = (
            self.metrics.total_execution_time / self.metrics.total_calls
        )

    def get_metrics(self) -> AgentMetrics:
        """Get agent performance metrics"""
        return self.metrics

    def process(self, data: Dict[str, Any]) -> AgentResponse:
        """Override in each specialized agent"""
        raise NotImplementedError("Each agent must implement process method")


## NewsAnalysisAgent
This agent reads news headlines about a stock, asks GPT-4 "are these headlines good or bad news?", gets back a sentiment score, and packages it in a standardized format for the orchestrator to use.
For example: if someone whats get sentiment from new, this agent is  reading news and telling if it's bullish or bearish.

In [5]:
# ============================================================================
# SPECIALIZED AGENTS
# ============================================================================

class NewsAnalysisAgent(BaseAgent):
    """Analyzes financial news sentiment and impact"""

    def __init__(self):
        super().__init__("News Analysis Agent")
        self.system_prompt = """You are a financial news analyst specializing in sentiment analysis.

INSTRUCTIONS:
1. Analyze news articles objectively
2. Consider both positive and negative aspects
3. Provide a sentiment score from -1 (very negative) to +1 (very positive)
4. Identify key factors driving the sentiment
5. Assess potential stock price impact

EXAMPLE OUTPUT:
{
  "sentiment_score": 0.75,
  "analysis": "Strong positive sentiment driven by earnings beat and product launch",
  "key_factors": ["Earnings exceeded expectations", "New product well-received"],
  "confidence": 0.85
}

Return ONLY valid JSON with keys: sentiment_score, analysis, key_factors, confidence"""

    def process(self, data: Dict[str, Any]) -> AgentResponse:
        """Process news data for sentiment analysis"""
        start_time = time.time()

        try:
            ticker = data.get('ticker', 'AAPL')
            news_articles = data.get('news', [])

            # Data validation
            if not news_articles:
                logger.warning(f"{self.agent_name}: No news data available")
                return AgentResponse(
                    agent_name=self.agent_name,
                    analysis="No news data available for analysis",
                    score=0.0,
                    confidence=0.0,
                    key_factors=["No news data available"],
                    timestamp=datetime.now().isoformat(),
                    execution_time=time.time() - start_time
                )

            # Prepare news summary
            news_summary = "\n".join([
                f"- {article.get('title', '')}: {article.get('summary', '')}"
                for article in news_articles[:5]
            ])

            user_message = f"""Analyze the following recent news about {ticker}:

{news_summary}

Provide sentiment analysis and impact assessment."""

            # Call LLM
            llm_response, token_usage = self.call_llm(self.system_prompt, user_message)

            # Parse response
            try:
                result = json.loads(llm_response)
                score = result.get('sentiment_score', 0)
                analysis = result.get('analysis', llm_response)
                key_factors = result.get('key_factors', [])

                confidence_raw = result.get('confidence', 0.7)
                try:
                    confidence = max(0.0, min(1.0, float(confidence_raw)))
                except (ValueError, TypeError):
                    confidence = 0.7

            except json.JSONDecodeError:
                logger.warning(f"{self.agent_name}: Failed to parse JSON response")
                score = 0
                analysis = llm_response
                key_factors = ["Unable to parse structured response"]
                confidence = 0.5

            execution_time = time.time() - start_time

            # Add to memory
            self.add_to_memory({
                'input': {'ticker': ticker, 'news_count': len(news_articles)},
                'output': analysis,
                'metadata': {'score': score, 'confidence': confidence}
            })

            # Share insight with other agents
            self.share_insight('news_sentiment', score)
            self.share_insight('news_confidence', confidence)

            # Update metrics
            self.update_metrics(success=True, execution_time=execution_time)

            return AgentResponse(
                agent_name=self.agent_name,
                analysis=analysis,
                score=float(score),
                confidence=float(confidence),
                key_factors=key_factors,
                timestamp=datetime.now().isoformat(),
                execution_time=execution_time,
                token_usage=token_usage
            )

        except Exception as e:
            execution_time = time.time() - start_time
            self.update_metrics(success=False, execution_time=execution_time)
            logger.error(f"{self.agent_name} error: {e}")
            raise

## EarningsAnalysisAgent
The Earnings Analysis Agent examines a company's financial performance (revenue, profits, earnings per share) and determines if the fundamentals are strong by comparing actual results against analyst expectations. It sends this financial data to GPT-4, which returns a score (-1 to +1) indicating whether the company's financials suggest it's a good or weak investment.

In [6]:
class EarningsAnalysisAgent(BaseAgent):
    """Analyzes earnings reports and financial statements"""

    def __init__(self):
        super().__init__("Earnings Analysis Agent")
        self.system_prompt = """You are a financial analyst specializing in earnings and fundamental analysis.

INSTRUCTIONS:
1. Analyze company financial metrics objectively
2. Compare actuals vs expectations
3. Assess fundamental strength from -1 (very weak) to +1 (very strong)
4. Identify key financial drivers
5. Evaluate growth trends

EXAMPLE OUTPUT:
{
  "fundamental_score": 0.80,
  "analysis": "Strong fundamentals with revenue and EPS beating expectations",
  "key_factors": ["Revenue beat by 5%", "EPS exceeded estimates", "Margin expansion"],
  "confidence": 0.88
}

Return ONLY valid JSON with keys: fundamental_score, analysis, key_factors, confidence"""

    def process(self, data: Dict[str, Any]) -> AgentResponse:
        """Process earnings and financial data"""
        start_time = time.time()

        try:
            ticker = data.get('ticker', 'UNKNOWN')
            financials = data.get('financials', {})

            # Check for shared insights from other agents
            news_sentiment = self.shared_context.get_insight('news_sentiment')
            if news_sentiment:
                logger.info(f"{self.agent_name}: Considering news sentiment = {news_sentiment}")

            financial_summary = f"""
Company: {ticker}
Revenue: ${financials.get('revenue', 'N/A')}B
EPS: ${financials.get('eps', 'N/A')}
Revenue Growth: {financials.get('revenue_growth', 'N/A')}%
Profit Margin: {financials.get('profit_margin', 'N/A')}%
Expected Revenue: ${financials.get('expected_revenue', 'N/A')}B
Expected EPS: ${financials.get('expected_eps', 'N/A')}
"""

            user_message = f"""Analyze the following financial data for {ticker}:

{financial_summary}

Assess fundamental strength and growth prospects."""

            llm_response, token_usage = self.call_llm(self.system_prompt, user_message)

            try:
                result = json.loads(llm_response)
                score = result.get('fundamental_score', 0)
                analysis = result.get('analysis', llm_response)
                key_factors = result.get('key_factors', [])

                confidence_raw = result.get('confidence', 0.8)
                try:
                    confidence = max(0.0, min(1.0, float(confidence_raw)))
                except (ValueError, TypeError):
                    confidence = 0.8

            except json.JSONDecodeError:
                logger.warning(f"{self.agent_name}: Failed to parse JSON response")
                score = 0
                analysis = llm_response
                key_factors = ["Unable to parse structured response"]
                confidence = 0.6

            execution_time = time.time() - start_time

            self.add_to_memory({
                'input': {'ticker': ticker, 'financials': financials},
                'output': analysis,
                'metadata': {'score': score, 'confidence': confidence}
            })

            self.share_insight('fundamental_score', score)
            self.share_insight('fundamental_confidence', confidence)

            self.update_metrics(success=True, execution_time=execution_time)

            return AgentResponse(
                agent_name=self.agent_name,
                analysis=analysis,
                score=float(score),
                confidence=float(confidence),
                key_factors=key_factors,
                timestamp=datetime.now().isoformat(),
                execution_time=execution_time,
                token_usage=token_usage
            )

        except Exception as e:
            execution_time = time.time() - start_time
            self.update_metrics(success=False, execution_time=execution_time)
            logger.error(f"{self.agent_name} error: {e}")
            raise



## MarketSignalsAgent
The MarketSignalsAgent performs technical analysis by examining stock price patterns, trading volume, and technical indicators (like moving averages, RSI, MACD) to identify trends and momentum. It sends this technical data to GPT-4, which returns a score (-1 to +1) indicating whether the stock's price action suggests a bullish or bearish trend based on chart patterns and trading signals.

In [7]:
class MarketSignalsAgent(BaseAgent):
    """Performs technical analysis on market data"""

    def __init__(self):
        super().__init__("Market Signals Agent")
        self.system_prompt = """You are a technical analyst specializing in market signals and price patterns.

INSTRUCTIONS:
1. Analyze technical indicators objectively
2. Assess technical strength from -1 (very bearish) to +1 (very bullish)
3. Identify support/resistance levels
4. Evaluate trend direction and momentum
5. Consider volume patterns

EXAMPLE OUTPUT:
{
  "technical_score": 0.65,
  "analysis": "Bullish technical setup with price above key moving averages",
  "key_factors": ["Price above 50-day MA", "RSI indicates strength", "Volume confirming uptrend"],
  "confidence": 0.75
}

Return ONLY valid JSON with keys: technical_score, analysis, key_factors, confidence"""

    def process(self, data: Dict[str, Any]) -> AgentResponse:
        """Process technical market data"""
        start_time = time.time()

        try:
            ticker = data.get('ticker', 'UNKNOWN')
            technicals = data.get('technicals', {})

            # Validate data
            if not technicals.get('current_price') or technicals.get('current_price') == 'N/A':
                logger.warning(f"{self.agent_name}: Insufficient technical data")
                return AgentResponse(
                    agent_name=self.agent_name,
                    analysis="Insufficient technical data for analysis",
                    score=0.0,
                    confidence=0.0,
                    key_factors=["Missing price data"],
                    timestamp=datetime.now().isoformat(),
                    execution_time=time.time() - start_time
                )

            technical_summary = f"""
Ticker: {ticker}
Current Price: ${technicals.get('current_price', 'N/A')}
50-day MA: ${technicals.get('ma_50', 'N/A')}
200-day MA: ${technicals.get('ma_200', 'N/A')}
RSI: {technicals.get('rsi', 'N/A')}
MACD: {technicals.get('macd', 'N/A')}
Volume: {technicals.get('volume', 'N/A')} (Avg: {technicals.get('avg_volume', 'N/A')})
Support: ${technicals.get('support', 'N/A')}
Resistance: ${technicals.get('resistance', 'N/A')}
"""

            user_message = f"""Analyze the following technical data for {ticker}:

{technical_summary}

Assess technical strength and price momentum."""

            llm_response, token_usage = self.call_llm(self.system_prompt, user_message)

            try:
                result = json.loads(llm_response)
                score = result.get('technical_score', 0)
                analysis = result.get('analysis', llm_response)
                key_factors = result.get('key_factors', [])

                confidence_raw = result.get('confidence', 0.7)
                try:
                    confidence = max(0.0, min(1.0, float(confidence_raw)))
                except (ValueError, TypeError):
                    confidence = 0.7

            except json.JSONDecodeError:
                logger.warning(f"{self.agent_name}: Failed to parse JSON response")
                score = 0
                analysis = llm_response
                key_factors = ["Unable to parse structured response"]
                confidence = 0.5

            execution_time = time.time() - start_time

            self.add_to_memory({
                'input': {'ticker': ticker, 'technicals': technicals},
                'output': analysis,
                'metadata': {'score': score, 'confidence': confidence}
            })

            self.share_insight('technical_score', score)
            self.share_insight('technical_confidence', confidence)

            self.update_metrics(success=True, execution_time=execution_time)

            return AgentResponse(
                agent_name=self.agent_name,
                analysis=analysis,
                score=float(score),
                confidence=float(confidence),
                key_factors=key_factors,
                timestamp=datetime.now().isoformat(),
                execution_time=execution_time,
                token_usage=token_usage
            )

        except Exception as e:
            execution_time = time.time() - start_time
            self.update_metrics(success=False, execution_time=execution_time)
            logger.error(f"{self.agent_name} error: {e}")
            raise

## RiskAssessmentAgent
The RiskAssessmentAgent evaluates investment risk by analyzing metrics like beta (volatility), Value at Risk, Sharpe ratio, and sector correlation to determine how risky a stock is for a portfolio. It sends these risk metrics to GPT-4, which returns a risk score (0 to 1, where 0 is low risk and 1 is high risk) along with warnings about potential portfolio concentration or volatility issues.

In [8]:
class RiskAssessmentAgent(BaseAgent):
    """Assesses investment risk and portfolio fit"""

    def __init__(self):
        super().__init__("Risk Assessment Agent")
        self.system_prompt = """You are a risk management analyst specializing in portfolio risk assessment.

INSTRUCTIONS:
1. Analyze risk metrics objectively
2. Provide risk level score from 0 (very low risk) to 1 (very high risk)
3. Identify key risk factors
4. Assess portfolio diversification implications
5. Evaluate risk-adjusted returns

EXAMPLE OUTPUT:
{
  "risk_score": 0.45,
  "analysis": "Moderate risk profile with acceptable volatility and strong Sharpe ratio",
  "key_factors": ["Beta of 1.15 indicates moderate volatility", "Strong Sharpe ratio", "Manageable drawdown"],
  "confidence": 0.82
}

Return ONLY valid JSON with keys: risk_score, analysis, key_factors, confidence"""

    def process(self, data: Dict[str, Any]) -> AgentResponse:
        """Process risk metrics"""
        start_time = time.time()

        try:
            ticker = data.get('ticker', 'UNKNOWN')
            risk_data = data.get('risk_metrics', {})

            risk_summary = f"""
Ticker: {ticker}
Beta: {risk_data.get('beta', 'N/A')}
Volatility (30-day): {risk_data.get('volatility', 'N/A')}%
Value at Risk (5%): ${risk_data.get('var_5', 'N/A')}
Sharpe Ratio: {risk_data.get('sharpe_ratio', 'N/A')}
Max Drawdown: {risk_data.get('max_drawdown', 'N/A')}%
Sector Correlation: {risk_data.get('sector_correlation', 'N/A')}
P/E Ratio: {risk_data.get('pe_ratio', 'N/A')}
"""

            user_message = f"""Analyze the following risk metrics for {ticker}:

{risk_summary}

Assess overall investment risk and portfolio implications."""

            llm_response, token_usage = self.call_llm(self.system_prompt, user_message)

            try:
                result = json.loads(llm_response)
                score = result.get('risk_score', 0.5)
                analysis = result.get('analysis', llm_response)
                key_factors = result.get('key_factors', [])

                confidence_raw = result.get('confidence', 0.8)
                try:
                    confidence = max(0.0, min(1.0, float(confidence_raw)))
                except (ValueError, TypeError):
                    confidence = 0.8

            except json.JSONDecodeError:
                logger.warning(f"{self.agent_name}: Failed to parse JSON response")
                score = 0.5
                analysis = llm_response
                key_factors = ["Unable to parse structured response"]
                confidence = 0.6

            execution_time = time.time() - start_time

            self.add_to_memory({
                'input': {'ticker': ticker, 'risk_metrics': risk_data},
                'output': analysis,
                'metadata': {'score': score, 'confidence': confidence}
            })

            self.share_insight('risk_score', score)
            self.share_insight('risk_confidence', confidence)

            self.update_metrics(success=True, execution_time=execution_time)

            return AgentResponse(
                agent_name=self.agent_name,
                analysis=analysis,
                score=float(score),
                confidence=float(confidence),
                key_factors=key_factors,
                timestamp=datetime.now().isoformat(),
                execution_time=execution_time,
                token_usage=token_usage
            )

        except Exception as e:
            execution_time = time.time() - start_time
            self.update_metrics(success=False, execution_time=execution_time)
            logger.error(f"{self.agent_name} error: {e}")
            raise



## SynthesisAgent
The SynthesisAgent acts as the "decision maker" that takes all the individual agent analyses (news sentiment, earnings strength, technical signals, risk level) and combines them into a single investment recommendation (STRONG BUY, BUY, HOLD, SELL, STRONG SELL). It sends a summary of all agent scores and findings to GPT-4, which weighs the different perspectives and returns a final actionable recommendation with confidence level and supporting reasoning.

In [9]:
class SynthesisAgent(BaseAgent):
    """Combines insights from all agents into final recommendation"""

    def __init__(self):
        super().__init__("Research Synthesis Agent")
        self.system_prompt = """You are a senior investment analyst who synthesizes multiple analyses into actionable recommendations.

INSTRUCTIONS:
1. Review all agent analyses objectively
2. Weigh different factors appropriately
3. Provide clear investment recommendation (STRONG BUY, BUY, HOLD, SELL, STRONG SELL)
4. State confidence level (0 to 1)
5. Summarize key reasoning
6. Note important risks

EXAMPLE OUTPUT:
{
  "recommendation": "BUY",
  "confidence": 0.78,
  "analysis": "Strong fundamentals and positive technical signals support a buy recommendation despite moderate risk",
  "key_points": ["Earnings beat expectations", "Technical breakout", "Acceptable risk profile"],
  "risks": ["Market volatility", "Sector headwinds"]
}

Return ONLY valid JSON with keys: recommendation, confidence, analysis, key_points, risks"""

    def process(self, agent_responses: List[AgentResponse]) -> AgentResponse:
        """Synthesize all agent responses"""
        start_time = time.time()

        try:
            # Check shared insights
            all_insights = self.get_shared_insights()
            logger.info(f"{self.agent_name}: Reviewing {len(all_insights)} shared insights")

            analyses_summary = "\n\n".join([
                f"{resp.agent_name}:\n"
                f"Score: {resp.score}\n"
                f"Confidence: {resp.confidence}\n"
                f"Analysis: {resp.analysis}\n"
                f"Key Factors: {', '.join(resp.key_factors)}"
                for resp in agent_responses
            ])

            user_message = f"""Synthesize the following analyses into a final investment recommendation:

{analyses_summary}

Provide comprehensive investment recommendation with supporting reasoning."""

            llm_response, token_usage = self.call_llm(self.system_prompt, user_message)

            try:
                result = json.loads(llm_response)
                recommendation = result.get('recommendation', 'HOLD')
                analysis = result.get('analysis', llm_response)
                key_factors = result.get('key_points', [])

                confidence_raw = result.get('confidence', 0.7)
                try:
                    confidence = max(0.0, min(1.0, float(confidence_raw)))
                except (ValueError, TypeError):
                    confidence = 0.7

                rec_to_score = {
                    'STRONG BUY': 1.0,
                    'BUY': 0.6,
                    'HOLD': 0.0,
                    'SELL': -0.6,
                    'STRONG SELL': -1.0
                }
                score = rec_to_score.get(recommendation, 0.0)

            except json.JSONDecodeError:
                logger.warning(f"{self.agent_name}: Failed to parse JSON response")
                score = 0
                analysis = llm_response
                key_factors = ["Unable to parse structured response"]
                confidence = 0.6

            execution_time = time.time() - start_time

            self.add_to_memory({
                'input': {'num_agents': len(agent_responses)},
                'output': analysis,
                'metadata': {'recommendation': recommendation, 'confidence': confidence}
            })

            self.share_insight('final_recommendation', recommendation)
            self.share_insight('final_score', score)

            self.update_metrics(success=True, execution_time=execution_time)

            return AgentResponse(
                agent_name=self.agent_name,
                analysis=analysis,
                score=float(score),
                confidence=float(confidence),
                key_factors=key_factors,
                timestamp=datetime.now().isoformat(),
                execution_time=execution_time,
                token_usage=token_usage
            )

        except Exception as e:
            execution_time = time.time() - start_time
            self.update_metrics(success=False, execution_time=execution_time)
            logger.error(f"{self.agent_name} error: {e}")
            raise


## CritiqueAgent

The CritiqueAgent acts as a "quality control checker" that reviews the final investment recommendation to catch mistakes, biases, or missing information before presenting it to the user. It examines the SynthesisAgent's recommendation, asks GPT-4 to identify logical flaws or gaps in reasoning, and can adjust the confidence level downward if it finds issues (like  "didn't consider macroeconomic factors"), ensuring the final output is reliable and well-reasoned.

In [10]:
class CritiqueAgent(BaseAgent):
    """Reviews and validates analysis quality"""

    def __init__(self):
        super().__init__("Critique & Validation Agent")
        self.system_prompt = """You are a critique analyst who reviews investment recommendations for biases, logical errors, and completeness.

INSTRUCTIONS:
1. Review the synthesis objectively
2. Identify logical inconsistencies
3. Detect potential biases
4. Note missing considerations
5. Assess data quality
6. Recommend confidence adjustments

EXAMPLE OUTPUT:
{
  "quality_score": 0.82,
  "issues_found": ["Limited macroeconomic analysis"],
  "suggestions": ["Consider Federal Reserve policy impact", "Add sector comparison"],
  "adjusted_confidence": 0.75
}

Return ONLY valid JSON with keys: quality_score, issues_found, suggestions, adjusted_confidence"""

    def process(self, synthesis_response: AgentResponse) -> AgentResponse:
        """Critique the synthesis"""
        start_time = time.time()

        try:
            user_message = f"""Review this investment analysis for quality and completeness:

Recommendation: {synthesis_response.analysis}
Confidence: {synthesis_response.confidence}
Key Factors: {', '.join(synthesis_response.key_factors)}

Identify any issues, biases, or missing elements."""

            llm_response, token_usage = self.call_llm(self.system_prompt, user_message)

            try:
                result = json.loads(llm_response)
                quality_score = result.get('quality_score', 0.7)
                issues = result.get('issues_found', [])
                suggestions = result.get('suggestions', [])
                adjusted_confidence_raw = result.get('adjusted_confidence', synthesis_response.confidence)

                try:
                    adjusted_confidence = max(0.0, min(1.0, float(adjusted_confidence_raw)))
                except (ValueError, TypeError):
                    adjusted_confidence = synthesis_response.confidence

                analysis = f"Quality Score: {quality_score}\n"
                if issues:
                    analysis += f"Issues Found: {', '.join(issues)}\n"
                if suggestions:
                    analysis += f"Suggestions: {', '.join(suggestions)}"

                key_factors = issues if issues else ["No major issues found"]

            except json.JSONDecodeError:
                logger.warning(f"{self.agent_name}: Failed to parse JSON response")
                quality_score = 0.7
                analysis = llm_response
                adjusted_confidence = synthesis_response.confidence
                key_factors = ["Unable to parse structured response"]

            execution_time = time.time() - start_time

            self.add_to_memory({
                'input': {'synthesis_confidence': synthesis_response.confidence},
                'output': analysis,
                'metadata': {'quality_score': quality_score, 'adjusted_confidence': adjusted_confidence}
            })

            self.share_insight('quality_score', quality_score)
            self.share_insight('adjusted_confidence', adjusted_confidence)

            self.update_metrics(success=True, execution_time=execution_time)

            return AgentResponse(
                agent_name=self.agent_name,
                analysis=analysis,
                score=float(quality_score),
                confidence=float(adjusted_confidence),
                key_factors=key_factors,
                timestamp=datetime.now().isoformat(),
                execution_time=execution_time,
                token_usage=token_usage
            )

        except Exception as e:
            execution_time = time.time() - start_time
            self.update_metrics(success=False, execution_time=execution_time)
            logger.error(f"{self.agent_name} error: {e}")
            raise

In [11]:
# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================

def print_agent_metrics():
    """Print performance metrics for all agents"""
    print("\n" + "="*80)
    print("AGENT PERFORMANCE METRICS")
    print("="*80)

    agents = [
        NewsAnalysisAgent(),
        EarningsAnalysisAgent(),
        MarketSignalsAgent(),
        RiskAssessmentAgent(),
        SynthesisAgent(),
        CritiqueAgent()
    ]

    for agent in agents:
        metrics = agent.get_metrics()
        print(f"\n{metrics.agent_name}:")
        print(f"  Total Calls: {metrics.total_calls}")
        print(f"  Success Rate: {metrics.successful_calls}/{metrics.total_calls}")
        print(f"  Avg Execution Time: {metrics.average_execution_time:.2f}s")
        print(f"  Total Tokens Used: {metrics.total_tokens_used}")


def print_shared_insights():
    """Print all shared insights between agents"""
    print("\n" + "="*80)
    print("SHARED INSIGHTS")
    print("="*80)

    insights = shared_context.get_all_insights()
    for key, data in insights.items():
        print(f"\n{key}: {data['value']}")
        print(f"  From: {data['from_agent']}")
        print(f"  Time: {data['timestamp']}")

In [None]:
# -*- coding: utf-8 -*-
"""notebooks/01_data_ingestion.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1J_syHGmJHhA8XcAtxw7nQ0KPWMJbnSkd
"""

#import os
os.environ["ALPHAVANTAGE_API_KEY"] = "BVGUKZR1MHVS0T6B"

import os
import requests
import pandas as pd
import yfinance as yf
from datasets import Dataset

# ------------------------------
# Helper: Convert Pandas → Hugging Face Dataset
# ------------------------------
def to_hf(df, schema=None):
    """Convert a pandas DataFrame to a Hugging Face Dataset. Handles empty gracefully."""
    if df is None or getattr(df, "empty", True):
        if schema:
            return Dataset.from_dict({c: [] for c in schema})
        return Dataset.from_dict({})
    if schema:
        df = df[[c for c in schema if c in df.columns]].copy()
    return Dataset.from_pandas(df.reset_index(drop=True), preserve_index=False)

# ------------------------------
# Alpha Vantage Connector (for news + indicators only)
# ------------------------------
class AlphaConnector:
    def __init__(self, api_key=None):
        # Pick up API key from os.environ if not passed directly
        self.api_key = api_key or os.getenv("ALPHAVANTAGE_API_KEY")
        if not self.api_key:
            raise ValueError("Alpha Vantage API key not found. Set os.environ['ALPHAVANTAGE_API_KEY'].")

        self.base_url = "https://www.alphavantage.co/query"

    def fetch_news(self, symbol):
        """Fetch company news & sentiment (Alpha Vantage)."""
        params = {
            "function": "NEWS_SENTIMENT",
            "tickers": symbol,
            "apikey": self.api_key
        }
        r = requests.get(self.base_url, params=params)
        data = r.json()

        if "feed" not in data:
            print("No news data:", data)
            return pd.DataFrame()

        rows = []
        for item in data["feed"]:
            rows.append({
                "published_at": item.get("time_published"),
                "source": item.get("source"),
                "title": item.get("title"),
                "summary": item.get("summary"),
                "url": item.get("url"),
                "overall_sentiment": item.get("overall_sentiment_label"),
                # ** Added By Ali **
                "overall_sentiment_score": item.get("overall_sentiment_score") # both label and score so later agents (NewsAnalysisAgent, SynthesisAgent, etc.) can use either
            })
        return pd.DataFrame(rows)

    def fetch_indicator(self, symbol, indicator, interval="daily", time_period=14, series_type="close"):
        """Generic technical indicator fetch (SMA, RSI, MACD)."""
        params = {
            "function": indicator,
            "symbol": symbol,
            "interval": interval,
            "time_period": time_period,
            "series_type": series_type,
            "apikey": self.api_key
        }
        r = requests.get(self.base_url, params=params)
        data = r.json()

        key_map = {
            "SMA": "Technical Analysis: SMA",
            "RSI": "Technical Analysis: RSI",
            "MACD": "Technical Analysis: MACD"
        }
        key = key_map.get(indicator)
        if key not in data:
            print(f"{indicator} fetch failed:", data)
            return pd.DataFrame()

        df = pd.DataFrame.from_dict(data[key], orient="index")
        df.index = pd.to_datetime(df.index)
        df.reset_index(inplace=True)
        df = df.rename(columns={"index": "date"})

        # Cast numeric values
        for col in df.columns:
            if col != "date":
                df[col] = df[col].astype(float)

        return df

# ------------------------------
# Data Ingestion Manager
# ------------------------------
class DataIngestionManager:
    def __init__(self, api_key=None):
        self.alpha = AlphaConnector(api_key)

    def fetch_all(self, symbol, start=None, end=None):
        """Fetch prices (Yahoo), news (Alpha Vantage), SMA, RSI (Alpha Vantage)."""
        datasets = {}

        # Prices from Yahoo Finance (unlimited)
        try:
            df_prices = yf.download(symbol, start=start, end=end, progress=False)

            # Flatten MultiIndex columns if necessary
            if isinstance(df_prices.columns, pd.MultiIndex):
                df_prices.columns = [c[0].lower() for c in df_prices.columns]

            df_prices = df_prices.reset_index().rename(columns={
                "Date": "date",
                "open": "open",
                "high": "high",
                "low": "low",
                "close": "close",
                "adj close": "adj_close",
                "volume": "volume"
            })
            df_prices["date"] = df_prices["date"].astype(str)

            datasets["prices"] = to_hf(
                df_prices, schema=["date","open","high","low","close","adj_close","volume"]
            )
        except Exception as e:
            print("Yahoo Finance fetch failed:", e)
            datasets["prices"] = to_hf(pd.DataFrame(), schema=["date","open","high","low","close","adj_close","volume"])

        # News from Alpha Vantage
        datasets["news"] = to_hf(
            self.alpha.fetch_news(symbol),
            schema=["published_at","source","title","summary","url","overall_sentiment"]
        )

        # Technical Indicators from Alpha Vantage
        datasets["sma"] = to_hf(
            self.alpha.fetch_indicator(symbol, "SMA", time_period=20),
            schema=["date","SMA"]
        )
        datasets["rsi"] = to_hf(
            self.alpha.fetch_indicator(symbol, "RSI", time_period=14),
            schema=["date","RSI"]
        )

        # Removed MACD to avoid premium-only error
        return datasets

from datetime import datetime, timedelta

mgr = DataIngestionManager()  # will pick up the key from os.environ
symbol = "AAPL"
start = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d")
end   = datetime.now().strftime("%Y-%m-%d")

datasets = mgr.fetch_all(symbol, start, end)

print("Prices sample:")
print(datasets["prices"].to_pandas().head())

print("News sample:")
print(datasets["news"].to_pandas().head())

  df_prices = yf.download(symbol, start=start, end=end, progress=False)


No news data: {'Information': 'We have detected your API key as BVGUKZR1MHVS0T6B and our standard API rate limit is 25 requests per day. Please subscribe to any of the premium plans at https://www.alphavantage.co/premium/ to instantly remove all daily rate limits.'}
SMA fetch failed: {'Information': 'We have detected your API key as BVGUKZR1MHVS0T6B and our standard API rate limit is 25 requests per day. Please subscribe to any of the premium plans at https://www.alphavantage.co/premium/ to instantly remove all daily rate limits.'}
RSI fetch failed: {'Information': 'We have detected your API key as BVGUKZR1MHVS0T6B and our standard API rate limit is 25 requests per day. Please subscribe to any of the premium plans at https://www.alphavantage.co/premium/ to instantly remove all daily rate limits.'}
Prices sample:
         date        open        high         low       close     volume
0  2025-09-18  239.970001  241.199997  236.649994  237.880005   44249600
1  2025-09-19  241.229996  246

## ui/gradio_app.py

In [None]:
import os, sys
from pathlib import Path

# Works in both scripts and notebooks:
HERE = Path(__file__).parent if "__file__" in globals() else Path.cwd()
ROOT = (HERE / "..").resolve()
sys.path.append(str(ROOT))
import gradio as gr
import pandas as pd
from datetime import date, timedelta
from src.system.orchestrator import run_pipeline


def run(symbol, days_back, required_tags_csv):
    import json
    import pandas as pd

    # ---------- helpers ----------
    def as_text(x):
        if x is None:
            return ""
        if isinstance(x, str):
            return x
        if isinstance(x, (dict, list)):
            # pretty JSON for readability / stable comparisons
            return json.dumps(x, ensure_ascii=False, indent=2, sort_keys=True)
        return str(x)

    def clean(s: str) -> str:
        # normalize whitespace and strip code fences if any slipped through
        if not isinstance(s, str):
            s = as_text(s)
        s = s.strip()
        if s.startswith("```"):
            s = s.strip("`").strip()
        return s

    def synth_to_prose(obj):
        # If it's already text, just return normalized text
        if not isinstance(obj, dict):
            return clean(as_text(obj))

        parts = []

        ms = obj.get("market_signals") or {}
        if ms:
            ms_bits = []
            cp = ms.get("current_price")
            if isinstance(cp, (int, float)):
                ms_bits.append(f"price ${cp:,.2f}")
            ma = ms.get("moving_averages") or {}
            ma50 = ma.get("50_day")
            ma200 = ma.get("200_day")
            if ma50 is not None or ma200 is not None:
                ms_bits.append(f"vs 50D {ma50}, 200D {ma200}")
            rsi = ms.get("RSI")
            if rsi is not None:
                ms_bits.append(f"RSI {rsi}")
            trend = ms.get("trend")
            if trend:
                ms_bits.append(trend)
            vol = ms.get("volume") or {}
            vcur, vavg = vol.get("current"), vol.get("average")
            if vcur is not None and vavg is not None:
                ms_bits.append(f"volume {vcur:,} vs avg {vavg:,}")
            if ms_bits:
                parts.append("Technicals: " + ", ".join(str(x) for x in ms_bits if x))

        news = obj.get("news") or {}
        if news:
            # keep order stable-ish
            news_bits = []
            for k in ("sentiment", "growth potential", "competitive landscape"):
                if k in news:
                    news_bits.append(f"{k}: {news[k]}")
            # include any other keys if present
            for k, v in news.items():
                if k not in ("sentiment", "growth potential", "competitive landscape"):
                    news_bits.append(f"{k}: {v}")
            parts.append("News: " + "; ".join(news_bits))

        risk = obj.get("risk_assessment") or {}
        if risk:
            risk_bits = []
            for k in ("volatility", "data_gaps", "idiosyncratic_risks"):
                if k in risk:
                    risk_bits.append(f"{k}: {v}")
            for k, v in risk.items():
                if k not in ("volatility", "data_gaps", "idiosyncratic_risks"):
                    risk_bits.append(f"{k}: {v}")
            parts.append("Risk: " + "; ".join(risk_bits))

        return "\n".join(parts).strip()

    def to_df(x):
        if isinstance(x, pd.DataFrame):
            return x
        if x is None:
            return pd.DataFrame()
        try:
            return pd.DataFrame(x)
        except Exception:
            return pd.DataFrame()
    # ---------- /helpers ----------

    start = (date.today() - timedelta(days=int(days_back))).isoformat()
    end = date.today().isoformat()
    tags = [t.strip() for t in required_tags_csv.split(",")] if required_tags_csv else None

    res = run_pipeline(symbol.strip().upper(), start, end, required_tags=tags)

    # Detect if optimizer re-synthesis ran (compare Initial vs Final on normalized JSON text)
    optimizer_ran = False
    init = next((a for a in res.agent_outputs if a.agent_name in {"Initial Synthesis", "Research Synthesis Agent", "SynthesisAgent"}), None)
    if init is not None:
        init_txt = clean(as_text(init.analysis))
        final_txt_norm = clean(as_text(res.final.analysis))
        optimizer_ran = (init_txt != final_txt_norm)

    plan = "\n".join([f"• {step}" for step in res.plan])

    # Agents panel: Synthesis agents shown as prose; others as text
    agents_txt = "\n\n".join([
        (
            f"[{a.agent_name}] score={a.score:.2f} conf={a.confidence:.2f}\n"
            f"{synth_to_prose(a.analysis) if ('synthesis' in a.agent_name.lower()) else clean(as_text(a.analysis))}"
        )
        for a in res.agent_outputs
    ])

    # Evidence tables (force DataFrame)
    news_rows = to_df(res.evidence.get("top_news", []))
    prices_rows = to_df(res.evidence.get("prices_tail", []))

    # Helpful note when news evidence is empty
    if news_rows.empty:
        agents_txt += "\n\n[Note] No news items matched filters or API limits were hit today."

    # ---- Critique FIRST (so it appears above Final in the UI) ----
    crit_txt = (
        f"[Critique]\n"
        f"score={res.critique.score:.2f} adj_conf={res.critique.confidence:.2f}\n"
        f"{clean(as_text(res.critique.analysis))}"
    )

    # ---- Final AFTER Critique ----
    headline = "FINAL (After Critique)"
    opt_line = "[Optimizer ran: YES]" if optimizer_ran else "[Optimizer ran: NO]"
    final_txt = (
        f"{headline}\n{opt_line}\n"
        f"score={res.final.score:.2f} conf={res.final.confidence:.2f}\n"
        f"{synth_to_prose(res.final.analysis)}\n\nKey: {', '.join(res.final.key_factors)}"
    )

    # IMPORTANT: return order matches component outputs order (crit BEFORE final)
    return plan, agents_txt, crit_txt, final_txt, news_rows, prices_rows


with gr.Blocks(title="Agentic Finance") as demo:
    gr.Markdown("# Agentic Finance — Interactive Tester")

    with gr.Row():
        symbol = gr.Textbox(label="Ticker", value="AAPL")
        days_back = gr.Slider(7, 120, value=30, step=1, label="Days Back")
        tags = gr.Textbox(label="Required Tags (optional, comma-sep)", placeholder="earnings, product")
    run_btn = gr.Button("Run")

    plan = gr.Textbox(label="Plan", lines=6)
    agents = gr.Textbox(label="Agent Outputs", lines=14)

    #  Critique ABOVE Final
    crit = gr.Textbox(label="Critique", lines=8)
    final = gr.Textbox(label="Final Recommendation", lines=10)

    news_tbl = gr.Dataframe(
        headers=["published_at","source","title","summary","url","overall_sentiment","tags","numbers"],
        label="Top News (evidence)",
        wrap=True
    )
    prices_tbl = gr.Dataframe(label="Recent Prices (evidence)")

    # Outputs order: plan, agents, CRITIQUE, FINAL, news, prices
    run_btn.click(
        run,
        inputs=[symbol, days_back, tags],
        outputs=[plan, agents, crit, final, news_tbl, prices_tbl]
    )

if __name__ == "__main__":
    demo.launch()


  from .autonotebook import tqdm as notebook_tqdm


* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


  df = yf.download(symbol, start=start, end=end, progress=False)
  df = yf.download(symbol, start=start, end=end, progress=False)
