In [1]:
import sys, os
sys.path.append("..")

import pandas as pd
import numpy as np
import yfinance as yf  # Yahoo Finance API

from agents import MemoryAgent


BenchmarkAgent fetches historical data for a ticker (SPY, QQQ).

Computes metrics (CAGR, Volatility, Sharpe, MaxDD).

Returns results to EvaluationAgent or PortfolioManagerAgent.

In [2]:
class BenchmarkAgent:
    def __init__(self, tickers=["SPY"], start="2020-01-01", end=None):
        self.tickers = tickers
        self.start = start
        self.end = end
        self.data = {}
    
    def fetch(self):
        for ticker in self.tickers:
            df = yf.download(ticker, start=self.start, end=self.end, progress=False)
            df["Return"] = df["Close"].pct_change()
            self.data[ticker] = df
        return self.data
    
    def compute_metrics(self, returns):
        strat_curve = (1 + returns.fillna(0)).cumprod()
        years = (returns.index[-1] - returns.index[0]).days / 365.25
        cagr = strat_curve.iloc[-1]**(1/years) - 1 if years > 0 else 0
        vol = returns.std() * np.sqrt(252)
        sharpe = (returns.mean() * 252) / (returns.std() * np.sqrt(252)) if returns.std() > 0 else 0
        roll_max = strat_curve.cummax()
        dd = (strat_curve / roll_max - 1).min()
        return {"CAGR": cagr, "Volatility": vol, "Sharpe": sharpe, "MaxDD": dd}
    
    def evaluate(self):
        metrics = {}
        for ticker, df in self.data.items():
            metrics[ticker] = self.compute_metrics(df["Return"])
        return metrics


In [3]:
# Example usage
bench_agent = BenchmarkAgent(tickers=["SPY", "QQQ"], start="2020-01-01")
bench_data = bench_agent.fetch()
bench_metrics = bench_agent.evaluate()

print("Benchmark Metrics:", bench_metrics)


  df = yf.download(ticker, start=self.start, end=self.end, progress=False)
  df = yf.download(ticker, start=self.start, end=self.end, progress=False)


Benchmark Metrics: {'SPY': {'CAGR': np.float64(0.1480653172202524), 'Volatility': np.float64(0.2111202789584386), 'Sharpe': np.float64(0.7629873333362032), 'MaxDD': np.float64(-0.33717258779070414)}, 'QQQ': {'CAGR': np.float64(0.19858697691313343), 'Volatility': np.float64(0.2568180727789718), 'Sharpe': np.float64(0.8373617774899662), 'MaxDD': np.float64(-0.35118709967260764)}}


In [4]:
class EvaluationAgent:
    def __init__(self, benchmark_agent: BenchmarkAgent, memory: MemoryAgent):
        self.benchmark_agent = benchmark_agent
        self.memory = memory
    
    def compute_metrics(self, returns):
        strat_curve = (1 + returns.fillna(0)).cumprod()
        years = (returns.index[-1] - returns.index[0]).days / 365.25
        cagr = strat_curve.iloc[-1]**(1/years) - 1 if years > 0 else 0
        vol = returns.std() * np.sqrt(252)
        sharpe = (returns.mean() * 252) / (returns.std() * np.sqrt(252)) if returns.std() > 0 else 0
        roll_max = strat_curve.cummax()
        dd = (strat_curve / roll_max - 1).min()
        return {"CAGR": cagr, "Volatility": vol, "Sharpe": sharpe, "MaxDD": dd}
    
    def evaluate(self, portfolio_df):
        # Current portfolio
        portfolio_metrics = self.compute_metrics(portfolio_df["Portfolio_Return"])
        
        # Benchmarks
        benchmarks = self.benchmark_agent.evaluate()
        
        # Past performance from memory
        history_df = pd.DataFrame(self.memory.history)
        if not history_df.empty and "Metrics" in history_df.columns:
            past_sharpes = [m["Sharpe"] for m in history_df["Metrics"] if m is not None and "Sharpe" in m]
            avg_past_sharpe = np.mean(past_sharpes) if past_sharpes else 0
        else:
            avg_past_sharpe = 0
        
        # Decision: adopt if Sharpe > both benchmark Sharpe and past average
        best_benchmark_sharpe = max(m["Sharpe"] for m in benchmarks.values())
        decision = "ADOPT" if portfolio_metrics["Sharpe"] > best_benchmark_sharpe and portfolio_metrics["Sharpe"] > avg_past_sharpe else "REJECT"
        
        return {
            "Portfolio": portfolio_metrics,
            "Benchmarks": benchmarks,
            "Avg_Past_Sharpe": avg_past_sharpe,
            "Decision": decision
        }


In [5]:
# 1. Create BenchmarkAgent and fetch data
bench_agent = BenchmarkAgent(tickers=["SPY", "QQQ"], start="2020-01-01")
bench_agent.fetch()

# 2. Load memory (from Day 18 or Day 19 logs)
memory = MemoryAgent()
history_path = "../logs/Day18_memory.csv"
if os.path.exists(history_path):
    df = pd.read_csv(history_path)
    for _, row in df.iterrows():
        memory.history.append({"Strategy": row["Strategy"], "Metrics": None, "Decision": row["Decision"], "Notes": row["Notes"]})

# 3. Create EvaluationAgent
evaluator = EvaluationAgent(benchmark_agent=bench_agent, memory=memory)

# 4. Evaluate today's portfolio (from Day 18 logs)
portfolio = pd.read_csv("../logs/Day18_portfolio.csv", index_col=0, parse_dates=True)
eval_result = evaluator.evaluate(portfolio)

print("Evaluation Result:", eval_result)


  df = yf.download(ticker, start=self.start, end=self.end, progress=False)
  df = yf.download(ticker, start=self.start, end=self.end, progress=False)


Evaluation Result: {'Portfolio': {'CAGR': np.float64(0.1398491088229432), 'Volatility': np.float64(0.04382244081992615), 'Sharpe': np.float64(2.994467401985038), 'MaxDD': np.float64(-0.007035576834643953)}, 'Benchmarks': {'SPY': {'CAGR': np.float64(0.1480592235914533), 'Volatility': np.float64(0.21112017584410203), 'Sharpe': np.float64(0.7629623294112585), 'MaxDD': np.float64(-0.3371727175312037)}, 'QQQ': {'CAGR': np.float64(0.19861079208393662), 'Volatility': np.float64(0.25690704262724), 'Sharpe': np.float64(0.8360326115221827), 'MaxDD': np.float64(-0.35118713836139503)}}, 'Avg_Past_Sharpe': 0, 'Decision': 'ADOPT'}


In [6]:
os.makedirs("../logs", exist_ok=True)

eval_result = evaluator.evaluate(portfolio)
pd.DataFrame([eval_result]).to_csv("../logs/Day20_evaluation.csv", index=False)

with open("../logs/Day20_summary.txt", "w") as f:
    f.write("Day 20 evaluation with BenchmarkAgent\n")
    f.write(str(eval_result))


In [7]:
with open("../agents/evaluation_agent.py", "w", encoding="utf-8") as f:
    f.write("""import pandas as pd
import numpy as np

class EvaluationAgent:
    def __init__(self, benchmark_agent, memory):
        self.benchmark_agent = benchmark_agent
        self.memory = memory
    
    def compute_metrics(self, returns):
        strat_curve = (1 + returns.fillna(0)).cumprod()
        years = (returns.index[-1] - returns.index[0]).days / 365.25
        cagr = strat_curve.iloc[-1]**(1/years) - 1 if years > 0 else 0
        vol = returns.std() * np.sqrt(252)
        sharpe = (returns.mean() * 252) / (returns.std() * np.sqrt(252)) if returns.std() > 0 else 0
        roll_max = strat_curve.cummax()
        dd = (strat_curve / roll_max - 1).min()
        return {"CAGR": cagr, "Volatility": vol, "Sharpe": sharpe, "MaxDD": dd}
    
    def evaluate(self, portfolio_df):
        # Current portfolio
        portfolio_metrics = self.compute_metrics(portfolio_df["Portfolio_Return"])
        
        # Benchmarks
        benchmarks = self.benchmark_agent.evaluate()
        
        # Historical averages from memory
        history_df = pd.DataFrame(self.memory.history)
        if not history_df.empty and "Metrics" in history_df.columns:
            past_sharpes = [m["Sharpe"] for m in history_df["Metrics"] if m is not None and "Sharpe" in m]
            avg_past_sharpe = np.mean(past_sharpes) if past_sharpes else 0
        else:
            avg_past_sharpe = 0
        
        # Decision: adopt if Sharpe > both benchmark Sharpe and past average
        best_benchmark_sharpe = max(m["Sharpe"] for m in benchmarks.values())
        decision = "ADOPT" if portfolio_metrics["Sharpe"] > best_benchmark_sharpe and portfolio_metrics["Sharpe"] > avg_past_sharpe else "REJECT"
        
        return {
            "Portfolio": portfolio_metrics,
            "Benchmarks": benchmarks,
            "Avg_Past_Sharpe": avg_past_sharpe,
            "Decision": decision
        }
""")


In [8]:
with open("../agents/benchmark_agent.py", "w", encoding="utf-8") as f:
    f.write("""import pandas as pd
import numpy as np
import yfinance as yf

class BenchmarkAgent:
    def __init__(self, tickers=["SPY"], start="2020-01-01", end=None):
        self.tickers = tickers
        self.start = start
        self.end = end
        self.data = {}
    
    def fetch(self):
        for ticker in self.tickers:
            df = yf.download(ticker, start=self.start, end=self.end, progress=False)
            df["Return"] = df["Close"].pct_change()
            self.data[ticker] = df
        return self.data
    
    def compute_metrics(self, returns):
        strat_curve = (1 + returns.fillna(0)).cumprod()
        years = (returns.index[-1] - returns.index[0]).days / 365.25
        cagr = strat_curve.iloc[-1]**(1/years) - 1 if years > 0 else 0
        vol = returns.std() * np.sqrt(252)
        sharpe = (returns.mean() * 252) / (returns.std() * np.sqrt(252)) if returns.std() > 0 else 0
        roll_max = strat_curve.cummax()
        dd = (strat_curve / roll_max - 1).min()
        return {"CAGR": cagr, "Volatility": vol, "Sharpe": sharpe, "MaxDD": dd}
    
    def evaluate(self):
        metrics = {}
        for ticker, df in self.data.items():
            metrics[ticker] = self.compute_metrics(df["Return"])
        return metrics
""")


In [9]:
from agents import BenchmarkAgent, MemoryAgent, EvaluationAgent

bench_agent = BenchmarkAgent(tickers=["SPY", "QQQ"], start="2020-01-01")
bench_agent.fetch()

memory = MemoryAgent()
portfolio = pd.read_csv("../logs/Day18_portfolio.csv", index_col=0, parse_dates=True)

evaluator = EvaluationAgent(benchmark_agent=bench_agent, memory=memory)
result = evaluator.evaluate(portfolio)

print(result)


  df = yf.download(ticker, start=self.start, end=self.end, progress=False)
  df = yf.download(ticker, start=self.start, end=self.end, progress=False)


{'Portfolio': {'CAGR': np.float64(0.1398491088229432), 'Volatility': np.float64(0.04382244081992615), 'Sharpe': np.float64(2.994467401985038), 'MaxDD': np.float64(-0.007035576834643953)}, 'Benchmarks': {'SPY': {'CAGR': np.float64(0.148103239121532), 'Volatility': np.float64(0.2111936260039627), 'Sharpe': np.float64(0.7618348905775892), 'MaxDD': np.float64(-0.33717283133592546)}, 'QQQ': {'CAGR': np.float64(0.19859249698414705), 'Volatility': np.float64(0.2568181435984046), 'Sharpe': np.float64(0.8373796535181902), 'MaxDD': np.float64(-0.3511872659425255)}}, 'Avg_Past_Sharpe': 0, 'Decision': 'ADOPT'}
