In [1]:
import sys, os
sys.path.append("..")

import pandas as pd
import numpy as np

from agents import StrategyAgent, RiskAgent, AllocatorAgent, ReflectionAgent, MemoryAgent


EvaluationAgent takes portfolio metrics.

Compares them to:

Past portfolios (via MemoryAgent).

Benchmark returns (e.g., SPY).

Uses rules:

Adopt if Sharpe ratio is higher than both past average and benchmark.

Reject if worse than both.

In [2]:
class EvaluationAgent:
    def __init__(self, benchmark_file="../data/SPY_sample.csv"):
        self.benchmark_file = benchmark_file
        self.benchmark = None
        if os.path.exists(benchmark_file):
            self.benchmark = pd.read_csv(benchmark_file, index_col=0, parse_dates=True)
    
    def compute_metrics(self, returns):
        strat_curve = (1 + returns.fillna(0)).cumprod()
        years = (returns.index[-1] - returns.index[0]).days / 365.25
        cagr = strat_curve.iloc[-1]**(1/years) - 1 if years > 0 else 0
        vol = returns.std() * np.sqrt(252)
        sharpe = (returns.mean() * 252) / (returns.std() * np.sqrt(252)) if returns.std() > 0 else 0
        roll_max = strat_curve.cummax()
        dd = (strat_curve / roll_max - 1).min()
        return {"CAGR": cagr, "Volatility": vol, "Sharpe": sharpe, "MaxDD": dd}
    
    def evaluate(self, portfolio_df, memory: MemoryAgent):
        # Current portfolio metrics
        portfolio_metrics = self.compute_metrics(portfolio_df["Portfolio_Return"])
        
        # Benchmark metrics
        if self.benchmark is not None:
            bench_metrics = self.compute_metrics(self.benchmark["Return"])
        else:
            bench_metrics = {"Sharpe": 0, "CAGR": 0, "Volatility": 0, "MaxDD": 0}
        
        # Historical averages from MemoryAgent
        history_df = pd.DataFrame(memory.history)
        if not history_df.empty and "Metrics" in history_df.columns:
            past_sharpes = [m["Sharpe"] for m in history_df["Metrics"] if m is not None and "Sharpe" in m]
            avg_past_sharpe = np.mean(past_sharpes) if past_sharpes else 0
        else:
            avg_past_sharpe = 0
        
        # Decision logic
        decision = "ADOPT" if (portfolio_metrics["Sharpe"] > bench_metrics["Sharpe"] and
                               portfolio_metrics["Sharpe"] > avg_past_sharpe) else "REJECT"
        
        return {
            "Portfolio": portfolio_metrics,
            "Benchmark": bench_metrics,
            "Avg_Past_Sharpe": avg_past_sharpe,
            "Decision": decision
        }


In [3]:
# Load yesterday’s saved portfolio (from Day 18 logs)
portfolio = pd.read_csv("../logs/Day18_portfolio.csv", index_col=0, parse_dates=True)

# Load memory (Day 18 log)
memory = MemoryAgent()
history_path = "../logs/Day18_memory.csv"
if os.path.exists(history_path):
    df = pd.read_csv(history_path)
    for _, row in df.iterrows():
        memory.history.append({"Strategy": row["Strategy"], "Metrics": None, "Decision": row["Decision"], "Notes": row["Notes"]})

# Evaluate
evaluator = EvaluationAgent()
result = evaluator.evaluate(portfolio, memory)

print("Evaluation Result:", result)


Evaluation Result: {'Portfolio': {'CAGR': np.float64(0.1398491088229432), 'Volatility': np.float64(0.04382244081992615), 'Sharpe': np.float64(2.994467401985038), 'MaxDD': np.float64(-0.007035576834643953)}, 'Benchmark': {'Sharpe': 0, 'CAGR': 0, 'Volatility': 0, 'MaxDD': 0}, 'Avg_Past_Sharpe': 0, 'Decision': 'ADOPT'}


In [4]:
os.makedirs("../logs", exist_ok=True)

pd.DataFrame([result]).to_csv("../logs/Day19_evaluation.csv", index=False)

with open("../logs/Day19_summary.txt", "w") as f:
    f.write("Day 19 evaluation run\n")
    f.write(str(result))
