# Experiment 1: Expert Reviewer Survey Material

This notebook generates the material for the surveys.

In [None]:
import os
import sys
import logging
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tabulate import tabulate
warnings.filterwarnings("ignore")

INSTALL_DEPS = False
if INSTALL_DEPS:
    %pip install python-dotenv==1.0.1
    %pip install tqdm==4.66.5
    %pip install openai==1.58.1
    %pip install matplotlib reportlab tabulate

%load_ext dotenv

FUNDAMENTALS_PATH = os.getenv("FUNDAMENTALS_PATH", '/fundamentals')
LLM_PROMPTS_PATH = os.getenv("LLM_PROMPTS_PATH", '/prompts')
FUNDAMENTALS_PATH = os.getenv("FUNDAMENTALS_PATH", '/fundamentals')
HISTORIC_PATH = os.getenv("HISTORIC_PATH", '/historic')
MACRO_PATH = os.getenv("MACRO_PATH", '/macro')
OPTIONS_PATH = os.getenv("OPTIONS_PATH", '/options')
LLM_OUTPUT_PATH = os.getenv("LLM_OUTPUT_PATH", '/llm_data')
LLM_OUTPUT_PATH = f"judge_reviews/{LLM_OUTPUT_PATH}"
LOGS_PATH = os.getenv("LOGS_PATH", '/logs')
paths = [LLM_OUTPUT_PATH, LOGS_PATH]
for path in paths:
    if path and not os.path.exists(path):
        os.makedirs(path)

if "KAGGLE_KERNEL_RUN_TYPE" in os.environ:
    logging.info("Running in Kaggle...")

    DATA_PATH = "/kaggle/input/thesis/data"
    FUNDAMENTALS_PATH = DATA_PATH + FUNDAMENTALS_PATH
    HISTORIC_PATH = DATA_PATH + HISTORIC_PATH
    MACRO_PATH = DATA_PATH + MACRO_PATH
    OPTIONS_PATH = DATA_PATH + OPTIONS_PATH
    LLM_PROMPTS_PATH = DATA_PATH + LLM_PROMPTS_PATH
    sys.path.insert(1, "/kaggle/usr/lib/rl_agent_utils")
    sys.path.insert(1, "/kaggle/usr/lib/data_utils")
else:
    DATA_PATH = './data'
    module_path = os.path.abspath(os.path.join(os.getcwd(), 'utils'))
    if module_path not in sys.path:
        sys.path.append(module_path)

from openai import OpenAI
from data_utils import generate_strategy_for_ticker, evaluate_trading_metrics, sanitize_text, HIGH_RISK_PROFILE, HIGH_OBJECTIVES , PERSONA

## Environment and Constants

In [None]:
RISK_EXPERIMENT = 'r'
PROMPT_VERSION = 'v3'

## LLM Setup

In [None]:
OPENAI_MODEL = os.getenv("OPENAI_MODEL")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_CLIENT = OpenAI(api_key=OPENAI_API_KEY)

In [None]:
def snap_to_first_business_day(date_str):
    date = pd.to_datetime(date_str, utc=True).normalize()
    return date + pd.offsets.BMonthBegin(0)

In [None]:
import matplotlib.dates as mdates

def plot_strategy_analysis(data, strategy_freq='M', llm_trade_action=None, output_dir='./judges_handbook'):
    plot_data = data # if strategy_freq == 'W' else data.resample('W').last().ffill()
    fig1, axes1 = plt.subplots(4, 1, figsize=(16, 8), gridspec_kw={'height_ratios': [3, 1, 1, 1]}, sharex=True)

    # Price & Technical Indicators
    ax = axes1[0]
    ax.set_title("Price with Technical Indicators", fontsize=16)
    ax.plot(plot_data.index, plot_data['Close'], label='Weekly Close', color='blue', linewidth=2.5)
    ax.plot(plot_data.index, plot_data['20MA'], label='20 Day MA', color='gray', linestyle="--", linewidth=2)
    ax.plot(plot_data.index, plot_data['200MA'], label='200 Day MA', color='black', linestyle="--", linewidth=2)

    if 'BB_Upper' in plot_data.columns and 'BB_Lower' in plot_data.columns:
        ax.fill_between(plot_data.index, plot_data['BB_Upper'], plot_data['BB_Lower'], color='blue', alpha=0.1, label='Bollinger Bands')
    ax.fill_between(plot_data.index, plot_data['Close'] + plot_data['ATR'], plot_data['Close'] - plot_data['ATR'],
                    color='gray', alpha=0.3, label="ATR Range")

    if llm_trade_action is not None:
        ranges = np.concatenate([plot_data['Close'], plot_data['20MA'], plot_data['200MA']])
        decision_price = data['Close'].iloc[0]
        ax.axhline(decision_price, color='green', linewidth=2, label=f'LLM {llm_trade_action} Entry')
        if llm_trade_action == "LONG":
            ax.fill_between(plot_data.index, decision_price, np.max(ranges), color='lightgreen', alpha=0.15, label="Profit Zone")
            ax.fill_between(plot_data.index, np.min(ranges), decision_price, color='lightcoral', alpha=0.15, label="Loss Zone")
        elif llm_trade_action == "SHORT":
            ax.fill_between(plot_data.index, decision_price, np.max(ranges), color='lightcoral', alpha=0.15, label="Loss Zone")
            ax.fill_between(plot_data.index, np.min(ranges), decision_price, color='lightgreen', alpha=0.15, label="Profit Zone")

    ax.set_ylabel("Price (USD)")
    ax.legend(fontsize=12, loc="upper right", bbox_to_anchor=(1.15, 1))
    ax.grid(True)

    # Liquidity Metrics (Volume)
    ax = axes1[1]
    ax.set_title("Liquidity Metrics", fontsize=14)
    ax.bar(plot_data.index, plot_data['Volume'], label='Volume', color='gray', alpha=0.6)
    ax.set_ylabel("Volume")
    ax.legend(fontsize=12, loc="upper right", bbox_to_anchor=(1.15, 1))
    ax.grid(True)

    # Relative Strength Index (RSI)
    ax = axes1[2]
    ax.set_title("Relative Strength Index (RSI)", fontsize=14)
    if 'RSI' in plot_data.columns:
        ax.plot(plot_data.index, plot_data['RSI'], label='RSI', color='blue', linewidth=2.5)
        ax.axhline(70, color='red', linestyle='--', linewidth=1, label='Overbought (70)')
        ax.axhline(30, color='green', linestyle='--', linewidth=1, label='Oversold (30)')
    ax.set_ylabel("RSI (0-100)")
    ax.legend(fontsize=12, loc="upper right", bbox_to_anchor=(1.15, 1))
    ax.grid(True)

    # MACD Indicator
    ax = axes1[3]
    ax.set_title("MACD", fontsize=14)
    if 'MACD' in plot_data.columns and 'Signal_Line' in plot_data.columns:
        ax.plot(plot_data.index, plot_data['MACD'], label='MACD', color='blue', linewidth=2.5)
        ax.plot(plot_data.index, plot_data['Signal_Line'], label='Signal Line', color='red', linewidth=2)
    ax.set_ylabel("MACD Value")
    ax.legend(fontsize=12, loc="upper right", bbox_to_anchor=(1.15, 1))
    ax.grid(True)

    fig2, axes2 = plt.subplots(3, 1, figsize=(16, 8), gridspec_kw={'height_ratios': [2, 1, 2]})

    ax = axes2[0]
    ax.set_title("Option Implied Volatility", fontsize=16)
    if 'ATM_IV_Call' in plot_data.columns and 'ATM_IV_Put' in plot_data.columns:
        ax.plot(plot_data.index, plot_data['ATM_IV_Call'], label='ATM Call', linewidth=1.5, alpha=0.5)
        ax.plot(plot_data.index, plot_data['ATM_IV_Put'], label='ATM Put', linewidth=1.5, alpha=0.5)
        ax.plot(plot_data.index, plot_data['OTM_IV_Call'], label='OTM Call', linewidth=1.5, linestyle='-.', alpha=0.5)
        ax.plot(plot_data.index, plot_data['OTM_IV_Put'], label='OTM Put', linewidth=1.5, linestyle='-.', alpha=0.5)
    ax.set_ylabel("Implied Volatility (%)")
    ax.legend(fontsize=12, loc="upper right", bbox_to_anchor=(1.15, 1))
    ax.grid(True)

    ax = axes2[1]
    ax.set_title("Option Put Skew", fontsize=16)
    if 'ATM_Skew' in plot_data.columns and 'OTM_Skew' in plot_data.columns:
        ax.plot(plot_data.index, plot_data['ATM_Skew'], label='ATM Skew', linewidth=2.5, color='blue')
        ax.plot(plot_data.index, plot_data['OTM_Skew'], label='OTM Skew', linewidth=2.5, linestyle='--', color='red')
    ax.set_ylabel("IV Skew)")

    ax.axhline(y=0, color='black', linestyle='--', linewidth=2, alpha=0.8, label="Skew Threshold")
    ranges = np.concatenate([plot_data['ATM_Skew'], plot_data['OTM_Skew'], [0.25]])
    ax.legend(fontsize=12, loc="upper right", bbox_to_anchor=(1.15, 1))
    ax.grid(True)

    ax = axes2[2]
    ax.set_title("Market Volatility & Beta", fontsize=16)
    ax2 = ax.twinx()
    if 'Market_Beta' in plot_data.columns:
        ax.plot(plot_data.index, plot_data['Market_Beta'], label='Market Beta', color='orange', linewidth=2.5)
    if 'VIX_Close' in plot_data.columns:
        ax2.plot(plot_data.index, plot_data['VIX_Close'], label='VIX', color='purple', linewidth=2.5, linestyle="--")
    ax.set_ylabel("Market Beta")
    ax2.set_ylabel("VIX Level")
    ax.legend(fontsize=12, loc="upper right", bbox_to_anchor=(1.15, 1))
    ax2.legend(fontsize=12, loc="lower right", bbox_to_anchor=(1.15, 1))
    ax.grid(True)

    for ax_group in [axes1, axes2]:
        for ax in ax_group:
            if llm_trade_action is None:
                ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))
            else:
                ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))


    if llm_trade_action is None:
        fig3, axes3 = plt.subplots(3, 1, figsize=(16, 8), gridspec_kw={'height_ratios': [2, 1, 1]}, sharex=True)
        ax = axes3[0]
        ax.set_title("Macroeconomic Indicators", fontsize=16)
        econ_metrics = ['GDP_YoY', 'PPI_YoY', 'M2_Money_Supply_YoY', 'Housing_Starts_YoY',
                        'Consumer_Confidence_YoY', 'Treasury_Yields_YoY', 'Employment_YoY']
        for econ in econ_metrics:
            if econ in plot_data.columns:
                formatted_label = econ.replace("_YoY", "").replace("_", " ")
                ax.plot(plot_data.index, plot_data[econ], label=formatted_label, linewidth=2)
        ax.set_ylabel("YoY Change (%)")
        ax.legend(fontsize=12, loc="upper right", bbox_to_anchor=(1.15, 1))
        ax.grid(True)
        ax = axes3[1]
        ax.set_title("ISM PMI", fontsize=16)
        ax.plot(plot_data.index, plot_data['PMI'], color='blue', linewidth=2, label="PMI")
        ax2 = ax.twinx()
        ax2.plot(plot_data.index, plot_data['PMI_YoY'], color='red', linewidth=2, linestyle="--", label="PMI YoY")
        ax.set_ylabel("PMI (0-100)")
        ax2.set_ylabel("YoY Change (%)")
        ax.legend(fontsize=12, loc="upper right", bbox_to_anchor=(1.15, 1))
        ax2.legend(fontsize=12, loc="lower right", bbox_to_anchor=(1.15, 1))
        ax.grid(True)

        ax = axes3[2]
        ax.set_title("Interest Rates & Yield Curve", fontsize=16)
        if 'Treasury_Yields_YoY' in plot_data.columns:
            ax.plot(plot_data.index, plot_data['Treasury_Yields_YoY'], label='10yr Treasury Yields', color='blue', linewidth=2.5)
            ax.plot(plot_data.index, plot_data['Yield_Curve_YoY'], label='Yield Curve', color='red', linewidth=2.5)
            ax.axhline(y=0, color='black', linestyle='--', linewidth=2, alpha=0.8, label="Inversion Threshold")
        ax.set_ylabel("YoY Change (%)")
        ax.legend(fontsize=12, loc="upper right", bbox_to_anchor=(1.15, 1))
        ax.grid(True)

        fig4, axes4 = plt.subplots(3, 1, figsize=(16, 8), gridspec_kw={'height_ratios': [2, 1, 1]}, sharex=True)
        ax = axes4[0]
        ax.set_title("Fundamental Indicators", fontsize=16)
        fundamentals_qoq = ['Quick_Ratio_QoQ_Growth', 'Current_Ratio_QoQ_Growth', 'Debt_to_Equity_Ratio_QoQ_Growth',
                            'Gross_Margin_QoQ_Growth', 'Operating_Margin_QoQ_Growth', 'EBIT_Margin_QoQ_Growth',
                            'Net_Profit_Margin_QoQ_Growth', 'Asset_Turnover_QoQ_Growth', 'Inventory_Turnover_Ratio_QoQ_Growth']
        for metric in fundamentals_qoq:
            if metric in plot_data.columns:
                formatted_label = metric.replace("_QoQ_Growth", "").replace("_", " ")
                ax.plot(plot_data.index, plot_data[metric], label=formatted_label, linewidth=2)
        ax.set_ylabel("QoQ Growth (%)")
        ax.legend(fontsize=12, loc="upper right", bbox_to_anchor=(1.15, 1))
        ax.grid(True)
        ax = axes4[1]
        ax.set_title("Valuation & Profitability", fontsize=16)
        valuation_qoq = ['Price_to_Book_Ratio_QoQ_Growth', 'PE_Ratio_QoQ_Growth', 'EPS_QoQ_Growth',
                        'Net_Income_QoQ_Growth', 'Free_Cash_Flow_Per_Share_QoQ_Growth']
        for metric in valuation_qoq:
            if metric in plot_data.columns:
                formatted_label = metric.replace("_QoQ_Growth", "").replace("_", " ")
                ax.plot(plot_data.index, plot_data[metric], label=formatted_label, linewidth=2)
        ax.set_ylabel("QoQ Growth (%)")
        ax.legend(fontsize=12, loc="upper right", bbox_to_anchor=(1.15, 1))
        ax.grid(True)
        ax = axes4[2]
        ax.set_title("Returns & Cash Flow", fontsize=16)
        returns_qoq = ['Operating_Cash_Flow_Per_Share_QoQ_Growth', 'Return_on_Equity_QoQ_Growth', 'Return_on_Assets_QoQ_Growth']
        for metric in returns_qoq:
            if metric in plot_data.columns:
                formatted_label = metric.replace("_QoQ_Growth", "").replace("_", " ")
                ax.plot(plot_data.index, plot_data[metric], label=formatted_label, linewidth=2)
        ax.set_ylabel("QoQ Growth (%)")
        ax.legend(fontsize=12, loc="upper right", bbox_to_anchor=(1.15, 1))
        ax.grid(True)

        for ax_group in [axes3, axes4]:
            for ax in ax_group:
                ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))

        for fig in [fig1, fig2, fig3, fig4]:
            fig.tight_layout()
    else:
        for fig in [fig1, fig2]:
            fig.tight_layout()
    plt.show()

    return [fig1, fig2, fig3, fig4] if llm_trade_action is None else [fig1, fig2]

In [None]:
def summarize_data_before_decision(data, macro_columns, freq='M'):
    if freq == 'M':
        resample_freq = 'W'
        periods_to_keep = -12
    else:
        resample_freq = 'D'
        periods_to_keep = -30

    aggregated_data = data.resample(resample_freq).agg({
        'Close': ['first', 'last', 'max', 'min'],
        'High': 'max',
        'Low': 'min',
        'ATR': ['mean', 'std', 'max', 'min'],
        'Volume': ['sum', 'mean'],
        'RSI': 'mean',
        'Close_Slope': 'mean',
        'Close_Z': 'mean'
    }).fillna(0)

    aggregated_data.columns = ['_'.join(col).strip() for col in aggregated_data.columns]
    aggregated_data = aggregated_data.iloc[periods_to_keep:]

    # Volatility & Risk Insights
    volatility_metrics = aggregated_data[['ATR_mean', 'ATR_std', 'ATR_max', 'ATR_min']].rename(columns={
        'ATR_mean': 'Average ATR',
        'ATR_std': 'ATR Standard Deviation',
        'ATR_max': 'Maximum ATR',
        'ATR_min': 'Minimum ATR',
    })

    # Liquidity Insights
    volume_metrics = aggregated_data[['Volume_sum', 'Volume_mean']].rename(columns={
        'Volume_sum': 'Total Volume',
        'Volume_mean': 'Average Volume',
    })

    # Macro Trends
    macro_trends = {macro: data[macro].resample(resample_freq).last().pct_change().fillna(0).iloc[periods_to_keep:] * 100
                    for macro in macro_columns if macro in data.columns}
    macro_trends_df = pd.DataFrame(macro_trends, index=aggregated_data.index)

    # Fundamental Metrics
    fundamental_metrics = {metric: data[metric].resample(resample_freq).last().iloc[periods_to_keep:]
                           for metric in [
                               'EPS_YoY_Growth', 'EBITDA_YoY_Growth', 'PE_Ratio_YoY_Growth',
                               'Net_Income_YoY_Growth', 'Free_Cash_Flow_Per_Share_YoY_Growth',
                               'Operating_Margin_YoY_Growth', 'Debt_to_Equity_Ratio_YoY_Growth',
                               'Return_on_Assets_YoY_Growth', 'Return_on_Equity_YoY_Growth'
                           ] if metric in data.columns}
    fundamentals_df = pd.DataFrame(fundamental_metrics, index=aggregated_data.index)

    # Economic Indicators
    economic_indicators = {econ: data[econ].resample(resample_freq).last().iloc[periods_to_keep:]
                           for econ in [
                               'PMI_YoY', 'GDP_YoY', 'Treasury_Yields_YoY', 'Housing_Starts_YoY',
                               'Consumer_Confidence_YoY', 'Employment_YoY', 'Retail_Sales_YoY',
                               'CPI_YoY', 'PPI_YoY', 'M2_Money_Supply_YoY'
                           ] if econ in data.columns}
    economic_df = pd.DataFrame(economic_indicators, index=aggregated_data.index)

    # Market Performance vs. Benchmarks & Sector
    market_performance = {benchmark: data[benchmark].resample(resample_freq).last().pct_change().fillna(0).iloc[periods_to_keep:] * 100
                          for benchmark in ['SPX_Close', 'NDX_Close', 'VIX_Close', 'TNX_Close']
                          if benchmark in data.columns}
    market_trends_df = pd.DataFrame(market_performance, index=aggregated_data.index)

    # Option Volatility Metrics (ATM IV)
    option_metrics = {iv_metric: data[iv_metric].resample(resample_freq).last().iloc[periods_to_keep:]
                      for iv_metric in ['ATM_IV_Call', 'ATM_IV_Put']
                      if iv_metric in data.columns}
    option_vol_df = pd.DataFrame(option_metrics, index=aggregated_data.index)

    # Credit & Debt Risk Metrics
    credit_risk_metrics = {metric: data[metric].resample(resample_freq).last().iloc[periods_to_keep:]
                           for metric in ['Debt_to_Equity_Ratio', 'Interest_Coverage_Ratio', 'Current_Ratio', 'Quick_Ratio']
                           if metric in data.columns}
    credit_risk_df = pd.DataFrame(credit_risk_metrics, index=aggregated_data.index)

    # Generate Report for ML Handbook
    report = "\n"
    report += "### Market and Financial Data Summary for ML Model\n"
    report += "\n#### Volatility & Risk Insights\n"
    report += tabulate(volatility_metrics, headers="keys", tablefmt="grid") + "\n\n"
    report += "#### Volume & Liquidity Insights\n"
    report += tabulate(volume_metrics, headers="keys", tablefmt="grid") + "\n\n"
    report += "#### Macro Trends\n"
    report += tabulate(macro_trends_df, headers="keys", tablefmt="grid") + "\n\n"
    report += "#### Market Performance & Benchmarks\n"
    report += tabulate(market_trends_df, headers="keys", tablefmt="grid") + "\n\n"
    report += "#### Fundamental Growth & Valuation Metrics\n"
    report += tabulate(fundamentals_df, headers="keys", tablefmt="grid") + "\n\n"
    report += "#### Economic Indicators\n"
    report += tabulate(economic_df, headers="keys", tablefmt="grid") + "\n\n"
    report += "#### Option Volatility (ATM Calls & Puts)\n"
    report += tabulate(option_vol_df, headers="keys", tablefmt="grid") + "\n\n"
    report += "#### Credit & Debt Risk Metrics\n"
    report += tabulate(credit_risk_df, headers="keys", tablefmt="grid") + "\n"

    return report

### Judge Handbook

In [None]:
from PIL import Image

def save_stacked_figures(figures, role, output_dir, scale_factor=3, dpi=600):
    """
    Combines multiple figures into a vertically stacked layout, removes white space, and enhances resolution.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Determine total height needed for stacking
    fig_heights = []
    fig_images = []

    for fig_source in figures:
        fig_source.canvas.draw()
        img = np.array(fig_source.canvas.buffer_rgba())

        # Remove whitespace
        img_pil = Image.fromarray(img)
        img_cropped = img_pil.crop(img_pil.getbbox())

        # Scale up the resolution
        new_size = (img_cropped.width * scale_factor, img_cropped.height * scale_factor)
        img_resized = img_cropped.resize(new_size, Image.LANCZOS)

        fig_images.append(img_resized)
        fig_heights.append(img_resized.height)

    # Create a single stacked image
    total_height = sum(fig_heights)
    max_width = max(img.width for img in fig_images)

    stacked_img = Image.new("RGBA", (max_width, total_height), (255, 255, 255, 0))

    y_offset = 0
    for img in fig_images:
        stacked_img.paste(img, (0, y_offset))
        y_offset += img.height

    # Save the optimized stacked image
    filename = f"{role}_stacked_optimized.png"
    filepath = os.path.join(output_dir, filename)
    stacked_img.save(filepath, dpi=(dpi, dpi))

    print(f"Saved optimized stacked figure: {filepath}")

    return filepath

def calculate_llm_cost(model, prompt_tokens, completion_tokens):
    """Calculate the cost of LLM execution based on token usage and model pricing."""
    pricing = {
        "o1-mini": {"input_price": 3.00 / 1e6, "output_price": 12.00 / 1e6},  # Per million tokens
        "GPT-4o-mini": {"input_price": 0.15 / 1e6, "output_price": 0.60 / 1e6},
        "deepseek-chat": {"input_price": 0.014 / 1e6, "output_price": 0.14 / 1e6},
        "deepseek-reasoner": {"input_price": 0.055 / 1e6, "output_price": 0.219 / 1e6},
    }

    # Default to GPT-4o-mini if model not specified
    model_prices = pricing.get(model, pricing["GPT-4o-mini"])
    input_cost = prompt_tokens * model_prices["input_price"]
    output_cost = completion_tokens * model_prices["output_price"]
    total_cost = input_cost + output_cost

    return input_cost, output_cost, total_cost


def print_performance_metrics(trading_metrics, llm_trades, model_name="GPT-4o-mini"):
    """Return formatted trading performance metrics as a string while also printing them."""

    output = []

    # Tabulate performance metrics
    metrics_table = tabulate(
        [[k, v] for k, v in trading_metrics.items()],
        headers=["Metric", "Value"],
        tablefmt="grid"
    )
    output.append("\n=== Performance Metrics ===\n")
    output.append(metrics_table)

    # Extract LLM probabilities and token usage
    llm_probs = {
        "Long Probability": llm_trades.get("long_conf_score", "N/A").iloc[-1],
        "Short Probability": llm_trades.get("short_conf_score", "N/A").iloc[-1],
        "Long Token strategy": llm_trades.get("long_token_proba", "N/A").iloc[-1],
        "Short Token strategy": llm_trades.get("short_token_proba", "N/A").iloc[-1],
        "Long Token Probability": llm_trades.get("long_token_proba", "N/A").iloc[-1],
        "Short Token Probability": llm_trades.get("short_token_proba", "N/A").iloc[-1],
        "Perplexity": llm_trades.get("perplexity", "N/A").iloc[-1],
    }

    tokens_meta_strat = llm_trades.get("tokens_meta_strat", {}).iloc[-1]
    tokens_meta_news = llm_trades.get("tokens_meta_news", {}).iloc[-1]
    tokens_meta_proba = llm_trades.get("tokens_meta_proba", {}).iloc[-1]
    tokens_meta_strat = tokens_meta_strat if isinstance(tokens_meta_strat, dict) else {}
    tokens_meta_news = tokens_meta_news if isinstance(tokens_meta_news, dict) else {}
    tokens_meta_proba = tokens_meta_proba if isinstance(tokens_meta_proba, dict) else {}

    # Extract total tokens from all ensemble components
    prompt_tokens_total = sum(meta.get("prompt_tokens", 0) for meta in [tokens_meta_strat, tokens_meta_news, tokens_meta_proba])
    completion_tokens_total = sum(meta.get("completion_tokens", 0) for meta in [tokens_meta_strat, tokens_meta_news, tokens_meta_proba])
    total_tokens = prompt_tokens_total + completion_tokens_total

    # Calculate LLM execution costs for the ensemble
    input_cost, output_cost, total_cost = calculate_llm_cost(model_name, prompt_tokens_total, completion_tokens_total)

    # Tabulate LLM decision insights
    llm_table = tabulate(
        [[k, v] for k, v in llm_probs.items()],
        headers=["LLM Decision Insights", "Value"],
        tablefmt="grid"
    )
    output.append("\n=== LLM Decision Insights ===\n")
    output.append(llm_table)

    # Tabulate token usage and cost analysis
    token_table = tabulate(
        [
            ["Prompt Tokens (Strategist)", tokens_meta_strat.get("prompt_tokens", 0)],
            ["Completion Tokens (Strategist)", tokens_meta_strat.get("completion_tokens", 0)],
            ["Prompt Tokens (Judge)", tokens_meta_proba.get("prompt_tokens", 0)],
            ["Completion Tokens (Judge)", tokens_meta_proba.get("completion_tokens", 0)],
            ["Prompt Tokens (News Analyst)", tokens_meta_news.get("prompt_tokens", 0)],
            ["Completion Tokens (News Analyst)", tokens_meta_news.get("completion_tokens", 0)],
            ["Total Prompt Tokens", prompt_tokens_total],
            ["Total Completion Tokens", completion_tokens_total],
            ["Total Tokens", total_tokens],
            ["Input Cost (USD)", f"${input_cost:.6f}"],
            ["Output Cost (USD)", f"${output_cost:.6f}"],
            ["Total Cost (USD)", f"${total_cost:.6f}"]
        ],
        headers=["Token Metrics", "Value"],
        tablefmt="grid"
    )
    output.append("\n=== Token Usage & Cost Analysis for Ensemble ===\n")
    output.append(token_table)

    # Join the output list into a single string
    final_output = "\n".join(output)

    # Print and return the output
    print(final_output)
    return final_output


In [None]:
def execute_strategy_evaluation(ticker=None,
                                execution_freq="M",
                                role="judge",
                                start_date=None,
                                end_date=None,
                                strategy_yaml_file=f'{LLM_PROMPTS_PATH}/strat_prompt_{PROMPT_VERSION}.yml',
                                news_yaml_file=None,
                                client=OPENAI_CLIENT,
                                model=OPENAI_MODEL,
                                llm_output_path=LLM_OUTPUT_PATH,
                                persona=PERSONA,
                                objectives=HIGH_OBJECTIVES,
                                risk=HIGH_RISK_PROFILE,
                                HISTORIC_PATH=HISTORIC_PATH):

    input_file = f"{HISTORIC_PATH}/engineered_{ticker}_data.parquet"
    stock_df = pd.read_parquet(input_file)
    stock_df.set_index('Date', inplace=True)

    output_dir = f"./judge_reviews/{ticker}_{execution_freq}_{model}"
    os.makedirs(output_dir, exist_ok=True)

    llm_trades_df = generate_strategy_for_ticker(ticker_df=stock_df,
                                                 ticker=ticker,
                                                 LLM_OUTPUT_PATH=llm_output_path,
                                                 persona=persona,
                                                 HIGH_OBJECTIVES=objectives,
                                                 HIGH_RISK_PROFILE=risk,
                                                 client=client,
                                                 model=model,
                                                 strategy_yaml_file=strategy_yaml_file,
                                                 news_yaml_file=news_yaml_file,
                                                 start_date=start_date,
                                                 end_date=end_date,
                                                 max_news=5 if news_yaml_file is not None else 0)

    _, llm_trades_df = evaluate_trading_metrics(llm_trades_df)
    strategy_dates = (
        llm_trades_df.index.to_series()
        .dt.to_period(execution_freq)
        .drop_duplicates()
        .sort_values()
        .dt.start_time
        .dt.tz_localize("UTC")
    )
    if len(strategy_dates) > 1:
        strategy_dates = strategy_dates[:-1]

    strategy_date = llm_trades_df.index.min()

    # Loop until no more dates
    while strategy_date <= llm_trades_df.index.max():
        strategy_folder = os.path.join(output_dir, f"{strategy_date.date()}")
        os.makedirs(strategy_folder, exist_ok=True)

        historic_start_date = strategy_date - pd.DateOffset(years=1)
        historic_end_date = strategy_date
        effect_start_date = strategy_date
        effect_end_date = strategy_date + pd.DateOffset(months=1)

        historic_df = stock_df[(stock_df.index >= historic_start_date) & (stock_df.index < historic_end_date)]
        effect_df = llm_trades_df[(llm_trades_df.index >= effect_start_date) & (llm_trades_df.index <= effect_end_date)]
        llm_strategy_details = llm_trades_df.iloc[0]

        trade_action, llm_explanation = "No Trade", "No decision available."
        if not llm_strategy_details.empty:
            llm_decision = llm_strategy_details['trade_action']
            llm_explanation = llm_strategy_details['explanation']
            trade_action = "LONG" if llm_decision == 1 else "SHORT"

        text_output = []

        if role == "judge":
            news = historic_df['content'].tail(10).dropna().apply(lambda x: sanitize_text(' '.join([str(i).strip() for i in x if str(i).strip()])))
            news = news[news.str.strip() != ''].values

            text_output.append(f"\n=== Judge's Evaluation: {ticker.upper()} - Market Data Until {strategy_date.date()} ===\n")
            text_output.append("Based on the provided market data and news, predict the LLM’s decision:")
            text_output.append(f"News Factors Considered:\n{news}")

            macro_columns = ['SPX_Close', 'NDX_Close', 'VIX_Close', 'TNX_Close', 'IRX_Close']
            figures = plot_strategy_analysis(historic_df, strategy_freq=execution_freq, llm_trade_action=None)
            save_figures(figures, role, strategy_folder)
            save_stacked_figures(figures, role, strategy_folder)

            summary_historic = summarize_data_before_decision(historic_df, macro_columns, freq=execution_freq)
            text_output.append(f"\n=== Market Summary: {ticker.upper()} (Last 3 Months) ===\n")
            text_output.append(summary_historic)

        elif role == "llm":
            text_output.append(f"\n=== {model} LLM Decision & Market Impact: {ticker.upper()} on {strategy_date.date()} ===\n")
            text_output.append(f"LLM Decision: {trade_action}")
            text_output.append(f"Rationale: {llm_explanation}")

            if 'news_factors' in effect_df.columns and not effect_df.empty:
                text_output.append(f"News Factors Considered: {effect_df['news_factors'].iloc[0]}")

            figures = plot_strategy_analysis(effect_df, strategy_freq=execution_freq, llm_trade_action=trade_action)
            save_figures(figures, role, strategy_folder)
            save_stacked_figures(figures, role, strategy_folder)

            effect_metrics, effect_df = evaluate_trading_metrics(effect_df)
            text_output.append(print_performance_metrics(effect_metrics, effect_df, model))
        next_date = pd.offsets.BMonthBegin().rollforward(strategy_date + pd.DateOffset(months=1))
        if next_date > llm_trades_df.index.max():
            break
        strategy_date = next_date

        with open(os.path.join(strategy_folder, f"{role}_trade_summary.txt"), "w") as f:
            f.write('\n'.join(text_output))


def save_figures(figures, role, output_dir):
    """ Saves generated figures into the output directory with appropriate titles. """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for fig in figures:
        for ax in fig.axes:
            title = ax.get_title()
            if title:
                filename = role + '_' + title.replace(" ", "_").replace("/", "_") + ".png"
                filepath = os.path.join(output_dir, filename)
                fig.savefig(filepath, bbox_inches='tight')
                print(f"Saved: {filepath}")

# Generate Survey

In [None]:
trade_data = [
    ("TSLA", "2019-05-05", "2019-06-02"),
    ("AAPL", "2019-04-07", "2019-05-05"),
    ("GOOGL", "2019-11-04", "2019-12-02"),
    ("MSFT", "2015-11-02", "2015-11-30"),
    ("GOOGL", "2017-01-02", "2017-01-30"),
    ("AAPL", "2015-01-05", "2015-02-02"),
]

for index in range(len(trade_data)):
    TARGET, STARTDATE, ENDDATE = trade_data[index]
    print(TARGET, STARTDATE, ENDDATE)
    execute_strategy_evaluation(role="judge",
                            ticker=TARGET,
                            start_date=STARTDATE,
                            end_date=ENDDATE,
                            strategy_yaml_file=f'{LLM_PROMPTS_PATH}/strat_prompt_{PROMPT_VERSION}.yml')
    execute_strategy_evaluation(role="llm",
                            ticker=TARGET,
                            start_date=STARTDATE,
                            end_date=ENDDATE,
                            strategy_yaml_file=f'{LLM_PROMPTS_PATH}/strat_prompt_{PROMPT_VERSION}.yml')
