# Experiment 1 - Tuning and Distilling LLM

## Prepare Notebook

In [1]:
import os
import sys
import logging
import warnings
import re

warnings.filterwarnings("ignore")

INSTALL_DEPS = True
if INSTALL_DEPS:
    %pip install yfinance==0.2.43
    %pip install openai==1.51.2

%load_ext dotenv

FUNDAMENTALS_PATH = os.getenv("FUNDAMENTALS_PATH")
LLM_PROMPTS_PATH = os.getenv("LLM_PROMPTS_PATH")
FUNDAMENTALS_PATH = os.getenv("FUNDAMENTALS_PATH")
HISTORIC_PATH = os.getenv("HISTORIC_PATH")
MACRO_PATH = os.getenv("MACRO_PATH")
OPTIONS_PATH = os.getenv("OPTIONS_PATH")
LLM_OUTPUT_PATH = os.getenv("LLM_OUTPUT_PATH")
LOGS_PATH = os.getenv("LOGS_PATH")
paths = [LLM_OUTPUT_PATH, LOGS_PATH]
for path in paths:
    if path and not os.path.exists(path):
        os.makedirs(path)

if "KAGGLE_KERNEL_RUN_TYPE" in os.environ:
    logging.info("Running in Kaggle...")
    for dirname, _, filenames in os.walk("/kaggle/input"):
        for filename in filenames:
            print(os.path.join(dirname, filename))
    DATA_PATH = "/kaggle/input/drl-dataset-quant"
    FUNDAMENTALS_PATH = DATA_PATH + FUNDAMENTALS_PATH
    HISTORIC_PATH = DATA_PATH + HISTORIC_PATH
    MACRO_PATH = DATA_PATH + MACRO_PATH
    OPTIONS_PATH = DATA_PATH + OPTIONS_PATH

    sys.path.insert(1, "/kaggle/usr/lib/drlutil")

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from pandas.tseries.offsets import BDay
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from openai import OpenAI

module_path = os.path.abspath(os.path.join(os.getcwd(), 'utils'))
if module_path not in sys.path:
    sys.path.append(module_path)

from data_utils import *
from thesis_utils import *

2025-05-30 13:42:29.625347: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-30 13:42:29.698392: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-05-30 13:42:29.698448: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-05-30 13:42:29.705296: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-30 13:42:29.732081: I tensorflow/core/platform/cpu_feature_guar

## Environment and Constants

In [3]:
# A lot of ranging in these dates across the market.
START_DATE = '20140701'
END_DATE = '20180901'
TARGET = "META"
WRITER_OPENAI_MODEL = os.getenv("OPENAI_MODEL")
# OPENAI_MODEL ="gpt-4o"  # use models good with reasoning
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
DISTIL_SAMPLES = 1

In [4]:
output_file = f"{HISTORIC_PATH}/engineered_{TARGET}_data.parquet"
stock_aug_data = pd.read_parquet(output_file)
stock_aug_data['Date'] = pd.to_datetime(stock_aug_data['Date'], utc=True)
stock_aug_data.set_index('Date', inplace=True)

sample_start_date = pd.Timestamp(START_DATE, tz='UTC')
sample_end_date = pd.Timestamp(END_DATE, tz='UTC')

stock_aug_data = stock_aug_data.loc[sample_start_date:sample_end_date]
stock_aug_data.tail(3)


Unnamed: 0_level_0,Open,High,Low,Close,Volume,IV_Open,IV_High,IV_Low,IV_Close,IV_Volume,...,Volume_Weighted_Returns,BB_Upper,BB_Middle,BB_Lower,BB_Width,IV_Percentile,VIX_Impact,Momentum_Long,Momentum_Short,content
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-08-29 00:00:00+00:00,176.23,176.75,174.75,175.9,3549177,0.232958,0.234847,0.228545,0.228656,1.0,...,7551.845532,178.537536,175.434,172.330464,0.035288,0.0,-13.566409,False,False,"[In June 2018, Facebook launched a new transpa..."
2018-08-30 00:00:00+00:00,175.9,179.79,175.71,177.64,4614986,0.227688,0.234419,0.227228,0.233784,1.0,...,19667.184703,178.571371,176.382,174.192629,0.02465,0.834625,-1.945713,False,False,"[In June 2018, Facebook launched a new transpa..."
2018-08-31 00:00:00+00:00,177.2,177.6,174.99,175.73,3464919,0.232101,0.243975,0.232101,0.236451,1.0,...,6432.940765,178.193915,176.598,175.002085,0.018163,1.0,0.167012,False,False,"[In June 2018, Facebook launched a new transpa..."


# Writer Judge Loop

In [5]:
RISK_EXPERIMENT = 'r'
PROMPT_VERSION = 'v2'
WRITER_PROMPT_YML = f'{LLM_PROMPTS_PATH}/strat_prompt_{PROMPT_VERSION}.yml'
JUDGE_PROMPT_YML = f'{LLM_PROMPTS_PATH}/judge_prompt_v1.yml'

DISTIL = False
META_ID = "self_improving_agent_v1"

In [6]:
import yaml
from pydantic import BaseModel


def enum_to_str_representer(dumper, data):
    """Helper function to represent enums as their string values in YAML."""
    return dumper.represent_str(data.value)

yaml.add_representer(Action, enum_to_str_representer)

OPENAI_CLIENT = OpenAI(api_key=OPENAI_API_KEY)
SHARED_MEMORY = []
class JudgeResponse(BaseModel):
    explanation: str
    features: str
    action: Action

In [7]:
def generate_random_sample_dates(dataframe, num_samples=15):
    timestamps = pd.to_datetime(dataframe.index, utc=True)
    random_dates = np.random.choice(timestamps, size=num_samples, replace=False)
    return pd.to_datetime(random_dates)


## Optimization Loop

Using a cost effective LLM for writing and a rationale superior LLM for judging.
Juding output will be used to distil the former LLM.

In [8]:
def judge_loop(train_template, judge_template, model, shared_memory=None, client=None, distil=False):
    context = {
        'train_template': train_template,
        'shared_memory': shared_memory
    }
    judge_template = fill_yaml_template(context, judge_template)
    judge_template = yaml.dump(judge_template, default_flow_style=True, allow_unicode=True)
    judge_response = client.beta.chat.completions.parse(
        model=model,
        messages=[
            {"role": "user", "content": judge_template}
        ],
        temperature=0.7,
        store=distil,
        metadata = {
            'role': META_ID,
            'version': PROMPT_VERSION,
        } if distil else None,
        response_format=JudgeResponse,
        max_tokens=2300,
    )
    opt_prompt = {
        "Selected Features": judge_response.choices[0].message.parsed.features,
        "strategy Rationale": judge_response.choices[0].message.parsed.explanation,
    }
    return opt_prompt, judge_response.choices[0].message.parsed.action.value

def backtest_strategy(action, historical_data, anchor_date, horizon=5*4):
    stock_df = historical_data.loc[anchor_date - BDay(horizon):anchor_date]

    action_multiplier = 1 if action == 'LONG' else -1
    rets = (stock_df['Close'].pct_change() * action_multiplier).fillna(0)

    cum_rets = (rets + 1).prod() - 1
    max_rets = rets.max()
    min_rets = rets.min()
    cum_rets = 0.0 if pd.isna(cum_rets) else cum_rets
    max_rets = 0.0 if pd.isna(max_rets) else max_rets
    min_rets = 0.0 if pd.isna(min_rets) else min_rets

    return {
        "Cumulative Returns": cum_rets,
        "Min Return": min_rets,
        "Max Return": max_rets
    }

def writer_loop(historical_df,
                anchor_date,
                train_template,
                persona=PERSONA,
                risk_profile=HIGH_RISK_PROFILE,
                objectives=HIGH_OBJECTIVES,
                horizon=5*4,
                model=WRITER_OPENAI_MODEL,
                client=OPENAI_CLIENT,
                distil=False,
                Last_LLM_Strat=None,
                Last_LLM_Strat_Action=None,
                Last_LLM_Strat_Returns=None,
                Last_LLM_Strat_Days=None,
                Peak_Returns=None,
                Trough_Returns=None):

    context = update_historical_data_context(
        historical_df.loc[anchor_date:anchor_date + pd.Timedelta(days=horizon)],
        persona=persona,
        HIGH_RISK_PROFILE=risk_profile,
        HIGH_OBJECTIVES=objectives,
        Last_LLM_Strat=Last_LLM_Strat,
        Last_LLM_Strat_Action=Last_LLM_Strat_Action,
        Last_LLM_Strat_Cum_Returns=Last_LLM_Strat_Returns,
        Last_LLM_Strat_Days=Last_LLM_Strat_Days,
        Peak_Returns=Peak_Returns,
        Trough_Returns=Trough_Returns,
    )
    train_template = fill_yaml_template(context, train_template)
    context_yaml = yaml.dump(train_template, default_flow_style=True, allow_unicode=True)
    writer_response = client.beta.chat.completions.parse(
        model=model,
        messages=[
            {"role": "user", "content": context_yaml}
        ],
        temperature=0.7,
        store=distil,
        response_format=TradeStrategy,
    )
    strategy = writer_response.choices[0].message.parsed
    action = strategy.action.value
    results = backtest_strategy(action, historical_df, anchor_date)
    return strategy, results, context_yaml



def outer_loop(historical_df,
               writer_yaml_file,
               judge_yaml_file,
               client,
               model,
               max_iterations=2,
               target_cumulative_returns=0.025,
               horizon=5*4,
               distil=False,
               num_samples=15):
    logging.info(f"[META_ID: {META_ID}] Starting outer loop")
    final_results = []
    train_template = load_yaml_template(writer_yaml_file)
    judge_template = load_yaml_template(judge_yaml_file)

    sampled_dates_df = generate_random_sample_dates(historical_df, num_samples=num_samples)
    unique_months = sorted(set((dt.year, dt.month) for dt in sampled_dates_df))
    action = None
    cum_rets = None
    min_rets = None
    max_rets = None
    orig_strategy = None

    total_iterations = len(unique_months) * 3
    with tqdm(total=total_iterations, desc="Optimizing Strategies") as monthly_pbar:
        for year, month in unique_months:
            # Run and curate strategy for 3 months.
            for offset in range(3):
                iter_year = year
                iter_month = month + offset
                if iter_month > 12:
                    iter_year += 1
                    iter_month -= 12
                sub_df = historical_df[(historical_df.index.year == iter_year) & (historical_df.index.month == iter_month)]
                if sub_df.empty:
                    monthly_pbar.update(1)
                    continue
                anchor_date = sub_df.index[0]

                iterations = 0
                shared_memory = []
                strategy, results, filled_train_template = writer_loop(historical_df,
                                                                        anchor_date,
                                                                        train_template,
                                                                        distil=False,
                                                                        Last_LLM_Strat=strategy.explanation if orig_strategy is not None else None,
                                                                        Last_LLM_Strat_Action=action,
                                                                        Last_LLM_Strat_Returns=cum_rets,
                                                                        Last_LLM_Strat_Days=horizon,
                                                                        Peak_Returns=max_rets,
                                                                        Trough_Returns=min_rets)

                context_yaml = yaml.dump(filled_train_template, default_flow_style=True, allow_unicode=True)
                shared_memory.append(f"Original Template:|\n\t'{context_yaml}'\n")

                cum_rets = results["Cumulative Returns"]
                min_rets = results["Min Return"]
                max_rets = results["Max Return"]
                assert not any(pd.isna([cum_rets, min_rets, max_rets])), "Cumulative, Min, or Max Returns contain NaN values!"
                shared_memory.append(f"Original strategy:|\n\t'{orig_strategy}'")
                shared_memory.append(
                    f"Original Backtest:\n\tCumulative Returns: {cum_rets}\n\tMin Return: {min_rets}\n\tMax Return: {max_rets}"
                )

                # Inner loop for refining strategy
                while iterations < max_iterations:
                    strategy, action = judge_loop(
                        train_template=strategy,
                        judge_template=judge_template,
                        model=model,
                        client=client,
                        shared_memory=shared_memory,
                        distil=distil,
                    )
                    shared_memory.append(f"Tuned strategy {iterations}:|\n\t'{strategy}'")
                    iterations += 1

                    results = backtest_strategy(action, historical_df, anchor_date)
                    assert not any(pd.isna([cum_rets, min_rets, max_rets])), "Cumulative, Min, or Max Returns contain NaN values!"
                    cum_rets = results["Cumulative Returns"]
                    min_rets = results["Min Return"]
                    max_rets = results["Max Return"]
                    shared_memory.append(
                        f"Tuned strategy {iterations} Backtest:\n\tCumulative Returns: {cum_rets}\n\tMin Return: {min_rets}\n\tMax Return: {max_rets}"
                    )
                    if results["Cumulative Returns"] >= target_cumulative_returns:
                        logging.info("Target achieved. Refinement complete.")
                        break

                final_results.append({
                    "memory": shared_memory,
                    "anchor_date": anchor_date,
                    "strategy": strategy,
                    "backtest_results": results
                })

                monthly_pbar.update(1)

    logging.info(f"[META_ID: {META_ID}] Completed outer loop")
    return final_results

results = outer_loop(historical_df=stock_aug_data,
                     writer_yaml_file=WRITER_PROMPT_YML,
                     judge_yaml_file=JUDGE_PROMPT_YML,
                     client=OPENAI_CLIENT,
                     model=WRITER_OPENAI_MODEL,
                     distil=False,
                     num_samples=DISTIL_SAMPLES)


Optimizing Strategies:   0%|          | 0/3 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
from pprint import pprint
pprint(results[-1])