In [41]:
from dotenv import load_dotenv

load_dotenv()

True

In [42]:
import pandas as pd

apple_data = pd.read_pickle("apple_data.pkl")
apple_data

Unnamed: 0_level_0,close,high,low,open,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1980-12-12,0.098726,0.099155,0.098726,0.098726,469033600
1980-12-15,0.093575,0.094005,0.093575,0.094005,175884800
1980-12-16,0.086707,0.087136,0.086707,0.087136,105728000
1980-12-17,0.088853,0.089282,0.088853,0.088853,86441600
1980-12-18,0.091429,0.091858,0.091429,0.091429,73449600
...,...,...,...,...,...
2025-04-29,211.210007,212.240005,208.369995,208.690002,36827600
2025-04-30,212.500000,213.580002,206.669998,209.300003,52286500
2025-05-01,213.320007,214.559998,208.899994,209.080002,57365700
2025-05-02,205.350006,206.990005,202.160004,206.089996,100912500


In [43]:
apple_data.index.max()

Timestamp('2025-05-05 00:00:00')

In [44]:
from llama_index.llms.openai_like import OpenAILike
import os

llm = OpenAILike(model="gpt-4o-mini", api_base=os.getenv("LITELLM_API_BASE"), api_key=os.getenv("LITELLM_API_KEY"),
                 temperature=0)
llm.is_function_calling_model = True
llm.is_chat_model = True

In [45]:
import io
import contextlib
from llama_index.core.tools import FunctionTool
from pydantic import BaseModel, Field
import ast
import builtins
import numpy as np

class CodeToolInputSchema(BaseModel):
    code: str = Field(
        description="The code to be executed, assume the data is present in the data variable, pandas is imported as pd, print the final answer you wanna see. You have access no other variable")


class CodeToolInfo:
    data: pd.DataFrame


code_tool_info = CodeToolInfo()

safe_builtin_names = [
    # functions
    "abs", "all", "any", "bool", "callable", "chr", "dict", "enumerate", "filter",
    "float", "int", "len", "list", "map", "max", "min", "next", "pow", "print",
    "range", "reversed", "round", "set", "slice", "sorted", "str", "sum", "tuple",
    "type", "zip",

    # exceptions
    "ArithmeticError", "AssertionError", "AttributeError", "EOFError", "Exception",
    "FloatingPointError", "ImportError", "IndexError", "KeyError", "KeyboardInterrupt",
    "LookupError", "MemoryError", "NameError", "NotImplementedError", "OSError",
    "OverflowError", "ReferenceError", "RuntimeError", "StopIteration", "SyntaxError",
    "SystemError", "TypeError", "UnboundLocalError", "ValueError", "ZeroDivisionError"
]

allowed_builtins = {name: getattr(builtins, name) for name in safe_builtin_names}


def exec_code(code):
    try:
        tree = ast.parse(code)

        if isinstance(tree.body[-1], ast.Expr):
            last_expr = tree.body[-1].value
            print_call = ast.Expr(
                value=ast.Call(
                    func=ast.Name(id='print', ctx=ast.Load()),
                    args=[last_expr],
                    keywords=[]
                )
            )
            tree.body[-1] = print_call

        tree = ast.fix_missing_locations(tree)
        compiled = compile(tree, filename="<ast>", mode="exec")
        stdout = io.StringIO()
        stderr = io.StringIO()

        local_vars = {'pd': pd, 'data': code_tool_info.data.copy(), "print": print, "np": np}

        with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
            try:
                exec(compiled, {"__builtins__": allowed_builtins}, local_vars)
            except Exception as e:
                print(f"Exception: {e}", file=stderr)
        output = f"""
    stdout:
    {stdout.getvalue()}
    --
    stderr:
    {stderr.getvalue()}
    """
        return output
    except Exception as e:
        return f"Exception: {e}"

code_tool = FunctionTool.from_defaults(
    exec_code,
    name="code_tool",
    description="Use this tool to execute pandas code, the dataframe is available in the variable data, pandas is imported as pd. The tool outputs the stdout and stderr streams. You cannot import anything and can only use pandas and numpy",
    fn_schema=CodeToolInputSchema
)


In [46]:
from llama_index.agent.openai import OpenAIAgent

SYSTEM_PROMPT = f"""
You are a financial trading assistant operating on a daily time scale. Your primary objective is to analyze daily stock data and make informed trading decisions.

# Available Tools
Code Interpreter: Utilize this tool to execute Python code for data analysis. It's especially useful for processing historical stock data using libraries like pandas.

You can find the historical data in a variable `data`, pandas is already imported as `pd`, print the final answer you wanna see
data.head(2) gives:

{apple_data.head(2).to_markdown()}


Date is the index

Print

# Daily Workflow
For each trading day:

- Data Analysis:

Review the provided date, open, and close prices for the day.

Employ the Code Interpreter to analyze historical data, identifying trends, patterns, or anomalies that could influence trading decisions.


- Decision Making:

Based on your analysis and news assessment, determine the number of shares to trade.

Your decision should be an integer within the range [-k, k], where:

Positive values indicate buying shares.

Negative values indicate selling shares.

Zero indicates no action.

- Output:

Clearly state your decision in the format: DECISION: [number]. For example, DECISION: 3 or DECISION: -2.

Provide a brief rationale for your decision, referencing specific data points or news items that influenced your choice.

Constraints & Guidelines
Trading Limits: Ensure that the number of shares traded does not exceed the absolute value of k in either direction.

Consistency: Maintain a consistent decision-making process, documenting your rationale for each action to facilitate future reviews and audits.

Tool Usage: Effectively utilize the available tools to support your analysis. If a tool fails or provides insufficient information, note this in your rationale.

Ethical Considerations: Avoid making decisions based on unverified or speculative information. Base your actions on concrete data analysis

Error Handling: If you encounter errors or inconsistencies in data or tool outputs, document these issues and proceed with caution, making decisions based on the most reliable information available.


"""

SYSTEM_PROMPT

"\nYou are a financial trading assistant operating on a daily time scale. Your primary objective is to analyze daily stock data and make informed trading decisions.\n\n# Available Tools\nCode Interpreter: Utilize this tool to execute Python code for data analysis. It's especially useful for processing historical stock data using libraries like pandas.\n\nYou can find the historical data in a variable `data`, pandas is already imported as `pd`, print the final answer you wanna see\ndata.head(2) gives:\n\n| Date                |     close |      high |       low |      open |      volume |\n|:--------------------|----------:|----------:|----------:|----------:|------------:|\n| 1980-12-12 00:00:00 | 0.0987259 | 0.0991551 | 0.0987259 | 0.0987259 | 4.69034e+08 |\n| 1980-12-15 00:00:00 | 0.0935754 | 0.0940046 | 0.0935754 | 0.0940046 | 1.75885e+08 |\n\n\nDate is the index\n\nPrint\n\n# Daily Workflow\nFor each trading day:\n\n- Data Analysis:\n\nReview the provided date, open, and close pric

In [47]:
test_data = apple_data.loc["2023-11-01":]
test_data

Unnamed: 0_level_0,close,high,low,open,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-11-01,172.704208,172.962311,168.882215,169.755817,56934900
2023-11-02,176.278015,176.486479,174.183367,174.242928,77334800
2023-11-03,175.364700,175.533477,172.088723,172.972247,79763700
2023-11-06,177.925934,178.124476,174.927918,175.096679,63841300
2023-11-07,180.497116,181.112600,177.667846,177.876310,70530000
...,...,...,...,...,...
2025-04-29,211.210007,212.240005,208.369995,208.690002,36827600
2025-04-30,212.500000,213.580002,206.669998,209.300003,52286500
2025-05-01,213.320007,214.559998,208.899994,209.080002,57365700
2025-05-02,205.350006,206.990005,202.160004,206.089996,100912500


In [48]:
import re
import json

balance = 10_000
shares_held = 0
cost_basis = 0

MAX_ITERATIONS = 500

with open("trading_run.jsonl", "w") as jsonl_file:
    for i, (index, row) in enumerate(test_data.iterrows()):
        date = index
        close = row['close']
        high = row['high']
        low = row['low']
        open_ = row['open']
        volume = row['volume']

        net_worth = balance + shares_held * close
        buy_range = int(balance / close)

        if shares_held == 0:
            shares_held_query = f"""
            You can output the decisions from {shares_held}, ..., {buy_range}

            0 meaning do nothing, and {buy_range} meaning buy all shares possible
            """
        else:
            shares_held_query = f"""
            You can output the decisions from -{shares_held}, ..., {buy_range}

            -{shares_held} meaning sell all shares, and {buy_range} meaning buy all shares possible
            """

        query = f"""
            Date: {date.strftime('%Y-%m-%d')}
            Close: {close}
            High: {high}
            Low: {low}
            Open: {open_}
            Volume: {volume}

            Balance: {balance}
            Shares Held: {shares_held}
            Cost Basis: {cost_basis}
            Net Worth: {net_worth}

            Generate the action to be taken using the analysis done using the tools provided in the format DECISION: [number]. For example, DECISION: 3 or DECISION: -2.

            {shares_held_query}

            Always use the code tool before answering, use pandas to analyze the data variable already present, do not create a new one. Try to do some new analyses too.
            """
        data = apple_data.loc[:date + pd.Timedelta(days=1)]
        code_tool_info.data = data
        first_out = True
        first_response = None
        agent = OpenAIAgent.from_tools(tools=[code_tool], llm=llm, verbose=True, system_prompt=SYSTEM_PROMPT)
        while True:
            retries = 5
            while retries > 0:
                try:
                    streaming_response = agent.chat(query)
                    break
                except Exception as e:
                    print(e)
                    retries -= 1
            if first_out:

                first_response = streaming_response
            first_out = False
            ans = streaming_response.response

            decision_match = re.search(r'DECISION:\s*(-?[0-9]+)', ans)
            if decision_match:
                value = int(decision_match.group(1))
                if buy_range >= value >= -shares_held:
                    new_shares_held = shares_held + value
                    balance -= value * close
                    if value > 0:
                        cost_basis = ((cost_basis * shares_held) + (value * close)) / new_shares_held
                    if value == shares_held:
                        cost_basis = 0
                    shares_held = new_shares_held

                    log_entry = {
                        "date": date.strftime('%Y-%m-%d'),
                        "close": close,
                        "high": high,
                        "low": low,
                        "open": open_,
                        "volume": volume,
                        "balance": balance,
                        "shares_held": shares_held,
                        "cost_basis": cost_basis,
                        "net_worth": balance + shares_held * close,
                        "action": value,
                        "llm_output": ans.strip(),
                        "llm_tool_calls": [{"code": source.raw_input["kwargs"]["code"], "output": source.raw_output} for
                                           source in first_response.sources]
                    }
                    jsonl_file.write(json.dumps(log_entry) + "\n")
                    jsonl_file.flush()
                    break
                else:
                    query = f"The DECISION value is outside range [-{shares_held}, {buy_range}]"
            else:
                query = f"DECISION: number not found output the decision as DECISION: number exactly, for example: DECISION: 0"

        if i >= MAX_ITERATIONS - 1:
            break
        print("==========")



Added user message to memory: 
            Date: 2023-11-01
            Close: 172.70420837402344
            High: 172.9623111800798
            Low: 168.88221462983017
            Open: 169.75581666227828
            Volume: 56934900.0

            Balance: 10000
            Shares Held: 0
            Cost Basis: 0
            Net Worth: 10000.0

            Generate the action to be taken using the analysis done using the tools provided in the format DECISION: [number]. For example, DECISION: 3 or DECISION: -2.

            
            You can output the decisions from 0, ..., 57

            0 meaning do nothing, and 57 meaning buy all shares possible
            

            Always use the code tool before answering, use pandas to analyze the data variable already present, do not create a new one. Try to do some new analyses too.
            
=== Calling Function ===
Calling function: code_tool with args: {"code": "data['daily_return'] = data['close'].pct_change()\n\n# Calculate

In [49]:
log_entry

{'date': '2025-05-05',
 'close': np.float64(199.34500122070312),
 'high': np.float64(204.10000610351562),
 'low': np.float64(198.2100067138672),
 'open': np.float64(203.125),
 'volume': np.float64(47305475.0),
 'balance': np.float64(11292.818908691406),
 'shares_held': 0,
 'cost_basis': np.float64(206.42519067834925),
 'net_worth': np.float64(11292.818908691406),
 'action': -24,
 'llm_output': 'Based on the analysis conducted for the date 2025-05-05, here are the key findings:\n\n- **Profit/Loss per Share**: -$7.08\n- **Total Profit/Loss**: -$169.92\n- **Net Worth**: $11,292.82\n- **Price Change**: -$3.78\n- **Price Change Percentage**: -1.86%\n- **Volume Analysis**: Below average\n\n### Decision Rationale:\n1. The current close price is significantly lower than the cost basis, resulting in a loss per share. This indicates that holding the shares is currently unprofitable.\n2. The price has decreased by 1.86% from the opening price, suggesting a bearish trend for the day.\n3. The tradi