In [234]:
import json
import tiktoken
from datetime import datetime
from openai import OpenAI
import yfinance as yf

tokenizer = tiktoken.get_encoding("cl100k_base")
client = OpenAI()

In [235]:
def get_info(ticker: str):
    stock = yf.Ticker(ticker)
    return stock.info

In [236]:
def get_history(
    ticker: str,
    period="1mo",
    interval="1d",
    start=None,
    end=None,
):
    """
    :Parameters:
        period : str
            Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
            Either Use period parameter or use start and end
        interval : str
            Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
            Intraday data cannot extend last 60 days
        start: str
            Download start date string (YYYY-MM-DD) or _datetime, inclusive.
            Default is 99 years ago
            E.g. for start="2020-01-01", the first data point will be on "2020-01-01"
        end: str
            Download end date string (YYYY-MM-DD) or _datetime, exclusive.
            Default is now
            E.g. for end="2023-01-01", the last data point will be on "2022-12-31"
    """
    stock = yf.Ticker(ticker)
    return stock.history(
        period=period, interval=interval, start=start, end=end
    ).to_json()

In [237]:
today = datetime.now().strftime("%B %d, %Y")
output_types = """type Info = {
  address1: string;
  city: string;
  state: string;
  zip: string;
  country: string;
  phone: string;
  website: string;
  industry: string;
  industryKey: string;
  industryDisp: string;
  sector: string;
  sectorKey: string;
  sectorDisp: string;
  longBusinessSummary: string;
  fullTimeEmployees: number;
  companyOfficers: {
    maxAge: number;
    name: string;
    age?: number;
    title: string;
    yearBorn?: number;
    fiscalYear: number;
    totalPay?: number;
    exercisedValue: number;
    unexercisedValue: number;
  }[];
  auditRisk: number;
  boardRisk: number;
  compensationRisk: number;
  shareHolderRightsRisk: number;
  overallRisk: number;
  governanceEpochDate: number;
  compensationAsOfEpochDate: number;
  irWebsite: string;
  maxAge: number;
  priceHint: number;
  previousClose: number;
  open: number;
  dayLow: number;
  dayHigh: number;
  regularMarketPreviousClose: number;
  regularMarketOpen: number;
  regularMarketDayLow: number;
  regularMarketDayHigh: number;
  dividendRate: number;
  dividendYield: number;
  exDividendDate: number;
  payoutRatio: number;
  fiveYearAvgDividendYield: number;
  beta: number;
  trailingPE: number;
  forwardPE: number;
  volume: number;
  regularMarketVolume: number;
  averageVolume: number;
  averageVolume10days: number;
  averageDailyVolume10Day: number;
  bid: number;
  ask: number;
  bidSize: number;
  askSize: number;
  marketCap: number;
  fiftyTwoWeekLow: number;
  fiftyTwoWeekHigh: number;
  priceToSalesTrailing12Months: number;
  fiftyDayAverage: number;
  twoHundredDayAverage: number;
  trailingAnnualDividendRate: number;
  trailingAnnualDividendYield: number;
  currency: string;
  enterpriseValue: number;
  profitMargins: number;
  floatShares: number;
  sharesOutstanding: number;
  sharesShort: number;
  sharesShortPriorMonth: number;
  sharesShortPreviousMonthDate: number;
  dateShortInterest: number;
  sharesPercentSharesOut: number;
  heldPercentInsiders: number;
  heldPercentInstitutions: number;
  shortRatio: number;
  shortPercentOfFloat: number;
  impliedSharesOutstanding: number;
  bookValue: number;
  priceToBook: number;
  lastFiscalYearEnd: number;
  nextFiscalYearEnd: number;
  mostRecentQuarter: number;
  earningsQuarterlyGrowth: number;
  netIncomeToCommon: number;
  trailingEps: number;
  forwardEps: number;
  pegRatio: number;
  lastSplitFactor: string;
  lastSplitDate: number;
  enterpriseToRevenue: number;
  enterpriseToEbitda: number;
  '52WeekChange': number;
  'SandP52WeekChange': number;
  lastDividendValue: number;
  lastDividendDate: number;
  exchange: string;
  quoteType: string;
  symbol: string;
  underlyingSymbol: string;
  shortName: string;
  longName: string;
  firstTradeDateEpochUtc: number;
  timeZoneFullName: string;
  timeZoneShortName: string;
  uuid: string;
  messageBoardId: string;
  gmtOffSetMilliseconds: number;
  currentPrice: number;
  targetHighPrice: number;
  targetLowPrice: number;
  targetMeanPrice: number;
  targetMedianPrice: number;
  recommendationMean: number;
  recommendationKey: string;
  numberOfAnalystOpinions: number;
  totalCash: number;
  totalCashPerShare: number;
  ebitda: number;
  totalDebt: number;
  quickRatio: number;
  currentRatio: number;
  totalRevenue: number;
  debtToEquity: number;
  revenuePerShare: number;
  returnOnAssets: number;
  returnOnEquity: number;
  freeCashflow: number;
  operatingCashflow: number;
  earningsGrowth: number;
  revenueGrowth: number;
  grossMargins: number;
  ebitdaMargins: number;
  operatingMargins: number;
  financialCurrency: string;
  trailingPegRatio: number;
};

type PriceData = {
  Open: { [timestamp: string]: number };
  High: { [timestamp: string]: number };
  Low: { [timestamp: string]: number };
  Close: { [timestamp: string]: number };
  Volume: { [timestamp: string]: number };
  Dividends: { [timestamp: string]: number };
  "Stock Splits": { [timestamp: string]: number };
};"""

example0 = """User: Who is the CEO of MSFT?

Assistant: {
  "thought": "Yahoo Finance likely has this information. I'm looking it up...",
  "action": {"command": "get_info", "ticker": "MSFT"}
}

Assistant: {
  "observation": {...,"companyOfficers":[{"maxAge":1,"name":"Mr. Satya  Nadella","age":56,"title":"Chairman & CEO","yearBorn":1967,"fiscalYear":2023,"totalPay":9276400,"exercisedValue":0,"unexercisedValue":0},...],...}
}

Assistant: {
  "answer": "The CEO of MSFT is Satya Nadella."
}"""
example1 = """User: What was AAPL's highest stock price in the past month, and how does it compare to their 52-week high?

Assistant: {
  "thought": "To answer this question, I need to first get the current stock information for AAPL, including its 52-week high. Then, I'll need to retrieve the stock price history for the past month to find the highest price during that period.",
  "action": {"command": "get_info", "ticker": "AAPL"}
}

Assistant: {
  "observation": {"symbol": "AAPL", "shortName": "Apple Inc.", "fiftyTwoWeekHigh": 198.23, "currency": "USD", ...}
}

Assistant: {
  "thought": "I've got the current stock information for AAPL, including the 52-week high of $198.23. Now I need to get the stock price history for the past month to find the highest price during that period.",
  "action": {"command": "get_history", "ticker": "AAPL", "period": "1mo", "interval": "1d"}
}

Assistant: {
  "observation": {
    "High": {
      "1688407800": 192.67,
      "1688494200": 189.99,
      ...,
      "1690912200": 195.71
    },
    ...
  }
}

Assistant: {
  "thought": "I now have all the information needed to answer the question. Let me analyze the data and provide the answer.",
  "answer": "AAPL's highest stock price in the past month was $195.71. This is 1.27% lower than their 52-week high of $198.23. The highest price in the past month occurred on August 1, 2023 (assuming the last date in the data is the most recent), and it came close to, but did not exceed, the 52-week high."
}"""

examples = "\n\n".join(
    [
        f"""Example {i}:
```
{example}
```"""
        for i, example in enumerate([example0, example1])
    ]
)

In [238]:
system_message = f"""You are a helpful assistant. You run in a loop, seeking additional information to answer a user's question until you are able to answer the question.

Today is {today}.

The output types are:

```typescript
{output_types}
```

The commands to seek information are:

| Command | Arguments | Description | Output Format |
| --- | --- | --- | --- |
| get_info | ticker | Retrieves stock information from Yahoo Finance | Info |
| get_history | ticker, period, interval, start, end | Retrieves stock history from Yahoo Finance | PriceData |

The optional input types are:
- period: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
- interval: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
- start: YYYY-MM-DD
- end: YYYY-MM-DD
You may omit an optional input type if it is not needed. For period and interval the explicit values from the list must be used, for example you cannot use 1wk for the period.

Your response must be in pure JSON format without any additional markers or formatting. The JSON will include a "Thought" + "Action" to retrieve data that you need in order to answer the question, or it will include the "Answer". When data has been retrieved, it will be included as an "Observation".

You will continue generating thoughts and actions until you get to an answer, or conclude that you can't.

IMPORTANT: Ensure that your entire response is valid JSON. Do not include any text outside of the JSON structure, and do not use code block markers or language identifiers. The answer field in the JSON must be a descriptive string, it should not be a JSON.

{examples}
"""
print("Tokens:", len(tokenizer.encode(system_message)))
print()
print(system_message)

Tokens: 2015

You are a helpful assistant. You run in a loop, seeking additional information to answer a user's question until you are able to answer the question.

Today is July 28, 2024.

The output types are:

```typescript
type Info = {
  address1: string;
  city: string;
  state: string;
  zip: string;
  country: string;
  phone: string;
  website: string;
  industry: string;
  industryKey: string;
  industryDisp: string;
  sector: string;
  sectorKey: string;
  sectorDisp: string;
  longBusinessSummary: string;
  fullTimeEmployees: number;
  companyOfficers: {
    maxAge: number;
    name: string;
    age?: number;
    title: string;
    yearBorn?: number;
    fiscalYear: number;
    totalPay?: number;
    exercisedValue: number;
    unexercisedValue: number;
  }[];
  auditRisk: number;
  boardRisk: number;
  compensationRisk: number;
  shareHolderRightsRisk: number;
  overallRisk: number;
  governanceEpochDate: number;
  compensationAsOfEpochDate: number;
  irWebsite: string;
  

In [239]:
def act(message: str, debug=False) -> tuple[dict, str]:
    if message is None or message == "":
        return ({"error": "No message provided"}, "")

    data = {}
    try:
        data = json.loads(message)
    except Exception as e:
        if debug:
            print(message)
        return ({"error": str(e)}, "")

    if debug:
        print("Assistant:")
        print(data)

    if "answer" in data:
        return ({}, data["answer"])

    if "action" not in data:
        return ({"error": "No action provided"}, "")

    action = data["action"]

    if action["command"] == "get_info":
        if "ticker" not in action:
            return ({"error": "No ticker provided"}, "")

        ticker = action["ticker"]

        try:
            return (get_info(ticker), "")
        except Exception as e:
            return ({"error": str(e)}, "")
    elif action["command"] == "get_history":
        if "ticker" not in action:
            return ({"error": "No ticker provided"}, "")

        ticker = action["ticker"]  # This should be required, so keep it as is
        period = action.get("period", "1mo")  # Default to "1mo" if not provided
        interval = action.get("interval", "1d")  # Default to "1d" if not provided
        start = action.get("start")  # Will be None if not provided
        end = action.get("end")  # Will be None if not provided

        try:
            return (get_history(ticker, period, interval, start, end), "")
        except Exception as e:
            return ({"error": str(e)}, "")

    return ({"error": "Unknown command"}, "")

In [240]:
def loop(query: str, debug=False):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": query},
    ]

    while True:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
        )

        assistant_message = response.choices[0].message.content

        observation, answer = act(assistant_message, debug)
        if answer:
            return answer

        if debug:
            print("Observation:")
            print_observation = json.dumps(observation)
            truncated_observation = (
                print_observation[:100] + "..."
                if len(print_observation) > 100
                else print_observation
            )
            print(truncated_observation)

        messages.append(
            {"role": "assistant", "content": json.dumps({"observation": observation})}
        )

In [241]:
answer = loop("Who is the CEO of NVDA?", True)
print()
print("Answer:", answer)

Assistant:
{'thought': 'I will need to look up the stock information for NVDA to determine who the CEO is.', 'action': {'command': 'get_info', 'ticker': 'NVDA'}}
Observation:
{"address1": "2788 San Tomas Expressway", "city": "Santa Clara", "state": "CA", "zip": "95051", "cou...
Assistant:
{'answer': 'The CEO of Nvidia Corporation (NVDA) is Jen-Hsun Huang.'}

Answer: The CEO of Nvidia Corporation (NVDA) is Jen-Hsun Huang.


In [242]:
answer = loop(
    "What was NVDA's highest stock price in the past month, and how does it compare to their 52-week high",
    True,
)
print()
print("Answer:", answer)

Assistant:
{'thought': "To answer this question, I need to first get the current stock information for NVDA, including its 52-week high. Then, I'll need to retrieve the stock price history for the past month to find the highest price during that period.", 'action': {'command': 'get_info', 'ticker': 'NVDA'}}
Observation:
{"address1": "2788 San Tomas Expressway", "city": "Santa Clara", "state": "CA", "zip": "95051", "cou...
Assistant:
{'thought': 'I have the current stock information for NVDA, including the 52-week high of $140.76. Now I need to get the stock price history for the past month to find the highest price during that period.', 'action': {'command': 'get_history', 'ticker': 'NVDA', 'period': '1mo', 'interval': '1d'}}
Observation:
"{\"Open\":{\"1719460800000\":124.0999984741,\"1719547200000\":124.5800018311,\"1719806400000\":123....
Assistant:
{'answer': "NVDA's highest stock price in the past month was $136.15, which is 3.83% lower than the 52-week high of $140.76."}

Answer: 

In [243]:
answer = loop(
    "What was the price of NVDA 2 years back and how does it compare to now?",
    True,
)
print()
print("Answer:", answer)

Assistant:
{'thought': "To answer this question, I need to get the historical stock price for NVDA from two years ago and the current stock price. I'll start by retrieving the current stock price information.", 'action': {'command': 'get_info', 'ticker': 'NVDA'}}
Observation:
{"address1": "2788 San Tomas Expressway", "city": "Santa Clara", "state": "CA", "zip": "95051", "cou...
Assistant:
{'thought': 'I have access to the current price data and other stock details for NVDA. To answer the query, I need to retrieve the historical price data for NVDA for exactly two years back from today.', 'action': {'command': 'get_history', 'ticker': 'NVDA', 'start': '2022-07-28', 'end': '2022-07-29', 'interval': '1d'}}
Observation:
"{\"Open\":{\"1658980800000\":17.9547773515},\"High\":{\"1658980800000\":18.1195907252},\"Low\":{\"1...
Assistant:
{'answer': 'Two years ago, on July 28, 2022, the price of NVDA was approximately $17.96. The current price of NVDA is $113.06. This shows a substantial increas