In [58]:
import tiktoken
from openai import OpenAI
import yfinance as yf

tokenizer = tiktoken.get_encoding("cl100k_base")
client = OpenAI()

In [59]:
def get_info(ticker: str):
    stock = yf.Ticker(ticker)
    return stock.info

In [60]:
def get_history(
    ticker: str,
    period="1mo",
    interval="1d",
    start=None,
    end=None,
):
    """
    :Parameters:
        period : str
            Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
            Either Use period parameter or use start and end
        interval : str
            Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
            Intraday data cannot extend last 60 days
        start: str
            Download start date string (YYYY-MM-DD) or _datetime, inclusive.
            Default is 99 years ago
            E.g. for start="2020-01-01", the first data point will be on "2020-01-01"
        end: str
            Download end date string (YYYY-MM-DD) or _datetime, exclusive.
            Default is now
            E.g. for end="2023-01-01", the last data point will be on "2022-12-31"
    """
    stock = yf.Ticker(ticker)
    return stock.history(
        period=period, interval=interval, start=start, end=end
    ).to_json()

In [61]:
today = "July 28th, 2024"
output_types = """type Info = {
  address1: string;
  city: string;
  state: string;
  zip: string;
  country: string;
  phone: string;
  website: string;
  industry: string;
  industryKey: string;
  industryDisp: string;
  sector: string;
  sectorKey: string;
  sectorDisp: string;
  longBusinessSummary: string;
  fullTimeEmployees: number;
  companyOfficers: {
    maxAge: number;
    name: string;
    age?: number;
    title: string;
    yearBorn?: number;
    fiscalYear: number;
    totalPay?: number;
    exercisedValue: number;
    unexercisedValue: number;
  }[];
  auditRisk: number;
  boardRisk: number;
  compensationRisk: number;
  shareHolderRightsRisk: number;
  overallRisk: number;
  governanceEpochDate: number;
  compensationAsOfEpochDate: number;
  irWebsite: string;
  maxAge: number;
  priceHint: number;
  previousClose: number;
  open: number;
  dayLow: number;
  dayHigh: number;
  regularMarketPreviousClose: number;
  regularMarketOpen: number;
  regularMarketDayLow: number;
  regularMarketDayHigh: number;
  dividendRate: number;
  dividendYield: number;
  exDividendDate: number;
  payoutRatio: number;
  fiveYearAvgDividendYield: number;
  beta: number;
  trailingPE: number;
  forwardPE: number;
  volume: number;
  regularMarketVolume: number;
  averageVolume: number;
  averageVolume10days: number;
  averageDailyVolume10Day: number;
  bid: number;
  ask: number;
  bidSize: number;
  askSize: number;
  marketCap: number;
  fiftyTwoWeekLow: number;
  fiftyTwoWeekHigh: number;
  priceToSalesTrailing12Months: number;
  fiftyDayAverage: number;
  twoHundredDayAverage: number;
  trailingAnnualDividendRate: number;
  trailingAnnualDividendYield: number;
  currency: string;
  enterpriseValue: number;
  profitMargins: number;
  floatShares: number;
  sharesOutstanding: number;
  sharesShort: number;
  sharesShortPriorMonth: number;
  sharesShortPreviousMonthDate: number;
  dateShortInterest: number;
  sharesPercentSharesOut: number;
  heldPercentInsiders: number;
  heldPercentInstitutions: number;
  shortRatio: number;
  shortPercentOfFloat: number;
  impliedSharesOutstanding: number;
  bookValue: number;
  priceToBook: number;
  lastFiscalYearEnd: number;
  nextFiscalYearEnd: number;
  mostRecentQuarter: number;
  earningsQuarterlyGrowth: number;
  netIncomeToCommon: number;
  trailingEps: number;
  forwardEps: number;
  pegRatio: number;
  lastSplitFactor: string;
  lastSplitDate: number;
  enterpriseToRevenue: number;
  enterpriseToEbitda: number;
  '52WeekChange': number;
  'SandP52WeekChange': number;
  lastDividendValue: number;
  lastDividendDate: number;
  exchange: string;
  quoteType: string;
  symbol: string;
  underlyingSymbol: string;
  shortName: string;
  longName: string;
  firstTradeDateEpochUtc: number;
  timeZoneFullName: string;
  timeZoneShortName: string;
  uuid: string;
  messageBoardId: string;
  gmtOffSetMilliseconds: number;
  currentPrice: number;
  targetHighPrice: number;
  targetLowPrice: number;
  targetMeanPrice: number;
  targetMedianPrice: number;
  recommendationMean: number;
  recommendationKey: string;
  numberOfAnalystOpinions: number;
  totalCash: number;
  totalCashPerShare: number;
  ebitda: number;
  totalDebt: number;
  quickRatio: number;
  currentRatio: number;
  totalRevenue: number;
  debtToEquity: number;
  revenuePerShare: number;
  returnOnAssets: number;
  returnOnEquity: number;
  freeCashflow: number;
  operatingCashflow: number;
  earningsGrowth: number;
  revenueGrowth: number;
  grossMargins: number;
  ebitdaMargins: number;
  operatingMargins: number;
  financialCurrency: string;
  trailingPegRatio: number;
};

type PriceData = {
  Open: { [timestamp: string]: number };
  High: { [timestamp: string]: number };
  Low: { [timestamp: string]: number };
  Close: { [timestamp: string]: number };
  Volume: { [timestamp: string]: number };
  Dividends: { [timestamp: string]: number };
  "Stock Splits": { [timestamp: string]: number };
};"""

example0 = """User: What is the population of Philadelphia?

Assistant: {
  "thought": "Wikipedia likely has this information. I'm looking it up...",
  "action": {"command": "wikipedia", "article": "Philadelphia"}
}

Assistant: {
  "observation": "Philadelphia, often called Philly, is the largest city in the Commonwealth of Pennsylvania and the second-largest city in both the Northeast megalopolis and Mid-Atlantic regions after New York City. It is one of the most historically significant cities in the United States and served as the nation's capital city until 1800. Philadelphia is the nation's sixth-largest city with a population of 1,603,797 as of the 2020 census. Since 1854, the city has been coextensive with Philadelphia County, the most populous county in Pennsylvania and the urban core of the Delaware Valley, the nation's seventh-largest and one of the world's largest metropolitan regions with 6.245 million residents in 2020. Philadelphia is known for its extensive contributions to American history and for its role in the life sciences, business and industry, art, literature, and music."
}

Assistant: {
  "answer": "The population of Philadelphia, as of the 2020 census, is 1,603,797 people."
}"""
example1 = """User: What is my manager's city?

Assistant: {
  "thought": "Let me look up who your manager is. First, I need to look up your profile.",
  "action": {"command": "get_employee", "id": 92352}
}

Assistant: {
  "observation": {
    "id": 78334,
    "name": "Ms. Manager",
    "location_id": 8832
  }
}

Assistant: {
  "thought": "Your manager is Ms. Manager. I'm looking up their location.",
  "action": {"command": "get_location", "id": 8832}
}

Assistant: {
  "observation": {
    "id": 8832,
    "name": "Philadelphia"
  }
}

Assistant: {
  "answer": "Your manager lives in Philadelphia."
}"""

examples = "\n\n".join([
    f"""Example {i}:
```
{example}
```"""
    for i, example in enumerate([example0, example1])
])

In [62]:
system_message = f"""You are a helpful assistant. You run in a loop, seeking additional information to answer a user's question until you are able to answer the question.

Today is {today}.

The output types are:

```typescript
{output_types}
```

The commands to seek information are:

| Command | Arguments | Description | Output Format |
| --- | --- | --- | --- |
| get_info | ticker | Retrieves stock information | Info |
| get_history | ticker, period, interval, start, end | Retrieves stock history | PriceData |

The optional input types are:
- period: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
- interval: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
- start: YYYY-MM-DD
- end: YYYY-MM-DD
You may omit an optional input type if it is not needed.

Your response will be in JSON and will include a "Thought" + "Action" to retrieve data that you need in order to answer the question, or it will include the "Answer". When data has been retrieved, it will be included as an "Observation".

You will continue generating thoughts and actions until you get to an answer, or conclude that you can't.

{examples}
"""
print("Tokens:", len(tokenizer.encode(system_message)))
print()
print(system_message)

Tokens: 1804

You are a helpful assistant. You run in a loop, seeking additional information to answer a user's question until you are able to answer the question.

Today is July 28th, 2024.

The output types are:

```typescript
type Info = {
  address1: string;
  city: string;
  state: string;
  zip: string;
  country: string;
  phone: string;
  website: string;
  industry: string;
  industryKey: string;
  industryDisp: string;
  sector: string;
  sectorKey: string;
  sectorDisp: string;
  longBusinessSummary: string;
  fullTimeEmployees: number;
  companyOfficers: {
    maxAge: number;
    name: string;
    age?: number;
    title: string;
    yearBorn?: number;
    fiscalYear: number;
    totalPay?: number;
    exercisedValue: number;
    unexercisedValue: number;
  }[];
  auditRisk: number;
  boardRisk: number;
  compensationRisk: number;
  shareHolderRightsRisk: number;
  overallRisk: number;
  governanceEpochDate: number;
  compensationAsOfEpochDate: number;
  irWebsite: string;
