In [1]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

In [2]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain_openai import ChatOpenAI
from typing import List


llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125")

In [3]:
inp_simple = "Joe spent $5 and Hannah spent $8 on a toilet plunger"
inp_complex = """Sarah bought groceries for $60 and a bottle of wine for $30.
Jake paid for gas, which costs $50, and bought snacks for $25.
Emily booked $150 hotel room for her and Sarah and paid for dinner, which was $90.
Jake booked his own hotel room for $80, and also bought a hotdog for Emily for $15.
Jake and Sarah also split a $20 popcorn which they shared with Emily"""

In [5]:
from langchain_core.tools import tool

@tool
def add(a: float, b: float) -> float:
    """Returns the result of a + b"""
    return a + b

@tool
def subtract(a: float, b: float) -> float:
    """Returns the result of a - b"""
    return a - b

@tool
def multiply(a: float, b: float) -> float:
    """Returns the result of a * b"""
    return a * b

@tool
def divide(a: float, b: float) -> float:
    """Returns the result of a / b"""
    return a / b if b != 0 else None

@tool
def mean(numbers: List[float]) -> float:
    """Returns the mean of numbers"""
    return np.mean(numbers)

@tool
def count_names(names: List[str]) -> int:
    """Returns the number of names in the list"""
    return len(names)

@tool
def split_amount(total: float, num_people: int) -> float:
    """Returns a total split between a number of people"""
    return total / num_people if num_people != 0 else None

@tool
def sum_list(numbers: List[float]) -> float:
    """Returns the sum of numbers"""
    return sum(numbers)

tools = [add, subtract, multiply, divide, mean, count_names, split_amount, sum_list]

In [37]:
list_contributions_prompt = ChatPromptTemplate.from_template("""Extract the contributions made by each person verbatim from the following prompt.
A contribution is an amount of money contributed towards an expense or purchase.

Contributions should be either be a numerical value quoted from the passage,
or an arithmetic expression that represent the calculations needed to find the value.
If you find yourself doing any arithmetic calculations, stop.

Passage:

{input}

Output format:

<Name 1>: <Item 1> ($<Item 1 contribution value verbatim OR arithmetic expression>), ...


Replace the brackets with the names of people, the items they contributed to, and their cost
""")

In [38]:
list_contributions_chain = list_contributions_prompt | llm | StrOutputParser()

In [39]:
list_contributions_chain.invoke({"input": inp_simple})

'Joe: Toilet plunger ($5), \nHannah: Toilet plunger ($8)'

In [40]:
list_contributions_chain.invoke({"input": inp_complex})

'Sarah: Groceries ($60), Wine ($30), Popcorn ($10)\nJake: Gas ($50), Snacks ($25), Hotel room ($80), Hotdog for Emily ($15), Popcorn ($10)\nEmily: Hotel room ($75), Dinner ($90), Popcorn ($10)'

In [41]:
list_contributions_chain.invoke({"input": "Alex and Talia split a $25 dinner"})

'Alex: Dinner ($12.50), \nTalia: Dinner ($12.50)'

In [19]:
tablify_contributions_prompt = ChatPromptTemplate.from_template("""Extract the contributions made by each person
into a Markdown table. The table header should be a list of "People" followed by item names.
Each row of data starts with a person's name, and indicates how much they contributed to an item.
If a person did not contribute to a purchase, use `$0` as the value for the cell.

Contributions:

{input}
""")

In [20]:
tablify_contributions_chain = tablify_contributions_prompt | llm | StrOutputParser()

In [21]:
tablify_contributions_chain.invoke({"input": 'Joe: Toilet plunger ($5), \nHannah: Toilet plunger ($8)'})

'| People | Toilet plunger |\n| ------ | -------------- |\n| Joe    | $5             |\n| Hannah | $8             |'

In [90]:
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List
from langchain_core.pydantic_v1 import conlist


class MarkdownTable(BaseModel):
    """A Markdown Table described as a header and a list of data"""
    header: List[str] = Field("The table header row")
    data: List[conlist(float | str)] = Field("A list of rows, with the first value a string and following values floats")

In [91]:
extract_to_table_format_prompt = ChatPromptTemplate.from_template("""Extract information from this Markdown table.

Only extract the properties mentioned in the 'MarkdownTable' function.
Extract money values as floats and text as string.
This means if you see "$5", you should pass something like `5.00`.

Markdown table:
```
{input}
```
""")

In [92]:
extract_to_table_format_chain = extract_to_table_format_prompt | llm.with_structured_output(MarkdownTable)

In [93]:
extract_to_table_format_chain.invoke({"input": '| People | Toilet plunger |\n| ------ | -------------- |\n| Joe    | $5             |\n| Hannah | $8             |'})

MarkdownTable(header=['People', 'Toilet plunger'], data=[['Joe', 5.0], ['Hannah', 8.0]])

In [94]:
extract_contributions_chain = list_contributions_chain | tablify_contributions_chain | extract_to_table_format_chain

In [95]:
extract_contributions_chain.invoke({"input": inp_simple})

MarkdownTable(header=['People', 'Toilet plunger'], data=[['Joe', 5.0], ['Hannah', 8.0]])

In [96]:
extract_contributions_chain.invoke({"input": inp_complex})

MarkdownTable(header=['People', 'Groceries', 'Wine', 'Popcorn', 'Gas', 'Snacks', 'Hotel room', 'Hotdog for Emily', 'Dinner'], data=[['Sarah', 60.0, 30.0, 10.0, 0.0, 0.0, 0.0, 0.0, 0.0], ['Jake', 0.0, 0.0, 10.0, 50.0, 25.0, 80.0, 15.0, 0.0], ['Emily', 0.0, 0.0, 0.0, 0.0, 0.0, 150.0, 0.0, 90.0]])