In [None]:
from aryn_sdk.client.client import Client
from aryn_sdk.types.search import SearchRequest
from sycamore.llms.openai import OpenAI, OpenAIModels
from sycamore.llms.prompts.prompts import RenderedPrompt, RenderedMessage
import rich

In [None]:
aryn_test_key = "<REDACTED>"

In [None]:
oai = OpenAI(OpenAIModels.GPT_4O)

In [None]:
aryn_client = Client(aryn_url="https://test-api.aryn.ai", aryn_api_key=aryn_test_key)

In [None]:
dtable = rich.table.Table(title="Docsets")
dtable.add_column("docset_id")
dtable.add_column("name")
dtable.add_column("created_at")
dtable.add_column("size")

for ds in aryn_client.list_docsets():
    dtable.add_row(ds.docset_id, ds.name, ds.created_at.isoformat(), str(ds.size))

rich.console.Console().print(dtable)

In [None]:
docset_id = "aryn:ds-4tu7utxr3u21r0q15iz91zn"

In [None]:
# What was the change in stock price on the day of the Q2 2024 AirBnB earnings call?
question = "What was the change in stock price on the day of the Q2 2024 AirBnB earnings call?"

search_result = aryn_client.search(
    docset_id=docset_id, 
    query=SearchRequest(
        query=question,
        query_type="lexical",
        return_type="element",
    )
)

texts = []
for r in search_result.value.results:
    r.pop("embedding", None)
    texts.append(str(r))

messages = [RenderedMessage(role="user", content=t) for t in texts]
prompt = RenderedPrompt(messages=[RenderedMessage(role="user", content=f"Using the provided documents, answer the question: {question}"), *messages])
oai.generate(prompt = prompt)

In [None]:
# List all the speakers in the MongoDB Q4 2024 earnings call.
question = "List all the speakers in the MongoDB Q4 2024 earnings call."


search_result = aryn_client.search(
    docset_id=docset_id, 
    query=SearchRequest(
        query=question,
        query_type="lexical",
        properties_filter="(properties.entity.earnings_call.company_ticker=\"MDB\") AND (properties.entity.earnings_call.quarter=\"Q4\")",
        return_type="element",
    )
)

texts = []
for r in search_result.value.results:
    r.pop("embedding", None)
    texts.append(str(r))

messages = [RenderedMessage(role="user", content=t) for t in texts]
prompt = RenderedPrompt(messages=[RenderedMessage(role="user", content=f"Using the provided documents, answer the question: {question}"), *messages])
print(oai.generate(prompt=prompt))

In [None]:
# List all the speakers in the Broadcom Q4 2024 earnings call.
question = "List all the speakers in the Broadcom Q4 2024 earnings call."

search_result = aryn_client.search(
    docset_id=docset_id, 
    query=SearchRequest(
        query=question,
        query_type="lexical",
        return_type="element",
        properties_filter="(properties.entity.earnings_call.company_name=\"Broadcom\") AND (properties.entity.earnings_call.quarter=\"Q4\")"
    )
)

texts = []
for r in search_result.value.results:
    r.pop("embedding", None)
    texts.append(str(r))

messages = [RenderedMessage(role="user", content=t) for t in texts]
prompt = RenderedPrompt(messages=[RenderedMessage(role="user", content=f"Using the provided documents, answer the question: {question}"), *messages])
print(oai.generate(prompt=prompt))

In [None]:
# How many customers did MongoDB have at the end of the Q1 2024 quarter?
question = "How many customers did MongoDB have at the end of the Q1 2024 quarter?"

search_result = aryn_client.search(
    docset_id=docset_id, 
    query=SearchRequest(
        query=question,
        query_type="lexical",
        return_type="element",
    )
)

texts = []
for r in search_result.value.results:
    r.pop("embedding", None)
    texts.append(str(r))

messages = [RenderedMessage(role="user", content=t) for t in texts]
prompt = RenderedPrompt(messages=[RenderedMessage(role="user", content=f"Using the provided documents, answer the question: {question}"), *messages])
print(oai.generate(prompt=prompt))

In [None]:
# What was the first earnings call when Broadcom mentioned the VMWare acquistion?
question = "What was the first earnings call when Broadcom mentioned the VMWare acquistion?"

search_result = aryn_client.search(
    docset_id=docset_id, 
    query=SearchRequest(
        query=question,
        query_type="lexical",
        return_type="element",
        properties_filter="(properties.entity.earnings_call.company_name=\"Broadcom\")"
    )
)

texts = []
for r in search_result.value.results:
    r.pop("embedding", None)
    texts.append(str(r))

messages = [RenderedMessage(role="user", content=t) for t in texts]
prompt = RenderedPrompt(messages=[RenderedMessage(role="user", content=f"Using the provided documents, answer the question: {question}"), *messages])
print(oai.generate(prompt=prompt))

In [None]:
# Summarize all the mergers and acquistions that happened in 2024 and give a breakdown of how each acquisition impacted earnings.
question = "Summarize all the mergers and acquistions that happened in 2024 and give a breakdown of how each acquisition impacted earnings."

search_result = aryn_client.search(
    docset_id=docset_id, 
    query=SearchRequest(
        query=question,
        query_type="lexical",
        return_type="element",
        # You'd probably filter by year to 2024 but all the data in this workshop is from 2024 so it don't matter
    )
)

texts = []
for r in search_result.value.results:
    r.pop("embedding", None)
    texts.append(str(r))

messages = [RenderedMessage(role="user", content=t) for t in texts]
prompt = RenderedPrompt(messages=[RenderedMessage(role="user", content=f"Using the provided documents, answer the question: {question}"), *messages])
print(oai.generate(prompt=prompt))

In [None]:
# Summarize how AI integration is progressing across each company's products. Give me a quarter by quarter break down of the progress per company and overall.
question = "Summarize how AI integration is progressing across each company's products. Give me a quarter by quarter break down of the progress per company and overall."

search_result = aryn_client.search(
    docset_id=docset_id, 
    query=SearchRequest(
        query=question,
        query_type="lexical",
        return_type="element",
    )
)

texts = []
for r in search_result.value.results:
    r.pop("embedding", None)
    texts.append(str(r))

messages = [RenderedMessage(role="user", content=t) for t in texts]
prompt = RenderedPrompt(messages=[RenderedMessage(role="user", content=f"Using the provided documents, answer the question: {question}"), *messages])
print(oai.generate(prompt=prompt))

In [None]:
# List all the companies that mentioned inflation and give me a count of the number of times each of the companies mentioned inflation.
question = "List all the companies that mentioned inflation and give me a count of the number of times each of the companies mentioned inflation."

search_result = aryn_client.search(
    docset_id=docset_id, 
    query=SearchRequest(
        query=question,
        query_type="vector",
        return_type="element",
    )
)

texts = []
for r in search_result.value.results:
    r.pop("embedding", None)
    texts.append(str(r))

messages = [RenderedMessage(role="user", content=t) for t in texts]
prompt = RenderedPrompt(messages=[RenderedMessage(role="user", content=f"Using the provided documents, answer the question: {question}"), *messages])
print(oai.generate(prompt=prompt))