In [1]:
import instructor

from openai import OpenAI
from typing import List
from pydantic import BaseModel, Field

client = instructor.patch(OpenAI())

### Example 1: Improving extraction

In [2]:
class Extraction(BaseModel):
    topic: str
    summary: str
    hypothetical_questions: List[str] = Field(
        default_factory=list,
        description="Hypothetical questions that this document could answer",
    )
    keywords: List[str] = Field(
        default_factory=list, description="Keywords that this document is about"
    )

In [3]:
from pprint import pprint
from typing import Iterable


text_chunk = """
## Simple RAG

**What is it?**

The simplest implementation of RAG embeds a user query and do a single embedding search in a vector database, like a vector store of Wikipedia articles. However, this approach often falls short when dealing with complex queries and diverse data sources.

**What are the limitations?**

- **Query-Document Mismatch:** It assumes that the query and document embeddings will align in the vector space, which is often not the case.
    - Query: "Tell me about climate change effects on marine life."
    - Issue: The model might retrieve documents related to general climate change or marine life, missing the specific intersection of both topics.
- **Monolithic Search Backend:** It relies on a single search method and backend, reducing flexibility and the ability to handle multiple data sources.
    - Query: "Latest research in quantum computing."
    - Issue: The model might only search in a general science database, missing out on specialized quantum computing resources.
- **Text Search Limitations:** The model is restricted to simple text queries without the nuances of advanced search features.
    - Query: "what problems did we fix last week"
    - Issue: cannot be answered by a simple text search since documents that contain problem, last week are going to be present at every week.
- **Limited Planning Ability:** It fails to consider additional contextual information that could refine the search results.
    - Query: "Tips for first-time Europe travelers."
    - Issue: The model might provide general travel advice, ignoring the specific context of first-time travelers or European destinations.
"""

extractions = client.chat.completions.create(
    model="gpt-4-1106-preview",
    stream=True,
    response_model=Iterable[Extraction],
    messages=[
        {
            "role": "system",
            "content": "Your role is to extract chunks from the following and create a set of topics.",
        },
        {"role": "user", "content": text_chunk},
    ],
)


for extraction in extractions:
    pprint(extraction.model_dump())

{'hypothetical_questions': ['What are the key features of a simple RAG system?',
                            'How does a simple RAG system perform embedding '
                            'searches?',
                            'Why might a simple RAG system fail to provide '
                            'accurate results for complex queries?'],
 'keywords': ['simple RAG',
              'Retrieval-Augmented Generation',
              'vector database',
              'embedding search',
              'complex queries',
              'limitations'],
 'summary': 'The simplest implementation of Retrieval-Augmented Generation '
            '(RAG) involves embedding a user query and performing a single '
            'embedding search in a vector database, which often struggles with '
            'complex queries and diverse data sources due to inherent '
            'limitations.',
 'topic': 'Simple RAG'}
{'hypothetical_questions': ['What causes query-document mismatch in RAG '
              

### Example 2: Understanding 'recent queries' to add temporal context

In [4]:
from datetime import date


class DateRange(BaseModel):
    start: date
    end: date


class Query(BaseModel):
    rewritten_query: str
    published_daterange: DateRange

In [5]:
def expand_query(q) -> Query:
    return client.chat.completions.create(
        model="gpt-3.5-turbo",
        response_model=Query,
        messages=[
            {
                "role": "system",
                "content": f"You're a query understanding system for the Metafor Systems search engine. Today is {date.today()}. Here are some tips: ...",
            },
            {"role": "user", "content": f"query: {q}"},
        ],
    )


query = expand_query("What are some recent developments in AI?")
query

Query(rewritten_query='Recent developments in AI', published_daterange=DateRange(start=datetime.date(2024, 1, 1), end=datetime.date(2024, 3, 27)))

In [6]:
class DateRange(BaseModel):
    chain_of_thought: str = Field(
        description="Think step by step to plan what is the best time range to search in"
    )
    start: date
    end: date


class Query(BaseModel):
    rewritten_query: str = Field(
        description="Rewrite the query to make it more specific"
    )
    published_daterange: DateRange = Field(
        description="Effective date range to search in"
    )


def expand_query(q) -> Query:
    return client.chat.completions.create(
        model="gpt-4-1106-preview",
        response_model=Query,
        messages=[
            {
                "role": "system",
                "content": f"You're a query understanding system for the Metafor Systems search engine. Today is {date.today()}. Here are some tips: ...",
            },
            {"role": "user", "content": f"query: {q}"},
        ],
    )


expand_query("What are some recent developments in AI?")

Query(rewritten_query='latest advancements in artificial intelligence technology', published_daterange=DateRange(chain_of_thought='To find the most recent developments in AI, a search starting from one year ago to the present would be a good range to capture the latest advancements.', start=datetime.date(2023, 3, 27), end=datetime.date(2024, 3, 27)))

#### Using W&B

In [7]:
import json
import instructor

from openai import AsyncOpenAI
from helpers import dicts_to_df
from datetime import date
from pydantic import BaseModel, Field


class DateRange(BaseModel):
    chain_of_thought: str = Field(
        description="Think step by step to plan what is the best time range to search in"
    )
    start: date
    end: date


class Query(BaseModel):
    rewritten_query: str = Field(
        description="Rewrite the query to make it more specific"
    )
    published_daterange: DateRange = Field(
        description="Effective date range to search in"
    )

    def report(self):
        dct = self.model_dump()
        dct["usage"] = self._raw_response.usage.model_dump()
        return dct



# We'll use a different client for async calls
# To highlight the difference and how we can use both
aclient = instructor.patch(AsyncOpenAI())


async def expand_query(
    q, *, model: str = "gpt-4-1106-preview", temp: float = 0
) -> Query:
    return await aclient.chat.completions.create(
        model=model,
        temperature=temp,
        response_model=Query,
        messages=[
            {
                "role": "system",
                "content": f"You're a query understanding system for the Metafor Systems search engine. Today is {date.today()}. Here are some tips: ...",
            },
            {"role": "user", "content": f"query: {q}"},
        ],
    )

In [8]:
import asyncio
import time
import pandas as pd
import wandb

model = "gpt-4-1106-preview"
temp = 0

run = wandb.init(
    project="structured_outptus",
    config={"model": model, "temp": temp},
)

test_queries = [
    "latest developments in artificial intelligence last 3 weeks",
    "renewable energy trends past month",
    "quantum computing advancements last 2 months",
    "biotechnology updates last 10 days",
]
start = time.perf_counter()
queries = await asyncio.gather(
    *[expand_query(q, model=model, temp=temp) for q in test_queries]
)
duration = time.perf_counter() - start

with open("schema.json", "w+") as f:
    schema = Query.model_json_schema()
    json.dump(schema, f, indent=2)

with open("results.jsonlines", "w+") as f:
    for query in queries:
        f.write(query.model_dump_json() + "\n")

df = dicts_to_df([q.report() for q in queries])
df["input"] = test_queries
df.to_csv("results.csv")


run.log({"schema": wandb.Table(dataframe=pd.DataFrame([{"schema": schema}]))})

run.log(
    {
        "usage_total_tokens": df["usage_total_tokens"].sum(),
        "usage_completion_tokens": df["usage_completion_tokens"].sum(),
        "usage_prompt_tokens": df["usage_prompt_tokens"].sum(),
        "duration (s)": duration,
        "average duration (s)": duration / len(queries),
        "n_queries": len(queries),
    }
)


run.log(
    {
        "results": wandb.Table(dataframe=df),
    }
)

files = wandb.Artifact("data", type="dataset")

files.add_file("schema.json")
files.add_file("results.jsonlines")
files.add_file("results.csv")


run.log_artifact(files)
run.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mfabiogomez11c[0m. Use [1m`wandb login --relogin`[0m to force relogin


0,1
average duration (s),▁
duration (s),▁
n_queries,▁
usage_completion_tokens,▁
usage_prompt_tokens,▁
usage_total_tokens,▁

0,1
average duration (s),1.86971
duration (s),7.47882
n_queries,4.0
usage_completion_tokens,410.0
usage_prompt_tokens,780.0
usage_total_tokens,1190.0


### Example 3: Personal Assistants, parallel processing

In [9]:
from typing import Literal


class SearchClient(BaseModel):
    query: str = Field(description="The search query that will go into the search bar")
    keywords: List[str]
    email: str
    source: Literal["gmail", "calendar"]
    date_range: DateRange


class Retrival(BaseModel):
    queries: List[SearchClient]

In [10]:
retrival = client.chat.completions.create(
    model="gpt-3.5-turbo",
    response_model=Retrival,
    messages=[
        {
            "role": "system",
            "content": f"""You are Jason's personal assistant.
                He has two emails jason@work.com jason@personal.com 
                Today is {date.today()}""",
        },
        {"role": "user", "content": "What do I have today for work? any new emails?"},
    ],
)
print(retrival.model_dump_json(indent=4))

{
    "queries": [
        {
            "query": "work",
            "keywords": [
                "work",
                "today"
            ],
            "email": "jason@work.com",
            "source": "gmail",
            "date_range": {
                "chain_of_thought": "Check for work-related emails for today",
                "start": "2024-03-27",
                "end": "2024-03-27"
            }
        },
        {
            "query": "new emails",
            "keywords": [
                "new",
                "emails"
            ],
            "email": "jason@work.com",
            "source": "gmail",
            "date_range": {
                "chain_of_thought": "Check for any new emails received today",
                "start": "2024-03-27",
                "end": "2024-03-27"
            }
        }
    ]
}


In [11]:
retrival = client.chat.completions.create(
    model="gpt-4-1106-preview",
    response_model=Retrival,
    messages=[
        {
            "role": "system",
            "content": f"""You are Jason's personal assistant.
                He has two emails jason@work.com jason@personal.com 
                Today is {date.today()}""",
        },
        {
            "role": "user",
            "content": "What meetings do I have today and are there any important emails I should be aware of",
        },
    ],
)
print(retrival.model_dump_json(indent=4))

{
    "queries": [
        {
            "query": "Today's meetings",
            "keywords": [
                "meeting",
                "appointment",
                "calendar event"
            ],
            "email": "jason@work.com",
            "source": "calendar",
            "date_range": {
                "chain_of_thought": "As today is 2024-03-27, I should search for calendar events on this specific date.",
                "start": "2024-03-27",
                "end": "2024-03-27"
            }
        },
        {
            "query": "Important emails",
            "keywords": [
                "important",
                "urgent",
                "priority"
            ],
            "email": "jason@personal.com",
            "source": "gmail",
            "date_range": {
                "chain_of_thought": "Since I'm asked for important emails, they are often marked as important or urgent recently so I will search for the last week.",
                "start": "2024-0

### Example 4: Decomposing questions

In [12]:
class Question(BaseModel):
    id: int = Field(..., description="A unique identifier for the question")
    query: str = Field(..., description="The question decomposited as much as possible")
    subquestions: List[int] = Field(
        default_factory=list,
        description="The subquestions that this question is composed of",
    )


class QueryPlan(BaseModel):
    root_question: str = Field(..., description="The root question that the user asked")
    plan: List[Question] = Field(
        ..., description="The plan to answer the root question and its subquestions"
    )


retrival = client.chat.completions.create(
    model="gpt-4-1106-preview",
    response_model=QueryPlan,
    messages=[
        {
            "role": "system",
            "content": "You are a query understanding system capable of decomposing a question into subquestions.",
        },
        {
            "role": "user",
            "content": "What is the difference between the population of jason's home country and canada?",
        },
    ],
)

print(retrival.model_dump_json(indent=4))

{
    "root_question": "What is the difference between the population of Jason's home country and Canada?",
    "plan": [
        {
            "id": 1,
            "query": "What is Jason's home country?",
            "subquestions": []
        },
        {
            "id": 2,
            "query": "What is the population of Jason's home country?",
            "subquestions": [
                1
            ]
        },
        {
            "id": 3,
            "query": "What is the population of Canada?",
            "subquestions": []
        },
        {
            "id": 4,
            "query": "What is the difference between the population of two countries?",
            "subquestions": [
                2,
                3
            ]
        }
    ]
}
