In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# Initial setup

## Set API key for Groq
Click [here](https://console.groq.com/keys) to create API key for Groq, if not already created.

In [2]:
import os, json, re, getpass
from dotenv import load_dotenv

load_dotenv( override=True)

True

In [3]:
if "GROQ_API_KEY" not in os.environ:
    os.environ["GROQ_API_KEY"] = getpass.getpass("GROQ API Key: ")

In [4]:
# if "TEST_API_KEY" not in os.environ:
#     os.environ["TEST_API_KEY"] = getpass.getpass("TEST API Key: ")

In [5]:
from langchain.chat_models import init_chat_model
model_name = "llama-3.1-8b-instant" ##set Llama 3.1 8B as the LLM for this lab

# Structured Output Generation Methods

## 0. Without a Method

In [6]:
#Initialize LLM
llm = init_chat_model(model_name, 
                      model_provider="groq")

In [7]:
prompt = """Who won the Champions league in 2022?
            Output should be in JSON and have following fields:
            win_team, lose_team, venue, date, score
         """

In [8]:
llm_response = llm.invoke(prompt)
print(llm_response.content)

The 2022 UEFA Champions League Final was played between Real Madrid and Liverpool. 

Here's the result in JSON format:

```json
{
  "win_team": "Real Madrid",
  "lose_team": "Liverpool",
  "venue": "Stade de France",
  "date": "May 28, 2022",
  "score": "1-0"
}
```

This is the result of the game, where Real Madrid won with a 1-0 score against Liverpool at the Stade de France on May 28, 2022.


## 1. Native LLM Output Response Support

In [9]:
#Initialize LLM
llm = init_chat_model(model_name, 
                      model_provider="groq",
                      model_kwargs={"response_format": {"type": "json_object"}})

In [10]:
llm_response = llm.invoke(prompt)
print(llm_response.content)

{
   "win_team": "Real Madrid",
   "lose_team": "Liverpool",
   "venue": "Stade de France",
   "date": "October 28, 2022",
   "score": "1-0"
}


In [11]:
# What would this be?
type(llm_response.content)

str

## 2. Output Parsers

In [12]:
#Initialize LLM without native support
llm = init_chat_model(model_name, 
                      model_provider="groq")

In [13]:
from langchain_core.output_parsers import JsonOutputParser

parser = JsonOutputParser()

*JsonOutputParser* is a runnable object.

In [14]:
#Example on a sample string
sample_json_str = '{"clarity": "unclear"}'
JsonOutputParser().invoke(sample_json_str)

{'clarity': 'unclear'}

In [15]:
# print(sample_json_str)

In [16]:
#Create a chain
chain = llm | parser

In [17]:
#Get response
llm_response = chain.invoke(prompt)
print(llm_response)

{'win_team': 'Real Madrid', 'lose_team': 'Liverpool', 'venue': 'Stade de France, Saint-Denis, France', 'date': 'May 28, 2022', 'score': '1-0'}


In [18]:
type(llm_response)

dict

In [19]:
llm_response

{'win_team': 'Real Madrid',
 'lose_team': 'Liverpool',
 'venue': 'Stade de France, Saint-Denis, France',
 'date': 'May 28, 2022',
 'score': '1-0'}

## 3. Output Parsers With Pydantic

In [20]:
#Initialize LLM without native support
llm = init_chat_model(model_name, 
                      model_provider="groq")

In [21]:
from pydantic import BaseModel, Field

class GameDetails(BaseModel):
    win_team: str = Field(description="The winning team in the football game, and most popular player")
    lose_team: str = Field(description="The losing team in the football game, and most popular player")
    venue: str = Field(description="The venue of the football game, and format should be stadium/venue, city, country")
    date: str = Field(description="The date of the football game, and format should be MMM-YY strictly")
    score: str = Field(description="The score of the football game, and format should losing team score - winning team score; eg. 1-2")


parser = JsonOutputParser(pydantic_object=GameDetails)

In [22]:
#Best practice to add this in the prompt directly
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"win_team": {"description": "The winning team in the football game, and most popular player", "title": "Win Team", "type": "string"}, "lose_team": {"description": "The losing team in the football game, and most popular player", "title": "Lose Team", "type": "string"}, "venue": {"description": "The venue of the football game, and format should be stadium/venue, city, country", "title": "Venue", "type": "string"}, "date": {"description": "The date of the football game, and format should be MMM-YY strictly", "title": "Date", "type

In [23]:
print(prompt)

Who won the Champions league in 2022?
            Output should be in JSON and have following fields:
            win_team, lose_team, venue, date, score
         


In [24]:
#Create a chain
chain = llm | parser

In [25]:
#Get response
llm_response = chain.invoke(prompt)
print(llm_response)

{'win_team': 'Real Madrid', 'lose_team': 'Liverpool', 'venue': 'Stade de France', 'date': 'May 28, 2022', 'score': '1-0'}


In [None]:
type(llm_response)

In [26]:
llm_response

{'win_team': 'Real Madrid',
 'lose_team': 'Liverpool',
 'venue': 'Stade de France',
 'date': 'May 28, 2022',
 'score': '1-0'}

In [27]:
new_prompt = prompt + "\n\n" + "Please return the output in the following JSON format: " + parser.get_format_instructions()

In [28]:
#Get response
llm_response = chain.invoke(new_prompt)
llm_response

{'win_team': 'Real Madrid',
 'lose_team': 'Liverpool',
 'venue': 'Stade de France, Saint-Denis, France',
 'date': 'May-2022',
 'score': '1-0'}

## 4. Structured Output (without parsers)

### With Pydantic

In [29]:
#Initialize LLM without native support
llm = init_chat_model(model_name, 
                      model_provider="groq",
                      temperature=0.0)

In [30]:
prompt = "Who won the Champions league in 2022?"

In [31]:
from pydantic import BaseModel, Field

class GameDetails(BaseModel):
    "Given a user question about a sports event, list the winning team, losing team, venue, date and final score of the game."
    win_team: str = Field(description="The winning team in the football game, and most popular player")
    lose_team: str = Field(description="The losing team in the football game, and most popular player")
    venue: str = Field(description="The venue of the football game, and format should be stadium/venue, city, country")
    date: str = Field(description="The date of the football game, and format should be MMM-YY strictly")
    score: dict = Field(description="The score of the football game, and format should {losing team: score, winning team: score}")

In [32]:
structured_llm = llm.with_structured_output(GameDetails)

In [33]:
structured_llm.invoke(prompt)

GameDetails(win_team='Real Madrid', lose_team='Liverpool', venue='Stade de France, Saint-Denis, France', date='May-22', score={'Liverpool': 0, 'Real Madrid': 1})

In [34]:
#.model_dump() method converts a model to a dictionary
llm_response = structured_llm.invoke(prompt).model_dump()
llm_response

{'win_team': 'Real Madrid',
 'lose_team': 'Liverpool',
 'venue': 'Stade de France, Saint-Denis, France',
 'date': 'May-22',
 'score': {'Liverpool': 0, 'Real Madrid': 1}}

In [35]:
type(llm_response)

dict

### With TypedDict

In [29]:
#Initialize LLM without native support
llm = init_chat_model(model_name, 
                      model_provider="groq",
                      temperature=0.0)

In [30]:
prompt = "Who won the Champions league in 2022?"

In [36]:
from typing_extensions import Annotated, TypedDict
from typing import Optional

class GameDetails(TypedDict):
    "Given a user question about a sports event, list the winning team, losing team, venue, date and final score of the game."
    win_team: str = Field(description="The winning team in the football game, and most popular player")
    lose_team: str = Field(description="The losing team in the football game, and most popular player")
    venue: str = Field(description="The venue of the football game, and format should be stadium/venue, city, country")
    date: str = Field(description="The date of the football game, and format should be MMM-YY strictly")
    score: dict = Field(description="The score of the football game, and format should {losing team: score, winning team: score}")

In [37]:
structured_llm = llm.with_structured_output(GameDetails)

In [42]:
llm_response = structured_llm.invoke(prompt)
llm_response

{'competition': 'Champions League',
 'date': '2022-05-28',
 'lose_team': 'Liverpool',
 'score': {'fullTime': {'lose_team': 0, 'win_team': 1},
  'halftime': {'lose_team': 0, 'win_team': 0}},
 'venue': 'Stade de France',
 'win_team': 'Real Madrid'}

In [43]:
type(llm_response)

dict

### With JSON Schema

In [44]:
#Initialize LLM without native support
llm = init_chat_model(model_name, 
                      model_provider="groq",
                      temperature=0.0)

In [45]:
prompt = "Who won the Champions league in 2022?"

In [55]:
json_schema = {
    "title": "GameDetails",
    "description": "Given a user question about a sports event, list the winning team, losing team, venue, date and final score of the game.",
    "type": "object",
    "properties": {
        "win_team": {
            "type": "string",
            "description": "The winning team in the football game, and most popular player"
        },
        "lose_team": {
            "type": "string",
            "description": "The losing team in the football game, and most popular player"
        },
        "venue": {
            "type": "string",
            "description": "The venue of the football game, and format should be stadium/venue, city, country"
        },
        "date": {
            "type": "string",
            "description": "The date of the football game, and format should be MMM-YY strictly"
        },
        "score": {
            "type": "object",
            "description": "The score of the football game, and format should {losing team: score, winning team: score}"
        }
    },
    "required": ["win_team", "lose_team", "venue","date","score"],
}

In [56]:
structured_llm = llm.with_structured_output(json_schema)

In [57]:
llm_response = structured_llm.invoke(prompt)
llm_response

{'date': 'May-22',
 'lose_team': 'Liverpool',
 'score': {'Liverpool': '0', 'Real Madrid': '1'},
 'venue': 'Stade de France, Saint-Denis, France',
 'win_team': 'Real Madrid'}

In [58]:
type(llm_response)

dict