In [1]:
import os
import google.generativeai as genai
from dotenv import load_dotenv
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (ChatPromptTemplate, HumanMessagePromptTemplate)
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

In [2]:
model = ChatOpenAI(openai_api_key =os.getenv('OPENAI_API_KEY'))

In [3]:
class Players(BaseModel):
    values:list = Field(description="Python list of dictionaries containing player name and nationality")
    city:list = Field(description="Give me the most popular country across the results")

parser = PydanticOutputParser(pydantic_object=Players)
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"values": {"description": "Python list of dictionaries containing player name and nationality", "items": {}, "title": "Values", "type": "array"}, "city": {"description": "Give me the most popular country across the results", "items": {}, "title": "City", "type": "array"}}, "required": ["values", "city"]}
```


In [4]:
human_prompt = HumanMessagePromptTemplate.from_template("{request}\n{format_instrustion}")
#request and format instruction are human generated request that the llm will handle
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])
# ChatPromptTemplate uses human prompt template to make request to the llm

In [5]:
request = chat_prompt.format_prompt(
    request= 'Give me facts about 100 NBA players around the world',
    format_instrustion = parser.get_format_instructions()
).to_messages()
# we want to pass this request as a message so .to_messages()

In [9]:
results = model(request, temperature= 0 )

In [10]:
result_values = parser.parse(results.content) # Player class object

In [None]:
import pandas as pd
df  = pd.DataFrame.from_dict(result_values.values)

In [None]:
df

Unnamed: 0,player_name,nationality
0,LeBron James,USA
1,Giannis Antetokounmpo,Greece
2,Luka Doncic,Slovenia
3,Kevin Durant,USA
4,Nikola Jokic,Serbia
5,Stephen Curry,USA
6,Joel Embiid,Cameroon
7,Kawhi Leonard,USA
8,Rudy Gobert,France
9,Damian Lillard,USA
