In [4]:
import os
import langchain
from dotenv import load_dotenv
from datetime import datetime
from pydantic import BaseModel, BaseConfig, Field
from langchain.chat_models import ChatOpenAI
from langchain.cache import InMemoryCache
from langchain.output_parsers import OutputFixingParser, PydanticOutputParser
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, PromptTemplate

#Config
load_dotenv(".env")
api_key = os.environ.get("key")

In [3]:
langchain.llm_cache = InMemoryCache()
chat = ChatOpenAI(api_key=api_key, model='gpt-3.5-turbo', max_tokens=500)

In [9]:
class Scientist(BaseModel):
    name: str = Field(description='Name of a Scientist')
    discoveries: list = Field(description="Diffusion model list of discoveries in deep learning")

In [10]:
parser = PydanticOutputParser(pydantic_object=Scientist)

In [11]:
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"name": {"title": "Name", "description": "Name of a Scientist", "type": "string"}, "discoveries": {"title": "Discoveries", "description": "Diffusion model list of discoveries in deep learning", "type": "array", "items": {}}}, "required": ["name", "discoveries"]}
```


In [20]:
human_prompt = HumanMessagePromptTemplate.from_template("{request_prompt}\n{format_instructions}")
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])
prompt = chat_prompt.format_prompt(request_prompt = "Tell me about the scientists", format_instructions = parser.get_format_instructions()).to_messages()
result = chat(prompt)

In [22]:
result.content

'{\n  "name": "Albert Einstein",\n  "discoveries": [\n    "Theory of Relativity",\n    "Photoelectric Effect",\n    "Brownian Motion"\n  ]\n}'

In [24]:
print(parser.parse(result.content))

name='Albert Einstein' discoveries=['Theory of Relativity', 'Photoelectric Effect', 'Brownian Motion']
