In [4]:
import os
import openai
from dotenv import find_dotenv, load_dotenv
from langchain_community.llms import OpenAI  
# from langchain_community.chat_models import ChatOpenAI  # depricated
from langchain_openai import ChatOpenAI
# from langchain.prompts import ChatPromptTemplate  # depricated
from langchain_core.prompts import ChatPromptTemplate

#### Connecting to client

In [5]:
load_dotenv(find_dotenv())
client = openai.OpenAI(api_key=os.environ["OPENAI_API_KEY"])

llm_model = "gpt-4o-mini"
chat = ChatOpenAI(temperature=0.0, model=llm_model)

In [6]:
email_response = """
Here's our itinerary for our upcoming trip to Europe.
We leave from Denver, Colorado airport at 8:45 pm, and arrive in Amsterdam 10 hours later
at Schipol Airport.
We'll grab a ride to our airbnb and maybe stop somewhere for breakfast before 
taking a nap.

Some sightseeing will follow for a couple of hours. 
We will then go shop for gifts 
to bring back to our children and friends.  

The next morning, at 7:45am we'll drive to to Belgium, Brussels - it should only take aroud 3 hours.
While in Brussels we want to explore the city to its fullest - no rock left unturned!

"""


email_template = """
From the following email, extract the following information:

leave_time: when are they leaving for vacation to Europe. If there's an actual
time written, use it, if not write unknown.

leave_from: where are they leaving from, the airport or city name and state if
available.

cities_to_visit: extract the cities they are going to visit. 
If there are more than one, put them in square brackets like '["cityone", "citytwo"].

Format the output as JSON with the following keys:
leave_time
leave_from
cities_to_visit

email: {email}
"""

In [7]:
prompt_template = ChatPromptTemplate.from_template(email_template)
messages = prompt_template.format_messages(email=email_response)

## LangChain Parsers

In [8]:
# ----------------- LangChain Parsers ----------------- #
from langchain.output_parsers import ResponseSchema  # allows us to create the instructions of what fields we want to extract along with their formats
from langchain.output_parsers import StructuredOutputParser  # actual output parsers

leave_time_schema = ResponseSchema(name="leave_time",
                                   description="When they are leaving. \
                                        It's usually numerical time of the day. \
                                        If not available write N/A")  # It's important to use a good description because this is what the langchain agent will use when creating the response schema.

leave_from_schema = ResponseSchema(name="leave_from",
                                   description="Where are they leaving from.\
                                        it's a city, airport or state, or province")

cities_to_visit_schema = ResponseSchema(name="cities_to_visit",
                                        description="The cities, towns they will be visiting on \
                                            their trip. This needs to be in a list")

# Storing the response schema for each field in a list which will later be fed to the langchain instructions output parser
response_schema = [
    leave_time_schema,
    leave_from_schema,
    cities_to_visit_schema
]

# Setup the output parser
output_parser = StructuredOutputParser.from_response_schemas(response_schema)
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"leave_time": string  // When they are leaving.                                         It's usually numerical time of the day.                                         If not available write N/A
	"leave_from": string  // Where are they leaving from.                                        it's a city, airport or state, or province
	"cities_to_visit": string  // The cities, towns they will be visiting on                                             their trip. This needs to be in a list
}
```


In [9]:
# reviewed email template - we updated to add the {format_instructions}
email_template_revised = """
From the following email, extract the following information:

leave_time: when are they leaving for vacation to Europe. If there's an actual
time written, use it, if not write unknown.

leave_from: where are they leaving from, the airport or city name and state if
available.

cities_to_visit: extract the cities they are going to visit. If there are more than 
one, put them in square brackets like '["cityone", "citytwo"].

Format the output as JSON with the following keys:
leave_time
leave_from
cities_to_visit

email: {email}
{format_instructions}
"""

In [11]:
updated_prompt = ChatPromptTemplate.from_template(template=email_template_revised)
messages = prompt_template.format_messages(email=email_response,
                                           format_instructions=format_instructions)

response = chat.invoke(messages)
print(response)
print("-----------------------------------------")
print(response.content)
print("-----------------------------------------")
print(type(response.content))  # Still a string but we are not done yet. We can now use the langchain parser to parse the response

content='```json\n{\n  "leave_time": "8:45 pm",\n  "leave_from": "Denver, Colorado airport",\n  "cities_to_visit": ["Amsterdam", "Brussels"]\n}\n```' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 42, 'prompt_tokens': 260, 'total_tokens': 302, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_06737a9306', 'finish_reason': 'stop', 'logprobs': None} id='run-9785f2e2-0836-4d38-aef9-d0092cb17385-0' usage_metadata={'input_tokens': 260, 'output_tokens': 42, 'total_tokens': 302, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}
-----------------------------------------
```json
{
  "leave_time": "8:45 pm",
  "leave_from": "Denver, Colorado airport",
  "cities_to_visit": ["Am

#### Using the StructureOutputParser to parse output into dictionnary (JSON)

In [12]:
output_dict = output_parser.parse(response.content)  # parse into dict (JSON)
print(output_dict)
print(type(output_dict))
print(f"Cities: {output_dict['cities_to_visit'][0]}")  # Extracting the first city in cities_to_visit key

{'leave_time': '8:45 pm', 'leave_from': 'Denver, Colorado airport', 'cities_to_visit': ['Amsterdam', 'Brussels']}
<class 'dict'>
Cities: Amsterdam
