## Pattern I: Using LLM to detect intent and recognize/extract entities followed by Text-to-SQL generation

#### Imports 

In [1]:
from langchain.prompts.chat import SystemMessagePromptTemplate
from langchain.prompts.chat import HumanMessagePromptTemplate
from langchain.prompts.chat import AIMessagePromptTemplate
from langchain.prompts.chat import ChatPromptTemplate
from langchain.chat_models import ChatVertexAI
from google.cloud import bigquery
import pandas as pd
import logging
import os

##### Setup logging

In [2]:
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())

#### Essentials 

In [3]:
SERVICE_ACCOUNT_CREDENTIALS = './../credentials/vai-key.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = SERVICE_ACCOUNT_CREDENTIALS

In [4]:
PROJECT = 'arun-genai-bb'
LOCATION = 'us-central1'
MODEL_NAME = 'codechat-bison@latest'

In [5]:
bq = bigquery.Client()

In [6]:
llm = ChatVertexAI(project=PROJECT, 
               location=LOCATION, 
               model_name=MODEL_NAME, 
               temperature=0.0, 
               max_output_tokens=256)

### Step 1: Identify the `intent` of the user's query

##### Load example prompt and completion pairs needed for intent detection

In [7]:
messages = []

In [8]:
examples = pd.read_csv('./../data/few-shot/prompts_intent.csv')
examples.head()

Unnamed: 0,prompt,intent
0,Need all the bookings from 10th to 15th Octobe...,RETRIEVE_RESERVATIONS
1,Could you retrieve reservations for mid-Octobe...,RETRIEVE_RESERVATIONS
2,Let’s see all the October reservations from 10...,RETRIEVE_RESERVATIONS
3,Any reservations from 10/10/2023 to 15/10/2023?,RETRIEVE_RESERVATIONS
4,I’m looking for bookings between the second an...,RETRIEVE_RESERVATIONS


In [9]:
template = "You are a helpful assistant capable of detecting the intent behind a user's query."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
messages.append(system_message_prompt)

In [10]:
for _, row in examples.iterrows():
    prompt, completion = row
    human_message = HumanMessagePromptTemplate.from_template(prompt)
    messages.append(human_message)
    ai_message = AIMessagePromptTemplate.from_template(completion)
    messages.append(ai_message)

In [11]:
human_template = "{user_query}"
human_message = HumanMessagePromptTemplate.from_template(human_template)
messages.append(human_message)

In [12]:
chat_prompt = ChatPromptTemplate.from_messages(messages)

In [13]:
user_query = "Provide a list of all reservations from October 10th to October 15th, 2023"
request = chat_prompt.format_prompt(user_query=user_query).to_messages()

In [14]:
%%time 

response = llm(request)
intent = response.content.strip()
logger.info(intent)

RETRIEVE_RESERVATIONS


CPU times: user 42.1 ms, sys: 4.76 ms, total: 46.9 ms
Wall time: 921 ms


### Step 2: Extract the entities from the user query

Load example prompt and completion pairs needed for entity recognition

In [15]:
messages = []

In [16]:
examples = pd.read_csv('./../data/few-shot/prompts_ner.csv')
examples.head()

Unnamed: 0,prompt,entities
0,Can you show me all the reservations from Octo...,"Start Date:October 10th, 2023|End Date:October..."
1,What bookings do we have from 10/10/2023 to 10...,Start Date:10/10/2023|End Date:10/15/2023
2,Show the reservations occurring between the se...,"Start Date:October 8th, 2023|End Date:October ..."
3,List all bookings that are happening from Octo...,"Start Date:October 10, 2023|End Date:October 1..."
4,Fetch the reservations from the second week of...,"Start Date:October 8th, 2023|End Date:October ..."


In [17]:
template = "You are a helpful assistant capable of performing named entity recognition."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
messages.append(system_message_prompt)

In [18]:
for _, row in examples.iterrows():
    prompt, completion = row
    human_message = HumanMessagePromptTemplate.from_template(prompt)
    messages.append(human_message)
    ai_message = AIMessagePromptTemplate.from_template(completion)
    messages.append(ai_message)

In [19]:
human_template = "{user_query} Standardize the date format to YYYY-MM-DD."
human_message = HumanMessagePromptTemplate.from_template(human_template)
messages.append(human_message)

In [20]:
chat_prompt = ChatPromptTemplate.from_messages(messages)

In [21]:
request = chat_prompt.format_prompt(user_query=user_query).to_messages()

In [22]:
%%time 

response = llm(request)
entities = response.content.strip()
logger.info(entities)

Start Date:2023-10-10|End Date:2023-10-15


CPU times: user 5.96 ms, sys: 2.25 ms, total: 8.2 ms
Wall time: 998 ms


### Step 3: Map Intent to Database Action

In [23]:
messages = []

In [24]:
examples = pd.read_csv('./../data/few-shot/intent_to_table_mapping.csv')
examples.head()

Unnamed: 0,intent,mapped_tables
0,RETRIEVE_RESERVATIONS,reservations
1,IDENTIFY_RECENT_CUSTOMERS,customers|reservations
2,CALCULATE_REVENUE,reservations|transactions
3,FIND_PEAK_DEPARTURE_MONTHS,flights
4,GROUP_AND_COUNT_CUSTOMERS_BY_AGE,customers


In [25]:
template = "You are a helpful assistant capable of mapping detected intent to the correct list of BigQuery tables."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
messages.append(system_message_prompt)

In [26]:
for _, row in examples.iterrows():
    prompt, completion = row
    human_message = HumanMessagePromptTemplate.from_template(prompt)
    messages.append(human_message)
    ai_message = AIMessagePromptTemplate.from_template(completion)
    messages.append(ai_message)

In [27]:
human_template = "{user_intent}"
human_message = HumanMessagePromptTemplate.from_template(human_template)
messages.append(human_message)

In [28]:
chat_prompt = ChatPromptTemplate.from_messages(messages)

In [29]:
request = chat_prompt.format_prompt(user_intent=intent).to_messages()

In [30]:
%%time 

response = llm(request)
tables = response.content.strip()
logger.info(tables)

reservations


CPU times: user 4.47 ms, sys: 2.51 ms, total: 6.98 ms
Wall time: 2.34 s


### Step 4: SQL Creation

Load table descriptions as additional context to help SQL creation

In [31]:
with open("./../data/text-schema/customers.txt", "rb") as f:
    customers_table_desc = f.read()

In [32]:
with open("./../data/text-schema/flights.txt", "rb") as f:
    flights_table_desc = f.read()

In [33]:
with open("./../data/text-schema/reservations.txt", "rb") as f:
    reservations_table_desc = f.read()

In [34]:
with open("./../data/text-schema/transactions.txt", "rb") as f:
    transactions_table_desc = f.read()

**Note:** Load descriptions based on mapped tables from the previous step. Do not load all of the descriptions.

In [35]:
messages = []

In [36]:
template = "You are a SQL master expert capable of writing complex SQL query in BigQuery."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
messages.append(system_message_prompt)

In [37]:
human_template = """Given the following inputs:
- INTENT: {intent}
- EXTRACTED_ENTITIES: {entities}
- MAPPED_TABLES: {tables}

Please construct a SQL query using the MAPPED_TABLES and EXTRACTED_ENTITIES. 

Below are the schema descriptions for the relevant tables:

**Customers Table:**
{customers_table_desc}

**Flights Table:**
{flights_table_desc}

**Reservations Table:**
{reservations_table_desc}

**Transactions Table:**
{reservations_table_desc}

Note: Prefix the table names with `flight_reservations."""

In [None]:
human_message = HumanMessagePromptTemplate.from_template(human_template)
messages.append(human_message)

In [None]:
chat_prompt = ChatPromptTemplate.from_messages(messages)

In [None]:
request = chat_prompt.format_prompt(intent=intent, 
                                    entities=entities, 
                                    tables=tables, 
                                    customers_table_desc=customers_table_desc,
                                    flights_table_desc=flights_table_desc, 
                                    reservations_table_desc=reservations_table_desc,
                                    transactions_table_desc=transactions_table_desc).to_messages()

In [None]:
%%time 

response = llm(request)
sql = '\n'.join(response.content.strip().split('\n')[1:-1])
logger.info(sql)

### Step 5: Execute the SQL query in BigQuery

In [None]:
query_job = bq.query(sql)

In [None]:
sql_result = []

for row in query_job:
    sql_result.append(f"Hotel name = {row.hotel_name}")
    sql_result.append(f"Room type = {row.room_type}")
    sql_result.append(f"Price per night in $ = {row.price_per_night}")
    sql_result.append(f"Number of rooms available = {row.availability}")
    sql_result.append("")

sql_result = "\n".join(sql_result).strip()
logger.info(sql_result)

### Step 6: Transform SQL results into a human friendly response

In [None]:
messages = []

In [None]:
template = "You are a travel assistant chatbot that can help people make travel reservations."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
messages.append(system_message_prompt)

In [None]:
human_template = """{user_query}

Given the following list of room availabilities as below:
{sql_result}

Please transform them into a human-readable format."""

In [None]:
human_message = HumanMessagePromptTemplate.from_template(human_template)
messages.append(human_message)

In [None]:
chat_prompt = ChatPromptTemplate.from_messages(messages)

In [None]:
request = chat_prompt.format_prompt(user_query=user_query,
                                    sql_result=sql_result).to_messages()

In [None]:
%%time 

response = llm(request)

In [None]:
output = response.content.strip()
logger.info(output)