In [4]:
from langchain_core.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    AIMessagePromptTemplate,
    SystemMessagePromptTemplate,
    PromptTemplate
)
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_community.document_loaders.csv_loader import CSVLoader
import pandas as pd
import os

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [5]:


# Pastikan variabel lingkungan GROQ_API_KEY sudah ada
api_key = os.getenv("groq_api_key")
if api_key is None:
    raise ValueError("Did not find groq_api_key, please add an environment variable `GROQ_API_KEY` which contains it, or pass `groq_api_key` as a named parameter.")

In [6]:
# llm
llm = ChatGroq(
    model="llama3-8b-8192",
    api_key=api_key,
    temperature=0,
    model_kwargs={
        "top_p": 0.95,
        "frequency_penalty": 0.1,
        "presence_penalty": 0.2,
    },
    verbose=True
)

In [7]:
user_query = "What luxury hotels are available in Basel?"

## Step 1: Identify the intent of user's query

In [8]:
loader = CSVLoader(file_path="../data/few-shot/new_prompt_intents.csv")
doc_int = loader.load()

In [9]:
template = "You are a helpful assistant capable of detecting the intent behind a user's query."
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a helpful assistant capable of detecting the intent behind a user's query. Your job is to extract relevant intent from user queries based on patterns in documents.
    Respond with a JSON object. Provide no preamble or explanation.

    example:
    I'm booked on flight 30625 and my ticket number is 0060005435212386. Can you tell me my assigned seat?
    intent=CHECK_SEAT_ASSIGNMENT

    Documents:
    {document}

    Now, extract intents from the following user query:
    {user_query}

    <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["user_query", "document"],
)

detect_intent = prompt | llm | JsonOutputParser()
intent = detect_intent.invoke({"user_query": user_query, "document": doc_int})
print(intent)


{'intent': 'FIND_HOTEL_OPTIONS'}


## Step 2: Extract the entities from the user query
Load example prompt and completion pairs needed for entity recognition

In [10]:
loader = CSVLoader(file_path="../data/few-shot/new_prompt_ner.csv")
doc = loader.load()

In [11]:
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are an AI assistant specializing in Named Entity Recognition (NER). Your task is to extract relevant entities from user queries based on patterns in documents. Follow these strict rules:

    1. Always respond with a single JSON object. Do not provide any explanation or additional text.
    2. Use lowercase keys in the JSON object.
    3. For dates, convert them to YYYY-MM-DD format.
    4. If no entities are found, return an empty JSON object:.

    Examples:

    example 1:
    user_query: Fetch the reservations from the second week of October 2023
    answer=Start Date: 2023-10-08, End Date: 2023-10-15
    
    example 2:
    user_query: I want to know the busiest departure months in 2023.
    answer=Year:'2023'

    Documents:
    {document}

    Now, extract entities from the following user query:
    {user_query}

    <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["user_query", "document"],
)

ner_detect = prompt | llm | JsonOutputParser()
ner = ner_detect.invoke({"user_query": user_query, "document": doc})
print(ner["entities"])


{'Location': 'Basel', 'Price Tier': 'Luxury', 'Attribute': 'Hotel Name'}


## Step 3: Table mapping

In [12]:
loader = CSVLoader(file_path="../data/few-shot/new_mapping.csv")
doc_map = loader.load()

In [13]:
template = "You are a helpful assistant capable of mapping detected intent to the correct list of SQLite tables."

In [14]:
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a helpful assistant capable of mapping detected intent to the correct list of SQLite tables.
    Provide NO preamble or explanation

    example:
    intent=RETRIEVE_RESERVATIONS
    answer=hotels|trip_recommendations|flights

    Documents:
    {document}

    User Intent:
    {intent}

    <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["intent", "document"],
)

detect_map = prompt | llm | StrOutputParser()
result_map = detect_map.invoke({"intent": intent["intent"], "document": doc_map})
print(result_map)


hotels


## Step 4: Load and filter table schemas

In [15]:
from langchain_community.utilities.sql_database import SQLDatabase

db = SQLDatabase.from_uri("sqlite:///../data/travel.sqlite")

In [16]:
from langchain_community.agent_toolkits import SQLDatabaseToolkit

toolkit = SQLDatabaseToolkit(db=db, llm=llm)
context = toolkit.get_context()

print(context["table_info"])


CREATE TABLE aircrafts_data (
	aircraft_code TEXT, 
	model TEXT, 
	range INTEGER
)

/*
3 rows from aircrafts_data table:
aircraft_code	model	range
773	Boeing 777-300	11100
763	Boeing 767-300	7900
SU9	Sukhoi Superjet-100	3000
*/


CREATE TABLE airports_data (
	airport_code TEXT, 
	airport_name TEXT, 
	city TEXT, 
	coordinates TEXT, 
	timezone TEXT
)

/*
3 rows from airports_data table:
airport_code	airport_name	city	coordinates	timezone
ATL	Hartsfield-Jackson Atlanta International Airport	Atlanta	[33.6407, -84.4277]	America/New_York
PEK	Beijing Capital International Airport	Beijing	[40.0799, 116.6031]	Asia/Shanghai
DXB	Dubai International Airport	Dubai	[25.2532, 55.3657]	Asia/Dubai
*/


CREATE TABLE boarding_passes (
	ticket_no TEXT, 
	flight_id INTEGER, 
	boarding_no INTEGER, 
	seat_no TEXT
)

/*
3 rows from boarding_passes table:
ticket_no	flight_id	boarding_no	seat_no
0060005435212351	30625	1	2D
0060005435212386	30625	2	3G
0060005435212381	30625	3	4H
*/


CREATE TABLE bookings (
	bo

## Step 6: Text-to-SQL

In [22]:
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a SQL master expert specializing in writing complex SQL queries for SQLite. Your task is to construct a SQL query based on the provided information. Follow these strict rules:

    1. Generate ONLY the SQL query. Do not provide any explanations, comments, or additional text.
    2. Ensure the query is complete and executable in SQLite.
    3. Use the provided input parameters, user query, and table schemas to construct the most appropriate query.
    4. If multiple tables need to be joined, use the appropriate JOIN clauses.
    5. Include WHERE clauses to filter results based on the extracted entities.
    6. If sorting is implied by the user query, include an ORDER BY clause.
    7. If the query implies a limit on the number of results, include a LIMIT clause.

    Input Parameters:
    -----------------
    INTENT: {intent}
    EXTRACTED_ENTITIES: {entities}
    MAPPED_TABLES: {tables}

    User Query:
    -----------
    {user_query}

    Table Schemas:
    --------------
    {table_schemas}

    <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["intent", "entities", "tables", "user_query", "table_schemas"],
)

sql_generate = prompt | llm | StrOutputParser()
sql_gen = sql_generate.invoke(
    {
        "intent": intent["intent"], 
        "entities": ner["entities"], 
        "tables": result_map, 
        "user_query": user_query, 
        "table_schemas": context["table_info"]
    }
)
print(sql_gen)


SELECT h.name 
FROM hotels h 
WHERE h.location = 'Basel' 
AND h.price_tier = 'Luxury';


In [26]:
QUERY = """
SELECT * 
FROM hotels 
WHERE location = 'Basel' AND price_tier = 'Luxury' AND booked = 0;
"""

# print(user_query)
result = db.run(sql_gen)
print(result)


[('Hilton Basel',)]


## Step 7: Transform SQL results into a human friendly response

In [47]:
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a friendly and helpful travel assistant named Alex, working for a prestigious airline. Your role is to assist customers with their travel inquiries in a warm, professional manner. Always maintain a positive and supportive tone.

User's Question:
----------------
{user_query}

SQL Result:
----------------
{sql_response}

Task:
-----
Transform the SQL query result into a natural, conversational response. Follow these guidelines:

- Start with a warm greeting and acknowledge the user's query.
- Present the information in a clear, easy-to-understand manner.


IMPORTANT Notes:
----------------
- Be courteous, empathetic, and maintain a conversational tone throughout.
- Personalize the response when possible, using the customer's name if provided.
- Avoid technical jargon or overly formal language.
- If the answer doesn't require a tabular structure, present it in a natural, flowing text.
- If no results are found, apologize politely and offer alternative suggestions or ask for more information.


<|eot_id|><|start_header_id|>assistant<|end_header_id|>
""",
    input_variables=["user_query", "sql_response"],
)

response_llm = prompt | llm | StrOutputParser()
response = response_llm.invoke({"user_query": user_query, "sql_response": result})
print(response)

Hello there! Thank you for reaching out to us about your travel plans to Basel. I'd be delighted to help you find a luxurious place to stay.

I've checked our database, and I'm happy to report that we have a fantastic option for you: the Hilton Basel! This 5-star hotel is located in the heart of Basel, offering stunning views of the Rhine River and the city's picturesque old town. The hotel features modern rooms, a fitness center, and an on-site restaurant serving delicious local cuisine.

If you're looking for a luxurious retreat during your stay in Basel, the Hilton Basel is an excellent choice. Would you like me to provide more information or make a booking for you?


## Final result

In [48]:
import json

def get_response(user_query: str) -> str:
    """
    Returns a response to the user's query based on the user's input.
    Args:
        user_query (str): The user's query.
    Returns:
        str: The response to the user's query.
    """
    # Get intent
    intent = detect_intent.invoke({"user_query": user_query, "document": doc_int})
    print(f"Intent detected: {json.dumps(intent, indent=2)}")

    # Detect entities
    ner = ner_detect.invoke({"user_query": user_query, "document": doc})
    print("="*50)
    print(f"Entities detected: {json.dumps(ner, indent=2)}")

    # Map intent to tables
    result_map = detect_map.invoke({"intent": intent["intent"], "document": doc_map})
    print("="*50)
    print(f"Tables mapped: {json.dumps(result_map, indent=2)}")

    # Generate SQL query
    sql_gen = sql_generate.invoke(
        {
            "intent": intent["intent"], 
            "entities": ner, 
            "tables": result_map, 
            "user_query": user_query, 
            "table_schemas": context["table_info"]
        }
    )
    print("="*50)
    print(f"SQL query generated: {sql_gen}")

    # Execute SQL query
    result = db.run(sql_gen)
    print("="*50)
    print(f"SQL query result: {json.dumps(result, indent=2)}")

    # Generate response
    response = response_llm.invoke({"user_query": user_query, "sql_response": result})
    print("="*50)
    print(f"\n{response}")

In [40]:
get_response("Can you tell me the nearest airport to Dubai Mall?")

Intent detected: {
  "intent": "LOCATE_NEAREST_AIRPORT"
}
Entities detected: {
  "Airport": "Dubai International Airport"
}
Tables mapped: "airports_data"
SQL query generated: SELECT a.airport_name, a.city, a.coordinates
FROM airports_data a
WHERE a.airport_name LIKE '%Dubai International Airport'
ORDER BY (SELECT AVG(r.range) FROM aircrafts_data r WHERE r.aircraft_code IN (SELECT sf.aircraft_code FROM seats s JOIN ticket_flights sf ON s.aircraft_code = sf.aircraft_code WHERE sf.fare_conditions = 'Business')) ASC
LIMIT 1;


OperationalError: (sqlite3.OperationalError) no such column: sf.aircraft_code
[SQL: SELECT a.airport_name, a.city, a.coordinates
FROM airports_data a
WHERE a.airport_name LIKE '%Dubai International Airport'
ORDER BY (SELECT AVG(r.range) FROM aircrafts_data r WHERE r.aircraft_code IN (SELECT sf.aircraft_code FROM seats s JOIN ticket_flights sf ON s.aircraft_code = sf.aircraft_code WHERE sf.fare_conditions = 'Business')) ASC
LIMIT 1;]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [49]:
get_response("How many business class seats are there on the SU9 aircraft?")

Intent detected: {
  "intent": "INQUIRE_SEAT_AVAILABILITY"
}
Entities detected: {
  "entities": {
    "Aircraft": "SU9",
    "Attribute": "Business Class Seats",
    "Comparison": "Count"
  }
}
Tables mapped: "hotels|trip_recommendations|flights|seats"
SQL query generated: SELECT COUNT(*)
FROM seats
WHERE aircraft_code = 'SU9' AND fare_conditions = 'Business';
SQL query result: "[(12,)]"

Hello there! Thank you for reaching out to us with your question about our SU9 aircraft. I'm more than happy to help you with that.

According to our records, the SU9 aircraft has a total of 12 business class seats. I hope that information is helpful for your travel planning. If you have any more questions or need further assistance, please don't hesitate to ask. I'm here to help make your travel experience with us as smooth and enjoyable as possible.


As usual, llm still cannot execute complex queries. 