## Prompt Intent

In [8]:
import json
import pandas as pd

PATH = "../data/few-shot/"
with open("../data/json/extra_prompt.json", "r") as f:
    extra_prompt = json.load(f)

prompt_intent = pd.read_csv(f"{PATH}prompts_intent.csv")


In [9]:
# # Konversi JSON ke format yang sesuai untuk DataFrame
data_for_df = []
for intent, prompts in extra_prompt.items():
    for prompt in prompts:
        data_for_df.append({"prompt": prompt, "intent": intent})

new_df = pd.DataFrame(data_for_df)

In [10]:
new_df.sample(5)

Unnamed: 0,prompt,intent
42,Can you tell me if seat 2A is a window seat on...,INQUIRE_SEAT_AVAILABILITY
67,Are there any upper midscale hotels available ...,FIND_HOTEL_OPTIONS
15,"For a family trip, should I choose a flight wi...",COMPARE_AIRCRAFT_MODELS
51,Are there any Economy class tickets for flight...,CHECK_TICKET_DETAILS
62,Which car rental company has the most bookings...,ANALYZE_BOOKING_PATTERNS


In [11]:
combined_df = pd.concat([prompt_intent, new_df], ignore_index=True)
combined_df.to_csv(f"{PATH}new_prompt_intents.csv", index=False)
combined_df.sample(7)

Unnamed: 0,prompt,intent
52,Can you give me details about the most expensi...,RETRIEVE_BOOKING_DETAILS
30,Which aircraft can I take for a non-stop fligh...,FIND_AIRCRAFT_BY_RANGE
97,Are there any upper midscale hotels available ...,FIND_HOTEL_OPTIONS
90,What's the most common fare condition for tick...,ANALYZE_BOOKING_PATTERNS
58,Which month had the highest total booking amount?,ANALYZE_BOOKING_TRENDS
6,Name the people who have booked a flight in th...,IDENTIFY_RECENT_CUSTOMERS
95,Can I book a room at the Marriott Zurich from ...,FIND_HOTEL_OPTIONS


## Prompt NER

In [12]:
prompt_ner = pd.read_csv("../data/few-shot/prompts_ner.csv")
prompt_ner.sample(10)

Unnamed: 0,prompt,entities
27,"Find customers, excluding minors, with a valid...",Age:Excluding Minors|Status:Validated|Start Da...
22,Can you segregate customers into 5 age categor...,Age Brackets:Five
2,Show the reservations occurring between the se...,"Start Date:October 8th, 2023|End Date:October ..."
21,Group the customers into 5 different age categ...,Age Brackets:5
9,Retrieve the list of customers with flight res...,Time Frame:Previous week
26,Identify adults who have secured their reserva...,Age:Adults|Status:Secured|Start Date:01/10/202...
0,Can you show me all the reservations from Octo...,"Start Date:October 10th, 2023|End Date:October..."
6,Give me a list of customers who made reservati...,Time Frame:Preceding week
7,Show me all flight bookings from the last week.,Time Frame:Last week
17,Can you find out the peak departure months in ...,Year:2023


In [13]:
with open("../data/json/extra_ner.json", "r") as f:
    extra_ner = json.load(f)

In [16]:
# # Konversi JSON ke format yang sesuai untuk DataFrame
data_for_df = []
for prompts, entities in extra_ner.items():    
    data_for_df.append({"prompt": prompts, "entities": entities})

df_ner = pd.DataFrame(data_for_df)

In [17]:
df_ner.sample(4)

Unnamed: 0,prompt,entities
12,What was the total amount for the booking made...,"Date:March 20, 2024|Attribute:Total Amount"
55,I'm interested in architecture. What's worth s...,Location:Zurich|Keyword:architecture|Attribute...
33,What is the passenger ID for the ticket number...,Ticket Number:9880005432000991|Attribute:Passe...
35,How many unique passengers are there in the ti...,Attribute:Unique Passenger Count


In [18]:
combined_df = pd.concat([prompt_ner, df_ner], ignore_index=True)
combined_df.to_csv(f"{PATH}new_prompt_ner.csv", index=False)
combined_df.sample(7)

Unnamed: 0,prompt,entities
85,I'm interested in architecture. What's worth s...,Location:Zurich|Keyword:architecture|Attribute...
62,How many tickets were booked under the referen...,Booking Reference:06B046|Attribute:Ticket Count
84,What wildlife-related activities are available...,Location:Bern|Keyword:wildlife|Attribute:Recom...
36,Could you give me the exact latitude and longi...,Airport:Los Angeles International Airport|Attr...
69,Which locations offer economy car rentals?,Price Tier:Economy|Attribute:Locations
81,What's the longest possible stay duration amon...,Attribute:Stay Duration|Comparison:Longest
86,Are there any museum recommendations for Basel?,Location:Basel|Keyword:museum|Attribute:Recomm...


## Intent table mapping

In [19]:
table_mapping = pd.read_csv(f"{PATH}intent_to_table_mapping.csv")
table_mapping

Unnamed: 0,intent,mapped_tables
0,RETRIEVE_RESERVATIONS,reservations|flights
1,IDENTIFY_RECENT_CUSTOMERS,reservations|customers
2,CALCULATE_REVENUE,reservations|transactions
3,FIND_PEAK_DEPARTURE_MONTHS,flights
4,GROUP_AND_COUNT_CUSTOMERS_BY_AGE,customers
5,IDENTIFY_AND_RANK_CUSTOMERS,customers|reservations


In [23]:
intent = pd.read_csv(f"{PATH}new_prompt_intents.csv")
intent["intent"].unique()

array(['RETRIEVE_RESERVATIONS', 'IDENTIFY_RECENT_CUSTOMERS',
       'CALCULATE_REVENUE', 'FIND_PEAK_DEPARTURE_MONTHS',
       'GROUP_AND_COUNT_CUSTOMERS_BY_AGE', 'IDENTIFY_AND_RANK_CUSTOMERS',
       'FIND_AIRCRAFT_BY_RANGE', 'LOCATE_NEAREST_AIRPORT',
       'CHECK_TIMEZONE_DIFFERENCE', 'COMPARE_AIRCRAFT_MODELS',
       'CHECK_SEAT_ASSIGNMENT', 'RETRIEVE_BOOKING_DETAILS',
       'FIND_FLIGHT_INFORMATION', 'ANALYZE_BOOKING_TRENDS',
       'CHECK_FLIGHT_STATUS', 'FIND_FLIGHT_ROUTES',
       'INQUIRE_SEAT_AVAILABILITY', 'ANALYZE_FLIGHT_PATTERNS',
       'CHECK_TICKET_DETAILS', 'RETRIEVE_PASSENGER_INFO',
       'FIND_CAR_RENTAL_OPTIONS', 'ANALYZE_BOOKING_PATTERNS',
       'FIND_HOTEL_OPTIONS', 'CHECK_HOTEL_AVAILABILITY',
       'GET_TRIP_RECOMMENDATIONS', 'PLAN_ITINERARY'], dtype=object)

In [25]:
with open("../data/json/extra_mapping.json", "r") as f:
    extra_mapp = json.load(f)

# # Konversi JSON ke format yang sesuai untuk DataFrame
data_for_df = []
for intent, mapped_tables in extra_mapp.items():    
    data_for_df.append({"intent": intent, "mapped_tables": mapped_tables})

df_map = pd.DataFrame(data_for_df)

In [26]:
df_map.to_csv(f"{PATH}new_mapping.csv", index=False)
df_map.sample(7)

Unnamed: 0,intent,mapped_tables
18,FIND_HOTEL_OPTIONS,hotels
7,RETRIEVE_BOOKING_DETAILS,bookings
9,ANALYZE_BOOKING_TRENDS,bookings
12,INQUIRE_SEAT_AVAILABILITY,flights|seats
17,ANALYZE_BOOKING_PATTERNS,ticket_flights|car_rentals
5,COMPARE_AIRCRAFT_MODELS,airports_data|aircrafts_data
21,PLAN_ITINERARY,hotels|car_rentals|trip_recommendations


## Download DB

In [1]:
import sqlite3
import requests
import os

def download_sqlite_db(url, local_filename='travel.sqlite'):
    # Download the file
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    return local_filename

def connect_to_sqlite_db(db_file):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_file)
    return conn

def display_table_data(conn, table_name, limit=5):
    cursor = conn.cursor()
    cursor.execute(f"SELECT * FROM {table_name} LIMIT {limit}")
    columns = [description[0] for description in cursor.description]
    rows = cursor.fetchall()

    print(f"\nData from table '{table_name}':")
    print("Columns:", columns)
    for row in rows:
        print(row)

# URL of the SQLite database
url = "https://storage.googleapis.com/benchmarks-artifacts/travel-db/travel2.sqlite"

db_file = download_sqlite_db(url)

conn = connect_to_sqlite_db(db_file)

cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

for table in tables:
    table_name = table[0]
    display_table_data(conn, table_name)

conn.close()



Data from table 'aircrafts_data':
Columns: ['aircraft_code', 'model', 'range']
('773', 'Boeing 777-300', 11100)
('763', 'Boeing 767-300', 7900)
('SU9', 'Sukhoi Superjet-100', 3000)
('320', 'Airbus A320-200', 5700)
('321', 'Airbus A321-200', 5600)

Data from table 'airports_data':
Columns: ['airport_code', 'airport_name', 'city', 'coordinates', 'timezone']
('ATL', 'Hartsfield-Jackson Atlanta International Airport', 'Atlanta', '[33.6407, -84.4277]', 'America/New_York')
('PEK', 'Beijing Capital International Airport', 'Beijing', '[40.0799, 116.6031]', 'Asia/Shanghai')
('DXB', 'Dubai International Airport', 'Dubai', '[25.2532, 55.3657]', 'Asia/Dubai')
('LAX', 'Los Angeles International Airport', 'Los Angeles', '[33.9416, -118.4085]', 'America/Los_Angeles')
('HND', 'Tokyo Haneda Airport', 'Tokyo', '[35.5494, 139.7798]', 'Asia/Tokyo')

Data from table 'boarding_passes':
Columns: ['ticket_no', 'flight_id', 'boarding_no', 'seat_no']
('0060005435212351', 30625, 1, '2D')
('0060005435212386', 30