In [1]:
import pandas as pd
import requests

In [2]:
query_user = "Find me the cheapest flights from London to Tokyo for two adults in 28 august 2023"
query = f"{query_user}, return all relevant details to book the flight including the airline, arrival and departure airports."

In [3]:
# read keys
import yaml
import os

def read_config():
    # Get the directory of the current script
    # script_dir = os.path.dirname(os.path.realpath(__file__))
    script_dir = "../src/"

    # Construct the full path to the configuration file
    file_path = os.path.join(script_dir, "apikeys.yml")

    with open(file_path, 'r') as stream:
        try:
            configs = yaml.safe_load(stream)
            api_key = configs['amadeues_flights']['api_key']
            api_secret = configs['amadeues_flights']['api_secret']
            return api_key, api_secret
        except yaml.YAMLError as exc:
            print(exc)
            
    return api_key, api_secret


class SingletonToken:
    __token = None

    @classmethod
    def set_token(cls, token):
        cls.__token = token

    @classmethod
    def get_token(cls):
        return cls.__token

api_key, api_secret = read_config()

In [4]:
def journey_data(response_flights_data, response_airline_lookup_data):
    # Load the data into a DataFrame
    df = pd.DataFrame(response_flights_data)
    df_airline_codes = pd.json_normalize(response_airline_lookup_data)
    
    # Extract itineraries, validatingAirlineCodes, price (total and currency) and id into separate dataframes
    df_itineraries = df[['id', 'itineraries']].explode('itineraries').reset_index(drop=True)
    
    # In the itineraries column, each cell is a dictionary. So, we need to convert those dictionaries into separate columns.
    df_itineraries = df_itineraries.join(pd.json_normalize(df_itineraries['itineraries'])).drop(columns='itineraries')
    
    # At this point, 'segments' column is a list of dictionaries where each dictionary represents a leg of the journey.
    # We want each leg to be a separate row in the dataframe. So, explode the 'segments' column.
    df_itineraries = df_itineraries.explode('segments').reset_index(drop=True)
    
    # Add a 'leg_id' column to identify each leg of the journey
    df_itineraries['leg_id'] = df_itineraries.groupby('id').cumcount() + 1
    
    # Now, convert the dictionaries in the 'segments' column into separate columns
    df_segments = pd.json_normalize(df_itineraries['segments'])
    
    # To avoid overlapping columns, add a prefix to the column names of the new dataframe
    df_segments.columns = ['flight_' + str(col) for col in df_segments.columns]
    
    # Now join the original dataframe with the new one
    df_itineraries = df_itineraries.join(df_segments).drop(columns='segments')
    
    df_validatingAirlineCodes = df[['id', 'validatingAirlineCodes']]
    
    # For the price column, we only need total and currency. So, extract only those into a new dataframe
    df_price = df['price'].apply(pd.Series)[['total', 'currency']]
    df_price['id'] = df['id']
    
    # Now join these dataframes on the 'id' column
    df_flights = pd.merge(df_itineraries, df_validatingAirlineCodes, on='id')
    df_flights = pd.merge(df_flights, df_price, on='id')
    
    # Create a new column for the total number of legs per journey
    df_flights['total_legs'] = df_flights.groupby('id')['leg_id'].transform('max')
    
    df_flights = df_flights.merge(right=df_airline_codes, how='left', left_on="flight_operating.carrierCode", right_on="iataCode")
    df_flights.rename(columns={"id":"journey_id", "commonName":"airline" }, inplace=True)

    df_flights.drop(columns=["flight_id", "validatingAirlineCodes", "businessName", "flight_operating.carrierCode", "flight_number", "flight_aircraft.code", "flight_stops"], inplace=True)

    return df_flights

# df_flights = journey_data(response_flights.data, df_airline_codes)
# df_flights

In [5]:
from sqlalchemy import create_engine
import pandas as pd
from langchain import SQLDatabase

def load_data(df_flights):
    engine = create_engine('sqlite:///:memory:')

    # Write the data to the SQLite database
    df_flights.to_sql('flights', engine, if_exists='replace', index=False)
    # Check if the data was loaded correctly
    df_loaded = pd.read_sql('SELECT * FROM flights', engine)
    db = SQLDatabase(engine)
    return db

In [6]:
# from langchain.sql_database import SQLDatabase

# def query_database(query:str, db:SQLDatabase) -> SQLDatabaseChain:
#     """Uses the SQLDatabase Chain to query the flights database with the llm based on client query
#     query: initial client query to agent
#     llm: open AI llm
#     db: SQLDatase generated by search_for_flights function
#     """
#     llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key="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")
#     db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True, use_query_checker=True)
#     db_chain.run(query)
#     return


In [7]:
import json
import openai

def get_args(query: str) -> str:
    # OpenAI function calling

    """Get's arguments based on client query, returns num_adults, departureDate, destinationLocationCode, originLocationCode"""
    
    function_call = [
    {
      "name": "search_for_flights",
      "description": "Requests flight data from Amadeus API and writes to SQLite database",
      "parameters": {
        "type": "object",
        "properties": {
            "num_adults":{
                "type":"integer",
                "description": '''Based on the query, respond with the number of adults'''
            },
            "departureDate": {
                "type":"string",
                "description": '''Based on the query, respond with the departure date in yyyy-dd-mm'''
            },
            "destinationLocationCode":{
                "type":"string",
                "description": '''Based on the query, respond with the iataCode for the destination airport'''
            },
          "originLocationCode": {
            "type": "string",
            "description": '''Based on the query, respond with the iataCode for the origin airport'''
          },

        },
        "required": ["destinationLocationCode", "originLocationCode"]
      }
    }
    ]
    
    openai.api_key = "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"

    message = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "system", "content": query}],
        functions = function_call,
        function_call = 'auto',
        temperature=0
    )
    
    arguments = message["choices"][0]["message"]["function_call"]["arguments"]
    arguments_dict = json.loads(arguments)
    
    num_adults = arguments_dict["num_adults"]
    departureDate = arguments_dict["departureDate"]
    destinationLocationCode = arguments_dict["destinationLocationCode"]
    originLocationCode = arguments_dict["originLocationCode"]


    return num_adults, departureDate, destinationLocationCode, originLocationCode

num_adults, departureDate, destinationLocationCode, originLocationCode = get_args(query)
print(num_adults, departureDate, destinationLocationCode, originLocationCode)

2 2023-08-28 TYO LON


In [8]:
from amadeus import Client, ResponseError
from datetime import datetime
from langchain.chat_models import ChatOpenAI
from langchain.experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner
from langchain.tools.python.tool import PythonREPLTool
from langchain.llms import OpenAI
from langchain import SerpAPIWrapper
from langchain.agents.tools import Tool
from langchain.tools import tool
from langchain import LLMMathChain
from langchain import OpenAI, SQLDatabase, SQLDatabaseChain


def search_for_flights(originLocationCode: str, destinationLocationCode: str, departureDate: str, num_adults: int) -> SQLDatabase:
    """Requets flight data from Amadeus API and writes to sqllite database and run SQLDatabaseQuery
    originLocationCode: Based on the query, respond with the iataCode for the origin airport,
    destinationLocationCode: Based on the query, respond with the iataCode for the destination airport,
    departureDate: Based on the query, respond with the departure date,
    num_adults: Based on the query, respond with the number of adults
    """

    # Assuming you've defined api_key and api_secret somewhere else
    amadeus = Client(client_id=api_key, client_secret=api_secret)

    # Defining the parameters for the flight
    params = {
        'originLocationCode': originLocationCode,
        'destinationLocationCode': destinationLocationCode,
        'departureDate': departureDate,  # API might require specific date format
        'adults': num_adults
        # add or modify other parameters according to your needs
    }
    
    try:
        response_flights = amadeus.shopping.flight_offers_search.get(**params)
        
    except ResponseError as error:
        print(f"ResponseError occurred: {error}")
        print(f"Error code: {error.code}")
        print(f"Error message: {error.description}")
        return []  # return an empty list in case of an error

    try:
        response_airline_lookup = amadeus.reference_data.airlines.get()

    except ResponseError as error:
        print(f"ResponseError occurred: {error}")
        print(f"Error code: {error.code}")
        print(f"Error message: {error.description}")

    df_flights = journey_data(response_flights.data, response_airline_lookup.data)

    db = load_data(df_flights)
    # db='test'

    return db, df_flights

db, df_flights = search_for_flights(originLocationCode, destinationLocationCode, departureDate, num_adults)
df_flights.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 346 entries, 0 to 345
Data columns (total 20 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   journey_id                 346 non-null    object
 1   duration                   346 non-null    object
 2   leg_id                     346 non-null    int64 
 3   flight_carrierCode         346 non-null    object
 4   flight_duration            346 non-null    object
 5   flight_numberOfStops       346 non-null    int64 
 6   flight_blacklistedInEU     346 non-null    bool  
 7   flight_departure.iataCode  346 non-null    object
 8   flight_departure.terminal  259 non-null    object
 9   flight_departure.at        346 non-null    object
 10  flight_arrival.iataCode    346 non-null    object
 11  flight_arrival.terminal    267 non-null    object
 12  flight_arrival.at          346 non-null    object
 13  total                      346 non-null    object
 14  currency  

In [9]:
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.llms.openai import OpenAI
from langchain.agents import AgentExecutor
from langchain.agents.agent_types import AgentType
from langchain.chat_models import ChatOpenAI


toolkit = SQLDatabaseToolkit(db=db, llm=ChatOpenAI(temperature=0, model="gpt-4", openai_api_key="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"))

agent_executor = create_sql_agent(
    llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key="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"),
    toolkit=toolkit,
    verbose=True,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
)

agent_executor.run(query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3mflights[0m
Thought:[32;1m[1;3mI see that there is a "flights" table in the database. I should query the schema of this table to see what columns I can use to find the cheapest flights from London to Tokyo.
Action: sql_db_schema
Action Input: "flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE flights (
	journey_id TEXT, 
	duration TEXT, 
	leg_id BIGINT, 
	"flight_carrierCode" TEXT, 
	flight_duration TEXT, 
	"flight_numberOfStops" BIGINT, 
	"flight_blacklistedInEU" BOOLEAN, 
	"flight_departure.iataCode" TEXT, 
	"flight_departure.terminal" TEXT, 
	"flight_departure.at" TEXT, 
	"flight_arrival.iataCode" TEXT, 
	"flight_arrival.terminal" TEXT, 
	"flight_arrival.at" TEXT, 
	total TEXT, 
	currency TEXT, 
	total_legs BIGINT, 
	type TEXT, 
	"iataCode" TEXT, 
	"icaoCode" TEXT, 
	airline TEXT
)

/*
3 rows from flights table:
journey_id	duration	leg_id	fl

'There are no flights from London to Tokyo for two adults on 28 August 2023 in the database.'

In [10]:
# assuming flights_df is your DataFrame
problematic_values = df_flights['flight_arrival.at'].apply(lambda x: isinstance(x, (type(None), int, float, str, bytes, bytearray)))
print(df_flights[~problematic_values])


Empty DataFrame
Columns: [journey_id, duration, leg_id, flight_carrierCode, flight_duration, flight_numberOfStops, flight_blacklistedInEU, flight_departure.iataCode, flight_departure.terminal, flight_departure.at, flight_arrival.iataCode, flight_arrival.terminal, flight_arrival.at, total, currency, total_legs, type, iataCode, icaoCode, airline]
Index: []
