In [1]:
import pandas as pd
import requests

In [2]:
query_user = "Find me the cheapest journey from London to Tokyo for one adult departing on the 30 August 2023 and returning on the 15 September 2023"

query = f'''Based on the user query: {query_user} do the following: 
Return all of the relevant information required to book the flights.
Some Journeys are indirect meaning they have multiple legs, remember to factor this into your calculation. The prices quoted are the total price for the journey. Use search to return links to the flight booking pages.
'''


In [3]:
# read keys
import yaml
import os

def read_config():
    # Get the directory of the current script
    # script_dir = os.path.dirname(os.path.realpath(__file__))
    script_dir = "../src/"

    # Construct the full path to the configuration file
    file_path = os.path.join(script_dir, "apikeys.yml")

    with open(file_path, 'r') as stream:
        try:
            configs = yaml.safe_load(stream)
            api_key = configs['amadeues_flights']['api_key']
            api_secret = configs['amadeues_flights']['api_secret']
            openai_key = configs['openai']['openai_key']
            hugging_api_key = configs['huggingfacehub']['hugging_api_key']
            return api_key, api_secret, openai_key, hugging_api_key
        except yaml.YAMLError as exc:
            print(exc)
            
    return api_key, api_secret, openai_key

class SingletonToken:
    __token = None

    @classmethod
    def set_token(cls, token):
        cls.__token = token

    @classmethod
    def get_token(cls):
        return cls.__token

api_key, api_secret, openai_key, hugging_api_key = read_config()

In [4]:
def journey_data(response_flights_data, response_airline_lookup_data):
    # Load the data into a DataFrame
    df = pd.DataFrame(response_flights_data)
    df_airline_codes = pd.json_normalize(response_airline_lookup_data)
    
    # Extract itineraries, validatingAirlineCodes, price (total and currency) and id into separate dataframes
    df_itineraries = df[['id', 'itineraries']].explode('itineraries').reset_index(drop=True)
    
    # In the itineraries column, each cell is a dictionary. So, we need to convert those dictionaries into separate columns.
    df_itineraries = df_itineraries.join(pd.json_normalize(df_itineraries['itineraries'])).drop(columns='itineraries')
    
    # At this point, 'segments' column is a list of dictionaries where each dictionary represents a leg of the journey.
    # We want each leg to be a separate row in the dataframe. So, explode the 'segments' column.
    df_itineraries = df_itineraries.explode('segments').reset_index(drop=True)
    
    # Add a 'leg_id' column to identify each leg of the journey
    df_itineraries['leg_id'] = df_itineraries.groupby('id').cumcount() + 1
    
    # Now, convert the dictionaries in the 'segments' column into separate columns
    df_segments = pd.json_normalize(df_itineraries['segments'])
    
    # To avoid overlapping columns, add a prefix to the column names of the new dataframe
    df_segments.columns = ['flight_' + str(col) for col in df_segments.columns]
    
    # Now join the original dataframe with the new one
    df_itineraries = df_itineraries.join(df_segments).drop(columns='segments')
    
    df_validatingAirlineCodes = df[['id', 'validatingAirlineCodes']]
    
    # For the price column, we only need total and currency. So, extract only those into a new dataframe
    df_price = df['price'].apply(pd.Series)[['total', 'currency']]
    df_price['id'] = df['id']
    
    # Now join these dataframes on the 'id' column
    df_flights = pd.merge(df_itineraries, df_validatingAirlineCodes, on='id')
    df_flights = pd.merge(df_flights, df_price, on='id')
    
    # Create a new column for the total number of legs per journey
    df_flights['total_legs'] = df_flights.groupby('id')['leg_id'].transform('max')
    
    df_flights = df_flights.merge(right=df_airline_codes, how='left', left_on="flight_operating.carrierCode", right_on="iataCode")
    df_flights.rename(columns={"id":"journey_id", "commonName":"airline" }, inplace=True)

    df_flights.drop(columns=["flight_id", "validatingAirlineCodes", "businessName", "flight_operating.carrierCode", "flight_aircraft.code", "flight_stops"], inplace=True)

    df_flights.columns = df_flights.columns.str.replace('.', '_')
    df_flights['total'] = pd.to_numeric(df_flights['total'], errors='coerce')

    return df_flights

In [5]:
from sqlalchemy import create_engine
import pandas as pd
from langchain import SQLDatabase

def load_data(df_flights):
    engine = create_engine('sqlite:///:memory:')

    # Write the data to the SQLite database
    df_flights.to_sql('flights', engine, if_exists='replace', index=False)
    # Check if the data was loaded correctly
    df_loaded = pd.read_sql('SELECT * FROM flights', engine)
    db = SQLDatabase(engine)
    return db

In [6]:
import json
import openai
from langchain.tools import tool

def get_args(query_user: str) -> str:
    # OpenAI function calling

    """Get's arguments based on client query, 
    returns num_adults, departureDate, returnDate destinationLocationCode, originLocationCode.
    This is required before pulling the data from the API.
    """
    
    function_call = [
    {
      "name": "search_for_flights",
      "description": "Requests flight data from Amadeus API and writes to SQLite database",
      "parameters": {
        "type": "object",
        "properties": {
            "num_adults":{
                "type":"integer",
                "description": '''Based on the query, respond with the number of adults'''
            },
            "departureDate": {
                "type":"string",
                "description": '''Based on the query, respond with the Departure Date. Dates are specified in the ISO 8601 YYYY-MM-DD format. '''
            },
            "returnDate": {
                "type":"string",
                "description": '''Based on the query, respond with the Return Date. Dates are specified in the ISO 8601 YYYY-MM-DD format. '''
            },
            "destinationLocationCode":{
                "type":"string",
                "description": '''Based on the query, respond with an airport IATA code from the city which the traveler is going. E.g CDG for Charles de Gaulle Airport'''
            },
          "originLocationCode": {
            "type": "string",
            "description": '''Based on the query, respond with an airport IATA code from the city which the traveler will depart from. E.g CDG for Charles de Gaulle Airport'''
          },

        },
        "required": ["destinationLocationCode", "originLocationCode", "departureDate", "returnDate", "num_adults"]
      }
    }
    ]
    
    openai.api_key = openai_key

    message = openai.ChatCompletion.create(
        model="gpt-4-0613",
        messages=[{"role": "user", "content": query_user}],
        functions = function_call,
        function_call = 'auto',
        temperature=0
    )
    response_message = message["choices"][0]["message"]["function_call"]["arguments"]

    parsed_data = json.loads(response_message)

    # Accessing variables
    num_adults = parsed_data['num_adults']
    departureDate = parsed_data['departureDate']
    returnDate = parsed_data['returnDate']
    destinationLocationCode = parsed_data['destinationLocationCode']
    originLocationCode = parsed_data['originLocationCode']
    
    print("Number of Adults: ", num_adults)
    print("Departure Date: ", departureDate)
    print("Return Date: ", returnDate)
    print("Destination Location Code: ", destinationLocationCode)
    print("Origin Location Code: ", originLocationCode)

    return num_adults, departureDate, returnDate, destinationLocationCode, originLocationCode

num_adults, departureDate, returnDate, destinationLocationCode, originLocationCode = get_args(query_user)

Number of Adults:  1
Departure Date:  2023-08-30
Return Date:  2023-09-15
Destination Location Code:  HND
Origin Location Code:  LHR


In [7]:
from amadeus import Client, ResponseError
from datetime import datetime
from langchain.chat_models import ChatOpenAI
from langchain.experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner
from langchain.tools.python.tool import PythonREPLTool
from langchain.llms import OpenAI
from langchain import SerpAPIWrapper
from langchain.agents.tools import Tool
from langchain.tools import tool
from langchain import LLMMathChain
from langchain import OpenAI, SQLDatabase, SQLDatabaseChain

def search_for_flights(originLocationCode, destinationLocationCode, departureDate, returnDate, num_adults) -> SQLDatabase:
    """Requests flight data from Amadeus API and writes to sqllite database and run SQLDatabaseQuery
    originLocationCode: Based on the query, respond with the iataCode for the origin airport,
    destinationLocationCode: Based on the query, respond with the iataCode for the destination airport,
    departureDate: Based on the query, respond with the departure date,
    num_adults: Based on the query, respond with the number of adults
    """

    # Assuming you've defined api_key and api_secret somewhere else
    amadeus = Client(client_id=api_key, client_secret=api_secret)

    # Defining the parameters for the flight
    params = {
        'originLocationCode': originLocationCode,
        'destinationLocationCode': destinationLocationCode,
        'departureDate': departureDate,
        'returnDate': returnDate,
        'adults': num_adults
    }
    
    try:
        response_flights = amadeus.shopping.flight_offers_search.get(**params)
        
    except ResponseError as error:
        print(f"ResponseError occurred flights: {error}")
        print(f"Error code flights: {error.code}")
        print(f"Error message flights: {error.description}")
        return []  # return an empty list in case of an error

    try:
        response_airline_lookup = amadeus.reference_data.airlines.get()

    except ResponseError as error:
        print(f"ResponseError occurred airline lookup: {error}")
        print(f"Error code airline lookup: {error.code}")
        print(f"Error message airline lookup: {error.description}")

    df_flights = journey_data(response_flights.data, response_airline_lookup.data)
    print(df_flights.dtypes)
    db = load_data(df_flights)

    return db, df_flights

db, df_flights = search_for_flights(originLocationCode, destinationLocationCode, departureDate, returnDate, num_adults)
df_flights.info()

journey_id                    object
duration                      object
leg_id                         int64
flight_carrierCode            object
flight_number                 object
flight_duration               object
flight_numberOfStops           int64
flight_blacklistedInEU          bool
flight_departure_iataCode     object
flight_departure_terminal     object
flight_departure_at           object
flight_arrival_iataCode       object
flight_arrival_terminal       object
flight_arrival_at             object
total                        float64
currency                      object
total_legs                     int64
type                          object
iataCode                      object
icaoCode                      object
airline                       object
dtype: object
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 844 entries, 0 to 843
Data columns (total 21 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  

In [8]:
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.llms.openai import OpenAI
from langchain.agents import AgentExecutor
from langchain.agents.agent_types import AgentType
from langchain.chat_models import ChatOpenAI

llm=ChatOpenAI(temperature=0, model="gpt-4-0613", openai_api_key=openai_key)

def find_flights(query, llm):
    '''creates agent that can be run on db to answer query flights'''
    llm=llm
    toolkit = SQLDatabaseToolkit(db=db, llm=llm)
    agent_executor = create_sql_agent(
        llm=llm,
        toolkit=toolkit,
        verbose=True,
        agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    )
 
    return agent_executor.run(query)


response = find_flights(query, llm)
response



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3mflights[0m
Thought:[32;1m[1;3mThe 'flights' table seems to be the most relevant one for this query. I should check its schema to understand the structure and the fields it contains.
Action: sql_db_schema
Action Input: "flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE flights (
	journey_id TEXT, 
	duration TEXT, 
	leg_id BIGINT, 
	"flight_carrierCode" TEXT, 
	flight_number TEXT, 
	flight_duration TEXT, 
	"flight_numberOfStops" BIGINT, 
	"flight_blacklistedInEU" BOOLEAN, 
	"flight_departure_iataCode" TEXT, 
	flight_departure_terminal TEXT, 
	flight_departure_at TEXT, 
	"flight_arrival_iataCode" TEXT, 
	flight_arrival_terminal TEXT, 
	flight_arrival_at TEXT, 
	total FLOAT, 
	currency TEXT, 
	total_legs BIGINT, 
	type TEXT, 
	"iataCode" TEXT, 
	"icaoCode" TEXT, 
	airline TEXT
)

/*
3 rows from flights table:
journey_id	duration	leg_id	flight_carr

'Here are the 10 cheapest flights from London to Tokyo departing on the 30 August 2023 and returning on the 15 September 2023:\n\n1. Journey ID: 167, Departure: 2023-08-30T09:40:00, Arrival: 2023-08-31T05:35:00, Total: 1422.79 EUR\n2. Journey ID: 197, Departure: 2023-08-30T09:40:00, Arrival: 2023-08-31T05:35:00, Total: 1487.79 EUR\n3. Journey ID: 198, Departure: 2023-08-30T19:20:00, Arrival: 2023-08-31T15:15:00, Total: 1487.79 EUR\n4. Journey ID: 199, Departure: 2023-08-30T19:20:00, Arrival: 2023-08-31T15:15:00, Total: 1551.79 EUR\n5. Journey ID: 202, Departure: 2023-08-30T19:00:00, Arrival: 2023-08-31T14:50:00, Total: 1647.53 EUR\n6. Journey ID: 203, Departure: 2023-08-30T14:20:00, Arrival: 2023-08-31T11:55:00, Total: 1654.79 EUR\n7. Journey ID: 204, Departure: 2023-08-30T09:40:00, Arrival: 2023-08-31T05:35:00, Total: 1654.79 EUR\n8. Journey ID: 205, Departure: 2023-08-30T14:20:00, Arrival: 2023-08-31T11:55:00, Total: 1718.79 EUR\n9. Journey ID: 206, Departure: 2023-08-30T19:20:00, Ar

In [18]:
df_flights.loc[df_flights['journey_id'] == '206']

Unnamed: 0,journey_id,duration,leg_id,flight_carrierCode,flight_number,flight_duration,flight_numberOfStops,flight_blacklistedInEU,flight_departure_iataCode,flight_departure_terminal,...,flight_arrival_iataCode,flight_arrival_terminal,flight_arrival_at,total,currency,total_legs,type,iataCode,icaoCode,airline
806,206,PT11H55M,1,JL,44,PT11H55M,0,False,LHR,3,...,HND,3,2023-08-31T15:15:00,1718.79,EUR,2,airline,JL,JAL,JAPAN AIRLINES
807,206,PT14H40M,2,JL,7083,PT14H40M,0,False,HND,3,...,LHR,5,2023-09-15T15:30:00,1718.79,EUR,2,airline,BA,BAW,BRITISH A/W


In [10]:
df_flights.sort_values(by='total', ascending=True)

Unnamed: 0,journey_id,duration,leg_id,flight_carrierCode,flight_number,flight_duration,flight_numberOfStops,flight_blacklistedInEU,flight_departure_iataCode,flight_departure_terminal,...,flight_arrival_iataCode,flight_arrival_terminal,flight_arrival_at,total,currency,total_legs,type,iataCode,icaoCode,airline
0,1,PT15H30M,1,CX,250,PT12H5M,0,False,LHR,3,...,HKG,1,2023-08-31T13:25:00,1049.34,EUR,4,airline,CX,CPA,CATHAYPACIFIC
22,6,PT37H10M,3,CX,543,PT4H50M,0,False,HND,3,...,HKG,1,2023-09-15T13:55:00,1049.34,EUR,4,airline,CX,CPA,CATHAYPACIFIC
23,6,PT37H10M,4,CX,257,PT13H5M,0,False,HKG,1,...,LHR,3,2023-09-16T15:15:00,1049.34,EUR,4,airline,CX,CPA,CATHAYPACIFIC
24,7,PT18H5M,1,CX,250,PT12H5M,0,False,LHR,3,...,HKG,1,2023-08-31T13:25:00,1049.34,EUR,4,airline,CX,CPA,CATHAYPACIFIC
25,7,PT18H5M,2,CX,6320,PT4H20M,0,False,HKG,1,...,HND,3,2023-08-31T20:25:00,1049.34,EUR,4,airline,JL,JAL,JAPAN AIRLINES
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
839,216,PT38H45M,3,BR,191,PT3H25M,0,False,HND,3,...,TSA,1,2023-09-15T15:05:00,3747.40,EUR,4,airline,BR,EVA,EVA AIRWAYS
840,216,PT38H45M,4,BR,67,PT18H5M,1,False,TPE,2,...,LHR,2,2023-09-16T19:25:00,3747.40,EUR,4,airline,BR,EVA,EVA AIRWAYS
842,217,PT13H40M,2,CX,235,PT30M,0,False,HKG,1,...,HND,3,2023-08-31T17:50:00,8658.42,EUR,3,airline,CX,CPA,CATHAYPACIFIC
841,217,PT13H40M,1,CX,256,PT12H5M,0,False,LHR,3,...,HKG,1,2023-08-31T15:15:00,8658.42,EUR,3,airline,CX,CPA,CATHAYPACIFIC


In [11]:
df_flights.dtypes

journey_id                    object
duration                      object
leg_id                         int64
flight_carrierCode            object
flight_number                 object
flight_duration               object
flight_numberOfStops           int64
flight_blacklistedInEU          bool
flight_departure_iataCode     object
flight_departure_terminal     object
flight_departure_at           object
flight_arrival_iataCode       object
flight_arrival_terminal       object
flight_arrival_at             object
total                        float64
currency                      object
total_legs                     int64
type                          object
iataCode                      object
icaoCode                      object
airline                       object
dtype: object