In [1]:
import pandas as pd
import requests

In [2]:
# read keys
import yaml
import os

def read_config():
    # Get the directory of the current script
    # script_dir = os.path.dirname(os.path.realpath(__file__))
    script_dir = "../src/"

    # Construct the full path to the configuration file
    file_path = os.path.join(script_dir, "apikeys.yml")

    with open(file_path, 'r') as stream:
        try:
            configs = yaml.safe_load(stream)
            api_key = configs['amadeues_flights']['api_key']
            api_secret = configs['amadeues_flights']['api_secret']
            return api_key, api_secret
        except yaml.YAMLError as exc:
            print(exc)
            
    return api_key, api_secret


class SingletonToken:
    __token = None

    @classmethod
    def set_token(cls, token):
        cls.__token = token

    @classmethod
    def get_token(cls):
        return cls.__token

api_key, api_secret = read_config()

In [3]:
# from amadeus import Client, ResponseError

# amadeus = Client(client_id=api_key, client_secret=api_secret)

# # Defining the parameters for the flight
# params = {
#     'originLocationCode': 'SYD',
#     'destinationLocationCode': 'LHR',
#     'departureDate': '2023-10-10',
#     'adults': 1
#     # add or modify other parameters according to your needs
# }

# try:
#     response_flights = amadeus.shopping.flight_offers_search.get(**params)
    
# except ResponseError as error:
#     print(f"ResponseError occurred: {error}")
#     print(f"Error code: {error.code}")
#     print(f"Error message: {error.description}")


# try:
#     response_airline_lookup = amadeus.reference_data.airlines.get()

    
# except ResponseError as error:
#     print(f"ResponseError occurred: {error}")
#     print(f"Error code: {error.code}")
#     print(f"Error message: {error.description}")




In [4]:
# flights_data = response_flights.data
# df_flights = pd.json_normalize(response_flights.data)
# df_airline_codes = pd.json_normalize(response_airline_lookup.data)

In [5]:
def journey_data(response_flights_data, response_airline_lookup_data):
    # Load the data into a DataFrame
    df = pd.DataFrame(response_flights.data)
    df_airline_codes = pd.json_normalize(response_airline_lookup.data)
    
    # Extract itineraries, validatingAirlineCodes, price (total and currency) and id into separate dataframes
    df_itineraries = df[['id', 'itineraries']].explode('itineraries').reset_index(drop=True)
    
    # In the itineraries column, each cell is a dictionary. So, we need to convert those dictionaries into separate columns.
    df_itineraries = df_itineraries.join(pd.json_normalize(df_itineraries['itineraries'])).drop(columns='itineraries')
    
    # At this point, 'segments' column is a list of dictionaries where each dictionary represents a leg of the journey.
    # We want each leg to be a separate row in the dataframe. So, explode the 'segments' column.
    df_itineraries = df_itineraries.explode('segments').reset_index(drop=True)
    
    # Add a 'leg_id' column to identify each leg of the journey
    df_itineraries['leg_id'] = df_itineraries.groupby('id').cumcount() + 1
    
    # Now, convert the dictionaries in the 'segments' column into separate columns
    df_segments = pd.json_normalize(df_itineraries['segments'])
    
    # To avoid overlapping columns, add a prefix to the column names of the new dataframe
    df_segments.columns = ['flight_' + str(col) for col in df_segments.columns]
    
    # Now join the original dataframe with the new one
    df_itineraries = df_itineraries.join(df_segments).drop(columns='segments')
    
    df_validatingAirlineCodes = df[['id', 'validatingAirlineCodes']]
    
    # For the price column, we only need total and currency. So, extract only those into a new dataframe
    df_price = df['price'].apply(pd.Series)[['total', 'currency']]
    df_price['id'] = df['id']
    
    # Now join these dataframes on the 'id' column
    df_flights = pd.merge(df_itineraries, df_validatingAirlineCodes, on='id')
    df_flights = pd.merge(df_flights, df_price, on='id')
    
    # Create a new column for the total number of legs per journey
    df_flights['total_legs'] = df_flights.groupby('id')['leg_id'].transform('max')
    
    df_flights = df_flights.merge(right=df_airline_codes, how='left', left_on="flight_operating.carrierCode", right_on="iataCode")
    df_flights.rename(columns={"id":"journey_id"}, inplace=True)

    # split journey from flights
    # journey_cols = ['journey_id', 'duration', 'total', 'currency']
    # df_journeys = df_flights[journey_cols].drop_duplicates()
    # df_flights.drop(columns =["duration", "total", "currency"], inplace=True)
    
    df_flights.drop(columns=["flight_id", "validatingAirlineCodes", "businessName", "flight_operating.carrierCode", "flight_stops", "flight_number"], inplace=True)

    return df_flights

# df_flights = journey_data(response_flights.data, df_airline_codes)
# df_flights.info()

In [6]:
from sqlalchemy import create_engine
import pandas as pd
from langchain import SQLDatabase

def load_data(flights_df):
    engine = create_engine('sqlite:///:memory:')

    # Write the data to the SQLite database
    df_flights.to_sql('flights', engine, if_exists='replace', index=False)
    # Check if the data was loaded correctly
    df_loaded = pd.read_sql('SELECT * FROM flights', engine)
    db = SQLDatabase(engine)
    return db

# db = load_data(flights_df)

In [7]:
from amadeus import Client, ResponseError
from datetime import datetime
from langchain.chat_models import ChatOpenAI
from langchain.experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner
from langchain.tools.python.tool import PythonREPLTool
from langchain.llms import OpenAI
from langchain import SerpAPIWrapper
from langchain.agents.tools import Tool
from langchain.tools import tool
from langchain import LLMMathChain

# @tool
def search_for_flights(originLocationCode: str, destinationLocationCode: str, departureDate: str, num_adults: int) -> SQLDatabase:
    """Requets flight data from Amadeus API and writes to sqllite database
    originLocationCode: Based on the query, respond with the iataCode for the origin airport,
    destinationLocationCode: Based on the query, respond with the iataCode for the destination airport,
    departureDate: Based on the query, respond with the departure date,
    num_adults: Based on the query, respond with the number of adults
    """

    # Assuming you've defined api_key and api_secret somewhere else
    amadeus = Client(client_id=api_key, client_secret=api_secret)

    # Defining the parameters for the flight
    params = {
        'originLocationCode': originLocationCode,
        'destinationLocationCode': destinationLocationCode,
        'departureDate': departureDate,  # API might require specific date format
        'adults': num_adults
        # add or modify other parameters according to your needs
    }
    
    try:
        response_flights = amadeus.shopping.flight_offers_search.get(**params)
        
    except ResponseError as error:
        print(f"ResponseError occurred: {error}")
        print(f"Error code: {error.code}")
        print(f"Error message: {error.description}")
        return []  # return an empty list in case of an error

    try:
        response_airline_lookup = amadeus.reference_data.airlines.get()

    except ResponseError as error:
        print(f"ResponseError occurred: {error}")
        print(f"Error code: {error.code}")
        print(f"Error message: {error.description}")

    

    df_flights = journey_data(response_flights.data, response_airline_lookup.data)

    db = load_data(flights_df)

    return db


In [8]:
# @tool
def query_database(query, llm, db):
    """Uses the SQLDatabase Chain to query the flights database with the llm based on client query"""
    db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True, use_query_checker=True)
    return db_chain.run(query)


In [9]:
import json
import openai

@tool
def get_args(query: str) -> str:
    # OpenAI function calling

    """Get's arguments based on client query, returns num_adults, departureDate, destinationLocationCode, originLocationCode"""
    
    function_call = [
    {
      "name": "search_for_flights",
      "description": "Requests flight data from Amadeus API and writes to SQLite database",
      "parameters": {
        "type": "object",
        "properties": {
            "num_adults":{
                "type":"integer",
                "description": '''Based on the query, respond with the number of adults'''
            },
            "departureDate": {
                "type":"datetime",
                "description": '''Based on the query, respond with the departure date in yyyy-dd-mm'''
            },
            "destinationLocationCode":{
                "type":"string",
                "description": '''Based on the query, respond with the iataCode for the destination airport'''
            },
          "originLocationCode": {
            "type": "string",
            "description": '''Based on the query, respond with the iataCode for the origin airport'''
          },

        },
        "required": ["destinationLocationCode", "originLocationCode"]
      }
    }
    ]
    
    openai.api_key = "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"

    message = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "system", "content": query}],
        functions = function_call,
        function_call = 'auto',
        temperature=0
    )
    
    arguments = message["choices"][0]["message"]["function_call"]["arguments"]
    arguments_dict = json.loads(arguments)
    
    num_adults = arguments_dict["num_adults"]
    departureDate = arguments_dict["departureDate"]
    destinationLocationCode = arguments_dict["destinationLocationCode"]
    originLocationCode = arguments_dict["originLocationCode"]


    return num_adults, departureDate, destinationLocationCode, originLocationCode



In [10]:
from langchain import OpenAI, SQLDatabase, SQLDatabaseChain
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key="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")
# db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True, use_query_checker=True) 

In [11]:
from langchain.tools import StructuredTool

tools = [
    Tool.from_function(
        name="get_arguments",
        func=get_args,
        description="Use to get the arguments for the function search_for_flights. Use return from get_arguments as input for search_for_flights"
    ),
    StructuredTool.from_function(search_for_flights),
    StructuredTool.from_function(query_database)
    
]

In [12]:
from langchain import SerpAPIWrapper
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI
mrkl = initialize_agent(
    tools, llm, agent=AgentType.OPENAI_MULTI_FUNCTIONS, verbose=True
)

In [13]:
mrkl.run("Get me the cheapest flights from London to Dubai, for one adult, on the 10/10/2023")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_for_flights` with `{'originLocationCode': 'LON', 'destinationLocationCode': 'DXB', 'departureDate': '2023-10-10', 'num_adults': 1}`


[0m

NameError: name 'response_flights' is not defined

In [None]:
# from langchain.agents import create_sql_agent
# from langchain.agents.agent_toolkits import SQLDatabaseToolkit
# from langchain.sql_database import SQLDatabase
# from langchain.llms.openai import OpenAI
# from langchain.agents import AgentExecutor
# from langchain.agents.agent_types import AgentType
# from langchain.chat_models import ChatOpenAI


# #toolkit = SQLDatabaseToolkit(db=db, llm=OpenAI(temperature=0, openai_api_key="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"))

# agent_executor = create_sql_agent(
#     llm=ChatOpenAI(temperature=0, model="gpt-4", openai_api_key="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"),
#     toolkit=tools,
#     verbose=True,
#     agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION
# )

In [None]:
# agent_executor.run("What's the shortest journey to London?")

In [None]:
# import pandas as pd
# import openai
# from langchain import OpenAI, SQLDatabase, SQLDatabaseChain

# def run_query(query, db):
#     # openai_token = SingletonToken.get_token()
#     llm = OpenAI(temperature=0, verbose=True, openai_api_key="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")
#     db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True, use_query_checker=True)

#     response = db_chain.run(query)
#     return response

# query = "What's the least expensive journey"
# run_query(query, db)

In [None]:
# test = journeys_df.sort_values(by='journey_id')
# test.to_csv("../src/flights.csv")

In [None]:
# journeys_df

In [None]:
# df_airline_codes