In [None]:
def json_print(data):
    """Pretty print JSON data"""
    print(json.dumps(data, indent=2))

In [None]:
# Starter/Stub code to invoke API's generated from LLMs and also to validate using LLMs as a Judge

# Import necessary modules
from abc import ABC, abstractmethod

# BaseAPI: Abstract base class for APIs
class BaseAPI(ABC):
    @abstractmethod
    def call_api(self, **kwargs):
        """Make the API call and return the response."""
        pass

    @abstractmethod
    def validate_response(self, response, ground_truth):
        """Validate the response against ground truth."""
        pass

# ExampleAPI: Implementation of a sample API
class ExampleAPI(BaseAPI):
    def call_api(self, param1, param2=None):
        # Simulate API call
        response = {"result": param1 * 2, "optional": param2}
        return response

    def validate_response(self, response, ground_truth):
        # Simple validation: check if response matches ground truth
        return response == ground_truth

# LLMInterface: Simulates the interaction with an LLM
class LLMInterface:
    def __init__(self, model_name="ExampleModel"):
        self.model_name = model_name

    def generate_api_call(self, task_description):
        # Simulate LLM's generated API call based on task
        return {"api": "ExampleAPI", "params": {"param1": 10, "param2": 5}}

    def judge_response(self, task_description, response, ground_truth):
        # Simulate using the LLM as a judge for response validation
        prompt = (
            f"Task Description: {task_description}\n"
            f"API Response: {response}\n"
            f"Ground Truth: {ground_truth}\n"
            "Does the API response match the ground truth? Answer Yes or No."
        )
        # Simulating the LLM's response
        llm_decision = "Yes" if response == ground_truth else "No"
        return llm_decision

# EvaluationFramework: Updated to include LLM as a judge
class EvaluationFramework:
    def __init__(self, llm, apis):
        self.llm = llm
        self.apis = apis

    def run_evaluation(self, task_description, ground_truth):
        # Step 1: Generate API call
        api_call = self.llm.generate_api_call(task_description)
        api_name = api_call['api']
        params = api_call['params']

        # Step 2: Invoke API
        api = self.apis.get(api_name)
        if not api:
            raise ValueError(f"API {api_name} not found.")

        response = api.call_api(**params)

        # Step 3: LLM evaluates response
        llm_decision = self.llm.judge_response(task_description, response, ground_truth)

        # Step 4: Prepare results
        evaluation_results = {"accuracy": llm_decision == "Yes"}
        return {"response": response, "llm_decision": llm_decision, "evaluation": evaluation_results}

# Main function to run the evaluation with LLM as a judge
def main():
    # Instantiate APIs and LLM
    apis = {"ExampleAPI": ExampleAPI()}
    llm = LLMInterface()

    # Initialize evaluation framework
    framework = EvaluationFramework(llm, apis)

    # Define task and ground truth
    task_description = "Double the number and return an optional parameter."
    ground_truth = {"result": 20, "optional": 5}

    # Run evaluation
    results = framework.run_evaluation(task_description, ground_truth)
    print("Evaluation Results:", results)

# Run the main function
if __name__ == "__main__":
    main()

Evaluation Results: {'response': {'result': 20, 'optional': 5}, 'llm_decision': 'Yes', 'evaluation': {'accuracy': True}}


In [None]:
# prompt: mount my google drive

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# load the Nestful executable jsons into dictionaries

import json

with open('/content/drive/MyDrive/E6998-Project/nestful_data_json/executable-data.json', 'r') as f:
    exec_data = json.load(f)

with open('/content/drive/MyDrive/E6998-Project/nestful_data_json/executable-spec.json', 'r') as f:
    exec_spec = json.load(f)

# get data from LLM and set them to the API parameters

In [None]:
exec_data[0]

{'input': 'Find flights from New York to London that depart on August 15, 2024, and return on August 18, 2024 and find hotels in London.',
 'output': [{'name': 'SkyScrapperSearchAirport',
   'arguments': {'query': 'New York'},
   'label': 'var1'},
  {'name': 'SkyScrapperSearchAirport',
   'arguments': {'query': 'London'},
   'label': 'var2'},
  {'name': 'SkyScrapperFlightSearch',
   'arguments': {'originSkyId': '$var1.skyId$',
    'destinationSkyId': '$var2.skyId$',
    'originEntityId': '$var1.entityId$',
    'destinationEntityId': '$var2.entityId$',
    'date': '2024-08-15',
    'returnDate': '2024-08-18'},
   'label': 'var3'},
  {'name': 'TripadvisorSearchLocation',
   'arguments': {'query': 'London'},
   'label': 'var4'},
  {'name': 'TripadvisorSearchHotels',
   'arguments': {'geoId': '$var4.geoId$',
    'checkIn': '2024-08-15',
    'checkOut': '2024-08-18'},
   'label': 'var5'},
  {'name': 'var_result',
   'arguments': {'flights': '$var3$', 'hotels': '$var5$'}}]}

In [None]:
exec_spec[0]

{'name': 'SkyScrapperFlightSearch',
 'description': 'Retrieves available flights for a given route using Skyscrapper API.',
 'method': 'GET',
 'endpoint': '/api/v2/flights/searchFlightsComplete',
 'host': 'sky-scrapper.p.rapidapi.com',
 'url': 'https://rapidapi.com/apiheya/api/sky-scrapper/playground/apiendpoint_adcbe9c6-73a7-4db4-b95c-d9b552de558c',
 'query_parameters': {'originSkyId': {'type': 'string',
   'description': 'The originSkyId code can be extracted from the Search Airport API in the Flights collection.',
   'required': True},
  'destinationSkyId': {'type': 'string',
   'description': 'The destinationSkyId code can be extracted from the Search Airport API in the Flights collection.',
   'required': True},
  'originEntityId': {'type': 'string',
   'description': 'The originEntityId code can be extracted from the Search Airport API in the Flights collection.',
   'required': True},
  'destinationEntityId': {'type': 'string',
   'description': 'The destinationEntityId code can

In [None]:
import requests

def invoke_api(spec, data, api_key, headers=None):
    """
    Generic method to invoke any API using spec and corresponding data, including the API key.
    """
    method = spec['method']
    url = spec['url']
    query_params = spec.get('query_parameters', {})
    body_params = spec.get('body_parameters', {})

    # Prepare the parameters for the request (fill in the parameters dynamically from data)
    params = {}
    for param, param_spec in query_params.items():
        if param_spec['required'] and param in data:
            params[param] = data[param]
        elif param in data:
            params[param] = data[param]

    body = {}
    for param, param_spec in body_params.items():
        if param_spec['required'] and param in data:
            body[param] = data[param]
        elif param in data:
            body[param] = data[param]

    # Add the API key to headers
    headers = headers or {}
    headers['X-RapidAPI-Key'] = api_key  # Include your RapidAPI key here

    # Make the API call based on the method (GET or POST)
    if method == 'GET':
        response = requests.get(url, headers=headers, params=params)
    elif method == 'POST':
        response = requests.post(url, headers=headers, json=body)
    else:
        raise ValueError(f"Unsupported HTTP method: {method}")

    # Check the raw response content
    # print("Raw Response Text:", response.text)
    # resp_json = json.loads(response.text)
    # print("Raw Response Text:", resp_json)
    # json_print(resp_json)

    # Check the response status code
    print("Response Status Code:", response.status_code)

    # Try parsing JSON
    try:
        data = response.json()
        print("JSON Response:")
        json_print(data)
    except ValueError as e:
        print(f"Error decoding JSON: {e}")

    print(response.headers)

    # Check for successful response and return data
    if response.status_code == 200:
        return data
    else:
      print("Found exception!")
      response.raise_for_status()

# Example of using this generic method
def execute_travel_plan(specs, data, api_key):
    for spec in specs:
        api_name = spec['name']
        print(f"Executing API: {api_name}")
        result = invoke_api(spec, data, api_key)
        print(f"Result for {api_name}: {result}")

# Example data corresponding to the spec
data = {
    'originSkyId': 'NYC123',
    'destinationSkyId': 'LON123',
    'originEntityId': 'NYCEntity',
    'destinationEntityId': 'LONEntity',
    'date': '2024-08-15',
    'returnDate': '2024-08-18',
    'adults': 1,
    'currency': 'USD',
    'market': 'en-US'
}

# Example API spec for SkyScrapperFlightSearch
specs = [
    {
        'name': 'SkyScrapperFlightSearch',
        'description': 'Retrieves available flights for a given route using Skyscrapper API.',
        'method': 'GET',
        'url': 'https://sky-scrapper.p.rapidapi.com/api/v2/flights/searchFlightsComplete',
        'query_parameters': {
            'originSkyId': {'type': 'string', 'required': True},
            'destinationSkyId': {'type': 'string', 'required': True},
            'originEntityId': {'type': 'string', 'required': True},
            'destinationEntityId': {'type': 'string', 'required': True},
            'date': {'type': 'string', 'required': True},
            'returnDate': {'type': 'string', 'required': False},
            'adults': {'type': 'number', 'required': False},
            'currency': {'type': 'string', 'required': False},
            'market': {'type': 'string', 'required': False}
        },
        'body_parameters': {}
    }
]

# Provide your RapidAPI key here
api_key = "7874338851msh2e0c26e395d4ae4p180bd3jsn8bf557b8dbf6"

# Execute the travel plan (invoke the APIs)
execute_travel_plan(specs, data, api_key)


Executing API: SkyScrapperFlightSearch
Response Status Code: 200
JSON Response:
{
  "status": true,
  "message": "success",
  "data": {
    "context": {
      "status": "complete",
      "sessionId": "Cg8IARILCgIQAxDJ1rHxuzISKHVzc180NDcyYzU2Ny1kMGE2LTRiZTUtOWRiZS1jNjc2N2NiYmQ4MGUiAlVT",
      "totalResults": 0
    },
    "itineraries": [],
    "messages": [],
    "filterStats": {
      "duration": {
        "min": 0,
        "max": 0,
        "multiCityMin": 0,
        "multiCityMax": 0
      },
      "airports": [],
      "carriers": [],
      "stopPrices": {
        "direct": {
          "isPresent": false
        },
        "one": {
          "isPresent": false
        },
        "twoOrMore": {
          "isPresent": false
        }
      }
    },
    "flightsSessionId": "",
    "destinationImageUrl": "https://content.skyscnr.com/m/3719e8f4a5daf43d/original/Flights-Placeholder.jpg"
  }
}
{'Date': 'Fri, 13 Dec 2024 04:00:10 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Co

In [None]:
def search_and_invoke_apis(spec, data):
    """
    Search for 'SkyScrapper' API in the spec and invoke 3 APIs.

    Parameters:
    - spec: List of API specs.
    - data: The data for invoking the API.

    Returns:
    - List of responses from the APIs.
    """
    # Find the SkyScrapper APIs
    sky_scrapper_apis = [api for api in spec if 'SkyScrapper' in api['name']]

    responses = []

    for api_spec in sky_scrapper_apis[:3]:  # Limit to 3 APIs for now
        print(f"Invoking {api_spec['name']}...")
        print(api_spec)
        response = invoke_api(api_spec, data, api_key)
        responses.append(response)

    return responses


# Example 'data' dictionary to fill in the API parameters
api_data = {
  "originSkyId": "JFK",
  "destinationSkyId": "LAX",
  "originEntityId": "123456",
  "destinationEntityId": "654321",
  "date": "2024-12-20",
  "returnDate": "2024-12-27",
  "cabinClass": "economy",
  "adults": 2,
  "children": 0,
  "infants": 0,
  "sortBy": "best",
  "limit": 10,
  "currency": "USD",
  "market": "en-US",
  "countryCode": "US"
}

# Invoke the APIs
responses = search_and_invoke_apis(exec_spec, api_data)

# Print out the results
for response in responses:
    # print(response)
    json_print(response)



Invoking SkyScrapperFlightSearch...
{'name': 'SkyScrapperFlightSearch', 'description': 'Retrieves available flights for a given route using Skyscrapper API.', 'method': 'GET', 'endpoint': '/api/v2/flights/searchFlightsComplete', 'host': 'sky-scrapper.p.rapidapi.com', 'url': 'https://rapidapi.com/apiheya/api/sky-scrapper/playground/apiendpoint_adcbe9c6-73a7-4db4-b95c-d9b552de558c', 'query_parameters': {'originSkyId': {'type': 'string', 'description': 'The originSkyId code can be extracted from the Search Airport API in the Flights collection.', 'required': True}, 'destinationSkyId': {'type': 'string', 'description': 'The destinationSkyId code can be extracted from the Search Airport API in the Flights collection.', 'required': True}, 'originEntityId': {'type': 'string', 'description': 'The originEntityId code can be extracted from the Search Airport API in the Flights collection.', 'required': True}, 'destinationEntityId': {'type': 'string', 'description': 'The destinationEntityId code 

In [None]:
# !pip uninstall httpx
!pip install --upgrade openai
!pip install httpx==0.27.2


Collecting openai
  Downloading openai-1.57.3-py3-none-any.whl.metadata (24 kB)
Downloading openai-1.57.3-py3-none-any.whl (390 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m390.2/390.2 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.54.5
    Uninstalling openai-1.54.5:
      Successfully uninstalled openai-1.54.5
Successfully installed openai-1.57.3
Collecting httpx==0.27.2
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Downloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: httpx
  Attempting uninstall: httpx
    Found existing installation: httpx 0.28.1
    Uninstalling httpx-0.28.1:
      Successfully uninstalled httpx-0.28.1
Successfully installed httpx-0.27.2


In [None]:
from google.colab import userdata

import openai
from openai import OpenAI

# Now your API key is available as an environment variable
openai_api_key = userdata.get('OPENAI_API_KEY')
mistral_api_key = userdata.get('MISTRAL_API_KEY')

# if openai_api_key is None:
    # raise ValueError("OpenAI API key not found in the .env file.")

print("API key loaded successfully!")

client = openai.OpenAI(api_key=openai_api_key)

API key loaded successfully!


In [None]:
from transformers import pipeline


In [None]:
# def invoke_bad_llm_api_call(api_spec, query):

#   messages = [
#       {"role": "system", "content": "You are a helpful assistant designed to build an API call that the user should send to get the info they want. Using the query input and the API spec needed to get the info needed from the query, return the API call that the user should make. Format your response as a dictionary with the query parameters and their values"},
#         {"role": "user", "content": "API spec to follow" + ": " + str(api_spec) + ", User query " + str(query)}
#   ]
#   pipe = pipeline("text-generation", model="MaziyarPanahi/calme-3.2-instruct-78b")
#   pipe(messages)

# query = 'I need flights from JFK to LAX for my husband and I'

# invoke_bad_llm_api_call(exec_spec[0], query)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/724 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/80.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/67 [00:00<?, ?it/s]

model-00001-of-00067.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00067.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00003-of-00067.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

model-00004-of-00067.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

model-00005-of-00067.safetensors:   0%|          | 0.00/4.78G [00:00<?, ?B/s]

model-00006-of-00067.safetensors:   0%|          | 0.00/4.78G [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
# def invoke_llm_api_call(api_spec, query):
#   input_messages = [
#         {"role": "system", "content": "You are a helpful assistant designed to build an API call that the user should send to get the info they want. Using the query input and the API spec needed to get the info needed from the query, return the API call that the user should make. Format your response as a dictionary with the query parameters and their values"},
#         {"role": "user", "content": "API spec to follow" + ": " + str(api_spec) + ", User query " + str(query)}
#     ]
#   # Make an API call to using the chat completion method/endpoint
#   # use appropriate model with "model" field
#   # use "max_tokens" field to adjust token limit based on your needs
#   # use "temperature" field to ajust temperature for randomness. Lower means less stochastic
#   response = client.chat.completions.create(
#       model="gpt-3.5-turbo",
#       messages=input_messages,
#       temperature=0,
#       max_tokens=500,
#   )

#   # Extract the generated solution from the response
#   x = response.choices[0].message.content
#   # Return the solution
#   return x


# query = 'I need flights from JFK to LAX for my husband and I'
# llm_resp = invoke_llm_api_call(exec_spec[0], query)
# print(llm_resp)

In [None]:
# def evaluate_api_response(actual, llm_resp):
#   input_messages = [
#         {"role": "system", "content": "You are a helpful assistant designed to figure out the differences between two API call responses. One is from an LLM, the other is an actual API call response that is the ground truth. You might need to parse some strings in the dict returned by the LLM."},
#         {"role": "user", "content": "LLM response" + ": " + str(llm_resp) + ", Actual API response: " + str(actual)}
#     ]
#   # Make an API call to using the chat completion method/endpoint
#   # use appropriate model with "model" field
#   # use "max_tokens" field to adjust token limit based on your needs
#   # use "temperature" field to ajust temperature for randomness. Lower means less stochastic
#   response = client.chat.completions.create(
#       model="gpt-3.5-turbo",
#       messages=input_messages,
#       temperature=0,
#       max_tokens=200,
#   )

#   # Extract the generated solution from the response
#   x = response.choices[0].message.content
#   # Return the solution
#   return x

In [None]:
# evaluate_api_response(responses[0], llm_resp)

# Langchain


In [None]:
!pip install langchain openai mistralai requests
# You pacakages installations
!pip install langchain_openai google-search-results langchain
!pip install -U langchain-community
!pip install langchain-core

Collecting langchain_openai
  Downloading langchain_openai-0.2.12-py3-none-any.whl.metadata (2.7 kB)
Collecting google-search-results
  Downloading google_search_results-2.4.2.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tiktoken<1,>=0.7 (from langchain_openai)
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading langchain_openai-0.2.12-py3-none-any.whl (50 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m30.6 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: google-search-results
  Building wheel for google-search-results (setup.py) ... [?25l[?25hdone
  Created wheel for google-search-results: filename=google

In [None]:
!pip install langchain_mistralai


Collecting langchain_mistralai
  Downloading langchain_mistralai-0.2.3-py3-none-any.whl.metadata (2.4 kB)
Downloading langchain_mistralai-0.2.3-py3-none-any.whl (15 kB)
Installing collected packages: langchain_mistralai
Successfully installed langchain_mistralai-0.2.3


In [None]:
import json
import requests
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain_mistralai import ChatMistralAI

# Load JSON data from URLs
import urllib.request

In [None]:
def load_json_data():
    """Load JSON data from remote URLs."""
    spec_url = "https://github.com/IBM/NESTFUL/raw/main/data/executable/executable-spec.json"
    data_url = "https://github.com/IBM/NESTFUL/raw/main/data/executable/executable-data.json"

    with urllib.request.urlopen(spec_url) as spec_resp:
        exec_spec = json.load(spec_resp)

    with urllib.request.urlopen(data_url) as data_resp:
        exec_data = json.load(data_resp)

    # Extract the first index as the test case
    exec_spec_0 = exec_spec[0]
    exec_data_0 = exec_data[0]

    print("Loaded JSON data and extracted the first index.")
    return exec_spec, exec_data, exec_spec_0, exec_data_0

In [None]:
exec_spec, exec_data, exec_spec_0, exec_data_0 = load_json_data()

Loaded JSON data and extracted the first index.


In [None]:
exec_spec_0

{'name': 'SkyScrapperFlightSearch',
 'description': 'Retrieves available flights for a given route using Skyscrapper API.',
 'method': 'GET',
 'endpoint': '/api/v2/flights/searchFlightsComplete',
 'host': 'sky-scrapper.p.rapidapi.com',
 'url': 'https://rapidapi.com/apiheya/api/sky-scrapper/playground/apiendpoint_adcbe9c6-73a7-4db4-b95c-d9b552de558c',
 'query_parameters': {'originSkyId': {'type': 'string',
   'description': 'The originSkyId code can be extracted from the Search Airport API in the Flights collection.',
   'required': True},
  'destinationSkyId': {'type': 'string',
   'description': 'The destinationSkyId code can be extracted from the Search Airport API in the Flights collection.',
   'required': True},
  'originEntityId': {'type': 'string',
   'description': 'The originEntityId code can be extracted from the Search Airport API in the Flights collection.',
   'required': True},
  'destinationEntityId': {'type': 'string',
   'description': 'The destinationEntityId code can

In [None]:
def make_api_call():
  # https://www.weatherapi.com/docs/#apis-s
  url = "https://weatherapi-com.p.rapidapi.com/forecast.json"

  querystring = {"q":"London","days":"1","aqi":"yes"}

  headers = {
    "x-rapidapi-key": "808c3bd978mshd588ab1b321f903p1f78aejsnfe670b2df825",
    "x-rapidapi-host": "weatherapi-com.p.rapidapi.com"
  }

  response = requests.get(url, headers=headers, params=querystring)
  return response.json(), querystring

In [None]:
ground_truth_res, ground_truth_params = make_api_call()


In [None]:
json_print(ground_truth_res)

{
  "location": {
    "name": "London",
    "region": "City of London, Greater London",
    "country": "United Kingdom",
    "lat": 51.5171,
    "lon": -0.1062,
    "tz_id": "Europe/London",
    "localtime_epoch": 1734068725,
    "localtime": "2024-12-13 05:45"
  },
  "current": {
    "last_updated_epoch": 1734067800,
    "last_updated": "2024-12-13 05:30",
    "temp_c": 6.1,
    "temp_f": 43.0,
    "is_day": 0,
    "condition": {
      "text": "Mist",
      "icon": "//cdn.weatherapi.com/weather/64x64/night/143.png",
      "code": 1030
    },
    "wind_mph": 3.1,
    "wind_kph": 5.0,
    "wind_degree": 97,
    "wind_dir": "E",
    "pressure_mb": 1026.0,
    "pressure_in": 30.3,
    "precip_mm": 0.0,
    "precip_in": 0.0,
    "humidity": 100,
    "cloud": 100,
    "feelslike_c": 5.3,
    "feelslike_f": 41.6,
    "windchill_c": 6.0,
    "windchill_f": 42.8,
    "heatindex_c": 6.7,
    "heatindex_f": 44.0,
    "dewpoint_c": 3.5,
    "dewpoint_f": 38.2,
    "vis_km": 3.8,
    "vis_miles": 

In [None]:
# Use Mistral to generate API call
def make_llm_api_call(api, query, api_key):
    """Use Mistral to generate API call."""
    mistral_llm = ChatMistralAI(
      model="mistral-large-latest",
      mistral_api_key=mistral_api_key,
      temperature=0,
      max_retries=2,
    )

    # The schema prompt
    query_parameters = exec_spec_0.get("query_parameters", {})
    schema_prompt = f"""
    Given the following api {api} and api key {api_key} and the query {query} to use to determine
    what parameters need to be set in the API call:

    Show the API call you would make to fetch the info from the given API and return the API response as a JSON
    and the request parameters you used as a JSON dictionary
    """

    messages = [
    (
        "system",
        "You are a helpful assistant that creates an API call user request.",
    ),
    ("human", schema_prompt),
    ]
    response = mistral_llm.invoke(messages)
    print("Mistral response", response)

    # Query Mistral
    # Parse and match the API call format
    try:
        generated_params = json.loads(response)
        api_request = {
            "origin": generated_params.get("origin", "JFK"),
            "destination": generated_params.get("destination", "LAX"),
            "adults": generated_params.get("adults", 2)
        }
        print("Mistral-generated API call:")
        print(api_request)
        return api_request, api_request
    except Exception as e:
        print("Error parsing Mistral response:", e)
        return {"error": "Failed to generate API call"}, {}

In [None]:
from langchain_core.prompts import ChatPromptTemplate
def make_llm_api_call(api, query, llm_type):
  llm = None
  if llm_type == "mistral":
    llm = ChatMistralAI(
        model="mistral-large-latest",
        mistral_api_key=mistral_api_key,
        temperature=0,
        max_retries=2,
      )
  elif llm_type == "openai":
    llm = ChatOpenAI(model="gpt-3.5-turbo", openai_api_key=openai_api_key)

  schema_prompt = f"""
      Given the following api {api} and the query {query} to use to determine
      what parameters need to be set in the API call:

      Return the parameters you would need to make this API call in a response called queryString,
      which is a dictionary of API query parameters. Only return the queryString as your result,
      nothing else as I want to easily extract this object from your response.
      """
  prompt = ChatPromptTemplate.from_messages(
      [
      (
          "system",
          "You are a helpful assistant that creates an API call user request.",
      ),
      ("human", schema_prompt),
      ]
  )

  chain = prompt | llm
  result = chain.invoke(
      {
          "api": api,
          "query":query,
          "api_key":api_key
      }
  )

  if result.content:
    cleaned_response = result.content.strip('`').replace('json\n', '')
    generated_params = json.loads(cleaned_response)
  else:
    print("Error getting response!")
    return {"error": "Failed to generate API call"}


  return generated_params

In [None]:
api_endpoint = "https://weatherapi-com.p.rapidapi.com/forecast.json"
api_query = "What's the weather in London today? I don't know if I can go out this afternoon with my allergies, they get bad with the air quality"
api_key = "808c3bd978mshd588ab1b321f903p1f78aejsnfe670b2df825"
mistral_generated_response = make_llm_api_call(api_endpoint, api_query, "mistral")

In [None]:
mistral_generated_response

{'q': 'London', 'days': '1', 'aqi': 'yes'}

In [None]:
chatgpt_generated_response = make_llm_api_call(api_endpoint, api_query, "openai")

In [None]:
chatgpt_generated_response

{'queryString': {'q': 'London', 'days': '1'}}

In [None]:
api_endpoint = "https://weatherapi-com.p.rapidapi.com/forecast.json"
api_query = "What's the weather in London today? I don't know if I can go out this afternoon with my allergies"
api_key = "808c3bd978mshd588ab1b321f903p1f78aejsnfe670b2df825"
generated_response = make_llm_api_call(api_endpoint, api_query, api_key)

In [None]:
# Evaluate responses
def evaluate_responses(ground_truth_params, generated_response_params):
    """Evaluate the responses using OpenAI GPT-3.5."""
    evaluator = ChatOpenAI(model="gpt-3.5-turbo", openai_api_key=openai_api_key)

    # Construct evaluation prompt
    eval_prompt = PromptTemplate(
        input_variables=["ground_truth_params", "generated_response_params"],
        template="""
        Compare the following two API query parameters:

        Ground Truth Response:
        {ground_truth_params}

        Generated Response:
        {generated_response_params}

        Instructions:
        - Check if the two responses are identical.
        - If identical, return a score of 100%.
        - If not, calculate the accuracy of the generated response. Penalize missing required parameters and give a larger penalty for missing optional parameters.
        - Return a summary score and justification.
        """
    )

    chain = eval_prompt | evaluator

    result = chain.invoke({
        "ground_truth_params": ground_truth_params,
        "generated_response_params": generated_response_params,
    })

    # # Generate evaluation query
    # query = eval_prompt.format(
    #     ground_truth=json.dumps(ground_truth_params),
    #     generated_response=json.dumps(generated_response_params)
    # )

    # # Pass query to GPT-3.5 for evaluation
    # evaluation_result = evaluator([HumanMessage(content=query)])
    # print("Evaluation complete.")
    return result.content

In [None]:
# Load JSON files
# exec_spec, exec_data, exec_spec_0, exec_data_0 = load_json_data()

# # Step 1: Real API Call
# ground_truth, real_api_request = make_api_call()

# # Step 2: LLM-generated API Call
# generated_response, llm_api_request = make_llm_api_call(exec_spec_0)

# # Step 3: Evaluate responses
evaluation_result = evaluate_responses(ground_truth_params, mistral_generated_response)

# # Print results
# print("\n--- Results ---")
# print("Ground Truth API Request:", real_api_request)
# print("Generated API Request:", llm_api_request)
print("Evaluation Result:", evaluation_result)

Evaluation Result: The two API query parameters are identical, so the accuracy score is 100%. There are no missing required or optional parameters in the generated response compared to the ground truth response. Therefore, the generated response is a perfect match to the ground truth response.


In [None]:
evaluation_result = evaluate_responses(ground_truth_params, chatgpt_generated_response)
print("Evaluation Result:", evaluation_result)

Evaluation Result: The two responses are not identical. The generated response is missing the 'aqi' parameter which is marked as required in the ground truth response. As a result, the accuracy of the generated response is penalized for missing a required parameter. 

Calculation:
- Missing required parameter: 'aqi' (penalty of 50%)
- Missing optional parameter: None

Accuracy = 100% - 50% = 50%

Summary:
The accuracy score of the generated response is 50%. This is because it is missing a required parameter 'aqi' compared to the ground truth response. The missing parameter resulted in a penalty that affected the overall accuracy score.
