In [41]:
import pandas as pd
import sys
import importlib.util
from pathlib import Path
import instructor
from openai import OpenAI

In [30]:
category = 'simple'
df = pd.read_json(f'../data/raw/gorilla_openfunctions_v1_test_{category}.json', lines=True)

In [31]:
df.head()

Unnamed: 0,question,function
0,Find the area of a triangle with a base of 10 ...,"{'name': 'calculate_triangle_area', 'descripti..."
1,Calculate the factorial of 5 using math functi...,"{'name': 'math.factorial', 'description': 'Cal..."
2,Calculate the hypotenuse of a right triangle g...,"{'name': 'math.hypot', 'description': 'Calcula..."
3,Find the roots of a quadratic equation with co...,"{'name': 'algebra.quadratic_roots', 'descripti..."
4,"Solve a quadratic equation where a=2, b=6, and...","{'name': 'solve_quadratic_equation', 'descript..."


In [32]:
question = df.loc[0]['question']
question

'Find the area of a triangle with a base of 10 units and height of 5 units.'

In [33]:
functions = df.loc[0]['function']
functions

{'name': 'calculate_triangle_area',
 'description': 'Calculate the area of a triangle given its base and height.',
 'parameters': {'type': 'dict',
  'properties': {'base': {'type': 'integer',
    'description': 'The base of the triangle.'},
   'height': {'type': 'integer', 'description': 'The height of the triangle.'},
   'unit': {'type': 'string',
    'description': "The unit of measure (defaults to 'units' if not specified)"}},
  'required': ['base', 'height']}}

In [35]:
df_models = pd.read_csv(f'../data/processed/pydantic_models.csv')
df_models.head()

Unnamed: 0,iterator,category,question,pydantic_model_file
0,0,simple,Find the area of a triangle with a base of 10 ...,simple\model_000.py
1,1,simple,Calculate the factorial of 5 using math functi...,simple\model_001.py
2,2,simple,Calculate the hypotenuse of a right triangle g...,simple\model_002.py
3,3,simple,Find the roots of a quadratic equation with co...,simple\model_003.py
4,4,simple,"Solve a quadratic equation where a=2, b=6, and...",simple\model_004.py


In [36]:
model_file = df_models[df_models['question'] == question]['pydantic_model_file'].values[0]

pydantic_model_file = Path("..").resolve() / "data/processed" / "pydantic_models" / model_file

In [37]:
pydantic_model_file

WindowsPath('C:/Users/Alonso/Dropbox/personal/repos/guided-generation-benchmark/evals/bfcl/data/processed/pydantic_models/simple/model_000.py')

In [38]:
def import_model_from_path(file_path: Path):
    # Ensure the directory of the file is in sys.path
    module_dir = file_path.parent
    if str(module_dir) not in sys.path:
        sys.path.append(str(module_dir))

    # Create a module spec from the file location
    spec = importlib.util.spec_from_file_location(file_path.stem, file_path)
    if spec is None:
        raise ImportError(f"Could not load spec for module at {file_path}")

    # Load the module from spec
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)

    attributes = [a for a in dir(module) if not a.startswith("__") and a not in ["Field", "BaseModel", "annotations"]]

    # Access the Model object
    if hasattr(module, attributes[0]):
        return getattr(module, attributes[0])
    else:
        raise AttributeError(
            f"Module at {file_path} does not have an object named '{attributes[0]}'"
        )


Model = import_model_from_path(pydantic_model_file)

In [39]:
Model

model_000.CalculateTriangleArea

In [40]:
def augment_prompt_by_languge(prompt, test_category):
    if test_category == "java":
        prompt = prompt + "\n Note that the provided function is in Java 8 SDK syntax."
    elif test_category == "javascript":
        prompt = prompt + "\n Note that the provided function is in JavaScript."
    else:
        prompt = prompt + "\n Note that the provided function is in Python."
    return prompt

def language_specific_pre_processing(function, test_category, string_param):
    for item in function:
        properties = item["parameters"]["properties"]
        if test_category == "java":
            for key, value in properties.items():
                if value["type"] == "Any" or value["type"] == "any":
                    properties[key][
                        "description"
                    ] += "This parameter can be of any type of Java object."
                    properties[key]["description"] += (
                        "This is Java" + value["type"] + " in string representation."
                    )
        elif test_category == "javascript":
            for key, value in properties.items():
                if value["type"] == "Any" or value["type"] == "any":
                    properties[key][
                        "description"
                    ] += "This parameter can be of any type of Javascript object."
                else:
                    if "description" not in properties[key]:
                        properties[key]["description"] = ""
                    properties[key]["description"] += (
                        "This is Javascript "
                        + value["type"]
                        + " in string representation."
                    )
        return function
    
SYSTEM_PROMPT_FOR_CHAT_MODEL = """"
    You are an expert in composing functions. You are given a question and a set of possible functions. 
    Based on the question, you will need to make one or more function/tool calls to achieve the purpose. 
    If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
    also point it out. You should only return the function call in tools call sections.
    """

USER_PROMPT_FOR_CHAT_MODEL = """
    Questions:{user_prompt}\nHere is a list of functions in JSON format that you can invoke:\n{functions}. 
    Should you decide to return the function call(s),Put it in the format of [func1(params_name=params_value, params_name2=params_value2...), func2(params)]\n
    NO other text MUST be included. 
"""

In [42]:
prompt = augment_prompt_by_languge(question,category)
functions_str = language_specific_pre_processing([functions],category,False)
messages = [
    {
        "role": "system",
        "content": SYSTEM_PROMPT_FOR_CHAT_MODEL,
    },
    {
        "role": "user",
        "content": "Questions:"
        + USER_PROMPT_FOR_CHAT_MODEL.format(
            user_prompt=prompt, functions=str(functions_str)
        ),
    },
]

In [43]:
client = instructor.from_openai(OpenAI())

model_response = client.chat.completions.create(
    model="gpt-4-0125-preview",
    response_model=Model,
    messages=messages,
)