In [1]:
import pandas as pd
import sys
import importlib.util
from pathlib import Path
import instructor
from openai import OpenAI

In [2]:
category = 'simple'
df = pd.read_json(f'../data/raw/gorilla_openfunctions_v1_test_{category}.json', lines=True)

In [17]:
pos = 337
question = df.loc[pos]['question']
question

"In a texas holdem game, Who won in the poker game with players Alex, Sam, Robert and Steve given the cards Alex':['A of spades', 'K of spades'], 'Sam': ['2 of diamonds', '3 of clubs'], 'Robert': ['Q of hearts', '10 of hearts'], 'Steve': ['4 of spades', '5 of spades']?"

In [18]:
functions = df.loc[pos]['function']
functions

{'name': 'poker_game_winner',
 'description': 'Identify the winner in a poker game based on the cards.',
 'parameters': {'type': 'dict',
  'properties': {'players': {'type': 'array',
    'items': {'type': 'string'},
    'description': 'Names of the players in a list.'},
   'cards': {'type': 'dict',
    'description': 'An object containing the player name as key and the cards as values in a list.'},
   'type': {'type': 'string',
    'description': "Type of poker game. Defaults to 'Texas Holdem'"}},
  'required': ['players', 'cards']}}

In [19]:
df_models = pd.read_csv(f'../data/processed/pydantic_models.csv')
df_models.head()

Unnamed: 0,iterator,category,question,pydantic_model_file
0,0,simple,Find the area of a triangle with a base of 10 ...,simple\model_000.py
1,1,simple,Calculate the factorial of 5 using math functi...,simple\model_001.py
2,2,simple,Calculate the hypotenuse of a right triangle g...,simple\model_002.py
3,3,simple,Find the roots of a quadratic equation with co...,simple\model_003.py
4,4,simple,"Solve a quadratic equation where a=2, b=6, and...",simple\model_004.py


In [20]:
model_file = df_models[df_models['question'] == question]['pydantic_model_file'].values[0]

pydantic_model_file = Path("..").resolve() / "data/processed" / "pydantic_models" / model_file

In [32]:
def import_model_from_path(file_path: Path):
    # Ensure the directory of the file is in sys.path
    module_dir = file_path.parent
    if str(module_dir) not in sys.path:
        sys.path.append(str(module_dir))

    # Create a module spec from the file location
    spec = importlib.util.spec_from_file_location(file_path.stem, file_path)
    if spec is None:
        raise ImportError(f"Could not load spec for module at {file_path}")

    # Load the module from spec
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)

    attributes = [name for name, value in module.__dict__.items()
              if not name.startswith("__") and name not in ["Field", "BaseModel", "annotations", "Enum", "Optional", "Any", "Dict", "List"]]


    # Access the Model object
    if hasattr(module, attributes[-1]):
        return getattr(module, attributes[-1])
    else:
        raise AttributeError(
            f"Module at {file_path} does not have an object named '{attributes[0]}'"
        )


Model = import_model_from_path(pydantic_model_file)

In [33]:
Model

model_337.PokerGameWinner

In [34]:
def augment_prompt_by_languge(prompt, test_category):
    if test_category == "java":
        prompt = prompt + "\n Note that the provided function is in Java 8 SDK syntax."
    elif test_category == "javascript":
        prompt = prompt + "\n Note that the provided function is in JavaScript."
    else:
        prompt = prompt + "\n Note that the provided function is in Python."
    return prompt

def language_specific_pre_processing(function, test_category, string_param):
    for item in function:
        properties = item["parameters"]["properties"]
        if test_category == "java":
            for key, value in properties.items():
                if value["type"] == "Any" or value["type"] == "any":
                    properties[key][
                        "description"
                    ] += "This parameter can be of any type of Java object."
                    properties[key]["description"] += (
                        "This is Java" + value["type"] + " in string representation."
                    )
        elif test_category == "javascript":
            for key, value in properties.items():
                if value["type"] == "Any" or value["type"] == "any":
                    properties[key][
                        "description"
                    ] += "This parameter can be of any type of Javascript object."
                else:
                    if "description" not in properties[key]:
                        properties[key]["description"] = ""
                    properties[key]["description"] += (
                        "This is Javascript "
                        + value["type"]
                        + " in string representation."
                    )
        return function
    
SYSTEM_PROMPT_FOR_CHAT_MODEL = """"
    You are an expert in composing functions. You are given a question and a set of possible functions. 
    Based on the question, you will need to make one or more function/tool calls to achieve the purpose. 
    If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
    also point it out. You should only return the function call in tools call sections.
    """

USER_PROMPT_FOR_CHAT_MODEL = """
    Questions:{user_prompt}\nHere is a list of functions in JSON format that you can invoke:\n{functions}. 
    Should you decide to return the function call(s),Put it in the format of [func1(params_name=params_value, params_name2=params_value2...), func2(params)]\n
    NO other text MUST be included. 
"""

In [35]:
prompt = augment_prompt_by_languge(question,category)
functions_str = language_specific_pre_processing([functions],category,False)
messages = [
    {
        "role": "system",
        "content": SYSTEM_PROMPT_FOR_CHAT_MODEL,
    },
    {
        "role": "user",
        "content": "Questions:"
        + USER_PROMPT_FOR_CHAT_MODEL.format(
            user_prompt=prompt, functions=str(functions_str)
        ),
    },
]

In [36]:
client = instructor.from_openai(OpenAI())

model_response, completion = client.chat.completions.create_with_completion(
    model="gpt-4-0125-preview",
    response_model=Model,
    messages=messages,
)

InstructorRetryException: 1 validation error for PokerGameWinner
cards
  Field required [type=missing, input_value={'players': ['Alex', 'Sam', 'Robert', 'Steve']}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.7/v/missing

In [13]:
from enum import Enum

def format_result(function_name, result):
    # This method is used to format the result in a standard way.
    args_string = ', '.join(
        [f"{key}='{value}'" if isinstance(value, str) else 
         f"{key}='{value.value}'" if isinstance(value, Enum) and isinstance(value.value, str) else 
         f"{key}={value.value}" if isinstance(value, Enum) else 
         f"{key}={value}"
         for key, value in result.dict().items()]
    )
    # Creating the output string with the function name and arguments
    output_string = f'[{function_name}({args_string})]'
    return output_string


In [14]:
model_dict = model_response.model_dump()

In [15]:
from enum import Enum

# see data types of each value of model_dict
for key, value in model_dict.items():
    print(f"{key}: {type(value)}")
    # if it's Enum, print value
    if isinstance(value, Enum):
        print(value.value)

temp: <class 'int'>
volume: <class 'int'>
gas: <class 'NoneType'>


In [16]:
format_result('holi', model_response)

'[holi(temp=298, volume=10, gas=None)]'