In [1]:
from enum import Enum
from typing import List, Dict, Any, Optional, NamedTuple
# from core.agent import AgentToolCall
from agent import AgentToolCall

class ErrorType(Enum):
    MISSING_FUNCTION = "missing_function"  # Expected function call is missing
    MISSING_ARGUMENT = "missing_argument"  # Expected argument is missing
    WRONG_ARGUMENT_TYPE = "wrong_argument_type"  # Wrong type of argument
    WRONG_ARGUMENT_VALUE = "wrong_argument_value"  # Wrong value of argument

# Define a validation error structure
class ValidationError(NamedTuple):
    error_type: ErrorType
    message: str
    call_index: Optional[int] = None
    arg_name: Optional[str] = None

# Update the validation function to return structured errors
def validate_function_calls(actual_calls: List[AgentToolCall],
                          expected_calls: List[Dict[str, Any]]) -> List[ValidationError]:
    """
    Validate that actual function calls match expected calls.
    
    Args:
        actual_calls: List of actual function calls from the tracker
        expected_calls: List of expected function calls from ground_truths.json
        
    Returns:
        List of validation errors with structured type information
    """
    errors = []
    
    # Create a copy of actual_calls to mark matches
    remaining_actual_calls = list(actual_calls)
    
    # For each expected call, try to find a matching actual call
    for i, expected in enumerate(expected_calls):
        expected_name = expected["name"]
        is_required = expected.get("required", True)  # Default to True if not specified
        
        # Find a matching function call by name
        match_found = False
        match_index = None
        
        for j, actual in enumerate(remaining_actual_calls):
            if actual.function == expected_name:
                match_found = True
                match_index = j
                break
        
        if not match_found:
            # Only report missing functions that are required
            if is_required:
                errors.append(ValidationError(
                    error_type=ErrorType.MISSING_FUNCTION,
                    message=f"Missing required function call: {expected_name}",
                    call_index=i
                ))
            continue
        
        # Validate arguments of the matching call
        actual_call = remaining_actual_calls[match_index]
        argument_errors = validate_arguments(actual_call, expected, i)
        errors.extend(argument_errors)
        
        # Remove the matched call so we don't match it again
        remaining_actual_calls.pop(match_index)
    
    return errors

def validate_arguments(actual: AgentToolCall,
                     expected: Dict[str, Any],
                     call_index: int) -> List[ValidationError]:
    """
    Validate arguments of a function call with smarter handling of renamed parameters.
    
    Args:
        actual: Actual function call record
        expected: Expected function call
        call_index: Index of this call
    """
    actual_args = actual.arguments
    expected_args = expected.get("arguments", [])
    errors = []
    
    # Keep track of matched arguments on both sides
    matched_actual_args = set()
    matched_expected_args = set()
    
    # First check all expected arguments
    for arg_index, expected_arg in enumerate(expected_args):
        if "name" not in expected_arg:
            errors.append(ValidationError(
                error_type=ErrorType.MISSING_ARGUMENT,
                message=f"Call {call_index+1}, Arg {arg_index+1}: Expected argument is missing required 'name' field",
                call_index=call_index
            ))
            continue
        
        expected_name = expected_arg["name"]

        is_required = expected_arg.get("required", True)  # Default to True if not specified
        if is_required and expected_name not in actual_args:
            errors.append(ValidationError(
                error_type=ErrorType.MISSING_ARGUMENT,
                message=f"Call {call_index+1}: Expected parameter '{expected_name}' not found in actual arguments",
                call_index=call_index,
                arg_name=expected_name
            ))
            continue
        
        if expected_name in actual_args:
            # Found a matching named parameter
            actual_value = actual_args[expected_name]
            matched_actual_args.add(expected_name)
            matched_expected_args.add(expected_name)
            
            # Validate value if specified
            if "value" in expected_arg:
                expected_value = expected_arg["value"]
                # if actual_value in expected_value:
                if is_valid(actual_value, expected_value):
                    pass
                else:
                    print(str(actual_value), str(expected_value))
                    errors.append(ValidationError(
                        error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                        message=f"Call {call_index+1}, Arg '{expected_name}': Expected value '{expected_value}', got '{actual_value}'",
                        call_index=call_index,
                        arg_name=expected_name
                    ))
            
            # Validate type if specified
            if "type" in expected_arg:
                expected_type = expected_arg["type"]
                actual_type = type(actual_value).__name__
                if actual_type == "NoneType":
                    continue
                if actual_type == expected_type:
                    pass
                elif  actual_type == "str" and expected_type == "string":
                    pass
                elif actual_type == "int" and expected_type == "integer":
                    pass  
                elif actual_type == "bool" and expected_type == "boolean":
                    pass  
                else:
                    errors.append(ValidationError(
                        error_type=ErrorType.WRONG_ARGUMENT_TYPE,
                        message=f"Call {call_index+1}, Arg '{expected_name}': Expected type '{expected_type}', got '{actual_type}'",
                        call_index=call_index,
                        arg_name=expected_name
                    ))
    
    return errors

def strict_equality_matcher(spec_value, allowed_values: list) -> bool:
    if spec_value is None:
        return '' in allowed_values

    if not isinstance(allowed_values, list):
        return (spec_value)==str(allowed_values)
    return spec_value in allowed_values

def is_valid(spec, options, key_map: dict = None, matcher=strict_equality_matcher) -> bool:
    # 分支 1: spec 是一个字典
    if isinstance(spec, dict):
        return _validate_dict(spec, options, key_map, matcher)

    # 分支 2: spec 是一个列表
    if isinstance(spec, list):
        return _validate_list(spec, options)
    
    # 分支 3: spec 是一个单一值 (这是递归的最终出口)
    return matcher(spec, options)

def _validate_dict(spec_dict: dict, options_obj, key_map: dict, matcher) -> bool:
    """
    (辅助函数) 专门处理字典类型的 spec 验证。
    """
    if not isinstance(options_obj, dict):
        # print(f"验证失败: 需求是一个字典, 但对应的选项不是 (而是 {type(options_obj).__name__})。")
        return False

    for spec_key, spec_value in spec_dict.items():
        mapped_keys = key_map.get(spec_key, spec_key)
        if not isinstance(mapped_keys, list):
            mapped_keys = [mapped_keys]

        key_is_validated = False
        for options_key in mapped_keys:
            if options_key not in options_obj:
                continue
            
            options_for_key = options_obj.get(options_key)
            
            # 关键的递归调用：返回到主函数 is_valid，以处理 spec_value 可能的任何类型
            if is_valid(spec_value, options_for_key, key_map, matcher):
                key_is_validated = True
                break
        
        if not key_is_validated:
            # print(f"验证失败: 字典中的键 '{spec_key}' (值: '{spec_value}') 未通过验证。")
            return False
            
    return True


def _validate_list(spec_list: list, options_obj) -> bool:
    """
    (辅助函数) 专门处理列表类型的 spec 验证 (执行子集检查)。
    """
    if not isinstance(options_obj, list):
        # print(f"验证失败: 需求是一个列表, 但对应的选项不是 (而是 {type(options_obj).__name__})。")
        return False

    # 检查 spec_list 中的每个元素是否存在于 options_obj 中
    # 注意：此简单实现主要用于处理原始类型（字符串、数字）的列表。
    for item in spec_list:
        if item not in options_obj:
            # print(f"验证失败: 列表中的元素 '{item}' 不在允许的选项列表中。")
            return False
            
    return True

if __name__ == "__main__":
    # actual_calls = [
    #     AgentToolCall(function="check_seat_availability", arguments={"flight_id": "LH797", "seat_class": "premium_economy"},call_id=0),
    #     AgentToolCall(function="check_seat_availability", arguments={"flight_id": "LH797"},call_id=1),
    # ]
    # expected_calls = [{"name": "check_seat_availability", "arguments": [{"name": "flight_id", "value": "LH797", "type": "str"}, {"name": "seat_class", "value": "premium_economy", "type": "str"}]}]
    # print(validate_function_calls(actual_calls, expected_calls))

    actual = AgentToolCall(function="calculate_fitness", arguments={"trait_values": [0.8,0.7], "trait_contributions": [0.4, 0.6]}, call_id=0)
    expected = {'name': 'calculate_fitness', 'arguments': [{'name': 'trait_values', 'value': [[0.8, 0.7]], 'type': "List[float]"}, {'name': 'trait_contributions', 'value': [[0.4, 0.6]], 'type': "List[float]"}]}
    argument_errors = validate_arguments(actual, expected, 0)
    print(argument_errors)

[0.8, 0.7] [[0.8, 0.7]]
[0.4, 0.6] [[0.4, 0.6]]
[ValidationError(error_type=<ErrorType.WRONG_ARGUMENT_VALUE: 'wrong_argument_value'>, message="Call 1, Arg 'trait_values': Expected value '[[0.8, 0.7]]', got '[0.8, 0.7]'", call_index=0, arg_name='trait_values'), ValidationError(error_type=<ErrorType.WRONG_ARGUMENT_TYPE: 'wrong_argument_type'>, message="Call 1, Arg 'trait_values': Expected type 'List[float]', got 'list'", call_index=0, arg_name='trait_values'), ValidationError(error_type=<ErrorType.WRONG_ARGUMENT_VALUE: 'wrong_argument_value'>, message="Call 1, Arg 'trait_contributions': Expected value '[[0.4, 0.6]]', got '[0.4, 0.6]'", call_index=0, arg_name='trait_contributions'), ValidationError(error_type=<ErrorType.WRONG_ARGUMENT_TYPE: 'wrong_argument_type'>, message="Call 1, Arg 'trait_contributions': Expected type 'List[float]', got 'list'", call_index=0, arg_name='trait_contributions')]


In [2]:
call_index = 0
actual_args = actual.arguments
expected_args = expected.get("arguments", [])
errors = []

# Keep track of matched arguments on both sides
matched_actual_args = set()
matched_expected_args = set()

# First check all expected arguments
for arg_index, expected_arg in enumerate(expected_args):
    # print(arg_index, expected_arg)
    if "name" not in expected_arg:
        errors.append(ValidationError(
            error_type=ErrorType.MISSING_ARGUMENT,
            message=f"Call {call_index+1}, Arg {arg_index+1}: Expected argument is missing required 'name' field",
            call_index=call_index
        ))
        continue
    
    expected_name = expected_arg["name"]
    # print(expected_name)

    is_required = expected_arg.get("required", True)  # Default to True if not specified
    if is_required and expected_name not in actual_args:
        errors.append(ValidationError(
            error_type=ErrorType.MISSING_ARGUMENT,
            message=f"Call {call_index+1}: Expected parameter '{expected_name}' not found in actual arguments",
            call_index=call_index,
            arg_name=expected_name
        ))

    if expected_name in actual_args:
        # Found a matching named parameter
        actual_value = actual_args[expected_name]
        matched_actual_args.add(expected_name)
        matched_expected_args.add(expected_name)


In [3]:
expected_name

'trait_contributions'

In [4]:
actual_args

{'trait_values': [0.8, 0.7], 'trait_contributions': [0.4, 0.6]}

In [5]:
if "value" in expected_arg:
    expected_value = expected_arg["value"]
if "type" in expected_arg:
    expected_type = expected_arg["type"]

In [6]:
expected_value

[[0.4, 0.6]]

In [7]:
actual_value

[0.4, 0.6]

In [8]:
expected_arg

{'name': 'trait_contributions', 'value': [[0.4, 0.6]], 'type': 'List[float]'}

In [9]:
from complex_value_validation import validate_argument

In [9]:
from typing import List, Dict, Tuple, Set, Union, Any, get_origin, get_args
from enum import Enum

class ErrorType(Enum):
    MISSING_FUNCTION = "missing_function"  # Expected function call is missing
    MISSING_ARGUMENT = "missing_argument"  # Expected argument is missing
    WRONG_ARGUMENT_TYPE = "wrong_argument_type"  # Wrong type of argument
    WRONG_ARGUMENT_VALUE = "wrong_argument_value"  # Wrong value of argument

# Define a validation error structure
class ValidationError(NamedTuple):
    error_type: ErrorType
    message: str
    call_index: Optional[int] = None
    arg_name: Optional[str] = None

def _get_type_name(type_obj: type) -> str:
    if hasattr(type_obj, '__origin__') and type_obj.__origin__ is not None:
        return str(type_obj).replace('typing.', '')
    elif hasattr(type_obj, '__name__'):
        return type_obj.__name__
    elif type_obj is type(None):
        return 'None'
    else:
        return str(type_obj).replace('typing.', '')

def restore_type(type_str: str) -> Any:
    safe_globals = {
        'List': list,
        'Dict': dict,
        'Tuple': tuple,
        'Set': set,
        'Union': Union,
        'Any': Any,
        'int': int, 'float': float, 'str': str, 'bool': bool, 'None': type(None)
    }
    try:
        return eval(type_str, safe_globals)
    except (NameError, TypeError, SyntaxError) as e:
        raise ValueError(f"Invalid type string: {type_str}") from e
    except Exception as e:
        raise RuntimeError(f"Unexpected error while restoring type: {type_str}") from e

def _compare_single_value_deep(actual: Any, expected_pattern: Any, current_expected_type_str: str, param_path: str, call_index: int) -> Union[bool, ValidationError]:
    try:
        full_expected_type = restore_type(current_expected_type_str)
        origin_type = get_origin(full_expected_type)
        args_types = get_args(full_expected_type)

        # Handle simple types, Union, or Any where expected_pattern might be a list of acceptable values
        if origin_type is None or origin_type is Union or origin_type is Any:
            if isinstance(expected_pattern, list) and not isinstance(actual, list):
                if actual not in expected_pattern:
                    return ValidationError(
                        error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                        message=f"Call {call_index+1}, Arg '{param_path}': Actual value '{actual}' not in expected value list '{expected_pattern}'.",
                        call_index=call_index,
                        arg_name=param_path
                    )
                return True
            else:
                if actual != expected_pattern:
                    return ValidationError(
                        error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                        message=f"Call {call_index+1}, Arg '{param_path}': Expected value '{expected_pattern}', got '{actual}'.",
                        call_index=call_index,
                        arg_name=param_path
                    )
                return True

        if origin_type is dict:
            if not isinstance(actual, dict):
                return ValidationError(
                    error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                    message=f"Call {call_index+1}, Arg '{param_path}': Expected dictionary, got non-dictionary type '{type(actual).__name__}'.",
                    call_index=call_index,
                    arg_name=param_path
                )
            if not (actual.keys() <= expected_pattern.keys() and expected_pattern.keys() <= actual.keys()):
                 return ValidationError(
                     error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                     message=f"Call {call_index+1}, Arg '{param_path}': Dictionary key sets do not match. Expected keys: {list(expected_pattern.keys())}, Actual keys: {list(actual.keys())}.",
                     call_index=call_index,
                     arg_name=param_path
                 )
            
            if len(args_types) != 2:
                return ValidationError(
                    error_type=ErrorType.WRONG_ARGUMENT_TYPE,
                    message=f"Call {call_index+1}, Arg '{param_path}': Dict type hint incomplete, requires key and value types.",
                    call_index=call_index,
                    arg_name=param_path
                )
            expected_value_type_obj = args_types[1]

            for key in expected_pattern:
                if key not in actual:
                    return ValidationError(
                        error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                        message=f"Call {call_index+1}, Arg '{param_path}.{key}': Dictionary field is missing.",
                        call_index=call_index,
                        arg_name=f"{param_path}.{key}"
                    )
                
                # Check for specific case: expected_pattern[key] is a list, but actual[key] is a single item.
                # This signifies that expected_pattern[key] is a list of acceptable values for actual[key].
                if isinstance(expected_pattern[key], list) and not isinstance(actual[key], list):
                    if actual[key] not in expected_pattern[key]:
                        return ValidationError(
                            error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                            message=f"Call {call_index+1}, Arg '{param_path}.{key}': Actual value '{actual[key]}' not in expected value list '{expected_pattern[key]}'.",
                            call_index=call_index,
                            arg_name=f"{param_path}.{key}"
                        )
                    continue # Value matched, move to next key

                # Otherwise, proceed with deep comparison using the derived type string
                next_expected_type_str = _get_type_name(expected_value_type_obj)
                comparison_result = _compare_single_value_deep(actual[key], expected_pattern[key], next_expected_type_str, f"{param_path}.{key}", call_index)
                if not comparison_result:
                    return comparison_result
            return True

        elif origin_type is list:
            if not isinstance(actual, list):
                return ValidationError(
                    error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                    message=f"Call {call_index+1}, Arg '{param_path}': Expected list, got non-list type '{type(actual).__name__}'.",
                    call_index=call_index,
                    arg_name=param_path
                )
            if len(actual) != len(expected_pattern):
                return ValidationError(
                    error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                    message=f"Call {call_index+1}, Arg '{param_path}': List lengths do not match. Expected length: {len(expected_pattern)}, Actual length: {len(actual)}.",
                    call_index=call_index,
                    arg_name=param_path
                )

            if len(args_types) != 1:
                return ValidationError(
                    error_type=ErrorType.WRONG_ARGUMENT_TYPE,
                    message=f"Call {call_index+1}, Arg '{param_path}': List type hint incomplete, requires element type.",
                    call_index=call_index,
                    arg_name=param_path
                )
            expected_item_type_obj = args_types[0]

            for i in range(len(actual)):
                next_expected_type_str = _get_type_name(expected_item_type_obj)
                comparison_result = _compare_single_value_deep(actual[i], expected_pattern[i], next_expected_type_str, f"{param_path}[{i}]", call_index)
                if not comparison_result:
                    return comparison_result
            return True

        elif origin_type is tuple:
            if not isinstance(actual, tuple):
                return ValidationError(
                    error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                    message=f"Call {call_index+1}, Arg '{param_path}': Expected tuple, got non-tuple type '{type(actual).__name__}'.",
                    call_index=call_index,
                    arg_name=param_path
                )
            if len(actual) != len(expected_pattern):
                return ValidationError(
                    error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                    message=f"Call {call_index+1}, Arg '{param_path}': Tuple lengths do not match. Expected length: {len(expected_pattern)}, Actual length: {len(actual)}.",
                    call_index=call_index,
                    arg_name=param_path
                )

            expected_types_for_elements = [_get_type_name(t) for t in args_types] if args_types else []
            while len(expected_types_for_elements) < len(actual):
                expected_types_for_elements.append('Any')

            for i in range(len(actual)):
                next_expected_type_str = expected_types_for_elements[i]
                comparison_result = _compare_single_value_deep(actual[i], expected_pattern[i], next_expected_type_str, f"{param_path}[{i}]", call_index)
                if not comparison_result:
                    return comparison_result
            return True

        elif origin_type is set:
            if not isinstance(actual, set):
                return ValidationError(
                    error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                    message=f"Call {call_index+1}, Arg '{param_path}': Expected set, got non-set type '{type(actual).__name__}'.",
                    call_index=call_index,
                    arg_name=param_path
                )
            if actual != expected_pattern:
                return ValidationError(
                    error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                    message=f"Call {call_index+1}, Arg '{param_path}': Sets are not equal. Expected: '{expected_pattern}', Actual: '{actual}'.",
                    call_index=call_index,
                    arg_name=param_path
                )
            return True
        
        else:
            if actual != expected_pattern:
                return ValidationError(
                    error_type=ErrorType.WRONG_ARGUMENT_VALUE,
                    message=f"Call {call_index+1}, Arg '{param_path}': Expected value '{expected_pattern}', got '{actual}'.",
                    call_index=call_index,
                    arg_name=param_path
                )
            return True

    except (ValueError, RuntimeError) as e:
        return ValidationError(
            error_type=ErrorType.WRONG_ARGUMENT_VALUE,
            message=f"Call {call_index+1}, Arg '{param_path}': Value validation failed due to type parsing exception. Details: {e}",
            call_index=call_index,
            arg_name=param_path
        )
    except Exception as e:
        import traceback
        return ValidationError(
            error_type=ErrorType.WRONG_ARGUMENT_VALUE,
            message=f"Call {call_index+1}, Arg '{param_path}': An unexpected error occurred during recursive value validation. Details: {e}\n{traceback.format_exc()}",
            call_index=call_index,
            arg_name=param_path
        )


def validate_argument(actual_value: Any, expected_arg: Dict[str, Any], call_index: int = 0) -> Union[Tuple[bool, str], Tuple[bool, ValidationError]]:
    expected_name = expected_arg.get("name", "unknown_param")
    expected_type_str = expected_arg["type"]
    expected_value = expected_arg["value"]

    # --- Type Validation ---
    try:
        full_expected_type = restore_type(expected_type_str)
        
        origin_type = get_origin(full_expected_type)
        args_types = get_args(full_expected_type)

        if origin_type is Union:
            if not isinstance(actual_value, full_expected_type):
                return False, ValidationError(
                    error_type=ErrorType.WRONG_ARGUMENT_TYPE,
                    message=f"Call {call_index+1}, Arg '{expected_name}': Expected type '{expected_type_str}', got '{type(actual_value).__name__}'.",
                    call_index=call_index,
                    arg_name=expected_name
                )
        elif origin_type:
            if not isinstance(actual_value, origin_type):
                return False, ValidationError(
                    error_type=ErrorType.WRONG_ARGUMENT_TYPE,
                    message=f"Call {call_index+1}, Arg '{expected_name}': Expected container type '{expected_type_str}' (base type: '{origin_type.__name__}'), got '{type(actual_value).__name__}'.",
                    call_index=call_index,
                    arg_name=expected_name
                )

            if origin_type is list:
                if not all(isinstance(item, args_types[0]) for item in actual_value):
                    return False, ValidationError(
                        error_type=ErrorType.WRONG_ARGUMENT_TYPE,
                        message=f"Call {call_index+1}, Arg '{expected_name}': List inner element type mismatch. Expected inner element type: '{_get_type_name(args_types[0])}'.",
                        call_index=call_index,
                        arg_name=expected_name
                    )
            elif origin_type is dict:
                if len(args_types) != 2:
                    return False, ValidationError(
                        error_type=ErrorType.WRONG_ARGUMENT_TYPE,
                        message=f"Call {call_index+1}, Arg '{expected_name}': Dict type hint incomplete, requires key and value types.",
                        call_index=call_index,
                        arg_name=expected_name
                    )
                expected_key_type = args_types[0]
                expected_value_type = args_types[1]
                for k, v in actual_value.items():
                    if not isinstance(k, expected_key_type) or not isinstance(v, expected_value_type):
                        return False, ValidationError(
                            error_type=ErrorType.WRONG_ARGUMENT_TYPE,
                            message=f"Call {call_index+1}, Arg '{expected_name}': Dict key-value type mismatch. Expected key type: '{_get_type_name(expected_key_type)}', Expected value type: '{_get_type_name(expected_value_type)}'.",
                            call_index=call_index,
                            arg_name=expected_name
                        )
            elif origin_type is tuple:
                if actual_value is not None and len(args_types) > 0 and args_types[-1] is Ellipsis:
                    expected_item_type = args_types[0]
                    if not all(isinstance(item, expected_item_type) for item in actual_value):
                        return False, ValidationError(
                            error_type=ErrorType.WRONG_ARGUMENT_TYPE,
                            message=f"Call {call_index+1}, Arg '{expected_name}': Tuple inner element type mismatch. Expected inner element type: '{_get_type_name(expected_item_type)}'.",
                            call_index=call_index,
                            arg_name=expected_name
                        )
                else:
                    if len(actual_value) != len(args_types):
                        return False, ValidationError(
                            error_type=ErrorType.WRONG_ARGUMENT_TYPE,
                            message=f"Call {call_index+1}, Arg '{expected_name}': Tuple length mismatch. Expected length: {len(args_types)}, Actual length: {len(actual_value)}.",
                            call_index=call_index,
                            arg_name=expected_name
                        )
                    for i, item in enumerate(actual_value):
                        if not isinstance(item, args_types[i]):
                            return False, ValidationError(
                                error_type=ErrorType.WRONG_ARGUMENT_TYPE,
                                message=f"Call {call_index+1}, Arg '{expected_name}': Tuple element at index {i} type mismatch. Expected type: '{_get_type_name(args_types[i])}'.",
                                call_index=call_index,
                                arg_name=expected_name
                            )
            elif origin_type is set:
                if not all(isinstance(item, args_types[0]) for item in actual_value):
                    return False, ValidationError(
                        error_type=ErrorType.WRONG_ARGUMENT_TYPE,
                        message=f"Call {call_index+1}, Arg '{expected_name}': Set inner element type mismatch. Expected inner element type: '{_get_type_name(args_types[0])}'.",
                        call_index=call_index,
                        arg_name=expected_name
                    )
        else:
            if not isinstance(actual_value, full_expected_type):
                return False, ValidationError(
                    error_type=ErrorType.WRONG_ARGUMENT_TYPE,
                    message=f"Call {call_index+1}, Arg '{expected_name}': Expected type '{expected_type_str}', got '{type(actual_value).__name__}'.",
                    call_index=call_index,
                    arg_name=expected_name
                )

    except ValueError as e:
        return False, ValidationError(
            error_type=ErrorType.WRONG_ARGUMENT_TYPE,
            message=f"Call {call_index+1}, Arg '{expected_name}': Expected type definition is invalid. Details: {e}",
            call_index=call_index,
            arg_name=expected_name
        )
    except Exception as e:
        return False, ValidationError(
            error_type=ErrorType.WRONG_ARGUMENT_TYPE,
            message=f"Call {call_index+1}, Arg '{expected_name}': An unexpected error occurred during type validation. Details: {e}",
            call_index=call_index,
            arg_name=expected_name
        )

    # --- Value Validation ---
    if isinstance(expected_value, list):
        for pattern_item in expected_value:
            comparison_result = _compare_single_value_deep(actual_value, pattern_item, expected_type_str, expected_name, call_index)
            if isinstance(comparison_result, bool) and comparison_result:
                return True, "Success"
            elif isinstance(comparison_result, ValidationError):
                continue
        return False, ValidationError(
            error_type=ErrorType.WRONG_ARGUMENT_VALUE,
            message=f"Call {call_index+1}, Arg '{expected_name}': Actual value: '{actual_value}' not found in any matching item in expected value list '{expected_value}'.",
            call_index=call_index,
            arg_name=expected_name
        )
    else:
        comparison_result = _compare_single_value_deep(actual_value, expected_value, expected_type_str, expected_name, call_index)
        if isinstance(comparison_result, bool) and comparison_result:
            return True, "Success"
        elif isinstance(comparison_result, ValidationError):
            return False, comparison_result

    return True, "Success"

In [10]:
validate_argument(actual_value, expected_arg)

(True, 'Success')

In [11]:
actual_value = {
                                    "name": "John Doe",
                                    "email": "johndoe@email.com"
                                }

In [12]:
expected_arg = {
                                    "name": "update_info",
                                    "value": [
                                        {
                                            "name": [
                                                "John Doe"
                                            ],
                                            "email": [
                                                "johndoe@email.com"
                                            ]
                                        }
                                    ],
                                    "type": "Dict[str, str]"
                                }

In [13]:
validate_argument(actual_value, expected_arg)

(True, 'Success')

In [22]:
actual_value = {
                                    "name": ["K"],
                                    "email": "johndoe@email.com"
                                }
expected_arg = {
                                    "name": "update_info",
                                    "value": [
                                        {
                                            "name": [
                                                "J","K"
                                            ],
                                            "email": [
                                                "johndoe@email.com"
                                            ]
                                        }
                                    ],
                                    "type": "Dict[str, str]"
                                }

In [23]:
validate_argument(actual_value, expected_arg)

(False,
 ValidationError(error_type=<ErrorType.WRONG_ARGUMENT_TYPE: 'wrong_argument_type'>, message="Call 1, Arg 'update_info': Dict key-value type mismatch. Expected key type: 'str', Expected value type: 'str'.", call_index=0, arg_name='update_info'))

In [2]:
from typing import List, Dict, Tuple, Set, Union, Any, get_origin, get_args, Optional, NamedTuple

In [3]:
def _get_type_name(type_obj: type) -> str:
    # This function is used to convert a type object (e.g., int, List[float]) into a string that can be parsed by eval().
    # It prioritizes standard typing module string representations.
    if hasattr(type_obj, '__origin__') and type_obj.__origin__ is not None:
        # For typing generics like List[float], Dict[str, int]
        return str(type_obj).replace('typing.', '')
    elif hasattr(type_obj, '__name__'):
        # For built-in types like int, str, float, bool
        return type_obj.__name__
    elif type_obj is type(None): # Specific handling for NoneType
        return 'None'
    else:
        # Fallback for other types, attempting to clean if it's already a <class 'X'> string repr
        s_type_obj = str(type_obj)
        if s_type_obj.startswith("<class '") and s_type_obj.endswith("'>"):
            return s_type_obj[8:-2] # Remove "<class '" and "'>"
        return s_type_obj.replace('typing.', '')

In [4]:
def restore_type(type_str: str) -> Any:
    # --- Enhanced Pre-processing for incoming type_str ---
    # This handles cases where type_str might come in as "<class 'int'>" or "NoneType" etc.
    cleaned_type_str = type_str
    if cleaned_type_str.startswith("<class '") and cleaned_type_str.endswith("'>"):
        cleaned_type_str = cleaned_type_str[8:-2] # Remove "<class '" and "'>"
    
    # Specific mapping for common built-in types that might be represented differently
    type_name_map = {
        'NoneType': 'None',
        'int': 'int',
        'float': 'float',
        'str': 'str',
        'bool': 'bool',
        'list': 'List', # Map built-in list to typing.List if used as generic base
        'dict': 'Dict', # Map built-in dict to typing.Dict if used as generic base
        'tuple': 'Tuple',
        'set': 'Set'
    }
    cleaned_type_str_lower = cleaned_type_str.lower() # For case-insensitive matching if needed

    # Try mapping if it's a direct built-in type name after cleaning
    if cleaned_type_str in type_name_map:
        cleaned_type_str = type_name_map[cleaned_type_str]
    elif cleaned_type_str_lower in type_name_map: # Handle lower case e.g. "int" -> "Int"
        cleaned_type_str = type_name_map[cleaned_type_str_lower]
        
    # If it's a complex typing hint like 'List[float]', it remains as is for eval
    # If it's a simple type like 'int', it also remains as is
    final_type_str_to_eval = cleaned_type_str

    safe_globals = {
        'List': list,
        'Dict': dict,
        'Tuple': tuple,
        'Set': set,
        'Union': Union,
        'Any': Any,
        'int': int, 'float': float, 'str': str, 'bool': bool, 'None': type(None) # Make sure NoneType is covered by 'None' in safe_globals
    }
    try:
        # Use final_type_str_to_eval for evaluation
        return eval(final_type_str_to_eval, safe_globals)
    except (NameError, TypeError, SyntaxError) as e:
        # If it still fails, the type string is genuinely invalid or unresolvable
        raise ValueError(f"Invalid type string: {type_str} (Cleaned to: '{final_type_str_to_eval}'). Details: {e}") from e
    except Exception as e:
        raise RuntimeError(f"Unexpected error while restoring type: {type_str} (Cleaned to: '{final_type_str_to_eval}'). Details: {e}") from e

In [14]:
current_expected_type_str = "List[int]"
full_expected_type = restore_type(current_expected_type_str)
origin_type = get_origin(full_expected_type)
args_types = get_args(full_expected_type)

In [19]:
for item in actual_value:
    print(isinstance(item, args_types[0]))

True
True
True
True
True


In [28]:
actual_value = [
                                    "a",
                                    10,
                                    15,
                                    20,
                                    25
                                ]

for item in actual_value:
    if not isinstance(item, args_types[0]):
        print(item)

a


In [None]:
a = {"scenario": "simple_83", "conversations": [{"id": "simple_83_conversation", "turns": [{"query": "Calculate the distance between two GPS coordinates (33.4484 N, 112.0740 W) and (34.0522 N, 118.2437 W) in miles.", "reference_response": "", "actual_response": "None", "expected_calls": [{"name": "calculate_distance", "required": true, "arguments": [{"name": "coord1", "value": [[33.4484, -112.074]], "type": "Tuple[float]", "required": true}, {"name": "coord2", "value": [[34.0522, -118.2437]], "type": "Tuple[float]", "required": true}, {"name": "unit", "value": ["miles"], "type": "str", "required": true}]}], "actual_calls": [{"function": "calculate_distance", "arguments": {"coord1": [33.4484, -112.074], "coord2": [34.0522, -118.2437], "unit": "miles"}, "call_id": "call_Cdy0caXiRoSLOve8RBb2QQ"}], "validation_errors": ["Call 1, Arg 'coord1': Actual value: '[33.4484, -112.074]' not found in any matching item in expected value list '[[33.4484, -112.074]]'.", "Call 1, Arg 'coord2': Actual value: '[34.0522, -118.2437]' not found in any matching item in expected value list '[[34.0522, -118.2437]]'."], "metrics": {"missing_calls": 0, "wrong_argument_types": 0, "wrong_argument_values": 0, "missing_arguments": 0, "steps": 1}, "success": false}]}], "metrics": {"total_turns": 1, "successful_turns": 0, "failed_turns": 1, "total_expected_calls": 1, "total_actual_calls": 1, "missing_calls": 0, "wrong_argument_values": 0, "wrong_argument_types": 0, "missing_arguments": 0, "wrong_argument_names": 0, "total_steps": 1, "success_rate": 0.0}, "scenario_name": "simple_83"}
b = {"scenario": "simple_83", "conversations": [{"id": "simple_83_conversation", "turns": [{"query": "Calculate the distance between two GPS coordinates (33.4484 N, 112.0740 W) and (34.0522 N, 118.2437 W) in miles.", "reference_response": "", "actual_response": "It appears there may be an issue with the `calculate_distance` function implementation, as it's not returning a numerical distance value as expected. Since we're unable to get a proper distance calculation through the function, I can't provide the exact distance between these coordinates.\n\nThe approximate distance between Phoenix, AZ (33.4484° N, 112.0740° W) and Los Angeles, CA (34.0522° N, 118.2437° W) is typically around 370 miles when measured in a straight line (as the crow flies). This is based on general geographical knowledge since we couldn't get the precise calculation to work.", "expected_calls": [{"name": "calculate_distance", "required": true, "arguments": [{"name": "coord1", "value": [[33.4484, -112.074]], "type": "Tuple[float]", "required": true}, {"name": "coord2", "value": [[34.0522, -118.2437]], "type": "Tuple[float]", "required": true}, {"name": "unit", "value": ["miles"], "type": "str", "required": true}]}], "actual_calls": [{"function": "calculate_distance", "arguments": {"coord1": [33.4484, -112.074], "coord2": [34.0522, -118.2437], "unit": "miles"}, "call_id": 1}, {"function": "calculate_distance", "arguments": {"coord1": [33.4484, -112.074], "coord2": [34.0522, -118.2437], "unit": "miles"}, "call_id": 2}], "validation_errors": ["Call 1, Arg 'coord1': Tuple length mismatch. Expected length: 1, Actual length: 2.", "Call 1, Arg 'coord2': Tuple length mismatch. Expected length: 1, Actual length: 2."], "metrics": {"missing_calls": 0, "wrong_argument_types": 0, "wrong_argument_values": 0, "missing_arguments": 0, "steps": 3}, "success": false}]}], "metrics": {"total_turns": 1, "successful_turns": 0, "failed_turns": 1, "total_expected_calls": 1, "total_actual_calls": 2, "missing_calls": 0, "wrong_argument_values": 0, "wrong_argument_types": 0, "missing_arguments": 0, "wrong_argument_names": 0, "total_steps": 3, "success_rate": 0.0}, "scenario_name": "simple_83"}