In [1]:
import json
import pandas as pd
from prompts import get_functions, get_shared_args, get_system_prompt

In [2]:
from dotenv import load_dotenv
from datetime import date, datetime
import os
import requests
import pickle
import time

load_dotenv()

# Configuration
API_KEY = os.getenv('AZURE_OPENAI_API_KEY')
headers = {
    "Content-Type": "application/json",
    "api-key": API_KEY,
}

In [3]:
source_dir = 'source'
result_dir = 'result'

In [4]:
# with open('function.json', 'r') as f:
#   FUNCTIONS = json.load(f)

# func_desc = get_functions(FUNCTIONS)

from source import func_description

func_desc = func_description.function_desc

In [5]:
shared_args_file_name = 'shared_arguments.json'
shared_args_file_path = os.path.join(source_dir, shared_args_file_name)
with open(shared_args_file_path, 'r') as f:
    shared_arguments = json.load(f)

args_desc = get_shared_args(shared_arguments)

In [6]:
file_name = "요금 Agent Intent 정의 및 발화 예시.xlsx"
file_path = os.path.join(source_dir, file_name)
plan_list_data = pd.read_excel(file_path, sheet_name='요금제 목록')
plan_list = list(plan_list_data['상품명'].unique())
lineup_list = list(plan_list_data['요금제 라인업, 혜택 라인업'].unique())

In [7]:
from source.examples import examples

system_prompt = get_system_prompt(
    # plan_list=plan_list, 
    # lineup_list=lineup_list, 
    args_desc=args_desc,
    functions_desc=func_desc,
    examples=examples
)
print(system_prompt)

You're a function classifier that needs to identify the appropriate function related to SKTelecom rate plans in order to accurately respond to the user's utterances.  You're actively involved in a three-way conversation with 'user', 'function' and yourself ('assistant').  You must classify the appropriate "function name" and "arguments" according to the user's utterance, and keep the following rules:
    1. "Function name" must be classified only from the lists provided below. You SHOULD NEVER GUESS and CREATE something that is not in the defined list.
    2. Arguments may or may not be required depending on the selected function.
    3. If the selected function has a 'required' field, you must fill in the arguments and send it.
    4. Arguments can be inferred from the user's utterance or the previous conversation.

### Shared Arguments
These argumentsd are shared by multiple functions.
    1. **keywords** (object):
    	- Description: search keywords
    	- Keys:
    		- productName 

In [8]:
# Payload for the request
def get_payload(system_prompt:str, user_prompt:str) -> dict:
    return {
        "messages": [
            {
                "role": "system",
                "content": [
                    {
                        "type": "text",
                        "text": system_prompt
                    }
                ]
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": user_prompt
                    }
                ]
            },
            {
                "role": "assistant",
                "content": [
                    {
                        "type": "text",
                        "text": ""
                    }
                ]
            }
        ],
        "temperature": 0.0
    }

def send_request(system_prompt:str, user_prompt:str, **kwargs)->dict:
    ENDPOINT = f"{os.getenv('AZURE_OPENAI_ENDPOINT')}openai/deployments/{os.getenv('AZURE_OPENAI_MODEL_NAME')}/chat/completions?api-version={os.getenv('AZURE_OPENAI_API_VERSION')}"
    payload = get_payload(system_prompt, user_prompt)
    headers = kwargs.get('headers')

    # Send request
    try:
        start = time.time()
        response = requests.post(ENDPOINT, headers=headers, json=payload)
        time_lapse = time.time() - start
        response.raise_for_status()  # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
    except requests.HTTPError as e:
        if e.response.status_code == 400:
            # Handle 400 Bad Request error specifically
            error_detail = e.response.json() if e.response.content else "No additional error details"
            print(f"400 Client Error: {user_prompt = }")  # Print the error details
            raise SystemExit(error_detail) 
        else:
            # Handle other HTTP errors
            raise SystemExit(f"Failed to make the request. Error: {e}")

    try:
        parsed_response = response.json()

        # Ensure 'choices' is present and non-empty
        if 'choices' in parsed_response and parsed_response['choices']:
            # Extract the content that is a string representation of JSON
            content = parsed_response['choices'][0]['message']['content'].strip()

            # Remove the code block formatting (```json ... ```)
            if content.startswith("```json") and content.endswith("```"):
                content = content[7:-3].strip()  # Strip off the ```json and ```

            # Now load the cleaned content as JSON
            result = json.loads(content)  # Load it as JSON

            # Inject time_lapse
            result['time_lapse'] = time_lapse
            return result
        else:
            raise ValueError("Unexpected response structure: 'choices' key not found or empty.")
            
    except ValueError as e:
        print(f"ValueError: {e}. Response content: {parsed_response}")
        raise SystemExit("Failed to parse the response as JSON.")
    except json.JSONDecodeError as e:
        print(f"JSONDecodeError: {e}. Raw response: {response.text}")
        raise SystemExit("Failed to decode JSON response.")

In [9]:
def compare_dicts(dict1, dict2, path="")->dict:
    """Recursively compare two nested dictionaries, return True if they are identical, otherwise False.
    Also print the location of mismatches."""
    
    # Check if both are dictionaries
    if not isinstance(dict1, dict) or not isinstance(dict2, dict):
        mismatch_info = f"Type mismatch at path '{path}': {type(dict1)} != {type(dict2)}"
        print(mismatch_info)
        return {'result': False, 'message':{'mismatch_info':mismatch_info, 'true_output': dict1, 'assistant_output': dict2}}

    # Compare the keys
    if dict1.keys() != dict2.keys():
        mismatch_info = f"Key mismatch at path '{path}': {dict1.keys()} != {dict2.keys()}"
        print(mismatch_info)
        return {'result': False, 'message':{'mismatch_info':mismatch_info, 'true_output': dict1, 'assistant_output': dict2}}

    # Compare the values for each key
    for key in dict1:
        new_path = f"{path}->{key}" if path else key  # Build the path to the current key
        val1, val2 = dict1[key], dict2[key]

        # Check if both values are dictionaries (including empty ones)
        if isinstance(val1, dict) and isinstance(val2, dict):
            # Recursively compare nested dictionaries
            res = compare_dicts(val1, val2, new_path)
            if not res['result']:
                return {'result': False, 'message': res['message']}
        # Check for value mismatch and handle empty dictionaries
        elif val1 != val2:
            # Check if both are empty dictionaries or other empty types
            if isinstance(val1, dict) and isinstance(val2, dict) and not val1 and not val2:
                continue  # Both are empty dictionaries, consider them equal
            mismatch_info = f"Value mismatch at path '{new_path}': {val1} != {val2}. type({type(val1)} != {type(val2)}), len({len(val1)} != {len(val2)})"
            print(mismatch_info)
            return {'result': False, 'message':{'mismatch_info':mismatch_info, 'true_output': dict1, 'assistant_output': dict2}}

    return {'result':True, 'message':{}}

In [10]:
def save_pickle(content, file_name: str) -> None:
    """Save content to a pickle file, handling different types and missing files."""
    
    # Check if file exists and load its content, otherwise initialize saved_content
    if os.path.exists(file_name):
        try:
            with open(file_name, 'rb') as f:
                saved_content = pickle.load(f)
        except (EOFError, pickle.UnpicklingError):
            # Handle empty file or unpickling errors (e.g., corrupted file)
            saved_content = None
    else:
        saved_content = None

    # Merge or process the content based on its type and existing saved_content
    if isinstance(saved_content, list):
        new_content = saved_content + content
    elif isinstance(saved_content, set):
        new_content = saved_content.intersection(content)
    elif isinstance(saved_content, dict):
        new_content = saved_content | content
    else:
        # Handle the case when there's no saved_content (e.g., first-time save)
        new_content = content

    # Save the updated content back to the pickle file
    file_path = os.path.join(result_dir, file_name)
    with open(file_path, 'wb') as f:
        pickle.dump(new_content, f)


In [11]:
from tqdm import tqdm

def send_request_with_retry(**kwargs):
    """Send request and retry if a SystemExit is encountered."""
    system_prompt = kwargs.get('system_prompt')
    headers = kwargs.get('headers')
    max_retries = kwargs.get('max_retries')
    wait_seconds = kwargs.get('wait_seconds')
    user_query = kwargs.get('user_query')
    
    # Define the error message
    error_message = f'Reactivating the requests after waiting for {wait_seconds} seconds...'
    retries = 0
    while retries < max_retries:
        try:
            return send_request(system_prompt=system_prompt, user_prompt=user_query, headers=headers)
        except SystemExit as e:
            retries += 1
            print(f"Attempt {retries} failed: {e}")
            
            # Since e is a SystemExit with a dictionary structure, we can access it directly
            error_details = e.args[0] if e.args else {}

            # Safely extract relevant keys from the error message
            error_message = error_details.get('error', {}).get('message', '')
            content_filter_code = error_details.get('error', {}).get('code', '')
            inner_error = error_details.get('error', {}).get('innererror', {})

            if content_filter_code == 'content_filter':
                return {
                    'content_filter': True, 
                    'innererror': inner_error
                }

            if retries < max_retries:
                print(error_message)
                time.sleep(wait_seconds)  # Wait before retrying
            else:
                print(f"Failed after {max_retries} attempts.")
                raise  # Re-raise the exception after all retries are exhaustedi

def run_and_evaluate(data:pd.DataFrame, **kwargs)->dict:
    # retrieve arguments
    skip_rows = kwargs.get('skip_rows', None)
    target_rows = kwargs.get('target_rows', None)
    today = kwargs.get('today', date.today())

    assert not (skip_rows is not None and target_rows is not None), "skip_rows and target_rows are mutually exclusive"

    content_filter_file_name = kwargs.get('content_filter_file_name', f'content_filtered_{today}.pkl')
    false_index_file_name = kwargs.get('false_index_file_name', f'false_index_{today}.pkl')
    time_lapse_file_name = kwargs.get('time_lapse_file_name', f'time_lapse_{today}.pkl')

    # Initialize loop results
    false_index_list = {}
    time_lapse = {}
    content_filtered = {}

    # Iterate over the rows with progress bar
    pbar = tqdm(data.itertuples(), total=len(data))
    for row in pbar:
        row_id:str = str(row.Index)
        pbar.set_description(f'{row_id = }')

        if target_rows:
            if row_id not in target_rows:
                continue
        
        if skip_rows and int(row_id) < skip_rows:
            continue
        
        # update kwargs
        kwargs['user_query'] = row.Utterance_Sentence.strip()        
        
        true_result = json.loads(row.LLM_output)

        # Measure the time while requesting
        try:
            res = send_request_with_retry(**kwargs)
        except SystemExit:
            continue  # Skip this row if retries failed

        # Record time lapse for each user query
        time_lapse[row_id] = res['time_lapse']
        save_pickle(time_lapse, time_lapse_file_name)

        if res.get('content_filter'):
            content_filtered[row_id] = {}
            content_filtered[row_id]['filtered'] =  [policy for policy, filter_result in res['innererror']['content_filter_result'].items() if filter_result['filtered']]
            content_filtered[row_id]['user_query'] = kwargs['user_query']
            save_pickle(content_filtered, content_filter_file_name)
            continue

        # Compare response with true result
        is_identical = compare_dicts(true_result, res)
        if not is_identical['result']:
            try:
                false_index_list[row_id] = {}
                false_index_list[row_id]['message'] = is_identical['message']
                false_index_list[row_id]['user_query'] = kwargs['user_query']
                save_pickle(false_index_list, false_index_file_name)
            except KeyError as e:
                print(f'error evaluating llm response: {is_identical}')
                raise KeyError(e)
                
    return {'false_index_list':false_index_list, 'time_lapse': time_lapse, 'content_filter': content_filtered}

In [12]:
# Partially evaluate with false_index(inaccurate functions or arguments)
false_index_decoded_file_name = 'false_index_decoded.pkl'
false_index_decoded_file_path = os.path.join(result_dir, false_index_decoded_file_name)
with open(false_index_decoded_file_path, 'rb') as f:
    false_index_df = pickle.load(f)
    target_rows = set(false_index_df.index)

# Partially evaluate with content_filtered(sexuality)
# with open('content_filtered_decoded.pkl', 'rb') as f:
#     content_filtered_df = pickle.load(f)
#     target_rows = set(content_filtered_df.index)

# print(target_rows)


In [14]:
from datetime import date

eval_file_name = "(SKT) 평가 결과_new.xlsx"
eval_file_path = os.path.join(source_dir, eval_file_name)
user_query_data = pd.read_excel(eval_file_path)

# Clean up column names by replacing spaces with underscores
user_query_data.columns = [c.replace(' ', '_') for c in user_query_data.columns]

# Define the maximum number of retries
max_retries = 3  # You can change this value as needed

kwargs = {
    'data' : user_query_data,
    'system_prompt' : system_prompt,
    'max_retries' : 5,
    'wait_seconds' : 60,
    'headers' : headers,
    'content_filter_file_name': f'content_filtered_gpt4o_v202408.pkl',
    'false_index_file_name': f'false_index_gpt4o_v202408.pkl',
    'today' : date.today(),
    # 'skip_rows': 507,
    # 'target_rows' : target_rows
}

results = run_and_evaluate(**kwargs)
results

row_id = '1':   0%|          | 1/600 [00:03<31:46,  3.18s/it]

Key mismatch at path '': dict_keys(['name', 'arguments']) != dict_keys(['name', 'arguments', 'time_lapse'])


row_id = '2':   0%|          | 2/600 [00:06<33:50,  3.40s/it]

Key mismatch at path '': dict_keys(['name', 'arguments']) != dict_keys(['name', 'arguments', 'time_lapse'])





KeyboardInterrupt: 

In [18]:
results_full_file_path = os.path.join(result_dir, f"results_full_{datetime.now().strftime('%Y%m%d_%H%M')}.pkl")
with open(results_full_file_path, 'wb') as f:
    pickle.dump(results, f)