In [1]:
from datetime import date
import os
import json
import pandas as pd
from dotenv import load_dotenv


In [2]:
from source.examples import examples
from source import func_description
from tworld.prompts import get_shared_args, get_system_prompt
from utils import get_aoai_endpoint_in4u, logprob_main

load_dotenv()

# Configuration
API_KEY = os.getenv('AZURE_OPENAI_API_KEY')
headers = {
    "Content-Type": "application/json",
    "api-key": API_KEY,
}

# Define the source directory
source_dir = 'source'

# Define shared_arguments for the system prompt
shared_args_file_name = 'shared_arguments.json'
shared_args_file_path = os.path.join(source_dir, shared_args_file_name)
with open(shared_args_file_path, 'r') as f:
    shared_arguments = json.load(f)

args_desc = get_shared_args(shared_arguments)

# Define functions descriptions for the system prompt
func_desc = func_description.function_desc

system_prompt = get_system_prompt(
    # plan_list=plan_list, 
    # lineup_list=lineup_list, 
    args_desc=args_desc,
    functions_desc=func_desc,
    examples=examples
)
print(system_prompt)

You're a function classifier that needs to identify the appropriate function related to SKTelecom rate plans in order to accurately respond to the user's utterances.  You're actively involved in a three-way conversation with 'user', 'function' and yourself ('assistant').  You must classify the appropriate "function name" and "arguments" according to the user's utterance, and keep the following rules:
    1. "Function name" must be classified only from the lists provided below. You SHOULD NEVER GUESS and CREATE something that is not in the defined list.
    2. Arguments may or may not be required depending on the selected function.
    3. If the selected function has a 'required' field, you must fill in the arguments and send it.
    4. Arguments can be inferred from the user's utterance or the previous conversation.

### Shared Arguments
These argumentsd are shared by multiple functions.
    1. **keywords** (object):
    	- Description: search keywords
    	- Keys:
    		- productName 

In [3]:
#  Partially evaluate with false_index(inaccurate functions or arguments)

import pickle

result_dir = 'result'

# false_index_decoded_file_name = 'false_index_decoded_abridged_20241017.pkl'
# false_index_decoded_file_path = os.path.join(result_dir, false_index_decoded_file_name)
# with open(false_index_decoded_file_path, 'rb') as f:
#     false_index_df = pickle.load(f)
#     target_rows = tuple(sorted(set(false_index_df.index)))
# print(f'{target_rows = }')

file_name = 'false_index_decoded_abridged_2024-10-23.pkl' # removed AVAILABLE_MOBILE_PLAN, mobile plans and lineups from the system prompt
file_path = os.path.join(result_dir, file_name)
with open(file_path, 'rb') as f:
    df:pd.DataFrame = pickle.load(f)
name_mismatch = df[df['mismatch_args'].isnull()].copy()
target_rows = list(name_mismatch.index)

In [4]:
eval_file_name = "(SKT) 평가 결과_new.xlsx"
eval_file_path = os.path.join(source_dir, eval_file_name)
user_query_data = pd.read_excel(eval_file_path)

kwargs = {
    'data' : user_query_data,
    'system_prompt' : system_prompt,
    'endpoint': get_aoai_endpoint_in4u(),
    'temperature': 0.0,
    'headers' : headers,
    'target_rows': target_rows
}


In [5]:
results = logprob_main(df=user_query_data, **kwargs)

row.Index = 599: 100%|██████████| 600/600 [01:11<00:00,  8.38it/s]  


In [6]:
with open('result/logprobs_gpt4o_v202408.json', 'w') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)