In [1]:
import json
import pandas as pd
import pickle
from tworld.prompts import get_functions, get_shared_args, get_system_prompt
from tworld.tworld_funcs import run_and_evaluate

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

# Configuration
API_KEY = os.getenv('AZURE_OPENAI_API_KEY')
headers = {
    "Content-Type": "application/json",
    "api-key": API_KEY,
}

In [3]:
source_dir = 'source'
result_dir = 'result'

In [4]:
# with open('function.json', 'r') as f:
#   FUNCTIONS = json.load(f)

# func_desc = get_functions(FUNCTIONS)

from source import func_description

func_desc = func_description.function_desc

In [5]:
shared_args_file_name = 'shared_arguments.json'
shared_args_file_path = os.path.join(source_dir, shared_args_file_name)
with open(shared_args_file_path, 'r') as f:
    shared_arguments = json.load(f)

args_desc = get_shared_args(shared_arguments)

In [6]:
file_name = "요금 Agent Intent 정의 및 발화 예시.xlsx"
file_path = os.path.join(source_dir, file_name)
plan_list_data = pd.read_excel(file_path, sheet_name='요금제 목록')
plan_list = list(plan_list_data['상품명'].unique())
lineup_list = list(plan_list_data['요금제 라인업, 혜택 라인업'].unique())

In [7]:
from source.examples import examples

system_prompt = get_system_prompt(
    # plan_list=plan_list, 
    # lineup_list=lineup_list, 
    args_desc=args_desc,
    functions_desc=func_desc,
    examples=examples
)
print(system_prompt)

You're a function classifier that needs to identify the appropriate function related to SKTelecom rate plans in order to accurately respond to the user's utterances.  You're actively involved in a three-way conversation with 'user', 'function' and yourself ('assistant').  You must classify the appropriate "function name" and "arguments" according to the user's utterance, and keep the following rules:
    1. "Function name" must be classified only from the lists provided below. You SHOULD NEVER GUESS and CREATE something that is not in the defined list.
    2. Arguments may or may not be required depending on the selected function.
    3. If the selected function has a 'required' field, you must fill in the arguments and send it.
    4. Arguments can be inferred from the user's utterance or the previous conversation.

### Shared Arguments
These argumentsd are shared by multiple functions.
    1. **keywords** (object):
    	- Description: search keywords
    	- Keys:
    		- productName 

In [8]:
# Partially evaluate with false_index(inaccurate functions or arguments)
# false_index_decoded_file_name = 'false_index_decoded.pkl'
# false_index_decoded_file_path = os.path.join(result_dir, false_index_decoded_file_name)
# with open(false_index_decoded_file_path, 'rb') as f:
#     false_index_df = pickle.load(f)
#     target_rows = set(false_index_df.index)

# Partially evaluate with content_filtered(sexuality)
# with open('content_filtered_decoded.pkl', 'rb') as f:
#     content_filtered_df = pickle.load(f)
#     target_rows = set(content_filtered_df.index)

# print(target_rows)


In [9]:
file_name = 'false_index_decoded_abridged_2024-10-23.pkl' # removed AVAILABLE_MOBILE_PLAN, mobile plans and lineups from the system prompt
file_path = os.path.join(result_dir, file_name)
with open(file_path, 'rb') as f:
    df:pd.DataFrame = pickle.load(f)
name_mismatch = df[df['mismatch_args'].isnull()].copy()
target_rows = list(name_mismatch.index)

In [10]:
# Load evaluation sheet

# eval_file_name = "(SKT) 평가 결과_new.xlsx"
eval_file_name = "추가_평가_목록.xlsx"
eval_file_path = os.path.join(source_dir, eval_file_name)
user_query_data = pd.read_excel(eval_file_path)

# Clean up column names by replacing spaces with underscores
user_query_data.columns = [c.replace(' ', '_') for c in user_query_data.columns]
user_query_data.head(10)

Unnamed: 0,Feature_ID,Utterance_Sentence_ID,Utterance_Sentence,종합_테스트_결과,True_Intent
0,F01-I01,F01-I01_Temp_001,"이번달 좀 많이 쓴 것 같은데, 내 이번 달 청구금액이 얼만큼 더 추가될지 예상 가능해?",FAIL,billing_charge_analyze_summary
1,F06-I01,F06-I01_Temp_001,5GX플래티넘 요금제로 요금제 바꾸면 요금 얼마나 더 내야 돼?,FAIL,estimated_billing_charge_compare_to_current
2,F06-I01,F06-I01_Temp_002,음성통화 제공하는 요금제로 변경할때 예상요금 알려줘,FAIL,estimated_billing_charge_compare_to_current
3,F06-I01,F06-I01_Temp_030,테더링 10기가 이상 가능하고 가격이 꽤 나가는 요금제 가격 알려줘,FAIL,estimated_billing_charge_compare_to_current
4,F06-I01,F06-I01_Temp_031,FLO 할인이 100% 되는 혜택이랑 테더링 가능한 데이터가 10기가 이하인 요금제...,FAIL,estimated_billing_charge_compare_to_current
5,F06-I01,F06-I01_Temp_037,"엄마랑 1시간 이상 통화하는 요금제랑, 2시간 이상 통화하는 요금제로 변경하게 될 ...",FAIL,estimated_billing_charge_compare_to_current
6,F06-I01,F06-I01_Temp_038,"데이터가 기본적으로 50GB 이상 쓸 수 있는 요금제랑, 100GB 이상 쓸 수 있...",FAIL,estimated_billing_charge_compare_to_current
7,F06-I01,F06-I01_Temp_055,"할아버지와 60분의 무료 통화가 가능한 온라인 전용 요금제, 영상통화가 부족하지 않...",FAIL,estimated_billing_charge_compare_to_current
8,F06-I01,F06-I01_Temp_056,"문자메시지는 무제한 발신 가능, 한 사람과 과금 없이 5시간까지 통화 가능한 뉴 T...",FAIL,estimated_billing_charge_compare_to_current
9,F17-I08,F17-I08_Temp_004,다른 요금제 옵션으로 지금 변경 가능해?,FAIL,changable_date_for_plan


In [11]:
from datetime import date
from utils import get_aoai_endpoint_in4u

# Define the maximum number of retries
max_retries = 3  # You can change this value as needed

kwargs = {
    'run_evaluate': False,
    'data' : user_query_data,
    'system_prompt' : system_prompt,
    'endpoint': get_aoai_endpoint_in4u(),
    'temparature': 0.0,
    'max_retries' : 5,
    'wait_seconds' : 60,
    'headers' : headers,
    'content_filter_file_name': f'content_filtered_gpt4o_v202408_{date.today()}.pkl',
    'false_index_file_name': f'false_index_gpt4o_v202408_{date.today()}.pkl',
    'today' : date.today(),
    # 'skip_rows': 507,
    # 'target_rows' : target_rows # target_rows
}

results = run_and_evaluate(**kwargs)
results

row_id = '0':   0%|          | 0/10 [00:00<?, ?it/s]

row_id = '9': 100%|██████████| 10/10 [00:23<00:00,  2.31s/it]


{'false_index_list': {},
 'time_lapse': {'0': 1.6275665760040283,
  '1': 2.085909366607666,
  '2': 2.16300892829895,
  '3': 2.047333240509033,
  '4': 2.1461644172668457,
  '5': 2.245622158050537,
  '6': 2.761965274810791,
  '7': 3.2733328342437744,
  '8': 2.3441545963287354,
  '9': 2.3505945205688477},
 'content_filter': {},
 'original_response': {'0': {'choices': [{'content_filter_results': {'hate': {'filtered': False,
       'severity': 'safe'},
      'protected_material_code': {'filtered': False, 'detected': False},
      'protected_material_text': {'filtered': False, 'detected': False},
      'self_harm': {'filtered': False, 'severity': 'safe'},
      'sexual': {'filtered': False, 'severity': 'safe'},
      'violence': {'filtered': False, 'severity': 'safe'}},
     'finish_reason': 'stop',
     'index': 0,
     'logprobs': {'content': [{'bytes': [123, 34],
        'logprob': -0.04163574,
        'token': '{"',
        'top_logprobs': []},
       {'bytes': [110, 97, 109, 101],
     

In [13]:
# results_full_file_path = os.path.join(result_dir, f"results_full_{date.today()}.pkl")
results_full_file_path = os.path.join(result_dir, f"results_full_add_{date.today()}.pkl")
with open(results_full_file_path, 'wb') as f:
    pickle.dump(results, f)