In [1]:
from my_funcs import (
    default_transformation, read_json_from_data_dir,
    normalize, embed_query_retrieve_examples,
    make_two_type_msg, get_python_chat_prompt, 
    parse_python_completion, update_dialogue_state, 
    compute_acc, calculate_token_f1, evaluate,
    DataOntologyNormalizer, Ontology,
    copy, defaultdict, random,
    openai, tiktoken, SentenceTransformer
)

import os
import json
import numpy as np
from rank_bm25 import BM25Okapi

from my_openai_key import get_openai_key
openai.api_key = get_openai_key()



In [None]:
# Load dev data to get a query and train data to retrieve examples
with open("data/mw24_100p_dev_100_sampled.json", 'r') as f:
    dev_dataset = json.load(f)
with open('data/mw21_1p_train_v1.json', 'r') as f:
    train_dataset = json.load(f)
    
corpus = list(map(default_transformation, train_dataset))

word_tokenized_corpus = []
for sent in corpus:
    tokens = sent.split()
    word_tokenized_corpus.append(tokens)

bm25 = BM25Okapi(word_tokenized_corpus[:5])

query = default_transformation(dev_dataset[100])
tokenized_query = query.split(" ")

score_idx_dict = {score:i for i, score in enumerate(bm25.get_scores(tokenized_query))}
sorted_scores = sorted(score_idx_dict, reverse=True)
# topk_list = [train_dataset[idx] for idx in [score_dict[score] for score in sorted_scores[:5]]]
k = 2
query_result = [score_idx_dict[top_score] for top_score in sorted_scores[:k]]

np.array([query_result]), np.array([sorted_scores[:k]])

In [4]:
# Load dev data to get a query and train data to retrieve examples
dev_dataset = read_json_from_data_dir("data/mw24_100p_dev_100_sampled.json")
train_dataset = read_json_from_data_dir('/home/haesungpyun/my_refpydst/data/mw21_1p_train_v1.json')

# Register all dialogues from the train dataset to example pool and Get all the unique dialogue ids in example pool
example_pool = []
selected_dialog_id_from_split = set()
for dataset in [train_dataset]:
    example_pool += dataset
    selected_dialog_id_from_split.update([dial['ID'] for dial in dataset])

# Load the all train data index
retriever_full_path = '/home/haesungpyun/my_refpydst/outputs/runs/retriever/mw21_1p_train/referred_states/split_v1'
search_index_filename = os.path.join(retriever_full_path, "train_index.npy")
search_embeddings = np.load(search_index_filename, allow_pickle=True).item()    # {'MUL0720.json_turn_10': np.array([0.1, 0.2, ...]), ...}

# Keep only embeddings for the selected dialogues in split version
emb_dict = {k: v for k, v in search_embeddings.items() if k.split('_')[0] in selected_dialog_id_from_split}
emb_keys = list(emb_dict.keys())
emb_dim = emb_dict[emb_keys[0]].shape[-1]

# Convert embeddings to array and Normalize them
emb_values = np.zeros((len(emb_keys), emb_dim))
for i, k in enumerate(emb_keys):
    emb_values[i] = emb_dict[k]
emb_values = normalize(emb_values)

# Create a label to index mapping  {'MUL0720.json_turn_10': 1, ...} 
label_to_idx = {k: i for i, k in enumerate(emb_keys)}

# Load the model for embed query
embedder = SentenceTransformer('/home/haesungpyun/my_refpydst/outputs/runs/retriever/mw21_1p_train/referred_states/split_v1')

# Tokenizer
encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')


In [3]:
# Query에 대해 retrieve & generate & evaluate

short_sys_msg = "You are an expert in Dialogue State Tracking(DST) and python coding.\n"

sys_msg_dict = {"short_sys_msg": short_sys_msg}

total_log = []
retrieving_samples = True
num_retrieved_examples = 100
n_smapled_examples = 10

random.seed(42)
# Randomly select a query data and Retrieve examples from example pool (train data)
# query_data = dev_dataset[random.randint(0, len(dev_dataset))]
for query_data in dev_dataset:
    if retrieving_samples:
        retrieved_examples = embed_query_retrieve_examples(
            embedder, example_pool, query_data, 
            emb_keys, emb_values, label_to_idx, num_retrieved_examples=num_retrieved_examples)
    else:
        random.seed(42)
        retrieved_examples = [example_pool[random.randint(0, len(example_pool))] for _ in range(num_retrieved_examples)]

    msg_chat, gold_python = get_python_chat_prompt(query_data, retrieved_examples, system_msg)    
    msg_chat_usr_last, msg_one_prompt = make_two_type_msg(msg_chat)
    # raise ValueError    
    log = defaultdict(dict)
    examples_list = []
    for idx, example in enumerate(retrieved_examples):
        tmp = {}
        tmp['ID_turn-id'] = f"{example['ID']}-{example['turn_id']}"
        tmp['last_slot_values'], tmp['turn_slot_values'], tmp['slot_values'] = \
            example['last_slot_values'], example['turn_slot_values'], example['slot_values']
        examples_list.append(tmp)
    log["retrieve_example"] = examples_list

    log["query"]['ID-turn-id']= f"{query_data['ID']}-{query_data['turn_id']}"
    log["query"]['last_slot_values'] = query_data['last_slot_values'] 
    log["query"]['turn_slot_values'] = query_data['turn_slot_values']
    log["query"]['slot_values']= query_data['slot_values']
    log["query"]['gold-python'] = gold_python

    for name, msg in {'chat_ass_last': msg_chat,'chat_user_last': msg_chat_usr_last, 'one_prompt': msg_one_prompt}.items():
        args = {
            "model": 'gpt-3.5-turbo',
            "messages": msg,
            "max_tokens": 120,
            "top_p": 0.9,
            "stop": ['--', '\n', ';', '#'],
            "n": 2,
            "logprobs": True,  # 1 needed to get back log-probabilities at all, in choice['logprobs']['token_logprobs']
        }
        # results = openai.chat.completions.create(**args)
        result_list = []
        for i, result in enumerate(results.choices):
            tmp = {}
            tmp['gold'] = gold_python
            tmp['pred'] = result.message.content
            tmp['len-normalize-logprob'] = sum(i.logprob for i in result.logprobs.content)/len(result.logprobs.content)
            tmp['token_f1'] = calculate_token_f1(encoding, gold_python, tmp['pred'])
            # tmp[f'toke-logprobs-{i}'] = [i.logprob for i in result.logprobs.content]
            result_list.append(tmp)
        sorted_result_list = sorted(result_list, key=lambda x: x['token_f1'], reverse=True)
        log[name] = sorted_result_list
    total_log.append(log)

import json, os
output_file = os.getcwd()+f'/tmp_{sys_msg_key}.json'
if not os.path.exists(output_file):
    with open(output_file, 'a') as f:
        f.close()
with open(output_file, 'a') as f:
    json.dump(total_log, f, indent=4)

In [None]:
# Query에 대해 retrieve된 examples Sampling Experiment

sys_msg = "You are an expert in Dialogue State Tracking(DST) and python coding.\n"


total_log = []
retrieving_samples = True
num_retrieved_examples = 100
n_smapled_examples = 10
sampling_iteration = 10

# Randomly select a query data and Retrieve examples from example pool (train data)
random.seed(42)
query_data = dev_dataset[random.randint(0, len(dev_dataset))]

retrieved_examples = embed_query_retrieve_examples(
    embedder, example_pool, query_data, emb_keys,
    emb_values, label_to_idx, num_retrieved_examples=num_retrieved_examples)

example_scores_cnt = {e['ID'] : 0 for e in retrieved_examples}

for iteration in range(sampling_iteration):

    random.shuffle(retrieved_examples)

    sampled_examples = retrieved_examples[:n_smapled_examples]
    
    msg_chat, gold_python = get_python_chat_prompt(query_data, retrieved_examples, sys_msg)    

    log = defaultdict(dict)
    examples_list = []
    for idx, example in enumerate(sampled_examples):
        tmp = {}
        tmp['ID_turn-id'] = f"{example['ID']}-{example['turn_id']}"
        tmp['last_slot_values'], tmp['turn_slot_values'], tmp['slot_values'] = \
            example['last_slot_values'], example['turn_slot_values'], example['slot_values']
        examples_list.append(tmp)
    log[iteration]["sampled_retrieve_example"] = examples_list

    log[iteration]["query"]['ID-turn-id']= f"{query_data['ID']}-{query_data['turn_id']}"
    log[iteration]["query"]['last_slot_values'] = query_data['last_slot_values'] 
    log[iteration]["query"]['turn_slot_values'] = query_data['turn_slot_values']
    log[iteration]["query"]['slot_values']= query_data['slot_values']
    log[iteration]["query"]['gold-python'] = gold_python

    args = {
        "model": 'gpt-3.5-turbo',
        "messages": msg_chat,
        "max_tokens": 120,
        "top_p": 0.9,
        "stop": ['--', '\n', ';', '#'],
        "n": 2,
        "logprobs": True,  # 1 needed to get back log-probabilities at all, in choice['logprobs']['token_logprobs']
    }
    # results = openai.chat.completions.create(**args)
    result_list = []
    for i, result in enumerate(results.choices):
        tmp = {}
        tmp['gold'] = gold_python
        tmp['pred'] = result.message.content
        tmp['len-normalize-logprob'] = sum(i.logprob for i in result.logprobs.content)/len(result.logprobs.content)
        tmp['token_f1'] = calculate_token_f1(encoding, gold_python, tmp['pred'])
        # tmp[f'toke-logprobs-{i}'] = [i.logprob for i in result.logprobs.content]
        result_list.append(tmp)
    sorted_result_list = sorted(result_list, key=lambda x: x['token_f1'], reverse=True)
    log[iteration][name] = sorted_result_list
    
    best_completion = sorted_result_list[0]
    predicted_slot_values = parse_python_completion(best_completion['pred'], query_data['last_slot_values'])
    aggregate_slot_values = update_dialogue_state(query_data['last_slot_values'], predicted_slot_values) 

    gold_slot_value = copy.deepcopy(query_data['slot_values'])
    for key in gold_slot_value.keys():
        if '|' in gold_slot_value[key]:
            gold_values = gold_slot_value[key].split('|')
            if key in aggregate_slot_values and aggregate_slot_values[key] in gold_values:
                gold_slot_value[key] = aggregate_slot_values[key]
    jga: int = 1 if aggregate_slot_values == gold_slot_value else 0
    f1 = compute_f1(aggregate_slot_values, gold_slot_value)
    acc = compute_acc(aggregate_slot_values, gold_slot_value)

    log[iteration]['jga'] = jga
    log[iteration]['f1'] = f1
    log[iteration]['acc'] = acc
    
    if jga == 1:
        for example in sampled_examples:
            example_scores_cnt[example['ID']] += 1

    total_log.append(log)


import json, os
output_file = os.getcwd()+f'/sampling_exp.json'
if not os.path.exists(output_file):
    with open(output_file, 'a') as f:
        f.close()
with open(output_file, 'a') as f:
    json.dump(total_log, f, indent=4)