In [196]:
import os
import json
import pandas as pd

# directory of json files
results_dir = "../results/"

# Dataframes
single_df = pd.DataFrame()
pairwise_df = pd.DataFrame()
freetalk_df = pd.DataFrame()
last_df = pd.DataFrame()
sus_df = pd.DataFrame()

def process_data(data: dict):
    # Rough Values
    mturk_worker_id = data['mturk_worker_id']
    situation1 = data['result']['situation1']
    situation2 = data['result']['situation2']
    situation3 = data['result']['situation3']
    freetalk = data['result']['freetalk']
    last_answer = int(data['result']['last_answer'])
    usability_answer = data['result']['usability_answer']
    
    # Common Values
    situation_number = [1] * len(situation1) \
                        + [2] * len(situation2) \
                        + [3] * len(situation3)
    scenario_count = []
    user_speech, base_message, augmented_message = [], [], []
    
    # Single Model Evaluation Values
    single_answer_list = [[] for _ in range(6)] # base_answer1~3, augmented_answer1~3
    #last_answer..how? split
    
    # Pairwise Model Evaluation Values
    pairwise_answer_list = [[], [], [], []]
    
    # SUS values
    sus_answer_list = [[] for _ in range(10)]
    
    for idx, data in enumerate(situation1 + situation2 + situation3):
        scenario_num = idx  if idx <= len(situation1) - 1 \
                            else idx - len(situation1) \
                                if idx <= len(situation1 + situation2) - 1\
                                else idx - len(situation1 + situation2)
        scenario_count.append(scenario_num)
        
        user_speech.append(data['user_speech'])
        base_message.append(data['assistant_message']['base_model'])
        augmented_message.append(data['assistant_message']['augmented_model'])
        
        # save single
        add_single(data['answer'], single_answer_list)
        
        # save pairwise
        add_pairwise(data['answer'], pairwise_answer_list)    

    single_eval = {
        'mturk_worker_id': [mturk_worker_id] * len(situation1 + situation2 + situation3),
        'situation_number': situation_number,
        'scenario_count' : scenario_count,
        'user_speech': user_speech,
        'base_message': base_message,
        'augmented_message': augmented_message,
        'single_base_answer1': single_answer_list[0],
        'single_base_answer2': single_answer_list[1],
        'single_base_answer3': single_answer_list[2],
        'single_augmented_answer1': single_answer_list[3],
        'single_augmented_answer2': single_answer_list[4],
        'single_augmented_answer3': single_answer_list[5],
        'pairwise_answer1': [None] * len(situation1 + situation2 + situation3),
        'pairwise_answer2': [None] * len(situation1 + situation2 + situation3),
        'pairwise_answer3': [None] * len(situation1 + situation2 + situation3),
    }
    
    pairwise_eval = {
        'mturk_worker_id': [mturk_worker_id] * len(situation1 + situation2 + situation3),
        'situation_number': situation_number,
        'scenario_count' : scenario_count,
        'user_speech': user_speech,
        'base_message': base_message,
        'augmented_message': augmented_message,
        'single_base_answer1': [None] * len(situation1 + situation2 + situation3),
        'single_base_answer2': [None] * len(situation1 + situation2 + situation3),
        'single_base_answer3': [None] * len(situation1 + situation2 + situation3),
        'single_augmented_answer1': [None] * len(situation1 + situation2 + situation3),
        'single_augmented_answer2': [None] * len(situation1 + situation2 + situation3),
        'single_augmented_answer3': [None] * len(situation1 + situation2 + situation3),
        'pairwise_answer1': pairwise_answer_list[0],
        'pairwise_answer2': pairwise_answer_list[1],
        'pairwise_answer3': pairwise_answer_list[2]
    }
    
    freetalk_user_speech, freetalk_augmented_message = [], []
    freetalk_answer = [[], [], []]
    for idx, data in enumerate(freetalk):
        freetalk_user_speech.append(data['user_speech'])
        freetalk_augmented_message.append(data['assistant_message']['augmented_model'])
        
        for idx, answer in enumerate(data['answer']):
            value = answer.split("=")[-1]
            freetalk_answer[idx].append(int(value))

    freetalk_eval = {
        'mturk_worker_id': [mturk_worker_id] * len(freetalk),
        'situation_number': [4] * len(freetalk),
        'scenario_count' : [i for i in range(len(freetalk))],
        'user_speech': freetalk_user_speech,
        'augmented_message': freetalk_augmented_message,
        'single_augmented_answer1': freetalk_answer[0],
        'single_augmented_answer2': freetalk_answer[1],
        'single_augmented_answer3': freetalk_answer[2],
    }
    
    last_eval = {
        'mturk_worker_id': [mturk_worker_id],
        'single_last_answer': [last_answer]
    }
    
    sus_eval = {
        'mturk_worker_id': [mturk_worker_id],
    }
    for idx, answer in enumerate(usability_answer):
        sus_eval[f'usability_answer{idx + 1}'] = answer

    return pd.DataFrame(single_eval), pd.DataFrame(pairwise_eval), pd.DataFrame(freetalk_eval), pd.DataFrame(last_eval), pd.DataFrame(sus_eval)

def add_single(answers: list, single_answer_list: list[list]):
    base_values, augmented_values = [], []
    for answer in answers[:6]:
        model_type, answer_value = answer.split("=")
        if model_type == "base":
            base_values.append(int(answer_value))
        else:
            augmented_values.append(int(answer_value))
            
    for idx, value in enumerate(base_values):
        single_answer_list[idx].append(value)

    for idx, value in enumerate(augmented_values):
        single_answer_list[idx + 3].append(value)

def add_pairwise(answers: list, pairwise_answer_list: list[list]):
    for idx, value in enumerate(answers[6:]):
        pairwise_answer_list[idx].append(value)

if __name__ == "__main__":
    json_files = os.listdir(results_dir)
    for jsonfile in json_files:
        with open(os.path.join(results_dir, jsonfile), "r") as file:
            data = json.loads(file.read())
        single, pair, free, last, sus = process_data(data)
        single_df = pd.concat([single_df, single], ignore_index=True)
        pairwise_df = pd.concat([pairwise_df, pair], ignore_index=True)
        freetalk_df = pd.concat([freetalk_df, free], ignore_index=True)
        last_df = pd.concat([last_df, last], ignore_index=True)
        sus_df = pd.concat([sus_df, sus], ignore_index=True)



### DataFrames

In [None]:
single_df
pairwise_df
freetalk_df
last_df
sus_df

### Merge code

In [197]:
pd.merge(single_df, pairwise_df, on='mturk_worker_id', how='outer')

Unnamed: 0,mturk_worker_id,situation_number_x,scenario_count_x,user_speech_x,base_message_x,augmented_message_x,single_base_answer1_x,single_base_answer2_x,single_base_answer3_x,single_augmented_answer1_x,...,augmented_message_y,single_base_answer1_y,single_base_answer2_y,single_base_answer3_y,single_augmented_answer1_y,single_augmented_answer2_y,single_augmented_answer3_y,pairwise_answer1_y,pairwise_answer2_y,pairwise_answer3_y
0,seongwonchoi,1,0,The item I chose is a white portable powerbank...,The white portable powerbank for iPhone is cur...,The user's statement is accurate.,4,4,3,2,...,The user's statement is accurate.,,,,,,,base,base,base
1,seongwonchoi,1,0,The item I chose is a white portable powerbank...,The white portable powerbank for iPhone is cur...,The user's statement is accurate.,4,4,3,2,...,I can provide you with information and tips on...,,,,,,,base,base,base
2,seongwonchoi,1,0,The item I chose is a white portable powerbank...,The white portable powerbank for iPhone is cur...,The user's statement is accurate.,4,4,3,2,...,"Yes, I can recommend healthy dishes.",,,,,,,base,base,base
3,seongwonchoi,1,0,The item I chose is a white portable powerbank...,The white portable powerbank for iPhone is cur...,The user's statement is accurate.,4,4,3,2,...,I agree that it is important to have time to e...,,,,,,,base,base,base
4,seongwonchoi,1,0,The item I chose is a white portable powerbank...,The white portable powerbank for iPhone is cur...,The user's statement is accurate.,4,4,3,2,...,"I'm sorry, but I don't have the ability to rem...",,,,,,,augmented,base,base
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
926,chanyoungjung,3,5,sorry,No problem at all! If you have any other quest...,No need to apologize. How can I assist you?,4,4,4,4,...,"Yes, I know about COVID-19.",,,,,,,base,base,base
927,chanyoungjung,3,5,sorry,No problem at all! If you have any other quest...,No need to apologize. How can I assist you?,4,4,4,4,...,I don't have an opinion on the user's assessme...,,,,,,,base,base,base
928,chanyoungjung,3,5,sorry,No problem at all! If you have any other quest...,No need to apologize. How can I assist you?,4,4,4,4,...,"Before the COVID-19 pandemic, there were sever...",,,,,,,base,base,base
929,chanyoungjung,3,5,sorry,No problem at all! If you have any other quest...,No need to apologize. How can I assist you?,4,4,4,4,...,"To relieve stress, you can try deep breathing,...",,,,,,,augmented,augmented,augmented
