In [10]:
import os, json
import pandas as pd

dataset_dict = {
    'a': 'movies',
    'b': 'birdstrikes'
}

oracle_dict = {
    'c': 'compassql',
    'd': 'dziban'
}

search_algorithm_dict = {
    'e': 'bfs',
    'f': 'dfs'
}

task_dict = {
    'p1': '1. Find Extremum',
    'p2': '2. Retrieve Value',
    'p3': '3. Prediction',
    'p4': '4. Exploration'
}

response_to_score_dict = {
    'sdisagree': -2,
    'disagree': -1,
    'neutral': 0,
    'agree': 1,
    'sagree': 2
}

In [18]:
path_to_json = '../logs/'
json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('_ptask.json')]
# print(json_files) 
jsons_data = pd.DataFrame(columns=['participant_id',
                                   'dataset',
                                   'oracle',
                                   'search',
                                   'condition',
                                   'task',
                                   'task_type',
                                   'confidence-udata', 
                                   'confidence-ans', 
                                   'efficiency',
                                   'ease-of-use', 
                                   'utility', 
                                   'overall'])

for index, js in enumerate(json_files):
    with open(os.path.join(path_to_json, js)) as json_file:
        json_text = json.load(json_file)
        
        split_filename = js.split('_')
        
        participant_id = split_filename[0].replace('partcipant','')
        experimental_setup = split_filename[1]
        dataset = dataset_dict[experimental_setup[0]]
        
        oracle = oracle_dict[experimental_setup[1]]
        search = search_algorithm_dict[experimental_setup[2]]
        
        condition = oracle+"_"+search
        
        task = task_dict[split_filename[2]]
        
        task_type = "Focused"
        if task == "3. Prediction" or task == '4. Exploration':
            task_type = "Open-Ended"
            
        confidence_udata = response_to_score_dict[json_text['confidence-udata']]
        confidence_ans = response_to_score_dict[json_text['confidence-ans']]
        efficiency = response_to_score_dict[json_text['efficiency']]
        ease_of_use = response_to_score_dict[json_text['ease-of-use']]
        utility = response_to_score_dict[json_text['utility']]
        overall = response_to_score_dict[json_text['overall']]
        
        row = [participant_id, dataset, oracle, search, condition, task, task_type, confidence_udata, confidence_ans, efficiency, ease_of_use, utility, overall]
        jsons_data.loc[index] = row
# now that we have the pertinent json data in our DataFrame let's look at it
print(jsons_data)
jsons_data.to_csv('processed_ptask_responses.csv', index=False)

    participant_id      dataset     oracle search      condition  \
0               29       movies  compassql    bfs  compassql_bfs   
1               55       movies  compassql    dfs  compassql_dfs   
2               42  birdstrikes     dziban    bfs     dziban_bfs   
3               58  birdstrikes     dziban    bfs     dziban_bfs   
4               22       movies     dziban    bfs     dziban_bfs   
..             ...          ...        ...    ...            ...   
231             31  birdstrikes     dziban    dfs     dziban_dfs   
232             32       movies  compassql    bfs  compassql_bfs   
233             41       movies     dziban    bfs     dziban_bfs   
234             15       movies     dziban    dfs     dziban_dfs   
235             57  birdstrikes     dziban    dfs     dziban_dfs   

                  task   task_type confidence-udata confidence-ans efficiency  \
0        3. Prediction  Open-Ended               -1              0         -1   
1     1. Find Extremu

In [16]:

path_to_json = '../logs/'
json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('_logs.json')]
# print(json_files) 
jsons_data = pd.DataFrame(columns=['participant_id',
                                   'dataset',
                                   'oracle',
                                   'search',
                                   'condition',
                                   'task',
                                   'task_type',
                                   'time'])

for index, js in enumerate(json_files):
    with open(os.path.join(path_to_json, js)) as json_file:
        json_text = json.load(json_file)
        split_filename = js.split('_')
        
        participant_id = split_filename[0]
        experimental_setup = split_filename[1]
        dataset = dataset_dict[experimental_setup[0]]
        oracle = oracle_dict[experimental_setup[1]]
        search = search_algorithm_dict[experimental_setup[2]]
        condition = oracle+"_"+search

        task = task_dict[split_filename[2]]
        task_type = "Focused"
        if task == "3. Prediction" or task == '4. Exploration':
            task_type = "Open-Ended"
        time = json_text[len(json_text)-1]['Time'] - json_text[0]['Time']
        row = [participant_id, dataset, oracle, search, condition, task, task_type, time]

        jsons_data.loc[index] = row
            
print(jsons_data)
jsons_data.to_csv('processed_completion_time.csv', index=False)

    participant_id      dataset     oracle search      condition  \
0     partcipant41       movies     dziban    bfs     dziban_bfs   
1      partcipant5       movies     dziban    bfs     dziban_bfs   
2     partcipant36  birdstrikes  compassql    dfs  compassql_dfs   
3     partcipant17       movies  compassql    dfs  compassql_dfs   
4     partcipant56       movies     dziban    dfs     dziban_dfs   
..             ...          ...        ...    ...            ...   
231    partcipant9       movies     dziban    bfs     dziban_bfs   
232   partcipant44       movies     dziban    bfs     dziban_bfs   
233   partcipant32       movies  compassql    bfs  compassql_bfs   
234   partcipant48  birdstrikes  compassql    bfs  compassql_bfs   
235   partcipant14       movies  compassql    dfs  compassql_dfs   

                  task   task_type     time  
0        3. Prediction  Open-Ended   785329  
1       4. Exploration  Open-Ended   595646  
2        3. Prediction  Open-Ended  1445670  

In [17]:
import os, json
import pandas as pd

pilot_task_dict = {
    '1': '1. Find Extremum',
    '2': '2. Retrieve Value',
    '3': '3. Prediction',
    '4': '4. Exploration'
}

path_to_json = '../pilots/'
json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('_logs.json')]
# print(json_files) 
jsons_data = pd.DataFrame(columns=['participant_id',
                                   'dataset',
                                   'oracle',
                                   'search',
                                   'task',
                                   'task_type',
                                   'time'])

for index, js in enumerate(json_files):
    with open(os.path.join(path_to_json, js)) as json_file:
        json_text = json.load(json_file)
        split_filename = js.split('_')
        
        participant_id = split_filename[0]
        if(participant_id in ['pilot3', 'pilot4', 'pilot5']):
            experimental_setup = split_filename[1]
            dataset = dataset_dict[experimental_setup[0]]
            oracle = oracle_dict[experimental_setup[1]]
            search = search_algorithm_dict[experimental_setup[2]]

            task = pilot_task_dict[split_filename[2][1]]
            task_type = "Focused"
            if task == "3. Prediction" or task == '4. Exploration':
                task_type = "Open-Ended"

            time = json_text[len(json_text)-1]['Time'] - json_text[0]['Time']
            row = [participant_id, dataset, oracle, search, task, task_type, time]

            jsons_data.loc[index] = row
jsons_data.sort_values(by=['participant_id', 'task'], inplace=True)
print(jsons_data)
print(jsons_data['time'].mean())
print(jsons_data['time'].std())
for task in ['1. Find Extremum', '2. Retrieve Value', '3. Prediction', '4. Exploration']:
    print("===== " + task)
    df = jsons_data.loc[jsons_data['task'] == task]
    print(df['time'].mean())
    print(df['time'].std())
    
for task_type in ['Focused', 'Open-Ended']:
    print("===== " + task_type)
    df = jsons_data.loc[jsons_data['task_type'] == task_type]
    print(df['time'].mean())
    print(df['time'].std())
jsons_data.to_csv('pilot_times.csv', index=False)

   participant_id      dataset     oracle search               task  \
16         pilot3       movies  compassql    dfs   1. Find Extremum   
10         pilot3       movies  compassql    dfs  2. Retrieve Value   
3          pilot3       movies  compassql    dfs      3. Prediction   
5          pilot3       movies  compassql    dfs     4. Exploration   
8          pilot4       movies     dziban    dfs   1. Find Extremum   
13         pilot4       movies     dziban    dfs  2. Retrieve Value   
17         pilot4       movies     dziban    dfs      3. Prediction   
11         pilot4       movies     dziban    dfs     4. Exploration   
12         pilot5  birdstrikes     dziban    bfs   1. Find Extremum   
7          pilot5  birdstrikes     dziban    bfs  2. Retrieve Value   
1          pilot5  birdstrikes     dziban    bfs      3. Prediction   
4          pilot5  birdstrikes     dziban    bfs     4. Exploration   

     task_type    time  
16     Focused  163058  
10     Focused  407044  
3

In [24]:
df = pd.read_csv("./processed_completion_time_split.csv")
df['condition'] = df['oracle']+"_"+df['search']
df['task_type'] = 'Focused'
df.loc[df['task'] == "3. Prediction", 'task_type'] = 'Open-Ended'
df.loc[df['task'] == '4. Exploration', 'task_type'] = 'Open-Ended'
df.to_csv('processed_completion_time.csv', index=False)

In [28]:
df = pd.read_csv("./processed_accuracy_split.csv")
df_oracle_1_hot = pd.get_dummies(df['oracle'], prefix='oracle')
df_search_1_hot = pd.get_dummies(df['search'], prefix='search')
df = df.join(df_oracle_1_hot)
df = df.join(df_search_1_hot)
print(df)
df.to_csv('processed_accuracy_split_1_hot.csv', index=False)

     participant_id      dataset     oracle search               task  \
0                10  birdstrikes  compassql    bfs   1. Find Extremum   
1                11  birdstrikes  compassql    dfs   1. Find Extremum   
2                12       movies  compassql    dfs   1. Find Extremum   
3                13  birdstrikes     dziban    bfs   1. Find Extremum   
4                14       movies  compassql    dfs   1. Find Extremum   
..              ...          ...        ...    ...                ...   
113               5       movies     dziban    bfs  2. Retrieve Value   
114               6  birdstrikes  compassql    dfs  2. Retrieve Value   
115               7       movies  compassql    dfs  2. Retrieve Value   
116               8       movies     dziban    dfs  2. Retrieve Value   
117               9       movies     dziban    bfs  2. Retrieve Value   

     accuracy  oracle_compassql  oracle_dziban  search_bfs  search_dfs  
0           1                 1              0    