In [2]:
import os, json
import pandas as pd

dataset_dict = {
    'a': 'movies',
    'b': 'birdstrikes'
}

oracle_dict = {
    'c': 'compassql',
    'd': 'dziban'
}

search_algorithm_dict = {
    'e': 'bfs',
    'f': 'dfs'
}

task_dict = {
    'p1': '1. Find Extremum',
    'p2': '2. Retrieve Value',
    'p3': '3. Prediction',
    'p4': '4. Exploration'
}

response_to_score_dict = {
    'sdisagree': -2,
    'disagree': -1,
    'neutral': 0,
    'agree': 1,
    'sagree': 2
}

In [31]:
path_to_json = './logs/'
json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('_ptask.json')]
# print(json_files) 
jsons_data = pd.DataFrame(columns=['participant_id',
                                   'dataset',
                                   'oracle',
                                   'search',
                                   'condition',
                                   'task',
                                   'confidence-udata', 
                                   'confidence-ans', 
                                   'efficiency',
                                   'ease-of-use', 
                                   'utility', 
                                   'overall'])

for index, js in enumerate(json_files):
    with open(os.path.join(path_to_json, js)) as json_file:
        json_text = json.load(json_file)
        
        split_filename = js.split('_')
        
        participant_id = split_filename[0].replace('partcipant','')
        experimental_setup = split_filename[1]
        dataset = dataset_dict[experimental_setup[0]]
        
        oracle = oracle_dict[experimental_setup[1]]
        search = search_algorithm_dict[experimental_setup[2]]
        
        condition = oracle+"_"+search
        
        task = task_dict[split_filename[2]]
        
        confidence_udata = response_to_score_dict[json_text['confidence-udata']]
        confidence_ans = response_to_score_dict[json_text['confidence-ans']]
        efficiency = response_to_score_dict[json_text['efficiency']]
        ease_of_use = response_to_score_dict[json_text['ease-of-use']]
        utility = response_to_score_dict[json_text['utility']]
        overall = response_to_score_dict[json_text['overall']]
        
        row = [participant_id, dataset, oracle, search, condition, task, confidence_udata, confidence_ans, efficiency, ease_of_use, utility, overall]
        jsons_data.loc[index] = row
# now that we have the pertinent json data in our DataFrame let's look at it
print(jsons_data)
jsons_data.to_csv('processed_ptask_responses.csv', index=False)

    participant_id      dataset     oracle search      condition  \
0               29       movies  compassql    bfs  compassql_bfs   
1               55       movies  compassql    dfs  compassql_dfs   
2               42  birdstrikes     dziban    bfs     dziban_bfs   
3               58  birdstrikes     dziban    bfs     dziban_bfs   
4               22       movies     dziban    bfs     dziban_bfs   
..             ...          ...        ...    ...            ...   
231             31  birdstrikes     dziban    dfs     dziban_dfs   
232             32       movies  compassql    bfs  compassql_bfs   
233             41       movies     dziban    bfs     dziban_bfs   
234             15       movies     dziban    dfs     dziban_dfs   
235             57  birdstrikes     dziban    dfs     dziban_dfs   

                  task confidence-udata confidence-ans efficiency ease-of-use  \
0        3. Prediction               -1              0         -1           1   
1     1. Find Extremu

In [4]:
import os, json
import pandas as pd
import math
pilot_task_dict = {
    '1': '1. Find Extremum',
    '2': '2. Retrieve Value',
    '3': '3. Prediction',
    '4': '4. Exploration'
}

path_to_json = './pilots/'
json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('_ptask.json')]
# print(json_files) 
jsons_data = pd.DataFrame(columns=['participant_id',
                                   'dataset',
                                   'oracle',
                                   'search',
                                   'condition',
                                   'task',
                                   'confidence-udata', 
                                   'confidence-ans', 
                                   'efficiency',
                                   'ease-of-use', 
                                   'utility', 
                                   'overall'])

for index, js in enumerate(json_files):
    with open(os.path.join(path_to_json, js)) as json_file:
        json_text = json.load(json_file)
        
        split_filename = js.split('_')
        
        participant_id = split_filename[0].replace('partcipant','')
        experimental_setup = split_filename[1]
        dataset = dataset_dict[experimental_setup[0]]
        
        oracle = oracle_dict[experimental_setup[1]]
        search = search_algorithm_dict[experimental_setup[2]]
        
        condition = oracle+"_"+search
        
        task = task_dict[split_filename[2]]
        
        confidence_udata = response_to_score_dict[json_text['confidence-udata']]
        confidence_ans = response_to_score_dict[json_text['confidence-ans']]
        efficiency = response_to_score_dict[json_text['efficiency']]
        ease_of_use = response_to_score_dict[json_text['ease-of-use']]
        utility = response_to_score_dict[json_text['utility']]
        overall = response_to_score_dict[json_text['overall']]
        
        row = [participant_id, dataset, oracle, search, condition, task, confidence_udata, confidence_ans, efficiency, ease_of_use, utility, overall]
        jsons_data.loc[index] = row
# now that we have the pertinent json data in our DataFrame let's look at it
print(jsons_data)
print("confidence-udata")
print(jsons_data['confidence-udata'].mean())
print(jsons_data['confidence-udata'].std())
print("---")

print("confidence-ans")
print(jsons_data['confidence-ans'].mean())
print(jsons_data['confidence-ans'].std())
print("---")

print("efficiency")
print(jsons_data['efficiency'].mean())
print(jsons_data['efficiency'].std())
print("---")

print("ease-of-use")
print(jsons_data['ease-of-use'].mean())
print(jsons_data['ease-of-use'].std())
print("---")

print("utility")
print(jsons_data['utility'].mean())
print(jsons_data['utility'].std())
print("---")

print("overall")
print(jsons_data['overall'].mean())
print(jsons_data['overall'].std())
print("---")


s = pd.concat([jsons_data['confidence-udata'], 
               jsons_data['confidence-ans'], 
               jsons_data['efficiency'], 
               jsons_data['ease-of-use'],
               jsons_data['utility'],
               jsons_data['overall'],
              ])
print(s.mean())
print(s.std())

   participant_id      dataset     oracle search      condition  \
0          pilot3       movies  compassql    dfs  compassql_dfs   
1          pilot4       movies     dziban    dfs     dziban_dfs   
2          pilot5  birdstrikes     dziban    bfs     dziban_bfs   
3          pilot3       movies  compassql    dfs  compassql_dfs   
4          pilot4       movies     dziban    dfs     dziban_dfs   
5          pilot3       movies  compassql    dfs  compassql_dfs   
6          pilot4       movies     dziban    dfs     dziban_dfs   
7          pilot4       movies     dziban    dfs     dziban_dfs   
8          pilot5  birdstrikes     dziban    bfs     dziban_bfs   
9          pilot5  birdstrikes     dziban    bfs     dziban_bfs   
10         pilot3       movies  compassql    dfs  compassql_dfs   
11         pilot5  birdstrikes     dziban    bfs     dziban_bfs   

                 task confidence-udata confidence-ans efficiency ease-of-use  \
0    1. Find Extremum                2          

In [45]:

path_to_json = './logs/'
json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('_logs.json')]
# print(json_files) 
jsons_data = pd.DataFrame(columns=['participant_id',
                                   'dataset',
                                   'oracle',
                                   'search',
                                   'condition',
                                   'task',
                                   'time'])

for index, js in enumerate(json_files):
    with open(os.path.join(path_to_json, js)) as json_file:
        json_text = json.load(json_file)
        split_filename = js.split('_')
        
        participant_id = split_filename[0]
        experimental_setup = split_filename[1]
        dataset = dataset_dict[experimental_setup[0]]
        oracle = oracle_dict[experimental_setup[1]]
        search = search_algorithm_dict[experimental_setup[2]]
        condition = oracle+"_"+search

        task = task_dict[split_filename[2]]
        time = json_text[len(json_text)-1]['Time'] - json_text[0]['Time']
        row = [participant_id, dataset, oracle, search, condition, task, time]

        jsons_data.loc[index] = row
            
print(jsons_data)
jsons_data.to_csv('task_times.csv', index=False)

    participant_id      dataset     oracle search      condition  \
0     partcipant41       movies     dziban    bfs     dziban_bfs   
1      partcipant5       movies     dziban    bfs     dziban_bfs   
2     partcipant36  birdstrikes  compassql    dfs  compassql_dfs   
3     partcipant17       movies  compassql    dfs  compassql_dfs   
4     partcipant56       movies     dziban    dfs     dziban_dfs   
..             ...          ...        ...    ...            ...   
231    partcipant9       movies     dziban    bfs     dziban_bfs   
232   partcipant44       movies     dziban    bfs     dziban_bfs   
233   partcipant32       movies  compassql    bfs  compassql_bfs   
234   partcipant48  birdstrikes  compassql    bfs  compassql_bfs   
235   partcipant14       movies  compassql    dfs  compassql_dfs   

                  task     time  
0        3. Prediction   785329  
1       4. Exploration   595646  
2        3. Prediction  1445670  
3    2. Retrieve Value   103418  
4        3. P

In [4]:
import os, json
import pandas as pd
import math
pilot_task_dict = {
    '1': '1. Find Extremum',
    '2': '2. Retrieve Value',
    '3': '3. Prediction',
    '4': '4. Exploration'
}

path_to_json = './pilots/'
json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('_logs.json')]
# print(json_files) 
jsons_data = pd.DataFrame(columns=['participant_id',
                                   'dataset',
                                   'oracle',
                                   'search',
                                   'task',
                                   'time'])

for index, js in enumerate(json_files):
    with open(os.path.join(path_to_json, js)) as json_file:
        json_text = json.load(json_file)
        split_filename = js.split('_')
        
        participant_id = split_filename[0]
        if(participant_id in ['pilot3', 'pilot4', 'pilot5']):
            experimental_setup = split_filename[1]
            dataset = dataset_dict[experimental_setup[0]]
            oracle = oracle_dict[experimental_setup[1]]
            search = search_algorithm_dict[experimental_setup[2]]

            task = pilot_task_dict[split_filename[2][1]]

            # time = math.log((json_text[len(json_text)-1]['Time'] - json_text[0]['Time'])/1000)
            time = (json_text[len(json_text)-1]['Time'] - json_text[0]['Time'])/1000
            row = [participant_id, dataset, oracle, search, task, time]

            jsons_data.loc[index] = row
jsons_data.sort_values(by=['participant_id', 'task'], inplace=True)
print(jsons_data)
print(jsons_data['time'].mean())
print(jsons_data['time'].std())
for task in ['1. Find Extremum', '2. Retrieve Value', '3. Prediction', '4. Exploration']:
    print("===== " + task)
    df = jsons_data.loc[jsons_data['task'] == task]
    print(df['time'].mean())
    print(df['time'].std())
jsons_data.to_csv('pilot_times.csv', index=False)

   participant_id      dataset     oracle search               task     time
16         pilot3       movies  compassql    dfs   1. Find Extremum  163.058
10         pilot3       movies  compassql    dfs  2. Retrieve Value  407.044
3          pilot3       movies  compassql    dfs      3. Prediction  636.623
5          pilot3       movies  compassql    dfs     4. Exploration  387.846
8          pilot4       movies     dziban    dfs   1. Find Extremum  110.347
13         pilot4       movies     dziban    dfs  2. Retrieve Value  301.887
17         pilot4       movies     dziban    dfs      3. Prediction  847.152
11         pilot4       movies     dziban    dfs     4. Exploration  543.307
12         pilot5  birdstrikes     dziban    bfs   1. Find Extremum  297.622
7          pilot5  birdstrikes     dziban    bfs  2. Retrieve Value  231.168
1          pilot5  birdstrikes     dziban    bfs      3. Prediction   83.174
4          pilot5  birdstrikes     dziban    bfs     4. Exploration  316.487

In [3]:
df = pd.read_csv("./processed_completion_time_split.csv")
print(df)

FileNotFoundError: [Errno 2] No such file or directory: './processed_completion_time_split.csv'