In [1]:
'''
Construct soft labels from elicited individual annotator information 
'''


import pandas as pd
import numpy as np
import os 
import itertools
import json 
import importlib 
import label_construction_utils as utils

save_dir = "./"

if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
    
class_names = ['Airplane', 'Automobile', 'Bird','Cat', 'Deer','Dog', 'Frog', 'Horse', 'Ship', 'Truck']
num_classes = len(class_names)
class2idx = {class_name: idx for idx, class_name in enumerate(class_names)}
idx2class = {idx: class_name for class_name, idx in class2idx.items()}

In [2]:
'''
Read in processed elicitation data
'''

elicitation_file = "human_soft_labels_data.json"

with open(elicitation_file, "r") as f: 
    all_elicitation_per_example = json.load(f)


In [3]:
'''
Create labels for various label varieties

The below shows Top 2, Clamp settings
Using redist_level = 0.1 like in paper

E.g., if an annotator assigns 80% prob to deer and 20% to horse, but says dog and cat are also possible
Then 5% mass will be spread to dog and cat, and the others will be dropped s.t. tot sum is 100%

This is done b/c, in the above example, if an annotator says a category is possible, it ought to have some non-zero probabilty
'''

include_top_2 = True # if False, just use Top-1 prob 
redist = "clamp" # could also use "uniform"
redist_level = 0.1

example_idxs = all_elicitation_per_example.keys()

soft_labels_per = {example_idx: [] for example_idx in example_idxs}
agg_soft_labels_per = {example_idx: [] for example_idx in all_elicitation_per_example.keys()}

for example_idx, elic_data in all_elicitation_per_example.items():
    soft_labels = []
    for single_annotator_info in elic_data: 
        soft_label = utils.construct_elicited_soft_label(single_annotator_info, class2idx, idx2class, include_top_2 = include_top_2,
                                                         redist=redist, redist_factor=redist_level)

        soft_labels.append(soft_label)
        
        soft_labels_per[example_idx].append(list(soft_label))
        
    # demonstrating an example aggregation over individuals' labels
    agg_label = np.mean(soft_labels,axis=0)
    agg_soft_labels_per[example_idx] = agg_label



In [4]:
'''
Save out
'''
with open("cifar10s_t2clamp_redist10.json", "w") as f:
    json.dump(soft_labels_per, f)