In [28]:
import json
import numpy as np
import pickle

In [29]:
with open("codebook.json", "r") as codebook_file:
    codebook = json.load(codebook_file)
    
with open('participants.pkl', 'rb') as f:
    participants = pickle.load(f)

In [None]:
target_variables = ['trstprl', 'badge', 'lrscale']

In [31]:
def get_intervals(array, num_of_choices, cutoff=40):
    lower_bound = np.percentile(array, cutoff/2)
    upper_bound = np.percentile(array, 100-(cutoff/2))
    step_size = (upper_bound - lower_bound) / num_of_choices
    intervals = [lower_bound + i * step_size for i in range(num_of_choices + 1)]
    return intervals


In [32]:
def find_interval(value, intervals, choices):
    if value <= intervals[0]:
        return choices[0]
    
    if value >= intervals[-1]:
        return choices[-1]
    
    for i in range(len(intervals) - 1):
        if intervals[i] <= value < intervals[i + 1]:
            return choices[i]


In [33]:
distributions = {
    t: np.array([
        participants[p]["projections"][t]
        for p in participants.keys()
    ]) 
    for t in target_variables
}

In [34]:
for t in target_variables:
    choices = codebook[t]['values']
    intervals = get_intervals(distributions[t], len(choices), 0)

    for p in participants.keys():
        participants[p]['predictions'][t] = find_interval(participants[p]['projections'][t], intervals, choices)

In [35]:
all_predictions = {}
for t in target_variables:
    predictions = []
    golds = []
    for p in participants:
        if participants[p]['gold'][t] not in [77, 88, 99]:
            predictions.append(
                participants[p]['predictions'][t]
            )
            golds.append(
                int(participants[p]['gold'][t])
            )
    all_predictions[t] = (golds, predictions)
    

In [36]:
with open('participants.pkl', 'wb') as f:
    pickle.dump(participants, f, protocol=pickle.HIGHEST_PROTOCOL)

with open('all_predictions.pkl', 'wb') as f:
    pickle.dump(all_predictions, f, protocol=pickle.HIGHEST_PROTOCOL)