In [1]:
import pickle
import numpy as np

In [2]:
def calculate_normalized_weighted_trimmed_mean(predictions):
    # Step 1: Find the median
    median_prediction = np.median(predictions)

    # Step 2: Determine the prediction farthest from the median
    distances = np.abs(predictions - median_prediction)
    max_distance = np.max(distances)

    # Step 3: Down-weight the furthest prediction by half
    weights = np.ones(len(predictions))
    weights[distances == max_distance] *= 0.5

    # Step 4: Distribute the saved weight among other predictions
    saved_weight = (1.0 - 0.5) / (len(predictions) - 1)
    weights[distances != max_distance] += saved_weight

    # Step 5: Calculate the weighted mean
    weighted_mean = np.average(predictions, weights=weights)

    return weighted_mean

In [3]:
with open("data/answers.pickle", "rb") as file:
    answers = pickle.load(file)
with open("data/base_predictions.pickle", "rb") as file:
    base_predictions = pickle.load(file)
with open("data/finetuned_predictions.pickle", "rb") as file:
    finetuned_predictions = pickle.load(file)
with open("data/finetuned_other_predictions.pickle", "rb") as file:
    finetuned_other_predictions = pickle.load(file)
with open("data/crowd_predictions.pickle", "rb") as file:
    community_predictions = pickle.load(file)

In [4]:
base_brier_score = 0
finetuned_brier_score = 0
finetuned_and_base_brier_score = 0
crowd_brier_score = 0
n = 0
for i in range(5): # num retrieval dates
    for j in range(len(finetuned_predictions[i])):
        answer = answers[i][j]
        
        base_preds = base_predictions[i][j]
        finetuned_preds = finetuned_predictions[i][j]
        finetuned_other_preds = finetuned_other_predictions[i][j]
        crowd_pred = community_predictions[i][j]
        
        base_pred = calculate_normalized_weighted_trimmed_mean(base_preds)
        finetuned_pred = np.mean(finetuned_preds + finetuned_other_preds)
        finetuned_and_base_pred = calculate_normalized_weighted_trimmed_mean(base_preds + finetuned_preds + finetuned_other_preds)
        
        base_brier_score += (base_pred - answer) ** 2
        finetuned_brier_score += (finetuned_pred - answer) ** 2
        finetuned_and_base_brier_score += (finetuned_and_base_pred - answer) ** 2
        crowd_brier_score += (crowd_pred - answer) ** 2
        n += 1

print("Base Brier Score:", base_brier_score/n)
print("Finetuned Brier Score:", finetuned_brier_score/n)
print("Finetuned and Base Brier Score:", finetuned_and_base_brier_score/n)
print("Crowd Brier Score:", crowd_brier_score/n)

Base Brier Score: 0.1863574497732625
Finetuned Brier Score: 0.18005945446102836
Finetuned and Base Brier Score: 0.17988713531620448
Crowd Brier Score: 0.1486199294280867
