In [46]:
#%pip install sklearn
#%pip install torch

from collections import namedtuple
from math import sqrt
import os
from time import time
import random
import numpy as np
from scipy import stats
import pandas as pd
import sklearn
from sklearn.base import clone
import torch
import torch.nn as nn
import torch.optim as optim
from IPython.display import display, HTML

## project structure
DATA_DIR = "/data/projects/capturingBias/research/framing/data/" 
DATA_NPZ = DATA_DIR + "data2021.npz"

## load files
data = np.load(DATA_NPZ)

X_2D = data['X_2D']
X_3D = data['X_3D']
y_crowd = data['y_crowd']
y_experts = data['y_experts']
y_combined = data['y_combined']

# retrieve indices of labeled samples
experts_pilot_idx = np.where(y_experts > -1)[0]  # equal pilot subset

crowd_pilot_idx = np.array([idx for idx in experts_pilot_idx if y_crowd[idx] > -1])
crowd_all_idx = np.where(y_crowd > -1)[0]

combined_all_idx = np.where(y_combined > -1)[0]

In [47]:
def set_seed(seed=-1):
    if seed < 0:
        seed = np.random.randint(0, 2**32-1)

    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.random.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    
    return seed
    
print(set_seed())  # make reproducable

40992887


# Majority Class

In [52]:
from collections import Counter

def majority_class(y):
    ct = Counter(y)
    return ct.most_common(1)[0][1] / len(y)

In [53]:
print("\nMajority class accuracy on dominant labels (baseline)")

majority_class_acc_pilot_experts = majority_class(y_experts[experts_pilot_idx])
print(" expert pilot: {:.4f}".format(majority_class_acc_pilot_experts))

majority_class_acc_pilot_crowd = majority_class(y_crowd[crowd_pilot_idx])
majority_class_acc_all_crowd = majority_class(y_crowd[crowd_all_idx])
print(" crowd pilot:  {:.4f}".format(majority_class_acc_pilot_crowd))
print(" crowd all:  {:.4f}".format(majority_class_acc_all_crowd))

majority_class_acc_all_combined = majority_class(y_combined[combined_all_idx])
print(" combined all: {:.4f}".format(majority_class_acc_all_combined))


Majority class accuracy on dominant labels (baseline)
 expert pilot: 0.5345
 crowd pilot:  0.5690
 crowd all:  0.5487
 combined pilot: 0.5345
 combined all: 0.5310
