In [1]:
#%pip install sklearn
#%pip install torch

from math import sqrt
import os
from time import time
import random
import numpy as np
from scipy import stats
import pandas as pd
import sklearn
import torch
import torch.nn as nn
import torch.optim as optim
from IPython.display import display, HTML

## project structure
DATA_DIR = "/data/projects/capturingBias/research/framing/data/"  # change to "./" for current directory
DATA_NPZ = DATA_DIR + "data.npz"

## load files
data = np.load(DATA_NPZ)

X_2D = data['X_2D']
X_3D = data['X_3D']
y_crowd = data['y_crowd']
y_experts = data['y_experts']
y_combined = data['y_combined']

# retrieve indices of labeled samples
experts_idx = np.where(y_experts > -1)[0]
crowd_idx = np.where(y_crowd > -1)[0]

_crowd_unique_idx = np.setdiff1d(crowd_idx, experts_idx,
                                 assume_unique=True)
combined_idx = np.concatenate([_crowd_unique_idx, experts_idx])

In [2]:
def set_seed(seed=-1):
    if seed < 0:
        seed = np.random.randint(0, 2**32-1)

    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.random.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    
    return seed
    
print(set_seed())  # make reproducable

483020717


In [3]:
def create_splits(y, test_ratio=.5):
    train_idx = list()
    test_idx = list()
    
    strats = [np.where(y == lab)[0] for lab in np.unique(y) if lab > -1]
    for strat in strats:
        n = strat.shape[0]
        train_idx.append(strat[:int(n*(1-test_ratio))])
        test_idx.append(strat[int(n*(1-test_ratio)):])
        
    train_idx = np.concatenate(train_idx)
    test_idx = np.concatenate(test_idx)
    
    np.random.shuffle(train_idx)
    np.random.shuffle(test_idx)
    
    return (train_idx, test_idx)

def create_splits_one_hot(y):
    vec = -np.ones(y.shape[0])
    nonzero = y.nonzero()
    vec[nonzero[:,0]] = nonzero[:,1].float()
    
    return create_splits(vec)

def alpaydin_F_test(c1_acc_lst, c2_acc_lst):
    # acc_list := [np.array([acc_ij, acc_i(j+1)]) for i in 5, j in 2]
    assert len(c1_acc_lst) == len(c2_acc_lst)
    diff_acc_lst = [c1_acc_lst[i] - c2_acc_lst[i] for i in range(len(c1_acc_lst))]
    
    mean_lst = [np.mean(a) for a in diff_acc_lst] 
    var_lst = [ (diff_acc_lst[i][0] - mean_lst[i])**2
               +(diff_acc_lst[i][1] - mean_lst[i])**2 for i in range(len(diff_acc_lst))]
    
    numerator = sum([sum(a**2) for a in diff_acc_lst])
    denumerator = 2 * sum(var_lst)
    f = numerator / denumerator
    p_value = stats.f.sf(f, 10, 5)
    
    return (f, p_value, np.mean(mean_lst), np.mean(var_lst))    

In [4]:
# ensure same datasets per model
crowd_splits = [create_splits(y_crowd[crowd_idx]) for i in range(5)]
experts_splits = [create_splits(y_experts[experts_idx]) for i in range(5)]
combined_splits = [create_splits(y_combined[combined_idx]) for i in range(5)]

labels = ['experts', 'crowd', 'combined']

# Majority Class

In [5]:
from collections import Counter

def majority_class(y):
    ct = Counter(y)
    return ct.most_common(1)[0][1] / len(y)

In [6]:
majority_class_acc_crowd = majority_class(y_crowd[crowd_idx])
majority_class_acc_experts = majority_class(y_experts[experts_idx])
majority_class_acc_combined = majority_class(y_combined[combined_idx])

print("\nMajority class accuracy on dominant labels (baseline)")
print(" crowd labels:  {:.4f}".format(majority_class_acc_crowd))
print(" expert labels: {:.4f}".format(majority_class_acc_experts))
print(" combined labels: {:.4f}".format(majority_class_acc_combined))


Majority class accuracy on dominant labels (baseline)
 crowd labels:  0.6355
 expert labels: 0.5345
 combined labels: 0.5741


# Random Forest (supervised)

We start with a traditional, or 'shallow', machine learning model: random forest. Because random forest does not support iterative learning, we test both the crowd and expert sets separately.

We use stratified cross validation to reduce the effects caused by the small size of the data set.

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


N_ESTIMATORS = [100, 250, 500, 750, 1000, 2000]

def random_forest(X, y, index, splits, n_estimators=N_ESTIMATORS):
    n_samples = X[index].shape[0]
    acc_est_lst = list()
    for n_estimators in N_ESTIMATORS:
        print("Training with {} estimators".format(n_estimators))
        acc_lst = list()
        for fold_i in range(5):
            print(" Starting outer fold {} / {}".format(fold_i+1, 5))
            acc_inner = list()
            split_a_idx, split_b_idx  = splits[fold_i]
            for fold_j in range(2):
                print("  Starting inner fold {} / {}".format(fold_j+1, 2), end='')
                if fold_j % 2 == 0:
                    train_fold_idx, test_fold_idx  = split_a_idx, split_b_idx
                else:
                    train_fold_idx, test_fold_idx  = split_b_idx, split_a_idx

                train_idx = index[train_fold_idx]
                test_idx = index[test_fold_idx]

                model = RandomForestClassifier(n_estimators=n_estimators)
                model.fit(X[train_idx], y[train_idx])

                y_pred = model.predict(X[test_idx])
                fold_acc = accuracy_score(y[test_idx], y_pred)

                acc_inner.append(fold_acc)
                print(" (acc: {:.4f})".format(fold_acc))

            acc_lst.append(np.array(acc_inner))
        print(" => mean acc: {:.4f}\n".format(np.mean(np.array([np.mean(inner) for inner in acc_lst]))))
        acc_est_lst.append(acc_lst)
    
    return acc_est_lst

In [8]:
print("=== Results of supervised learning on expert dominant labels ===")
random_forest_acc_experts_dominant = random_forest(X_2D,
                                                   y_experts, 
                                                   experts_idx,
                                                   experts_splits)
table = {'p-values': N_ESTIMATORS}
table.update({est: list() for est in N_ESTIMATORS})
nhypotheses = len(random_forest_acc_experts_dominant)
for i in range(nhypotheses):
    for e in range(i+1):
        table[N_ESTIMATORS[i]].append(np.nan)
    for j in range(i+1, nhypotheses):
        f, p, mean, variance = alpaydin_F_test(random_forest_acc_experts_dominant[i],
                                               random_forest_acc_experts_dominant[j])
        table[N_ESTIMATORS[i]].append(p)
        #print("RF {} vs {} estimators".format(N_ESTIMATORS[i], N_ESTIMATORS[j]))
        #print(" f: {:.4f}, p: {:.4f}, mean: {:.4f}, var: {:.4f}".format(f, p, mean, variance))

significance = pd.DataFrame(table)
display(significance)

=== Results of supervised learning on expert dominant labels ===
Training with 100 estimators
 Starting outer fold 1 / 5
  Starting inner fold 1 / 2 (acc: 0.6000)
  Starting inner fold 2 / 2 (acc: 0.5357)
 Starting outer fold 2 / 5
  Starting inner fold 1 / 2 (acc: 0.6333)
  Starting inner fold 2 / 2 (acc: 0.5000)
 Starting outer fold 3 / 5
  Starting inner fold 1 / 2 (acc: 0.5000)
  Starting inner fold 2 / 2 (acc: 0.6429)
 Starting outer fold 4 / 5
  Starting inner fold 1 / 2 (acc: 0.5333)
  Starting inner fold 2 / 2 (acc: 0.5357)
 Starting outer fold 5 / 5
  Starting inner fold 1 / 2 (acc: 0.6333)
  Starting inner fold 2 / 2 (acc: 0.5714)
 => mean acc: 0.5686

Training with 250 estimators
 Starting outer fold 1 / 5
  Starting inner fold 1 / 2 (acc: 0.5667)
  Starting inner fold 2 / 2 (acc: 0.5714)
 Starting outer fold 2 / 5
  Starting inner fold 1 / 2 (acc: 0.5667)
  Starting inner fold 2 / 2 (acc: 0.5714)
 Starting outer fold 3 / 5
  Starting inner fold 1 / 2 (acc: 0.6000)
  Startin

Unnamed: 0,p-values,100,250,500,750,1000,2000
0,100,,,,,,
1,250,0.680373,,,,,
2,500,0.765636,0.763176,,,,
3,750,0.511119,0.116262,0.177892,,,
4,1000,0.619725,0.173572,0.492158,0.273369,,
5,2000,0.603424,0.185773,0.397377,0.70154,0.416758,


In [9]:
print("=== Results of supervised learning on crowd dominant labels ===")
random_forest_acc_crowd_dominant = random_forest(X_2D,
                                                 y_crowd,
                                                 crowd_idx,
                                                 crowd_splits)
table = {'p-values': N_ESTIMATORS}
table.update({est: list() for est in N_ESTIMATORS})
nhypotheses = len(random_forest_acc_crowd_dominant)
for i in range(nhypotheses):
    for e in range(i+1):
        table[N_ESTIMATORS[i]].append(np.nan)
    for j in range(i+1, nhypotheses):
        f, p, mean, variance = alpaydin_F_test(random_forest_acc_crowd_dominant[i],
                                               random_forest_acc_crowd_dominant[j])
        table[N_ESTIMATORS[i]].append(p)
        #print("RF {} vs {} estimators".format(N_ESTIMATORS[i], N_ESTIMATORS[j]))
        #print(" f: {:.4f}, p: {:.4f}, mean: {:.4f}, var: {:.4f}".format(f, p, mean, variance))

significance = pd.DataFrame(table)
display(significance)

=== Results of supervised learning on crowd dominant labels ===
Training with 100 estimators
 Starting outer fold 1 / 5
  Starting inner fold 1 / 2 (acc: 0.6111)
  Starting inner fold 2 / 2 (acc: 0.5472)
 Starting outer fold 2 / 5
  Starting inner fold 1 / 2 (acc: 0.6667)
  Starting inner fold 2 / 2 (acc: 0.5283)
 Starting outer fold 3 / 5
  Starting inner fold 1 / 2 (acc: 0.6111)
  Starting inner fold 2 / 2 (acc: 0.5094)
 Starting outer fold 4 / 5
  Starting inner fold 1 / 2 (acc: 0.6296)
  Starting inner fold 2 / 2 (acc: 0.5472)
 Starting outer fold 5 / 5
  Starting inner fold 1 / 2 (acc: 0.5926)
  Starting inner fold 2 / 2 (acc: 0.5849)
 => mean acc: 0.5828

Training with 250 estimators
 Starting outer fold 1 / 5
  Starting inner fold 1 / 2 (acc: 0.6111)
  Starting inner fold 2 / 2 (acc: 0.5094)
 Starting outer fold 2 / 5
  Starting inner fold 1 / 2 (acc: 0.5926)
  Starting inner fold 2 / 2 (acc: 0.5472)
 Starting outer fold 3 / 5
  Starting inner fold 1 / 2 (acc: 0.5741)
  Starting

Unnamed: 0,p-values,100,250,500,750,1000,2000
0,100,,,,,,
1,250,0.622192,,,,,
2,500,0.596831,0.565088,,,,
3,750,0.418151,0.499507,0.579882,,,
4,1000,0.765908,0.44426,0.487105,0.721664,,
5,2000,0.734651,0.688049,0.703507,0.694303,0.534881,


In [10]:
print("=== Results of supervised learning on combined dominant labels ===")
random_forest_acc_combined_dominant = random_forest(X_2D,
                                                    y_combined,
                                                    combined_idx,
                                                    combined_splits)
table = {'p-values': N_ESTIMATORS}
table.update({est: list() for est in N_ESTIMATORS})
nhypotheses = len(random_forest_acc_combined_dominant)
for i in range(nhypotheses):
    for e in range(i+1):
        table[N_ESTIMATORS[i]].append(np.nan)
    for j in range(i+1, nhypotheses):
        f, p, mean, variance = alpaydin_F_test(random_forest_acc_combined_dominant[i],
                                               random_forest_acc_combined_dominant[j])
        table[N_ESTIMATORS[i]].append(p)
        #print("RF {} vs {} estimators".format(N_ESTIMATORS[i], N_ESTIMATORS[j]))
        #print(" f: {:.4f}, p: {:.4f}, mean: {:.4f}, var: {:.4f}".format(f, p, mean, variance))

significance = pd.DataFrame(table)
display(significance)

=== Results of supervised learning on combined dominant labels ===
Training with 100 estimators
 Starting outer fold 1 / 5
  Starting inner fold 1 / 2 (acc: 0.6111)
  Starting inner fold 2 / 2 (acc: 0.4815)
 Starting outer fold 2 / 5
  Starting inner fold 1 / 2 (acc: 0.6481)
  Starting inner fold 2 / 2 (acc: 0.6296)
 Starting outer fold 3 / 5
  Starting inner fold 1 / 2 (acc: 0.5926)
  Starting inner fold 2 / 2 (acc: 0.5741)
 Starting outer fold 4 / 5
  Starting inner fold 1 / 2 (acc: 0.6296)
  Starting inner fold 2 / 2 (acc: 0.5370)
 Starting outer fold 5 / 5
  Starting inner fold 1 / 2 (acc: 0.6296)
  Starting inner fold 2 / 2 (acc: 0.5185)
 => mean acc: 0.5852

Training with 250 estimators
 Starting outer fold 1 / 5
  Starting inner fold 1 / 2 (acc: 0.6296)
  Starting inner fold 2 / 2 (acc: 0.5370)
 Starting outer fold 2 / 5
  Starting inner fold 1 / 2 (acc: 0.6111)
  Starting inner fold 2 / 2 (acc: 0.5185)
 Starting outer fold 3 / 5
  Starting inner fold 1 / 2 (acc: 0.6296)
  Start

Unnamed: 0,p-values,100,250,500,750,1000,2000
0,100,,,,,,
1,250,0.41078,,,,,
2,500,0.541487,0.651522,,,,
3,750,0.6549,0.676752,0.469902,,,
4,1000,0.306475,0.80197,0.729304,0.572469,,
5,2000,0.54589,0.784673,0.685465,0.664206,0.534881,


# SGD

In [11]:
from sklearn.linear_model import PassiveAggressiveClassifier

def pac(X, y, index, splits):
    n_samples = X[index].shape[0]
    acc_lst = list()
    for fold_i in range(5):
        print(" Starting outer fold {} / {}".format(fold_i+1, 5), end='')
        acc_inner = list()
        split_a_idx, split_b_idx  = splits[fold_i]
        for fold_j in range(2):
            print("  Starting inner fold {} / {}".format(fold_j+1, 2), end='')
            if fold_j % 2 == 0:
                train_fold_idx, test_fold_idx  = split_a_idx, split_b_idx
            else:
                train_fold_idx, test_fold_idx  = split_b_idx, split_a_idx

            train_idx = index[train_fold_idx]
            test_idx = index[test_fold_idx]

            model = PassiveAggressiveClassifier(max_iter=2000, warm_start=False)
            model.fit(X[train_idx], y[train_idx])
 
            y_pred = model.predict(X[test_idx])
            fold_acc = accuracy_score(y[test_idx], y_pred)

            acc_inner.append(fold_acc)
            print(" (acc: {:.4f})".format(fold_acc))
            
        acc_lst.append(np.array(acc_inner))
    print(" => mean acc: {:.4f}\n".format(np.mean(np.array([np.mean(inner) for inner in acc_lst]))))
    
    return acc_lst

In [12]:
print("=== Results of supervised learning on expert dominant labels ===")
pac_acc_experts_dominant = pac(X_2D,
                                  y_experts, 
                                  experts_idx,
                                  experts_splits)

print("=== Results of supervised learning on crowd dominant labels ===")
pac_acc_crowd_dominant = pac(X_2D,
                                y_crowd,
                                crowd_idx,
                                crowd_splits)

print("=== Results of supervised learning on combined dominant labels ===")
pac_acc_combined_dominant = pac(X_2D,
                                   y_combined,
                                   combined_idx,
                                   combined_splits)

pac_acc = [pac_acc_experts_dominant, pac_acc_crowd_dominant, pac_acc_combined_dominant]

print("= p-values =")
table = {'p-values': labels}
table.update({lab: list() for lab in labels})
nlabels = len(labels)
for i in range(nlabels):
    for e in range(i+1):
        table[labels[i]].append(np.nan)
    for j in range(i+1, nlabels):
        f, p, mean, variance = alpaydin_F_test(pac_acc[i],
                                               pac_acc[j])
        table[labels[i]].append(p)
        #print("RF {} vs {} estimators".format(pac_acc[i], pac_acc[j]))
        #print(" f: {:.4f}, p: {:.4f}, mean: {:.4f}, var: {:.4f}".format(f, p, mean, variance))

significance = pd.DataFrame(table)
display(significance)

=== Results of supervised learning on expert dominant labels ===
 Starting outer fold 1 / 5  Starting inner fold 1 / 2 (acc: 0.5333)
  Starting inner fold 2 / 2 (acc: 0.5714)
 Starting outer fold 2 / 5  Starting inner fold 1 / 2 (acc: 0.5333)
  Starting inner fold 2 / 2 (acc: 0.6071)
 Starting outer fold 3 / 5  Starting inner fold 1 / 2 (acc: 0.5333)
  Starting inner fold 2 / 2 (acc: 0.5000)
 Starting outer fold 4 / 5  Starting inner fold 1 / 2 (acc: 0.5333)
  Starting inner fold 2 / 2 (acc: 0.5714)
 Starting outer fold 5 / 5  Starting inner fold 1 / 2 (acc: 0.5333)
  Starting inner fold 2 / 2 (acc: 0.5357)
 => mean acc: 0.5452

=== Results of supervised learning on crowd dominant labels ===
 Starting outer fold 1 / 5  Starting inner fold 1 / 2 (acc: 0.5185)
  Starting inner fold 2 / 2 (acc: 0.4717)
 Starting outer fold 2 / 5  Starting inner fold 1 / 2 (acc: 0.5370)
  Starting inner fold 2 / 2 (acc: 0.4528)
 Starting outer fold 3 / 5  Starting inner fold 1 / 2 (acc: 0.5370)
  Starting 

Unnamed: 0,p-values,experts,crowd,combined
0,experts,,,
1,crowd,0.507225,,
2,combined,0.801778,0.557695,


# PyTorch Preparations

In [13]:
## convert numpy arrays to PyTorch tensors
X_2D = torch.from_numpy(X_2D)
X_3D = torch.from_numpy(X_3D)

y_crowd = torch.from_numpy(y_crowd)
y_experts = torch.from_numpy(y_experts)
y_combined = torch.from_numpy(y_combined)

In [14]:
def categorical_accuracy(y_hat, y):
    # y := 1D array of class labels
    # y_hat := 2D array of one-hot class labels
    _, labels = y_hat.max(dim=1)
    return torch.mean(torch.eq(labels, y).float())

def fit(model, X, y, index, splits, lr=0.01, l2norm=0.01, n_epoch=250, patience=-1):
    n_samples = X[index].shape[0]
    acc_lst = list()
    for fold_i in range(5):
        print("Starting outer fold {} / {}".format(fold_i+1, 5))
        acc_inner = list()
        split_a_idx, split_b_idx  = splits[fold_i]

        for fold_j in range(2):
            print(" Starting inner fold {} / {}".format(fold_j+1, 2), end='')
            if fold_j % 2 == 0:
                train_fold_idx, test_fold_idx  = split_a_idx, split_b_idx
            else:
                train_fold_idx, test_fold_idx  = split_b_idx, split_a_idx
                
            train_idx = index[train_fold_idx]
            test_idx = index[test_fold_idx]
            
            model.init()
            optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=l2norm)
            criterion = nn.CrossEntropyLoss()

            # early stopping
            patience_left = patience
            best_fold_score = -1
            delta = 1e-4
            best_fold_state = None
            best_fold_state_opt = None
        
            for epoch in range(n_epoch):
                model.train()

                y_hat = model(X[train_idx].float())
                train_acc = categorical_accuracy(y_hat, y[train_idx])
                train_loss = criterion(y_hat, y[train_idx].long())
                optimizer.zero_grad()
                train_loss.backward()
                optimizer.step()

                model.eval()
                test_loss = None
                with torch.no_grad():
                    y_hat = model(X[test_idx].float())
                    test_acc = categorical_accuracy(y_hat, y[test_idx])
                    test_loss = criterion(y_hat, y[test_idx].long())

                train_loss = float(train_loss.item())
                test_loss = float(test_loss.item())
                
                if best_fold_score < 0:
                    best_fold_score = test_loss
                    best_fold_state = model.state_dict()
                    best_fold_state_opt = optimizer.state_dict()

                if patience <= 0:
                    continue
                if test_loss >= best_fold_score - delta:
                    patience_left -= 1
                else:
                    best_fold_score = test_loss
                    best_fold_state = model.state_dict()
                    best_fold_state_opt = optimizer.state_dict()
                    patience_left = patience
                if patience_left <= 0:
                    model.load_state_dict(best_fold_state)
                    optimizer.load_state_dict(best_fold_state_opt)
                    break
            
            # do a final run over the test set after loading a previous state
            with torch.no_grad():
                y_hat = model(X[test_idx].float())
                test_acc = categorical_accuracy(y_hat, y[test_idx])
                test_loss = criterion(y_hat, y[test_idx].long())
            
            test_loss = float(test_loss.item())
            print(" (acc: {:.4f})".format(test_acc))
            acc_inner.append(test_acc)
    
        acc_lst.append(np.array(acc_inner))
        
    print(" => mean acc: {:.4f}\n".format(np.mean(np.array([np.mean(inner) for inner in acc_lst]))))

    return acc_lst

# Neural Network

In [15]:
class ClassifierNN(nn.Module):
    """Simple Neural Network Classifier"""

    def __init__(self, input_dim, output_dim, p_dropout=0.05):
        super().__init__()
        hidden_dim = (input_dim-output_dim)//2
        
        self.layers = nn.ModuleList()
        self.layers.append(nn.Sequential(
                            nn.Linear(input_dim, hidden_dim),
                            nn.ReLU(inplace=True),
                            nn.Dropout(p=p_dropout)))
            
        self.layers.append(nn.Sequential(
                            nn.Linear(hidden_dim, output_dim),
                            nn.ReLU(inplace=True)))
        
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, X):
        for layer in self.layers:
            X = layer(X)                          
                           
        return self.softmax(X)
        
    def init(self):
        for param in self.parameters():
            nn.init.normal_(param)

In [16]:
## hyperparameters
lr = 0.01
n_epoch = 250
p_dropout = 0.1

## define model
indim = X_2D.shape[1]
outdim = np.unique(y_experts[experts_idx]).shape[0]
assert outdim == np.unique(y_crowd[crowd_idx]).shape[0]

model = ClassifierNN(input_dim=indim,
                     output_dim=outdim,
                     p_dropout=p_dropout)

print("=== Results on expert dominant labels ===")
neural_net_acc_dominant_experts = fit(model, X_2D, y_experts, experts_idx, experts_splits, lr=lr, n_epoch=n_epoch)

print("\n=== Results on crowd dominant labels ===")
neural_net_acc_dominant_crowd = fit(model, X_2D, y_crowd, crowd_idx, crowd_splits, lr=lr, n_epoch=n_epoch)

print("\n=== Results on combined dominant labels ===")
neural_net_acc_dominant_combined = fit(model, X_2D, y_combined, combined_idx, combined_splits, lr=lr, n_epoch=n_epoch)

=== Results on expert dominant labels ===
Starting outer fold 1 / 5
 Starting inner fold 1 / 2 (acc: 0.6333)
 Starting inner fold 2 / 2 (acc: 0.4643)
Starting outer fold 2 / 5
 Starting inner fold 1 / 2 (acc: 0.4667)
 Starting inner fold 2 / 2 (acc: 0.6071)
Starting outer fold 3 / 5
 Starting inner fold 1 / 2 (acc: 0.5667)
 Starting inner fold 2 / 2 (acc: 0.5357)
Starting outer fold 4 / 5
 Starting inner fold 1 / 2 (acc: 0.5333)
 Starting inner fold 2 / 2 (acc: 0.5714)
Starting outer fold 5 / 5
 Starting inner fold 1 / 2 (acc: 0.5333)
 Starting inner fold 2 / 2 (acc: 0.5000)
 => mean acc: 0.5412


=== Results on crowd dominant labels ===
Starting outer fold 1 / 5
 Starting inner fold 1 / 2 (acc: 0.5556)
 Starting inner fold 2 / 2 (acc: 0.6415)
Starting outer fold 2 / 5
 Starting inner fold 1 / 2 (acc: 0.5000)
 Starting inner fold 2 / 2 (acc: 0.5283)
Starting outer fold 3 / 5
 Starting inner fold 1 / 2 (acc: 0.5926)
 Starting inner fold 2 / 2 (acc: 0.5283)
Starting outer fold 4 / 5
 Sta

In [17]:
nn_acc = [neural_net_acc_dominant_experts, neural_net_acc_dominant_crowd, neural_net_acc_dominant_combined]

print("= p-values =")
table = {'p-values': labels}
table.update({lab: list() for lab in labels})
nlabels = len(labels)
for i in range(nlabels):
    for e in range(i+1):
        table[labels[i]].append(np.nan)
    for j in range(i+1, nlabels):
        f, p, mean, variance = alpaydin_F_test(nn_acc[i],
                                               nn_acc[j])
        table[labels[i]].append(p)
        #print("RF {} vs {} estimators".format(nn_acc[i], nn_acc[j]))
        #print(" f: {:.4f}, p: {:.4f}, mean: {:.4f}, var: {:.4f}".format(f, p, mean, variance))

significance = pd.DataFrame(table)
display(significance)

= p-values =


Unnamed: 0,p-values,experts,crowd,combined
0,experts,,,
1,crowd,0.708992,,
2,combined,0.687674,0.236686,


In [18]:
rf_acc = [random_forest_acc_experts_dominant, random_forest_acc_crowd_dominant, random_forest_acc_combined_dominant]

nhypotheses = len(N_ESTIMATORS)
for k in range(nhypotheses):
    table = {'p-values - {} estimators'.format(N_ESTIMATORS[k]): labels}
    table.update({lab: list() for lab in labels})

    nlabels = len(labels)
    for i in range(nlabels):
        for e in range(nlabels):
            if e != i:
                table[labels[i]].append(np.nan)
            else:
                f, p, mean, variance = alpaydin_F_test(nn_acc[i],
                                                       rf_acc[j][k])
                table[labels[i]].append(p)
                #print("RF {} vs {} estimators".format(nn_acc[i], nn_acc[j]))
                #print(" f: {:.4f}, p: {:.4f}, mean: {:.4f}, var: {:.4f}".format(f, p, mean, variance))

    significance = pd.DataFrame(table)
    display(significance)

Unnamed: 0,p-values - 100 estimators,experts,crowd,combined
0,experts,0.476178,,
1,crowd,,0.442656,
2,combined,,,0.691388


Unnamed: 0,p-values - 250 estimators,experts,crowd,combined
0,experts,0.761688,,
1,crowd,,0.689395,
2,combined,,,0.816627


Unnamed: 0,p-values - 500 estimators,experts,crowd,combined
0,experts,0.808665,,
1,crowd,,0.741673,
2,combined,,,0.765724


Unnamed: 0,p-values - 750 estimators,experts,crowd,combined
0,experts,0.751336,,
1,crowd,,0.755569,
2,combined,,,0.803466


Unnamed: 0,p-values - 1000 estimators,experts,crowd,combined
0,experts,0.782701,,
1,crowd,,0.791536,
2,combined,,,0.811922


Unnamed: 0,p-values - 2000 estimators,experts,crowd,combined
0,experts,0.814085,,
1,crowd,,0.740406,
2,combined,,,0.809711
