In [1]:
#%pip install sklearn
#%pip install torch

from math import sqrt
import os
from time import time
import random
import numpy as np
import sklearn
import torch
import torch.nn as nn
import torch.optim as optim

## project structure
DATA_DIR = "/data/projects/capturingBias/research/framing/data/"  # change to "./" for current directory
DATA_NPZ = DATA_DIR + "data.npz"

## load files
data = np.load(DATA_NPZ)

X_2D = data['X_2D']
X_3D = data['X_3D']
y_likert_crowd = data['y_likert_crowd']
y_likert_experts = data['y_likert_experts']
y_dominant_crowd = data['y_dominant_crowd']
y_dominant_experts = data['y_dominant_experts']
y_likert_combined = data['y_likert_combined']
y_dominant_combined = data['y_dominant_combined']


# likert
likert_expert_idx = np.where(y_likert_experts > -1)[0]
likert_crowd_idx = np.setdiff1d(np.where(y_likert_crowd > -1)[0],
                                likert_expert_idx,
                                assume_unique=True)
likert_combined_idx = np.concatenate([likert_crowd_idx,
                                      likert_expert_idx])

# dominant
dominant_expert_idx = np.where(y_dominant_experts > -1)[0]
dominant_crowd_idx = np.setdiff1d(np.where(y_dominant_crowd > -1)[0],
                                  dominant_expert_idx,
                                  assume_unique=True)
dominant_combined_idx = np.concatenate([dominant_crowd_idx,
                                        dominant_expert_idx])

In [2]:
def set_seed(seed=-1):
    if seed < 0:
        seed = np.random.randint(0, 2**32-1)

    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.random.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    
#set_seed(47)  # make reproducable

In [3]:
def create_splits(y, test_ratio=.2):
    train_idx = list()
    test_idx = list()
    
    strats = [np.where(y == lab)[0] for lab in np.unique(y) if lab > -1]
    for strat in strats:
        n = strat.shape[0]
        train_idx.append(strat[:int(n*(1-test_ratio))])
        test_idx.append(strat[int(n*(1-test_ratio)):])
        
    train_idx = np.concatenate(train_idx)
    test_idx = np.concatenate(test_idx)
    
    np.random.shuffle(train_idx)
    np.random.shuffle(test_idx)
    
    return (train_idx, test_idx)

def create_splits_one_hot(y):
    vec = -np.ones(y.shape[0])
    nonzero = y.nonzero()
    vec[nonzero[:,0]] = nonzero[:,1].float()
    
    return create_splits(vec)

# Majority Class

In [4]:
from collections import Counter


def majority_class(y):
    ct = Counter(y)
    return ct.most_common(1)[0][1] / len(y)

In [5]:
majority_class_acc_crowd_likert = majority_class(y_likert_crowd[likert_crowd_idx])
majority_class_acc_experts_likert = majority_class(y_likert_experts[likert_expert_idx])
majority_class_acc_combined_likert = majority_class(y_likert_combined[likert_combined_idx])

print("Majority class accuracy on Likert labels (baseline)")
print(" crowd labels:  {:.4f}".format(majority_class_acc_crowd_likert))
print(" expert labels: {:.4f}".format(majority_class_acc_experts_likert))
print(" combined labels: {:.4f}".format(majority_class_acc_combined_likert))

majority_class_acc_crowd_dominant = majority_class(y_dominant_crowd[dominant_crowd_idx])
majority_class_acc_experts_dominant = majority_class(y_dominant_experts[dominant_expert_idx])
majority_class_acc_combined_dominant = majority_class(y_dominant_combined[dominant_combined_idx])

print("\nMajority class accuracy on Dominant labels (baseline)")
print(" crowd labels:  {:.4f}".format(majority_class_acc_crowd_dominant))
print(" expert labels: {:.4f}".format(majority_class_acc_experts_dominant))
print(" combined labels: {:.4f}".format(majority_class_acc_combined_dominant))

Majority class accuracy on Likert labels (baseline)
 crowd labels:  0.2787
 expert labels: 0.2414
 combined labels: 0.2521

Majority class accuracy on Dominant labels (baseline)
 crowd labels:  0.6250
 expert labels: 0.5345
 combined labels: 0.5789


# Random Forest (supervised)

We start with a traditional, or 'shallow', machine learning model: random forest. Because random forest does not support iterative learning, we test both the crowd and expert sets separately.

We use stratified cross validation to reduce the effects caused by the small size of the data set.

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


N_ESTIMATORS = [100, 250, 500, 750, 1000, 2000]
N_FOLDS = 10

def random_forest(X, y, index, n_folds=N_FOLDS, n_estimators=N_ESTIMATORS):
    n_samples = X[index].shape[0]
    for n_estimators in N_ESTIMATORS:
        print("Training with {} estimators".format(n_estimators))
        acc = 0
        for fold_i in range(N_FOLDS):
            print(" Starting fold {} / {}".format(fold_i+1, N_FOLDS), end='')
            
            train_fold_idx, test_fold_idx  = create_splits(y[index])
            train_idx = index[train_fold_idx]
            test_idx = index[test_fold_idx]
        
            model = RandomForestClassifier(n_estimators=n_estimators)
            model.fit(X[train_idx], y[train_idx])
            
            y_pred = model.predict(X[test_idx])
            fold_acc = accuracy_score(y[test_idx], y_pred)
        
            acc += fold_acc
            print(" (acc: {:.4f})".format(fold_acc))
            
        acc /= N_FOLDS
        print("Mean accuracy on test set: {:.4f}\n".format(acc))
    
    return acc

In [7]:
print("=== Results of supervised learning on expert likert labels ===")
random_forest_acc_experts_likert = random_forest(X_2D,
                                                 y_likert_experts, 
                                                 likert_expert_idx)

=== Results of supervised learning on expert likert labels ===
Training with 100 estimators
 Starting fold 1 / 10 (acc: 0.1333)
 Starting fold 2 / 10 (acc: 0.2000)
 Starting fold 3 / 10 (acc: 0.1333)
 Starting fold 4 / 10 (acc: 0.2000)
 Starting fold 5 / 10 (acc: 0.2000)
 Starting fold 6 / 10 (acc: 0.1333)
 Starting fold 7 / 10 (acc: 0.0667)
 Starting fold 8 / 10 (acc: 0.2000)
 Starting fold 9 / 10 (acc: 0.1333)
 Starting fold 10 / 10 (acc: 0.1333)
Mean accuracy on test set: 0.1533

Training with 250 estimators
 Starting fold 1 / 10 (acc: 0.1333)
 Starting fold 2 / 10 (acc: 0.2000)
 Starting fold 3 / 10 (acc: 0.2000)
 Starting fold 4 / 10 (acc: 0.1333)
 Starting fold 5 / 10 (acc: 0.1333)
 Starting fold 6 / 10 (acc: 0.2000)
 Starting fold 7 / 10 (acc: 0.2000)
 Starting fold 8 / 10 (acc: 0.2000)
 Starting fold 9 / 10 (acc: 0.2000)
 Starting fold 10 / 10 (acc: 0.2000)
Mean accuracy on test set: 0.1800

Training with 500 estimators
 Starting fold 1 / 10 (acc: 0.2000)
 Starting fold 2 / 10 

In [8]:
print("=== Results of supervised learning on crowd likert labels ===")
random_forest_acc_crowd_likert = random_forest(X_2D,
                                               y_likert_crowd,
                                               likert_crowd_idx)

=== Results of supervised learning on crowd likert labels ===
Training with 100 estimators
 Starting fold 1 / 10 (acc: 0.2000)
 Starting fold 2 / 10 (acc: 0.2000)
 Starting fold 3 / 10 (acc: 0.2667)
 Starting fold 4 / 10 (acc: 0.0667)
 Starting fold 5 / 10 (acc: 0.2667)
 Starting fold 6 / 10 (acc: 0.2667)
 Starting fold 7 / 10 (acc: 0.3333)
 Starting fold 8 / 10 (acc: 0.0667)
 Starting fold 9 / 10 (acc: 0.2000)
 Starting fold 10 / 10 (acc: 0.2667)
Mean accuracy on test set: 0.2133

Training with 250 estimators
 Starting fold 1 / 10 (acc: 0.2000)
 Starting fold 2 / 10 (acc: 0.2000)
 Starting fold 3 / 10 (acc: 0.1333)
 Starting fold 4 / 10 (acc: 0.1333)
 Starting fold 5 / 10 (acc: 0.2000)
 Starting fold 6 / 10 (acc: 0.2667)
 Starting fold 7 / 10 (acc: 0.2000)
 Starting fold 8 / 10 (acc: 0.2667)
 Starting fold 9 / 10 (acc: 0.1333)
 Starting fold 10 / 10 (acc: 0.2667)
Mean accuracy on test set: 0.2000

Training with 500 estimators
 Starting fold 1 / 10 (acc: 0.2667)
 Starting fold 2 / 10 (

In [9]:
print("=== Results of supervised learning on combined likert labels ===")
random_forest_acc_combined_likert = random_forest(X_2D,
                                                  y_likert_combined,
                                                  likert_combined_idx)

=== Results of supervised learning on combined likert labels ===
Training with 100 estimators
 Starting fold 1 / 10 (acc: 0.2000)
 Starting fold 2 / 10 (acc: 0.1600)
 Starting fold 3 / 10 (acc: 0.2400)
 Starting fold 4 / 10 (acc: 0.1200)
 Starting fold 5 / 10 (acc: 0.2400)
 Starting fold 6 / 10 (acc: 0.2000)
 Starting fold 7 / 10 (acc: 0.2000)
 Starting fold 8 / 10 (acc: 0.0800)
 Starting fold 9 / 10 (acc: 0.2000)
 Starting fold 10 / 10 (acc: 0.2000)
Mean accuracy on test set: 0.1840

Training with 250 estimators
 Starting fold 1 / 10 (acc: 0.1600)
 Starting fold 2 / 10 (acc: 0.1200)
 Starting fold 3 / 10 (acc: 0.2000)
 Starting fold 4 / 10 (acc: 0.2400)
 Starting fold 5 / 10 (acc: 0.2400)
 Starting fold 6 / 10 (acc: 0.2000)
 Starting fold 7 / 10 (acc: 0.1600)
 Starting fold 8 / 10 (acc: 0.1200)
 Starting fold 9 / 10 (acc: 0.2000)
 Starting fold 10 / 10 (acc: 0.2000)
Mean accuracy on test set: 0.1840

Training with 500 estimators
 Starting fold 1 / 10 (acc: 0.2000)
 Starting fold 2 / 1

In [10]:
print("=== Results of supervised learning on expert dominant labels ===")
random_forest_acc_experts_dominant = random_forest(X_2D,
                                                   y_dominant_experts, 
                                                   dominant_expert_idx)

=== Results of supervised learning on expert dominant labels ===
Training with 100 estimators
 Starting fold 1 / 10 (acc: 0.6154)
 Starting fold 2 / 10 (acc: 0.5385)
 Starting fold 3 / 10 (acc: 0.7692)
 Starting fold 4 / 10 (acc: 0.5385)
 Starting fold 5 / 10 (acc: 0.7692)
 Starting fold 6 / 10 (acc: 0.6154)
 Starting fold 7 / 10 (acc: 0.5385)
 Starting fold 8 / 10 (acc: 0.5385)
 Starting fold 9 / 10 (acc: 0.5385)
 Starting fold 10 / 10 (acc: 0.6154)
Mean accuracy on test set: 0.6077

Training with 250 estimators
 Starting fold 1 / 10 (acc: 0.5385)
 Starting fold 2 / 10 (acc: 0.6923)
 Starting fold 3 / 10 (acc: 0.6154)
 Starting fold 4 / 10 (acc: 0.5385)
 Starting fold 5 / 10 (acc: 0.6923)
 Starting fold 6 / 10 (acc: 0.6923)
 Starting fold 7 / 10 (acc: 0.4615)
 Starting fold 8 / 10 (acc: 0.6154)
 Starting fold 9 / 10 (acc: 0.5385)
 Starting fold 10 / 10 (acc: 0.6154)
Mean accuracy on test set: 0.6000

Training with 500 estimators
 Starting fold 1 / 10 (acc: 0.6154)
 Starting fold 2 / 1

In [11]:
print("=== Results of supervised learning on crowd dominant labels ===")
random_forest_acc_crowd_dominant = random_forest(X_2D,
                                                 y_dominant_crowd,
                                                 dominant_crowd_idx)

=== Results of supervised learning on crowd dominant labels ===
Training with 100 estimators
 Starting fold 1 / 10 (acc: 0.4167)
 Starting fold 2 / 10 (acc: 0.5000)
 Starting fold 3 / 10 (acc: 0.5000)
 Starting fold 4 / 10 (acc: 0.5000)
 Starting fold 5 / 10 (acc: 0.5833)
 Starting fold 6 / 10 (acc: 0.5000)
 Starting fold 7 / 10 (acc: 0.5000)
 Starting fold 8 / 10 (acc: 0.5000)
 Starting fold 9 / 10 (acc: 0.5000)
 Starting fold 10 / 10 (acc: 0.4167)
Mean accuracy on test set: 0.4917

Training with 250 estimators
 Starting fold 1 / 10 (acc: 0.5000)
 Starting fold 2 / 10 (acc: 0.4167)
 Starting fold 3 / 10 (acc: 0.5000)
 Starting fold 4 / 10 (acc: 0.5000)
 Starting fold 5 / 10 (acc: 0.5000)
 Starting fold 6 / 10 (acc: 0.4167)
 Starting fold 7 / 10 (acc: 0.5000)
 Starting fold 8 / 10 (acc: 0.4167)
 Starting fold 9 / 10 (acc: 0.4167)
 Starting fold 10 / 10 (acc: 0.4167)
Mean accuracy on test set: 0.4583

Training with 500 estimators
 Starting fold 1 / 10 (acc: 0.5000)
 Starting fold 2 / 10

In [12]:
print("=== Results of supervised learning on combined dominant labels ===")
random_forest_acc_combined_dominant = random_forest(X_2D,
                                                    y_dominant_combined,
                                                    dominant_combined_idx)

=== Results of supervised learning on combined dominant labels ===
Training with 100 estimators
 Starting fold 1 / 10 (acc: 0.5417)
 Starting fold 2 / 10 (acc: 0.5417)
 Starting fold 3 / 10 (acc: 0.5833)
 Starting fold 4 / 10 (acc: 0.5417)
 Starting fold 5 / 10 (acc: 0.5417)
 Starting fold 6 / 10 (acc: 0.5417)
 Starting fold 7 / 10 (acc: 0.5417)
 Starting fold 8 / 10 (acc: 0.5000)
 Starting fold 9 / 10 (acc: 0.5417)
 Starting fold 10 / 10 (acc: 0.6250)
Mean accuracy on test set: 0.5500

Training with 250 estimators
 Starting fold 1 / 10 (acc: 0.5417)
 Starting fold 2 / 10 (acc: 0.5000)
 Starting fold 3 / 10 (acc: 0.5417)
 Starting fold 4 / 10 (acc: 0.5833)
 Starting fold 5 / 10 (acc: 0.5417)
 Starting fold 6 / 10 (acc: 0.5417)
 Starting fold 7 / 10 (acc: 0.5417)
 Starting fold 8 / 10 (acc: 0.5000)
 Starting fold 9 / 10 (acc: 0.5417)
 Starting fold 10 / 10 (acc: 0.5417)
Mean accuracy on test set: 0.5375

Training with 500 estimators
 Starting fold 1 / 10 (acc: 0.5417)
 Starting fold 2 /

# SGD

In [13]:
from sklearn.linear_model import PassiveAggressiveClassifier

def pac(X, y, index, model=None, partial=False):
    n_samples = X[index].shape[0]
    acc = 0.0
    best_score = -1
    best_model = None
    for fold_i in range(N_FOLDS):
        print(" Starting fold {} / {}".format(fold_i+1, N_FOLDS), end='')

        train_fold_idx, test_fold_idx  = create_splits(y[index])
        train_idx = index[train_fold_idx]
        test_idx = index[test_fold_idx]

        if model is None:
            if partial:
                classes = np.unique(y)
                model = PassiveAggressiveClassifier(max_iter=2000, warm_start=True)
                model.partial_fit(X[train_idx], y[train_idx], classes)
            else:
                model = PassiveAggressiveClassifier(max_iter=2000, warm_start=False)
                model.fit(X[train_idx], y[train_idx])
        else:
            model.partial_fit(X[train_idx], y[train_idx])

        y_pred = model.predict(X[test_idx])
        fold_acc = accuracy_score(y[test_idx], y_pred)
        
        if best_score < 0 or best_score < (fold_acc - 0.02):
            best_score = fold_acc
            best_model = model
        
        if not partial:
            model = None

        acc += fold_acc
        print(" (acc: {:.4f})".format(fold_acc))

    acc /= N_FOLDS
    print("Mean accuracy on test set: {:.4f}\n".format(acc))
    
    return (acc, best_model)

In [14]:
print("=== Results of supervised learning on expert likert labels ===")
pac_acc_experts_likert, _ = pac(X_2D,
                                y_likert_experts, 
                                likert_expert_idx)

print("=== Results of supervised learning on crowd likert labels ===")
pac_acc_crowd_likert, _ = pac(X_2D,
                              y_likert_crowd,
                              likert_crowd_idx)

print("=== Results of supervised learning on combined likert labels ===")
pac_acc_combined_likert, _ = pac(X_2D,
                                 y_likert_combined,
                                 likert_combined_idx)

print("=== Results of supervised learning on expert dominant labels ===")
pac_acc_experts_dominant, _ = pac(X_2D,
                                  y_dominant_experts, 
                                  dominant_expert_idx)

print("=== Results of supervised learning on crowd dominant labels ===")
pac_acc_crowd_dominant, _ = pac(X_2D,
                                y_dominant_crowd,
                                dominant_crowd_idx)

print("=== Results of supervised learning on combined dominant labels ===")
pac_acc_combined_dominant, _ = pac(X_2D,
                                   y_dominant_combined,
                                   dominant_combined_idx)

=== Results of supervised learning on expert likert labels ===
 Starting fold 1 / 10 (acc: 0.2000)
 Starting fold 2 / 10 (acc: 0.2667)
 Starting fold 3 / 10 (acc: 0.2667)
 Starting fold 4 / 10 (acc: 0.2667)
 Starting fold 5 / 10 (acc: 0.2000)
 Starting fold 6 / 10 (acc: 0.2000)
 Starting fold 7 / 10 (acc: 0.2667)
 Starting fold 8 / 10 (acc: 0.2000)
 Starting fold 9 / 10 (acc: 0.2667)
 Starting fold 10 / 10 (acc: 0.2667)
Mean accuracy on test set: 0.2400

=== Results of supervised learning on crowd likert labels ===
 Starting fold 1 / 10 (acc: 0.1333)
 Starting fold 2 / 10 (acc: 0.2000)
 Starting fold 3 / 10 (acc: 0.2000)
 Starting fold 4 / 10 (acc: 0.1333)
 Starting fold 5 / 10 (acc: 0.2000)
 Starting fold 6 / 10 (acc: 0.2000)
 Starting fold 7 / 10 (acc: 0.2000)
 Starting fold 8 / 10 (acc: 0.2667)
 Starting fold 9 / 10 (acc: 0.2000)
 Starting fold 10 / 10 (acc: 0.1333)
Mean accuracy on test set: 0.1867

=== Results of supervised learning on combined likert labels ===
 Starting fold 1 /

## incremental learning

In [15]:
print("=== Results of supervised learning on expert likert labels ===")
pac_acc_experts_likert, model = pac(X_2D,
                                    y_likert_experts, 
                                    likert_expert_idx,
                                    partial=True)

print("=== Results of supervised learning on crowd likert labels ===")
pac_acc_crowd_likert, _ = pac(X_2D,
                              y_likert_crowd,
                              likert_crowd_idx,
                              model=model,
                              partial=True)

print("=== Results of supervised learning on expert dominant labels ===")
pac_acc_experts_dominant, model = pac(X_2D,
                                      y_dominant_experts, 
                                      dominant_expert_idx,
                                      partial=True)

print("=== Results of supervised learning on crowd dominant labels ===")
pac_acc_crowd_dominant, _ = pac(X_2D,
                                y_dominant_crowd,
                                dominant_crowd_idx,
                                model=model,
                                partial=True)

=== Results of supervised learning on expert likert labels ===
 Starting fold 1 / 10 (acc: 0.0667)
 Starting fold 2 / 10 (acc: 0.2000)
 Starting fold 3 / 10 (acc: 0.2000)
 Starting fold 4 / 10 (acc: 0.2000)
 Starting fold 5 / 10 (acc: 0.1333)
 Starting fold 6 / 10 (acc: 0.1333)
 Starting fold 7 / 10 (acc: 0.2000)
 Starting fold 8 / 10 (acc: 0.2000)
 Starting fold 9 / 10 (acc: 0.2000)
 Starting fold 10 / 10 (acc: 0.2000)
Mean accuracy on test set: 0.1733

=== Results of supervised learning on crowd likert labels ===
 Starting fold 1 / 10 (acc: 0.2667)
 Starting fold 2 / 10 (acc: 0.2667)
 Starting fold 3 / 10 (acc: 0.2000)
 Starting fold 4 / 10 (acc: 0.1333)
 Starting fold 5 / 10 (acc: 0.2000)
 Starting fold 6 / 10 (acc: 0.0667)
 Starting fold 7 / 10 (acc: 0.1333)
 Starting fold 8 / 10 (acc: 0.2667)
 Starting fold 9 / 10 (acc: 0.2000)
 Starting fold 10 / 10 (acc: 0.2000)
Mean accuracy on test set: 0.1933

=== Results of supervised learning on expert dominant labels ===
 Starting fold 1 /

# PyTorch Preparations

In [16]:
## convert numpy arrays to PyTorch tensors
X_2D = torch.from_numpy(X_2D)
X_3D = torch.from_numpy(X_3D)
y_likert_crowd = torch.from_numpy(y_likert_crowd)
y_likert_experts = torch.from_numpy(y_likert_experts)
y_likert_combined = torch.from_numpy(y_likert_combined)
y_dominant_crowd = torch.from_numpy(y_dominant_crowd)
y_dominant_experts = torch.from_numpy(y_dominant_experts)
y_dominant_combined = torch.from_numpy(y_dominant_combined)

In [17]:
def categorical_accuracy(y_hat, y):
    # y := 1D array of class labels
    # y_hat := 2D array of one-hot class labels
    _, labels = y_hat.max(dim=1)
    return torch.mean(torch.eq(labels, y).float())

def fit(model, X, y, index, lr=0.01, l2norm=0.01, n_folds=10, n_epoch=250, patience=-1, state=None, finetune=False):
    n_samples = X[index].shape[0]

    loss = 0
    acc = 0
    best_state = None
    best_state_opt = None
    best_score = -1
    for fold_i in range(n_folds):
        print("Starting fold {} / {}".format(fold_i+1, n_folds), end='')
        if state is None:
            model.init()
            optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=l2norm)
        else:
            model.load_state_dict(state[0])
            optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=l2norm)
            optimizer.load_state_dict(state[1])
            if finetune:
                for layer in model.layers[:-1]:
                    layer.requires_grad = False
            
        criterion = nn.CrossEntropyLoss()
        
        # early stopping
        patience_left = patience
        best_fold_score = -1
        delta = 1e-4
        best_fold_state = None
        best_fold_state_opt = None
        
        train_fold_idx, test_fold_idx  = create_splits(y[index])
        train_idx = index[train_fold_idx]
        test_idx = index[test_fold_idx]
        for epoch in range(n_epoch):
            model.train()
            
            y_hat = model(X[train_idx].float())
            train_acc = categorical_accuracy(y_hat, y[train_idx])
            train_loss = criterion(y_hat, y[train_idx].long())
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
            
            model.eval()
            test_loss = None
            with torch.no_grad():
                y_hat = model(X[test_idx].float())
                test_acc = categorical_accuracy(y_hat, y[test_idx])
                test_loss = criterion(y_hat, y[test_idx].long())
                
            train_loss = float(train_loss.item())
            test_loss = float(test_loss.item())

            if best_fold_score < 0:
                best_fold_score = test_loss
                best_fold_state = model.state_dict()
                best_fold_state_opt = optimizer.state_dict()
                            
            if patience <= 0:
                continue
            if test_loss >= best_fold_score - delta:
                patience_left -= 1
            else:
                best_fold_score = test_loss
                best_fold_state = model.state_dict()
                best_fold_state_opt = optimizer.state_dict()
                patience_left = patience
            if patience_left <= 0:
                model.load_state_dict(best_fold_state)
                optimizer.load_state_dict(best_fold_state_opt)
                break
                
        test_idx = index[create_splits(y[index])[1]]  # get new random test set to validate on
        with torch.no_grad():
            y_hat = model(X[test_idx].float())
            test_acc = categorical_accuracy(y_hat, y[test_idx])
            test_loss = float(criterion(y_hat, y[test_idx].long()).item())
        
        loss += test_loss
        acc += test_acc
        if best_score < 0 or best_score > test_loss:
            best_state = best_fold_state
            best_state_opt = best_fold_state_opt
            best_score = test_loss
        print(" - training accuracy: {:.4f} / loss: {:.4f} - test accuracy: {:.4f} / loss: {:.4f}".format(train_acc,
                                                                                          train_loss,
                                                                                          test_acc,
                                                                                          test_loss))
        
    loss /= n_folds
    acc /= n_folds
    print("average loss on test set: {:.4f}".format(loss))
    print("average accuracy on test set: {:.4f}".format(acc))
    
    return (acc, (best_state, best_state_opt))

# Neural Network

In [18]:
class ClassifierNN(nn.Module):
    """Simple Neural Network Classifier"""

    def __init__(self, input_dim, output_dim, p_dropout=0.05):
        super().__init__()
        hidden_dim = (input_dim-output_dim)//2
        
        self.layers = nn.ModuleList()
        self.layers.append(nn.Sequential(
                            nn.Linear(input_dim, hidden_dim),
                            nn.ReLU(inplace=True),
                            nn.Dropout(p=p_dropout)))
            
        self.layers.append(nn.Sequential(
                            nn.Linear(hidden_dim, output_dim),
                            nn.ReLU(inplace=True)))
        
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, X):
        for layer in self.layers:
            X = layer(X)                          
                           
        return self.softmax(X)
        
    def init(self):
        for param in self.parameters():
            nn.init.normal_(param)

In [19]:
## hyperparameters
lr = 0.01
n_epoch = 250
p_dropout = 0.05

## define model
indim = X_2D.shape[1]
outdim = np.unique(y_likert_experts[likert_expert_idx]).shape[0]
assert outdim == np.unique(y_likert_crowd[likert_crowd_idx]).shape[0]

model = ClassifierNN(input_dim=indim,
                     output_dim=outdim,
                     p_dropout=p_dropout)

print("=== Results on expert likert labels ===")
neural_net_acc_likert_experts, _ = fit(model, X_2D, y_likert_experts, likert_expert_idx, lr=lr, n_epoch=n_epoch)

print("\n=== Results on crowd likert labels ===")
neural_net_acc_likert_crowd, _ = fit(model, X_2D, y_likert_crowd, likert_crowd_idx, lr=lr, n_epoch=n_epoch)

print("\n=== Results on combined likert labels ===")
neural_net_acc_likert_combined, _ = fit(model, X_2D, y_likert_combined, likert_combined_idx, lr=lr, n_epoch=n_epoch)

=== Results on expert likert labels ===
Starting fold 1 / 10 - training accuracy: 0.7674 / loss: 1.4071 - test accuracy: 0.0667 / loss: 2.0207
Starting fold 2 / 10 - training accuracy: 0.7674 / loss: 1.4001 - test accuracy: 0.2000 / loss: 1.9427
Starting fold 3 / 10 - training accuracy: 0.6744 / loss: 1.4972 - test accuracy: 0.2000 / loss: 1.9812
Starting fold 4 / 10 - training accuracy: 0.8140 / loss: 1.3898 - test accuracy: 0.1333 / loss: 1.9958
Starting fold 5 / 10 - training accuracy: 0.7907 / loss: 1.3893 - test accuracy: 0.1333 / loss: 2.0249
Starting fold 6 / 10 - training accuracy: 0.6279 / loss: 1.5661 - test accuracy: 0.0667 / loss: 1.9870
Starting fold 7 / 10 - training accuracy: 0.5814 / loss: 1.5876 - test accuracy: 0.1333 / loss: 2.0118
Starting fold 8 / 10 - training accuracy: 0.5814 / loss: 1.5684 - test accuracy: 0.1333 / loss: 1.9880
Starting fold 9 / 10 - training accuracy: 0.7442 / loss: 1.4364 - test accuracy: 0.0667 / loss: 2.0205
Starting fold 10 / 10 - training 

In [20]:
## hyperparameters
lr = 0.01
n_epoch = 250
p_dropout = 0.1

## define model
indim = X_2D.shape[1]
outdim = np.unique(y_dominant_experts[dominant_expert_idx]).shape[0]
assert outdim == np.unique(y_dominant_crowd[dominant_crowd_idx]).shape[0]

model = ClassifierNN(input_dim=indim,
                     output_dim=outdim,
                     p_dropout=p_dropout)

print("=== Results on expert dominant labels ===")
neural_net_acc_dominant_experts, _ = fit(model, X_2D, y_dominant_experts, dominant_expert_idx, lr=lr, n_epoch=n_epoch)

print("\n=== Results on crowd dominant labels ===")
neural_net_acc_dominant_crowd, _ = fit(model, X_2D, y_dominant_crowd, dominant_crowd_idx, lr=lr, n_epoch=n_epoch)

print("\n=== Results on combined dominant labels ===")
neural_net_acc_dominant_combined, _ = fit(model, X_2D, y_dominant_combined, dominant_combined_idx, lr=lr, n_epoch=n_epoch)

=== Results on expert dominant labels ===
Starting fold 1 / 10 - training accuracy: 0.9556 / loss: 0.3973 - test accuracy: 0.8462 / loss: 0.4506
Starting fold 2 / 10 - training accuracy: 0.9333 / loss: 0.3892 - test accuracy: 0.7692 / loss: 0.5870
Starting fold 3 / 10 - training accuracy: 0.9556 / loss: 0.3857 - test accuracy: 0.8462 / loss: 0.5572
Starting fold 4 / 10 - training accuracy: 0.9778 / loss: 0.3470 - test accuracy: 0.6154 / loss: 0.6621
Starting fold 5 / 10 - training accuracy: 0.9111 / loss: 0.3962 - test accuracy: 0.5385 / loss: 0.7243
Starting fold 6 / 10 - training accuracy: 0.8444 / loss: 0.4604 - test accuracy: 0.6923 / loss: 0.5984
Starting fold 7 / 10 - training accuracy: 0.9333 / loss: 0.3578 - test accuracy: 0.6923 / loss: 0.6164
Starting fold 8 / 10 - training accuracy: 0.9556 / loss: 0.3762 - test accuracy: 0.6923 / loss: 0.6478
Starting fold 9 / 10 - training accuracy: 0.9333 / loss: 0.3945 - test accuracy: 0.6923 / loss: 0.6261
Starting fold 10 / 10 - trainin

## Incremental learning

In [25]:
## hyperparameters
lr = 0.01
n_epoch = 250
p_dropout = 0.1

## define model
indim = X_2D.shape[1]
outdim = np.unique(y_dominant_experts[dominant_expert_idx]).shape[0]
assert outdim == np.unique(y_dominant_crowd[dominant_crowd_idx]).shape[0]

model = ClassifierNN(input_dim=indim,
                     output_dim=outdim,
                     p_dropout=p_dropout)

print("=== Results on expert dominant labels ===")
neural_net_acc_dominant_experts, state = fit(model, X_2D, y_dominant_experts, dominant_expert_idx, lr=lr, n_epoch=n_epoch)

print("\n=== Results on crowd dominant labels ===")
neural_net_acc_dominant_crowd, _ = fit(model, X_2D, y_dominant_crowd, dominant_crowd_idx, lr=lr, n_epoch=n_epoch, state=state, finetune=False)

=== Results on expert dominant labels ===
Starting fold 1 / 10 - training accuracy: 0.9556 / loss: 0.3715 - test accuracy: 0.6154 / loss: 0.5987
Starting fold 2 / 10 - training accuracy: 0.7556 / loss: 0.4891 - test accuracy: 0.5385 / loss: 0.6435
Starting fold 3 / 10 - training accuracy: 0.8889 / loss: 0.3993 - test accuracy: 0.6154 / loss: 0.6419
Starting fold 4 / 10 - training accuracy: 0.9111 / loss: 0.4253 - test accuracy: 0.7692 / loss: 0.6000
Starting fold 5 / 10 - training accuracy: 0.9556 / loss: 0.3809 - test accuracy: 0.8462 / loss: 0.5359
Starting fold 6 / 10 - training accuracy: 0.8222 / loss: 0.4919 - test accuracy: 0.6154 / loss: 0.6430
Starting fold 7 / 10 - training accuracy: 0.8889 / loss: 0.4282 - test accuracy: 0.6154 / loss: 0.5870
Starting fold 8 / 10 - training accuracy: 0.8889 / loss: 0.3906 - test accuracy: 0.7692 / loss: 0.5811
Starting fold 9 / 10 - training accuracy: 0.8889 / loss: 0.4242 - test accuracy: 0.6923 / loss: 0.5837
Starting fold 10 / 10 - trainin

# CNN

In [22]:
class ClassifierCNN(nn.Module):
    """CNN Classifier"""

    def __init__(self, features_in, features_out, p_dropout=0.05):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(features_in, int(features_in*1.5), kernel_size=7),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=3), 

            nn.Conv1d(int(features_in*1.5), int(features_in*2), kernel_size=7),
            nn.ReLU(inplace=True),
            
            nn.Conv1d(int(features_in*2), int(features_in*2), kernel_size=5),
            nn.ReLU(inplace=True),
            nn.AdaptiveMaxPool1d(2)
        )

        self.fc = nn.Sequential(
            nn.Linear(int(features_in*2)*2, 32),
            nn.ReLU(inplace=True),
            nn.Dropout(p=p_dropout),

            nn.Linear(32, features_out)
        )
        
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, X):
        X = self.conv(X)
        X = X.view(X.size(0), -1)

        return self.softmax(self.fc(X))
        
    def init(self):
        for param in self.parameters():
            nn.init.normal_(param)

In [23]:
## hyperparameters
lr = 0.01
p_dropout = 0.05

## define model
indim = X_3D.shape[2]
outdim = np.unique(y_likert_experts[likert_expert_idx]).shape[0]
assert outdim == np.unique(y_likert_crowd[likert_crowd_idx]).shape[0]

model = ClassifierCNN(features_in=indim,
                      features_out=outdim,
                      p_dropout=p_dropout)

print("=== Results on expert likert labels ===")
cnn_acc_likert_experts = fit(model, X_3D.transpose(1, 2), y_likert_experts, likert_expert_idx, lr=lr)

print("\n=== Results on crowd likert labels ===")
cnn_acc_likert_crowd = fit(model, X_3D.transpose(1, 2), y_likert_crowd, likert_crowd_idx, lr=lr)

=== Results on expert likert labels ===
Starting fold 1 / 10 - training accuracy: 0.1837 / loss: 1.9817 - test accuracy: 0.1111 / loss: 2.0543
Starting fold 2 / 10

KeyboardInterrupt: 

In [24]:
## hyperparameters
lr = 0.01
p_dropout = 0.05

## define model
indim = X_3D.shape[2]
outdim = np.unique(y_likert_experts[likert_expert_idx]).shape[0]
assert outdim == np.unique(y_likert_crowd[likert_crowd_idx]).shape[0]

model = ClassifierCNN(features_in=indim,
                      features_out=outdim,
                      p_dropout=p_dropout)

print("=== Results on expert dominant labels ===")
cnn_acc_dominant_experts = fit(model, X_3D.transpose(1, 2), y_dominant_experts, dominant_expert_idx, lr=lr)

print("\n=== Results on crowd dominant labels ===")
cnn_acc_dominant_crowd = fit(model, X_3D.transpose(1, 2), y_dominant_crowd, dominant_crowd_idx, lr=lr)

=== Results on expert dominant labels ===
Starting fold 1 / 10 - training accuracy: 0.0000 / loss: 2.1654 - test accuracy: 0.0000 / loss: 2.1654
Starting fold 2 / 10 - training accuracy: 0.7368 / loss: 1.4286 - test accuracy: 0.6667 / loss: 1.4988
Starting fold 3 / 10

KeyboardInterrupt: 

# LSTM

In [26]:
class ClassifierLSTM(nn.Module):
    def __init__(self,
                 input_dim,
                 output_dim,
                 hidden_dim,
                 num_layers=1,
                 p_dropout=0.0):
        """
        LSTM

        """
        super().__init__()
        self.hidden_dim = hidden_dim

        self.lstm = nn.LSTM(input_size=input_dim,
                            hidden_size=hidden_dim,
                            num_layers=num_layers,
                            bias=True,
                            batch_first=True)  # (batch, seq, feature)
                            
        fc_hidden_dim = (hidden_dim-output_dim)//2
        self.fc = nn.Sequential(nn.Linear(hidden_dim, fc_hidden_dim),
                                nn.ReLU(inplace=True),
                                nn.Dropout(p=p_dropout),
                                
                                nn.Linear(fc_hidden_dim, output_dim))
        
        self.softmax = nn.Softmax(dim=1)

    def forward(self, X):
        # default H0 is zero vector
        # output Hn is representation of entire sequence
        X, _ = self.lstm(X)
        X = X[:,-1,:]  # only consider final output

        return self.softmax(self.fc(X))

    def init(self):
        sqrt_k = sqrt(1.0/self.hidden_dim)
        for param in self.parameters():
            nn.init.uniform_(param, -sqrt_k, sqrt_k)


In [25]:
## hyperparameters
lr = 0.01
p_dropout = 0.05

## define model
indim = X_3D.shape[2]
outdim = np.unique(y_likert_experts[likert_expert_idx]).shape[0]
assert outdim == np.unique(y_likert_crowd[likert_crowd_idx]).shape[0]

model = ClassifierLSTM(input_dim=indim,
                       output_dim=outdim,
                       hidden_dim=indim,
                       p_dropout=p_dropout)

print("=== Results on expert dominant labels ===")
lstm_acc_dominant_experts = fit(model, X_3D, y_dominant_experts, dominant_expert_idx, lr=lr)

print("\n=== Results on crowd dominant labels ===")
lstm_acc_dominant_crowd = fit(model, X_3D, y_dominant_crowd, dominant_crowd_idx, lr=lr)

print("=== Results on combined dominant labels ===")
lstm_acc_dominant_combined = fit(model, X_3D, y_dominant_combined, dominant_combined_idx, lr=lr)

NameError: name 'ClassifierLSTM' is not defined

In [27]:
## hyperparameters
lr = 0.01
p_dropout = 0.05

## define model
indim = X_3D.shape[2]
outdim = np.unique(y_likert_experts[likert_expert_idx]).shape[0]
assert outdim == np.unique(y_likert_crowd[likert_crowd_idx]).shape[0]
hidden_dim = (indim-outdim)//2

model = ClassifierLSTM(input_dim=indim,
                       output_dim=outdim,
                       hidden_dim=hidden_dim,
                       p_dropout=p_dropout)

print("=== Results on expert likert labels ===")
lstm_acc_likert_experts = fit(model, X_3D, y_likert_experts, likert_expert_idx, lr=lr)

print("\n=== Results on crowd likert labels ===")
lstm_acc_likert_crowd = fit(model, X_3D, y_likert_crowd, likert_crowd_idx, lr=lr)

=== Results on expert likert labels ===
Starting fold 1 / 10 - training accuracy: 0.2449 / loss: 1.9027 - test accuracy: 0.2222 / loss: 1.9174
Starting fold 2 / 10 - training accuracy: 0.2449 / loss: 1.8814 - test accuracy: 0.2222 / loss: 1.9046
Starting fold 3 / 10 - training accuracy: 0.2449 / loss: 1.8906 - test accuracy: 0.2222 / loss: 1.9030
Starting fold 4 / 10 - training accuracy: 0.2449 / loss: 1.8934 - test accuracy: 0.2222 / loss: 1.9050
Starting fold 5 / 10

KeyboardInterrupt: 