Visit https://github.com/TrentoCrowdAI/crowdsourced-datasets and follow the instructions to download the datasets.

### TODO

* Handle partially labeled data

In [55]:
import pandas as pd

dataset_path = "../crowdsourced-datasets/binary-classification/Blue Birds/transformed_dataset.csv"

df = pd.read_csv(dataset_path).dropna(how='all')

In [56]:
# Normalize labels

# Combine unique values
unique_values = pd.Series(pd.unique(df[['response', 'goldLabel']].dropna().values.ravel())).sort_values()

# Map each unique value to a unique integer
value_to_int = pd.Series(range(len(unique_values)), index=unique_values)

# Map column to integers
df['normal_response'] = df['response'].map(value_to_int)
df['normal_goldLabel'] = df['goldLabel'].map(value_to_int)

In [None]:
worker_list = df['workerID'].unique()
task_list = df['taskID'].unique()
label_list = df['normal_goldLabel'].unique()

data = df.groupby('taskID').apply(lambda x: dict(zip(x['workerID'], [x['normal_response']]))).to_dict()
labels_dictionary = df.drop_duplicates(subset='taskID').set_index('taskID')['normal_goldLabel'].to_dict()

In [None]:
print ("num Tasks:", len(task_list))
print ("num Observers:", len(worker_list))
print ("num Classes:", len(label_list))

# Run Test

In [None]:
from dawid_skene import run as run_em

_, _, _, _, class_marginals, error_rates, patient_classes = run_em(data, verbose=False)
patient_classes

# Evaluation

In [61]:
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

In [None]:
true_labels = list(labels_dictionary.values())
estimated_labels = np.argmax(patient_classes, axis=1)

accuracy = int(100 * accuracy_score(true_labels, estimated_labels))

print("Accuracy: %{}".format(accuracy))

In [63]:
# Get the confusion matrices

confusion_matrices = []
for value in df['workerID'].unique():
    worker_df = df[df['workerID'] == value]

    confusion_matrices.append(confusion_matrix(worker_df['normal_goldLabel'], worker_df['normal_response'], labels=label_list))

In [64]:
def frobenius_norm(matrix1, matrix2):
    # Convert the input matrices to numpy arrays if they are not already
    matrix1 = np.array(matrix1)
    matrix2 = np.array(matrix2)
    
    # Calculate the Frobenius norm of the difference
    difference = matrix1 - matrix2
    norm = np.linalg.norm(difference, 'fro')
    
    return norm

In [None]:
parameter_error_rate = np.mean([frobenius_norm(cm_true, cm_estimate) for cm_true, cm_estimate in zip(confusion_matrices, error_rates)])

print("Average Parameter Estimation Error: {:.2f}".format(parameter_error_rate))