Visit https://github.com/TrentoCrowdAI/crowdsourced-datasets and follow the instructions to download the datasets.

### TODO

* Handle partially labeled data

In [1]:
import pandas as pd

# dataset_path = "../crowdsourced-datasets/binary-classification/Blue Birds/transformed_dataset.csv"
# dataset_path = "../crowdsourced-datasets/multi-class-classification/Weather Sentiment - AMT/transformed_dataset.csv"
dataset_path = "../crowdsourced-datasets/my-dataset/transformed_dataset.csv"

df = pd.read_csv(dataset_path).dropna(how='all')

In [2]:
# Normalize labels

# Combine unique values
unique_values = pd.Series(pd.unique(df[['response', 'goldLabel']].dropna().values.ravel())).sort_values()

# Map each unique value to a unique integer
value_to_int = pd.Series(range(len(unique_values)), index=unique_values)

# Map column to integers
df['normal_response'] = df['response'].map(value_to_int)
df['normal_goldLabel'] = df['goldLabel'].map(value_to_int)

In [3]:
worker_list = df['workerID'].unique()
task_list = df['taskID'].unique()
label_list = df['normal_goldLabel'].unique()

data = df.groupby('taskID').apply(lambda x: dict(zip(x['workerID'], [x['normal_response']]))).to_dict()
labels_dictionary = df.drop_duplicates(subset='taskID').set_index('taskID')['normal_goldLabel'].to_dict()

In [4]:
print ("num Tasks:", len(task_list))
print ("num Observers:", len(worker_list))
print ("num Classes:", len(label_list))

num Tasks: 7661
num Observers: 3
num Classes: 5


# Run Test

In [5]:
from dawid_skene import run as run_em

_, _, _, _, class_marginals, error_rates, patient_classes = run_em(data, verbose=False)
patient_classes

array([[9.99318655e-001, 6.62954257e-004, 1.36475661e-005,
        1.37268847e-152, 4.74349299e-006],
       [9.80763951e-001, 1.60999690e-003, 1.73199769e-002,
        6.95148358e-103, 3.06075466e-004],
       [9.99318655e-001, 6.62954257e-004, 1.36475661e-005,
        1.37268847e-152, 4.74349299e-006],
       ...,
       [8.47018892e-001, 1.52877314e-001, 1.03794489e-004,
        2.47919379e-153, 0.00000000e+000],
       [4.83077133e-003, 5.33861228e-003, 9.89830616e-001,
        3.76464377e-056, 0.00000000e+000],
       [4.83077133e-003, 5.33861228e-003, 9.89830616e-001,
        3.76464377e-056, 0.00000000e+000]])

# Evaluation

In [6]:
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

In [7]:
true_labels = list(labels_dictionary.values())
estimated_labels = np.argmax(patient_classes, axis=1)

accuracy = int(100 * accuracy_score(true_labels, estimated_labels))

print("Accuracy: %{}".format(accuracy))

Accuracy: %66


In [8]:
# Get the confusion matrices

confusion_matrices = []
for value in df['workerID'].unique():
    worker_df = df[df['workerID'] == value]

    confusion_matrices.append(confusion_matrix(worker_df['normal_goldLabel'], worker_df['normal_response'], labels=label_list))

In [9]:
def frobenius_norm(matrix1, matrix2):
    # Convert the input matrices to numpy arrays if they are not already
    matrix1 = np.array(matrix1)
    matrix2 = np.array(matrix2)
    
    # Calculate the Frobenius norm of the difference
    difference = matrix1 - matrix2
    norm = np.linalg.norm(difference, 'fro')
    
    return norm

In [10]:
print(label_list)
print('confusion matrix 0:\n',confusion_matrices[0],'\n')
print('confusion matrix 1:\n',confusion_matrices[1],'\n')
print('confusion matrix 2:\n',confusion_matrices[2],'\n')


[1 2 0 4 3]
confusion matrix 0:
 [[1495   15    7    0    8]
 [  20  864   44    2    6]
 [ 245  145 4842   20   15]
 [   0    0    0  172    0]
 [   0    0    0    0   27]] 

confusion matrix 1:
 [[1463    5   57    0    0]
 [   0  909   22    0    5]
 [  18   29 5220    0    0]
 [   0   26   21  125    0]
 [   0    1    0    0   26]] 

confusion matrix 2:
 [[1126    6  393    0    0]
 [   6  885   43    2    0]
 [ 170   92 5000    5    0]
 [   0    1    2  169    0]
 [   0    0    0    0   27]] 



In [11]:

print('error_rates:\n',error_rates[0],'\n')

error_rates:
 [[9.55461788e-01 2.47707305e-02 1.72995735e-02 9.52545679e-04
  1.51536220e-03]
 [1.23274082e-01 8.69496115e-01 5.52299727e-03 1.70680618e-03
  0.00000000e+00]
 [4.03835594e-02 9.39420584e-03 9.45492899e-01 3.95393043e-03
  7.75405564e-04]
 [1.30552810e-50 7.21208939e-53 1.21970081e-02 9.87802992e-01
  0.00000000e+00]
 [4.94774796e-02 0.00000000e+00 5.88978441e-02 0.00000000e+00
  8.91624676e-01]] 



In [12]:
parameter_error_rate = np.mean([frobenius_norm(cm_true, cm_estimate) for cm_true, cm_estimate in zip(confusion_matrices, error_rates)])

print("Average Parameter Estimation Error: {:.2f}".format(parameter_error_rate))

Average Parameter Estimation Error: 5150.46
