# Parameters

In [2]:
nWorkers = 5    # Number of Channels
nSamples = 45   # Number of Data
nClasses = 4    # Number of Classes

# Generate Fake Data

In [3]:
from fake_data_generator import RandomConfusionMatrixChannel, DataGenerator

def generate_sample_data(nSamples, nWorkers, nClass, truePredictionRate=.8):
    workers = []
    confusion_matrices = []
    for _ in range(nWorkers):
        worker = RandomConfusionMatrixChannel()
        worker.train(nClass, truePredictionRate)
        workers.append(worker)
        confusion_matrices.append(worker.confusionMatrix)

    samples = {}
    lables = []
    data_generator = DataGenerator(nClass)
    for n in range(nSamples):
        data = data_generator.generate()
        sample = {w:[workers[w].estimate(data)] for w in range(nWorkers)}
        samples[n] = sample
        lables.append(data.label)


    return (samples, lables, confusion_matrices)

# Test

In [4]:
samples, labels, confusion_matrices = generate_sample_data(nSamples, nWorkers, nClasses, .8)

In [5]:
from dawid_skene import run as run_em

_, _, _, _, class_marginals, error_rates, patient_classes = run_em(samples, verbose=False)

# Evaluation

In [7]:
from sklearn.metrics import accuracy_score
import numpy as np

In [8]:
def frobenius_norm(matrix1, matrix2):
    # Convert the input matrices to numpy arrays if they are not already
    matrix1 = np.array(matrix1)
    matrix2 = np.array(matrix2)
    
    # Calculate the Frobenius norm of the difference
    difference = matrix1 - matrix2
    norm = np.linalg.norm(difference, 'fro')
    
    return norm

def one_hot_encode(x, n_classes):
    """
    One hot encode a list of sample labels. Return a one-hot encoded vector for each label.
    : x: List of sample Labels
    : return: Numpy array of one-hot encoded labels
     """
    return np.eye(n_classes)[x]

### Accuracy
Run this cell only if true labels are available

In [None]:

true_labels = labels
estimated_labels = np.argmax(patient_classes, axis=1)

accuracy = int(100 * accuracy_score(true_labels, estimated_labels))

print("Accuracy: %{}".format(accuracy))

### Average Parameter Estimation Error
Run this cell only if true confusion matrices are available

In [None]:
parameter_error_rate = np.mean([frobenius_norm(cm_true, cm_estimate) for cm_true, cm_estimate in zip(confusion_matrices, error_rates)])

print("Average Parameter Estimation Error: {:.2f}".format(parameter_error_rate))