In [1]:
from cleanlab import *
import pandas as pd 
import numpy as np 
import random 

In [331]:
## create toy dataset
rows = 2000
cols = 30
num_class = 10
num_annotators = 50

data = np.random.rand(rows, cols)
column_name = ['col'+ str(i) for i in range(cols)]
data_df = pd.DataFrame(data = data, columns= column_name)

def gen_labels(num_class, num_annotators):
    label_list = []
    for annotator in range(num_annotators):
        label_list.append([random.randrange(1,10) for _ in range(rows)])
    col_name_annot = ['annot'+ str(i) for i in range(num_annotators)]
    return col_name_annot, np.transpose(np.array(label_list))

col_labels, labels = gen_labels(num_class, num_annotators)
label_df = pd.DataFrame(data=labels, columns=col_labels)
final_df = pd.concat([data_df, label_df], axis = 1)

In [332]:
final_df.head(5)

Unnamed: 0,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,...,annot40,annot41,annot42,annot43,annot44,annot45,annot46,annot47,annot48,annot49
0,0.473255,0.772771,0.716062,0.345439,0.200372,0.259145,0.132092,0.802222,0.561289,0.627092,...,3,3,2,8,5,7,2,7,2,9
1,0.15869,0.409009,0.061098,0.828001,0.761124,0.335639,0.66129,0.039424,0.282486,0.848282,...,8,8,8,6,3,4,9,2,6,8
2,0.912738,0.69604,0.621875,0.964729,0.209513,0.849239,0.184992,0.485495,0.276308,0.371791,...,5,4,6,2,2,6,7,2,2,6
3,0.260712,0.199669,0.71197,0.607946,0.502872,0.516352,0.644867,0.64577,0.325319,0.106159,...,1,5,7,5,6,1,3,1,3,7
4,0.164569,0.841068,0.762105,0.460805,0.236593,0.357434,0.114363,0.400563,0.451104,0.907592,...,7,1,4,3,4,2,8,5,9,6


In [333]:
def gen_pred_matrix(num_class, num_annotators, rows):
    final_predMatrix = []
    for _ in range(num_annotators):
        annotator_list = []
        for _ in range(rows): 
            temp = np.random.random(num_class)
            annotator_list.append(temp)
            # annotator_list.append(temp/sum(temp))
        final_predMatrix.append(annotator_list)
    return np.array(final_predMatrix)

pred_probs = gen_pred_matrix(num_class, num_annotators, rows) ## final dummy predication array

In [334]:
def most_frequent(label_list, score_list):
    """
    Parameters:
    ----------------
    label_list: takes list of labels annotated by all the annotaters. 
        Dimension,(K,), where K is number of annotators.
    
    score_list: list of scores returned by get_label_quality_scores()
        Dimension,(K,), where K is number of annotators.
    
    How it works:
    ----------------
    This function considers majority voting. In case of ties, it factors the average probability for the scores to decide upon final label.

    Output
    -----------------
    final_key: selected key with logic explained above. 
    number of annotator that labeled the datapoint with the final_key.

    """

    count_dict = {}
    # print("INSIDE MOST FREQUENT: ", label_list.shape, score_list.shape)
    for idx, num in enumerate(label_list):
        if num in count_dict.keys():
            count_dict[num][0] += 1
            count_dict[num][1] += score_list[idx]            
        else: 
            count_dict[num] = [1, score_list[idx]]

    ## take average of the probability for particular label
    for key in count_dict.keys():
        count_dict[key][1] = round(count_dict[key][1]/count_dict[key][0], 3)
    
    count_dict = dict(sorted(count_dict.items(), key=lambda item: item[1][0], reverse= True))
    
    list_same_keys = [k for k,v in count_dict.items() if v[0] == list(count_dict.values())[0][0]]
    final_key = list(count_dict.keys())[0]
    prob_key = float("-inf")
    for k in list_same_keys:
        if (count_dict[k][1] > prob_key):
            prob_key = count_dict[k][1]
            final_key = k  

    return final_key, count_dict[final_key][0]

def annotator_agreement(labels_list, final_score_list):
    """
    returns integer between 0 to 1
    """
    _, label_count = most_frequent(labels_list, final_score_list)
    return label_count/labels_list.shape[0]

In [341]:
def get_multiannotator_label_quality_scores(labels, pred_probs):
        """
        Parameters
        ----------
        labels : 2D numpy array of (multiple) given labels for each example.
                labels[i][j] = given label for i-th example by j-th annotator,
                        an integer in {0,...,K-1}.
        pred_probs : Predicted class probabilities of same format as for get_label_quality_scores().
        ... : Same additional arguments as for get_label_quality_scores(). 
        Do NOT worry about what these mean, just pass them into to the scoring for each annotator's label!

        Returns
        -------
        df : pandas DataFrame in which each row corresponds to one example, with columns:
                score_annotator1, score_annotator2, ..., score_annotatorM, annotator_agreement, overall_score
        """
        num_annotators = labels.shape[1]
        final_scores = []
        for annotator in range(num_annotators):
                final_scores.append(rank.get_label_quality_scores(np.transpose(labels)[annotator], pred_probs[annotator]))
        
        final_scores = np.transpose(np.array(final_scores))

        agreed_annotation = []
        for row in range(labels.shape[0]):        
                agreed_annotation.append(annotator_agreement(labels[row], final_scores[row]))

        output_col_names = ['score_annotator'+str(i+1) for i in range(labels.shape[1])]
        final_df = pd.DataFrame(data = final_scores, columns=output_col_names)     
        final_df['average_score'] = final_df.mean(axis=1)
        final_df['annotator_agreement'] = agreed_annotation
        final_df['overall_score'] = final_df['average_score'] * final_df['annotator_agreement']
         
        return final_df

In [342]:
score_df = get_multiannotator_label_quality_scores(labels, pred_probs)

In [343]:
score_df.head()

Unnamed: 0,score_annotator1,score_annotator2,score_annotator3,score_annotator4,score_annotator5,score_annotator6,score_annotator7,score_annotator8,score_annotator9,score_annotator10,...,score_annotator44,score_annotator45,score_annotator46,score_annotator47,score_annotator48,score_annotator49,score_annotator50,average_score,annotator_agreement,overall_score
0,0.440659,0.739078,0.512333,0.863542,0.041187,0.661572,0.229064,0.934597,0.571663,0.148111,...,0.357925,0.23018,0.557804,0.709861,0.985028,0.044531,0.390576,0.468161,0.16,0.074906
1,0.52896,0.362441,0.750903,0.714757,0.906967,0.268832,0.995989,0.280616,0.668763,0.20135,...,0.18602,0.543081,0.873942,0.541245,0.295893,0.501325,0.988095,0.457727,0.26,0.119009
2,0.962353,0.728996,0.09582,0.76626,0.263946,0.406049,0.175456,0.292058,0.856864,0.206019,...,0.105475,0.611915,0.206274,0.779187,0.872494,0.774178,0.356713,0.517404,0.14,0.072437
3,0.754925,0.760841,0.425651,0.058373,0.310159,0.33633,0.443588,0.848677,0.314038,0.938724,...,0.870841,0.485165,0.299108,0.386815,0.073221,0.062852,0.464297,0.514743,0.16,0.082359
4,0.768847,0.511564,0.032726,0.532721,0.868189,0.979571,0.418095,0.394185,0.64448,0.564332,...,0.322821,0.598175,0.090046,0.16814,0.083826,0.299062,0.320324,0.492807,0.2,0.098561


In [349]:
print("Overall Score: MAX - {}, MIN - {}".format(round(max(score_df['overall_score']),2), round(min(score_df['overall_score']),2)))
print("Average Score: MAX - {}, MIN - {}".format(round(max(score_df['average_score']),2), round(min(score_df['average_score']),2)))
print("Annotator Agreement: MAX - {}, MIN - {}".format(max(score_df['annotator_agreement']), min(score_df['annotator_agreement'])))
print("Overall score is simple matric which combine average score and annotator score. Values are very low beacuse of less annotator agreement as these are randonly choosen \
        levels. In real dataset it is probable that it may give better result.")

Overall Score: MAX - 0.18, MIN - 0.06
Average Score: MAX - 0.63, MIN - 0.37
Annotator Agreement: MAX - 0.32, MIN - 0.14
Overall score is simple matric which combine average score and annotator score. Values are very low beacuse of less annotator agreement as these are randonly choosen         levels. In real dataset it is probable that it may give better result.
