In [1]:
import numpy as np
import pandas as pd

Read in your label data:

In [2]:
labels = pd.read_csv('labels.csv')
labels.head(10)

Unnamed: 0,rad1,rad2,rad3,biopsy
0,benign,benign,benign,benign
1,benign,benign,benign,benign
2,benign,benign,benign,benign
3,benign,benign,benign,benign
4,benign,benign,cancer,benign
5,cancer,cancer,cancer,cancer
6,benign,benign,benign,benign
7,benign,benign,benign,benign
8,cancer,cancer,benign,cancer
9,benign,benign,cancer,benign


## Create your first ground truth as derived from biopsy labels: 

In [3]:
labels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   rad1    60 non-null     object
 1   rad2    60 non-null     object
 2   rad3    60 non-null     object
 3   biopsy  60 non-null     object
dtypes: object(4)
memory usage: 2.0+ KB


In [4]:
labels['ground_truth_0'] = labels.apply(lambda row: int(row['biopsy'] == 'benign'), axis=1)
labels

Unnamed: 0,rad1,rad2,rad3,biopsy,ground_truth_0
0,benign,benign,benign,benign,1
1,benign,benign,benign,benign,1
2,benign,benign,benign,benign,1
3,benign,benign,benign,benign,1
4,benign,benign,cancer,benign,1
5,cancer,cancer,cancer,cancer,0
6,benign,benign,benign,benign,1
7,benign,benign,benign,benign,1
8,cancer,cancer,benign,cancer,0
9,benign,benign,cancer,benign,1


## Create your second truth by voting system from the three radiologists:

In [14]:
def ground_truth_1(row):
    return int(row['rad1'] == 'benign') + int(row['rad2'] == 'benign') + int(row['rad3'] == 'benign')

labels['ground_truth_1_val'] = labels.apply(lambda row: ground_truth_1(row), axis=1)
labels['ground_truth_1'] = labels.apply(lambda row: int(ground_truth_1(row) > 1), axis=1)
labels

Unnamed: 0,rad1,rad2,rad3,biopsy,ground_truth_0,ground_truth_1,ground_truth_2,ground_truth_1_val
0,benign,benign,benign,benign,1,1,1,3
1,benign,benign,benign,benign,1,1,1,3
2,benign,benign,benign,benign,1,1,1,3
3,benign,benign,benign,benign,1,1,1,3
4,benign,benign,cancer,benign,1,1,1,2
5,cancer,cancer,cancer,cancer,0,0,0,0
6,benign,benign,benign,benign,1,1,1,3
7,benign,benign,benign,benign,1,1,1,3
8,cancer,cancer,benign,cancer,0,0,0,1
9,benign,benign,cancer,benign,1,1,1,2


## Create your third ground truth by weighting the three radiologists:

In [27]:
def ground_truth_2(row):
    return 5*int(row['rad1']=='benign') + 10*int(row['rad2']=='benign') + 15*int(row['rad3'] == 'benign')

labels['ground_truth_2_val'] = labels.apply(lambda row: ground_truth_2(row), axis=1)
labels['ground_truth_2'] = labels.apply(lambda row: int(ground_truth_2(row) > 10), axis=1)
labels

Unnamed: 0,rad1,rad2,rad3,biopsy,ground_truth_0,ground_truth_1,ground_truth_2,ground_truth_1_val,ground_truth_2_val
0,benign,benign,benign,benign,1,1,1,3,30
1,benign,benign,benign,benign,1,1,1,3,30
2,benign,benign,benign,benign,1,1,1,3,30
3,benign,benign,benign,benign,1,1,1,3,30
4,benign,benign,cancer,benign,1,1,1,2,15
5,cancer,cancer,cancer,cancer,0,0,0,0,0
6,benign,benign,benign,benign,1,1,1,3,30
7,benign,benign,benign,benign,1,1,1,3,30
8,cancer,cancer,benign,cancer,0,0,1,1,15
9,benign,benign,cancer,benign,1,1,1,2,15


## Compare the three ground truths:

Here, just explore the three sets of labels you created and see how often they agree

In [20]:
agree_01 = len(labels[labels['ground_truth_0'] == labels['ground_truth_1']]) / len(labels)
agree_01

0.85

In [21]:
labels[labels['ground_truth_0'] != labels['ground_truth_1']]

Unnamed: 0,rad1,rad2,rad3,biopsy,ground_truth_0,ground_truth_1,ground_truth_2,ground_truth_1_val,ground_truth_2_val
12,benign,cancer,cancer,benign,1,0,0,1,5
14,cancer,cancer,cancer,benign,1,0,0,0,0
22,benign,cancer,cancer,benign,1,0,0,1,5
29,benign,benign,benign,cancer,0,1,1,3,30
30,cancer,benign,benign,cancer,0,1,1,2,25
34,benign,cancer,cancer,benign,1,0,0,1,5
37,cancer,benign,benign,cancer,0,1,1,2,25
52,cancer,cancer,benign,benign,1,0,0,1,15
57,benign,benign,benign,cancer,0,1,1,3,30


In [28]:
agree_02 = len(labels[labels['ground_truth_0'] == labels['ground_truth_2']]) / len(labels)
agree_02


0.8

In [29]:
labels[labels['ground_truth_0'] != labels['ground_truth_2']]

Unnamed: 0,rad1,rad2,rad3,biopsy,ground_truth_0,ground_truth_1,ground_truth_2,ground_truth_1_val,ground_truth_2_val
8,cancer,cancer,benign,cancer,0,0,1,1,15
12,benign,cancer,cancer,benign,1,0,0,1,5
14,cancer,cancer,cancer,benign,1,0,0,0,0
18,cancer,cancer,benign,cancer,0,0,1,1,15
22,benign,cancer,cancer,benign,1,0,0,1,5
27,cancer,cancer,benign,cancer,0,0,1,1,15
29,benign,benign,benign,cancer,0,1,1,3,30
30,cancer,benign,benign,cancer,0,1,1,2,25
34,benign,cancer,cancer,benign,1,0,0,1,5
37,cancer,benign,benign,cancer,0,1,1,2,25


In [30]:
labels[labels['ground_truth_1'] != labels['ground_truth_2']]

Unnamed: 0,rad1,rad2,rad3,biopsy,ground_truth_0,ground_truth_1,ground_truth_2,ground_truth_1_val,ground_truth_2_val
8,cancer,cancer,benign,cancer,0,0,1,1,15
18,cancer,cancer,benign,cancer,0,0,1,1,15
27,cancer,cancer,benign,cancer,0,0,1,1,15
52,cancer,cancer,benign,benign,1,0,1,1,15
55,cancer,cancer,benign,cancer,0,0,1,1,15
