## Read Label Files

In [41]:
true_labels = []
predicted_labels = []

true_labels_file = 'answer_gold.txt'
predicted_labels_file = 'prompt1.txt'

with open(true_labels_file, 'r') as f:
    for line in f:
        label = line.strip().split('\t')
        if int(label[0]) > 1003:
            break
        true_labels.append(label)

with open(predicted_labels_file, 'r') as f:
    for line in f:
        label = line.strip().split('\t')
        predicted_labels.append(label)

print(true_labels)
print(predicted_labels)

[['1001', 'IDNUM', '13', '23', '88Y206206L'], ['1001', 'MEDICALRECORD', '24', '35', '8892062.BPL'], ['1001', 'PATIENT', '37', '63', 'Vatterott, Jerrie CLARENCE'], ['1001', 'IDNUM', '74', '82', '88Y20620'], ['1001', 'IDNUM', '83', '91', '88Y20620'], ['1001', 'STREET', '92', '98', 'Exeter'], ['1001', 'CITY', '99', '112', 'DECEPTION BAY'], ['1001', 'STATE', '114', '132', 'Northern Territory'], ['1001', 'ZIP', '134', '138', '6845'], ['1001', 'DATE', '170', '180', '15/11/2004', '2004-11-15'], ['1001', 'DATE', '200', '209', '20/5/2064', '2064-05-20'], ['1001', 'DEPARTMENT', '226', '234', 'PARKES 8'], ['1001', 'HOSPITAL', '237', '263', 'GUNNEDAH DISTRICT HOSPITAL'], ['1001', 'DOCTOR', '267', '288', 'Edison Clay GOLDHIRSH'], ['1001', 'DATE', '400', '404', '2059', '2059'], ['1001', 'DATE', '1193', '1197', '2059', '2059'], ['1001', 'DOCTOR', '1212', '1215', 'Sek'], ['1001', 'DOCTOR', '1243', '1253', 'X Standrew'], ['1001', 'TIME', '1257', '1274', '9:30am on 18/3/14', '2014-03-18T09:30'], ['1001'

## Method 1: Calculate via equation

### Precision, Recall, F1

In [42]:
def calculate_f1(true, predicted):
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    # If exactly same, TP+1. If no, FP+1
    for predicted_label in predicted:
        if predicted_label in true:
            true_positives += 1
            true.remove(predicted_label)
        else:
            false_positives += 1

    # No match, FN+1
    false_negatives = len(true)

    # Precision, Recall
    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)

    # f1
    f1 = 2 * (precision * recall) / (precision + recall)

    return f1

### Calculate Scores

In [43]:
print("Macro F1:", calculate_f1(true_labels.copy(), predicted_labels.copy()))

Macro F1: 0.25236593059936907


## Method 2: Sklearn Macro F1 Score

### Alignment

In [44]:
def aligning_labels(true, predict):
    aligned_true = []
    aligned_predict = []

    true_used = [False] * len(true)
    predict_used = [False] * len(predict)

    for i, true_entry in enumerate(true):
        true_index = true_entry[0]
        true_label = true_entry[1]
        true_start = true_entry[2]
        true_end = true_entry[3]
        true_content = true_entry[4]

        isMatched = False

        for j, predict_entry in enumerate(predict):
            predict_index = predict_entry[0]
            predict_label = predict_entry[1]
            predict_start = predict_entry[2]
            predict_end = predict_entry[3]
            predict_content = predict_entry[4]

            # Same file no.
            if true_index == predict_index:
                match_count = sum(
                    [true_start == predict_start, true_end == predict_end, true_content == predict_content, true_label == predict_label]
                )
                
                # if >2 element matched
                if match_count >= 2 and not true_used[i] and not predict_used[j]:
                    aligned_true.append(true_entry)
                    aligned_predict.append(predict_entry)
                    true_used[i] = True
                    predict_used[j] = True
                    isMatched = True
                    break

        # if no match, padding with None
        if isMatched == False:
            aligned_true.append(true_entry)
            aligned_predict.append("None")
            true_used[i] = True
                
    return aligned_true, aligned_predict,  predict_used, true_used

## Calculate Marcro F1

In [45]:
from sklearn.metrics import f1_score

aligned_true, aligned_predict, predict_used, true_used= aligning_labels(true_labels.copy(), predicted_labels.copy())

# print aligned result
for i in range(len(aligned_true)):
    print(i+1, aligned_true[i], '\t',aligned_predict[i])

# useless prediction count
print("Useless Prediction Count: ",predict_used.count(False))
print("Useless True Count: ",true_used.count(False))

# Get labels for F1
aligned_true_labels = [entry[1] for entry in aligned_true]
aligned_predicted_labels = [entry[1] if entry != "None" else "None" for entry in aligned_predict]
f1 = f1_score(aligned_true_labels, aligned_predicted_labels, average='macro')

#for i, j in zip(aligned_true_labels, aligned_predicted_labels):
    #print(i,j)

print("marcro f1: ", f1)

1 ['1001', 'IDNUM', '13', '23', '88Y206206L'] 	 ['1001', 'IDNUM', '13', '23', '88Y206206L']
2 ['1001', 'MEDICALRECORD', '24', '35', '8892062.BPL'] 	 ['1001', 'MEDICALRECORD', '24', '35', '8892062.BPL']
3 ['1001', 'PATIENT', '37', '63', 'Vatterott, Jerrie CLARENCE'] 	 ['1001', 'PATIENT', '37', '63', 'Vatterott, Jerrie CLARENCE']
4 ['1001', 'IDNUM', '74', '82', '88Y20620'] 	 ['1001', 'IDNUM', '74', '82', '88Y20620']
5 ['1001', 'IDNUM', '83', '91', '88Y20620'] 	 ['1001', 'IDNUM', '91', '99', '88Y20620']
6 ['1001', 'STREET', '92', '98', 'Exeter'] 	 ['1001', 'STREET', '92', '98', 'Exeter']
7 ['1001', 'CITY', '99', '112', 'DECEPTION BAY'] 	 ['1001', 'CITY', '99', '112', 'DECEPTION BAY']
8 ['1001', 'STATE', '114', '132', 'Northern Territory'] 	 ['1001', 'STATE', '127', '145', 'Northern Territory']
9 ['1001', 'ZIP', '134', '138', '6845'] 	 ['1001', 'ZIP', '180', '184', '6845']
10 ['1001', 'DATE', '170', '180', '15/11/2004', '2004-11-15'] 	 ['1001', 'DATE', '170', '180', '15/11/2004', '2004-11-