In [17]:
from sklearn.metrics import f1_score, precision_score, recall_score
import numpy as np

In [2]:
# O: I have lost my card 5199807081286740 and blocked by c4c4:9bac:38a3:886:f173:826c:d16d:e730 can't submit a replacement request. My name is Maisie Griffiths.
# G: I have lost my card xxxxxxxxxxxxxxxx and blocked by xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx can't submit a replacement request. My name is xxxxxxxxxxxxxxxx.
# P: I have lost my card 51998070xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx826c:d16d:e730 can't submit a replacement request. My name is Maisie xxxxxxxxx.


# settings
origin_text = (
    "I have lost my card 5199807081286740 and blocked "
    "by c4c4:9bac:38a3:886:f173:826c:d16d:e730 can't "
    "submit a replacement request. My name is Maisie Griffiths."
)

true_spans = [
    {"text": "5199807081286740",                       "start": 20,  "end": 36,  "type": "credit_card"},
    {"text": "c4c4:9bac:38a3:886:f173:826c:d16d:e730", "start": 52,  "end":90,   "type": "ip_address"},
    {"text": "Maisie Griffiths",                       "start": 138, "end": 154, "type": "person"}
]

pred_spans = [
    {"text": "81286740 and blocked by c4c4:9bac:38a3:886:f173:", "start": 28,  "end": 76,  "type": "ip_address"},
    {"text": "Griffiths",                                        "start": 145, "end": 154, "type": "person"}
]


# precision
overlap_with_correct_type(a_span, spans) / ( overlap_with_correct_type(a_span, spans) + overlap_with_incorrect_type(a_span, spans) )
= (36 - 28) / ( (36 - 28) + (76 - 52) )
= 8 / ( 8 + 24)
= 0.25

# recall
overlap_with_correct_type(a_span, spans) / ( overlap_with_correct_type(a_span, spans) + non_overlap(a_span) )
= (36 - 28) / ( (36 - 28) + (28 - 20) )
= 8 / ( 8 + 8 )
= 0.5

# Global-based

In [3]:
def build_binary(text, spans):
    """Binary represenatation for text. 
    Spans are labeled with 1 and non-spans are label with 0."""
    array = [0] * len(text)

    for span in spans:
        s = span["start"]
        e = span["end"]
        array[s:e] = [1] * (e - s)

    return array

In [4]:
true_binary = build_binary(origin_text, true_spans)
pred_binary = build_binary(origin_text, pred_spans)

In [5]:
precision = precision_score(true_binary, pred_binary)
recall = recall_score(true_binary, pred_binary)
f1 = f1_score(true_binary, pred_binary)

f"precision: {precision:.4f}, recall: {recall:.4f}, f1: {f1:.4f}"

'precision: 0.7193, recall: 0.5857, f1: 0.6457'

# Entity-based

## Boundary detection

In [6]:
boundary_precisions = []
for pred_span in pred_spans:
    pred_span_binary = build_binary(origin_text, [pred_span])
    precision = precision_score(true_binary, pred_span_binary)
    boundary_precisions.append(precision)


ave_boundary_precision = sum(boundary_precisions) / len(boundary_precisions)

In [7]:
boundary_recalls = []
for true_span in true_spans:
    true_span_binary = build_binary(origin_text, [true_span])
    recall = recall_score(true_span_binary, pred_binary)
    boundary_recalls.append(recall)
    
ave_boundary_recall = sum(boundary_recalls) / len(boundary_recalls)

In [8]:
boundary_f1 = (2 * ave_boundary_precision * ave_boundary_recall) / (ave_boundary_precision + ave_boundary_recall)

f"For boundary detection: ave-precision: {ave_boundary_precision:.4f}, ave-recall: {ave_boundary_recall:.4f}, f1: {boundary_f1:.4f}"

'For boundary detection: ave-precision: 0.8333, ave-recall: 0.5647, f1: 0.6732'

## Type identification

In [9]:
# The formula we had was ill defined on false positive, these are the steps to take
# for fixing it

# span_1 (how we did it before)
# O: I have lost my card 5199807081286740 and blocked by c4c4:9bac:38a3:886:f173:826c:d16d:e730 can't submit a replacement request. My name is Maisie Griffiths.
# G: I have lost my card xxxxxxxxxxxxxxxx and blocked by xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx can't submit a replacement request. My name is xxxxxxxxxxxxxxxx.
# P: I have lost my card 51998070xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx826c:d16d:e730 can't submit a replacement request. My name is Maisie Griffiths.

# what we'd like to do
## when calcualting true positive
# G: I have lost my card xxxxxxxxxxxxxxxx and blocked by xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx can't submit a replacement request. My name is xxxxxxxxxxxxxxxx.
# P: I have lost my card 5199807081286740 and blocked by xxxxxxxxxxxxxxxxxxxxxxxx826c:d16d:e730 can't submit a replacement request. My name is Maisie Griffiths.

## when calculating false positive
# G: I have lost my card 5199807081286740 and blocked by xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx can't submit a replacement request. My name is xxxxxxxxxxxxxxxx.
# P: I have lost my card 51998070xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx826c:d16d:e730 can't submit a replacement request. My name is Maisie Griffiths.


# That is equvilent to converting the ground truth by removing all types execept type of span_1
# G: I have lost my card 5199807081286740 and blocked by xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx can't submit a replacement request. My name is Maisie Griffiths.
# P: I have lost my card 51998070xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx826c:d16d:e730 can't submit a replacement request. My name is Maisie Griffiths.

In [10]:
# Instead of implementing the fix in binary, it would be easy to code it up with multi-class

# O: I have lost my card 5199807081286740 and blocked by c4c4:9bac:38a3:886:f173:826c:d16d:e730 can't submit a replacement request. My name is Maisie Griffiths.
# G: I have lost my card 1111111111111111 and blocked by 22222222222222222222222222222222222222 can't submit a replacement request. My name is 3333333333333333.
# P: I have lost my card 51998070222222222222222222222222222222222222222222222222826c:d16d:e730 can't submit a replacement request. My name is Maisie 333333333.
# wher 1 is type credit_card, 2 is type ip_address and 3 is type person, the rest of chars should be converted to 0 but here for illustration we won't borther
 and blocked by 22222222222222222222222222222222222222
2222222222222222222222222222222222222222

In [11]:
mapping = {
    "non_entity": 0,
    "credit_card": 1,
    "ip_address": 2,
    "person": 3,
    "LOCATION": 6,
    "OTHER": 6
}

mapping_pii = {
    "non_entity": 0,
    "all_entities": 1
}

pii_score 
entity_score 
ratio = entity_score / pii_score

def build_multinomial(text, spans, mapping):
    array = [0] * len(text)
    
    for span in spans:
        s = span["start"]
        e = span["end"]
        class_name = mapping[span["type"]]
        array[s : e] = [class_name] * (e - s)
    
    return array

In [12]:
true_multi = build_multinomial(origin_text, true_spans, mapping)
pred_multi = build_multinomial(origin_text, pred_spans, mapping)

In [13]:
type_precisions = []
for pred_span in pred_spans:
    pred_span_multi = build_multinomial(origin_text, [pred_span], mapping)
    span_class = mapping[pred_span["type"]]
    # index 0 because the list contains only 1 element
    precision = precision_score(true_multi, pred_span_multi, average=None, labels=[span_class])[0]
    type_precisions.append(precision)
    
ave_type_precision = sum(type_precisions) / len(type_precisions)

In [14]:
type_recalls = []
for true_span in true_spans:
    true_span_multi = build_multinomial(origin_text, [true_span], mapping)
    span_class = mapping[true_span["type"]]
    # index 0 because the list contains only 1 element
    recall = recall_score(true_span_multi, pred_multi, average=None, labels=[span_class])[0]
    type_recalls.append(recall)
    
ave_type_recall = sum(type_recalls) / len(type_recalls)

In [16]:
type_f1 = (2 * ave_type_precision * ave_type_recall) / (ave_type_precision + ave_type_recall)

f"For type identification: ave-precision: {ave_type_precision:.4f}, ave-recall: {ave_type_recall:.4f}, f1: {type_f1:.4f}"

'For type identification: ave-precision: 0.7500, ave-recall: 0.3980, f1: 0.5201'

## Performance of boundary detection and type identification for every entity

In [25]:
# precisions
boundary_precisions = np.array(boundary_precisions)
type_precisions = np.array(type_precisions)

entity_precisions = np.multiply(boundary_precisions, type_precisions)

array([0.33333333, 1.        ])

In [33]:
# add precison scores
for i, span in enumerate(pred_spans):
    span.update({"precision": entity_precisions[i]})
    
pred_spans

[{'text': '81286740 and blocked by c4c4:9bac:38a3:886:f173:',
  'start': 28,
  'end': 76,
  'type': 'ip_address',
  'precison': 0,
  'precision': 0.3333333333333333},
 {'text': 'Griffiths',
  'start': 145,
  'end': 154,
  'type': 'person',
  'precision': 1.0}]

In [34]:
# recalls
boundary_recalls = np.array(boundary_recalls)
type_recalls = np.array(type_recalls)

entity_recalls = np.multiply(boundary_recalls, type_recalls)

In [35]:
# add recall scores
for i, span in enumerate(true_spans):
    span.update({"recall": entity_recalls[i]})
    
true_spans

[{'text': '5199807081286740',
  'start': 20,
  'end': 36,
  'type': 'credit_card',
  'recall': 0.0},
 {'text': 'c4c4:9bac:38a3:886:f173:826c:d16d:e730',
  'start': 52,
  'end': 90,
  'type': 'ip_address',
  'recall': 0.39889196675900274},
 {'text': 'Maisie Griffiths',
  'start': 138,
  'end': 154,
  'type': 'person',
  'recall': 0.31640625}]

In [37]:
# f1 on ip_address
f1_ip_address = (2 * 0.3333 * 0.3988) / (0.3333 + 0.3988)
f1_ip_address

0.3631199016527797