### Installs and downloads
- python -m pip install spacy
- python -m spacy download en

In [1]:
import os

import spacy
nlp = spacy.load("en_core_web_sm")

### Paths

In [2]:
ANNOTATIONS_PATH = "../data/annotations/"
STORIES_PATH = "../data/stories/"

### NER

In [3]:
import json

ground_truth = []
results = []

# Extract ground truths
# for annotation in os.listdir(ANNOTATIONS_PATH):
#     print(annotation)

# for filename in os.listdir(STORIES_PATH):
#     filename = os.path.join(STORIES_PATH, filename)
#     if os.path.isfile(filename):
#         with open(filename, encoding='utf8') as f:
#             curr_story = f.read()
#             print(curr_story)

file_name = "002"

gt_file = ANNOTATIONS_PATH + f"{file_name}.json"
if (os.path.isfile(gt_file)):
    with open(gt_file, encoding='utf8') as f:
        annotation = f.read()
        annotation_json = json.loads(annotation)
        ground_truth.append(set(annotation_json['characters']))

filename = STORIES_PATH + f'{file_name}.txt'
if os.path.isfile(filename):
    with open(filename, encoding='utf8') as f:
        story = f.read()
        doc = nlp(story)

        # Extract only PERSON entities
        persons = [ent for ent in doc.ents if ent.label_ in ['PERSON']]

        # To lower case and remove 's
        persons = set([str(person).lower().replace("'s", "") for person in persons])
        results.append(persons)

print(f'Ground truths: {ground_truth}')
print(f'Results: {results}')

Ground truths: [{'kleopatra petrovna', 'ilya sergeitch peplov', 'shchupkin', 'natashenka'}]
Results: [{'nekrassov', 'shchupkin', 'kleopatra petrovna', 'lazhetchnikov', 'kleopatra'}]


### Results

In [4]:
ground_truth_count = len(ground_truth)
results_count = len(results)

for i in range(len(results)):

    TP, FP, TN, FN = 0, 0, 0, 0

    res_intersection = ground_truth[i].intersection(results[i]);
    res_union = ground_truth[i].union(results[i]);

    print(f'Intersection: {res_intersection}')
    print(f'Union: {res_union}')
    

Intersection: {'kleopatra petrovna', 'shchupkin'}
Union: {'ilya sergeitch peplov', 'natashenka', 'nekrassov', 'shchupkin', 'kleopatra petrovna', 'lazhetchnikov', 'kleopatra'}
