In [None]:
# Training of Entity Recognizer

import warnings
import random
import spacy
from spacy.pipeline import EntityRecognizer
from spacy.training import Example
import xml.etree.ElementTree as ElementTree

warnings.filterwarnings('ignore')

nlp = spacy.load('en_core_web_sm')

nlp.remove_pipe('ner')      # remove standard NER ...
ner = nlp.add_pipe("ner")   # ... and create new NER 

pipe_exceptions = ['ner']   # Disable all pipeline components except NER before training the NER
not_required_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
nlp.disable_pipes(*not_required_pipes)

nlp.pipe_names              # Should now output only ['ner']

# Load XML-file with training data

entity_linker_export = ElementTree.parse("D:/Git_Fachpraktikum/MedExtractor/resources/all_entity_linker_export.xml")
root = entity_linker_export.getroot()

# Prepare set of training samples

TRAIN_DATA = []

for sample in [item for item in root.iter("sample")]:
    
    links_node = sample.find("links")
    
    symptoms = []
    for pos in links_node.iter("position"):
        
        position = eval(pos.text)   # converts string like  "(34,41)" into tuple format
        
        symptoms.append((position[0],position[1],'SYMPTOM'))
        
    data = (sample.text,symptoms)
    TRAIN_DATA.append(data)

# Convert training samples into spacy.example objects    

nlp.disable_pipes('ner')
examples = []
for i in range(25):
    random.shuffle(TRAIN_DATA)
    for text, annotations in TRAIN_DATA:
        example = Example.from_dict(nlp(text), {"entities": annotations})
        examples.append(example)
nlp.enable_pipe('ner')

def give_examples():
    return examples

# Initialize and train the Entity Recognizer

ner.initialize(give_examples)
optimizer = nlp.create_optimizer()

for example in give_examples():
    nlp.update([example], sgd=optimizer)
    
nlp.enable_pipe('tok2vec')
nlp.enable_pipe('tagger')
nlp.enable_pipe('parser')
nlp.enable_pipe('attribute_ruler')
nlp.enable_pipe('lemmatizer')

ner.to_disk("D:/Git_Fachpraktikum/MedExtractor/resources/ner")

In [None]:
# Training of Entity Ruler

import spacy
from spacy.pipeline import EntityRuler
from spacy.training import Example
import random
import xml.etree.ElementTree as ElementTree

nlp = spacy.load('en_core_web_sm')

pipe_exceptions = ['tok2vec','tagger','parser','attribute_ruler','lemmatizer']
not_required_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
nlp.disable_pipes(*not_required_pipes)
ruler = nlp.add_pipe("entity_ruler")

# Load XML-file with training data

entity_linker_export = ElementTree.parse("D:/Git_Fachpraktikum/MedExtractor/resources/all_entity_linker_export.xml")
root = entity_linker_export.getroot()

# Prepare set of training samples

TRAIN_DATA = []

for alias in [item for item in root.iter("alias")]:
    TRAIN_DATA.append({"label": "SYMPTOM", "pattern": alias.text})

ruler.add_patterns(TRAIN_DATA)

In [None]:
# Entity Recognizer Test - No need to run 'Training of Entity Recognizer' script. Model is loaded from disk.
import re
import spacy
from spacy import displacy
from spacy.pipeline import EntityRecognizer

nlp = spacy.load('en_core_web_sm')

print(nlp.pipe_names)
if nlp.has_pipe('entity_ruler'):
    nlp.disable_pipes(['entity_ruler'])  # ensure that not the entity ruler is used for finding named entities
nlp.remove_pipe('ner')                   # remove standard NER ...
ner = nlp.add_pipe('ner')                # ... and create new NER 
nlp.enable_pipe('ner')
ner.from_disk("D:/Git_Fachpraktikum/MedExtractor/resources/ner")  # load statistical model
text_file = open('D:/Git_Fachpraktikum/MedExtractor/resources/to_analyze_wikipedia/wikipedia_agoraphobia.txt','r',encoding="utf8")
text = text_file.read()
p = re.compile('\[\d*?\]')  # Remove literatur references such as '[23]'
doc = nlp(p.sub('',text))
text_file.close()
displacy.render(doc,style="ent",jupyter=True)

In [None]:
# Entity Ruler Test - Please run 'Training of Entity Ruler' script first
import re
import spacy
from spacy import displacy
from spacy.pipeline import EntityRuler

nlp.disable_pipes(['ner'])  # ensure that not the entity recognizer is used for finding named entities
nlp.enable_pipe('entity_ruler')
print(nlp.pipe_names)
text_file = open('D:/Git_Fachpraktikum/MedExtractor/resources/to_analyze_wikipedia/wikipedia_agoraphobia.txt','r',encoding="utf8")
text = text_file.read()
p = re.compile('\[\d*?\]')  # Remove literatur references such as '[23]'
doc = nlp(p.sub('',text))
displacy.render(doc,style="ent",jupyter=True)