In [1]:
import spacy
from spacy.matcher import Matcher
#!python3 -m spacy download en_core_web_sm

import numpy as np
import re

import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

nlp = spacy.load("en_core_web_md")

## Model performance on your data

### In this exercise, you will practice evaluating an existing model on your data. In this case, the aim is to examine model performance on a specific entity label, PRODUCT. If a model can accurately classify a large percentage of PRODUCT entities (e.g. more than 75%), you do not need to train the model on examples of PRODUCT entities, otherwise, you should consider training the model to improve its performance on PRODUCT entity prediction.

### You'll use two reviews from the Amazon Fine Food Reviews dataset for this exercise. You can access these reviews by using the texts list.

### The en_core_web_sm model is already loaded for you. You can access it by calling nlp(). The model is already ran on the texts list and documents, a list of Doc containers is available for your use.

### Instructions
-    Compile a target_entities list, of all the entities for each of the documents, and append a tuple of (entities text, entities label) only if Jumbo is in the entity text.
-    For any tuple in the target_entities, append True to a correct_labels list if the entity label (second attribute in the tuple) is PRODUCT, otherwise append False.

In [3]:
nlp = spacy.load("en_core_web_sm")

texts = ['Product arrived labeled as Jumbo Salted Peanuts.',
 'Not sure if the product was labeled as Jumbo.']

documents = [nlp(text) for text in texts]

In [4]:
# Append a tuple of (entities text, entities label) if Jumbo is in the entity
target_entities = []
for doc in documents:
  target_entities.extend([(ent.text, ent.label_) for ent in doc.ents if "Jumbo" in ent.text])
print(target_entities)

# Append True to the correct_labels list if the entity label is `PRODUCT`
correct_labels = []
for ent in target_entities:
  if ent[1] == "PRODUCT":
    correct_labels.append(True)
  else:
    correct_labels.append(False)
print(correct_labels)

[('Jumbo Salted Peanuts', 'PERSON'), ('Jumbo', 'PERSON')]
[False, False]


## Annotation and preparing training data

### After collecting data, you can annotate data in the required format for a spaCy model. In this exercise, you will practice forming the correct annotated data record for an NER task in the medical domain.

### A sentence and two entities of entity_1 with a text of chest pain and a SYMPTOM type and entity_2 with a text of hyperthyroidism and a DISEASE type are available for you to use.

### Instructions
-    Complete the annotated_data record in the correct format.
-    Extract start and end characters of each entity and store as the corresponding variables.
-    Store the same input sentence and its entities in the proper training format as training_data.

In [5]:
text = "A patient with chest pain had hyperthyroidism."
entity_1 = "chest pain"
entity_2 = "hyperthyroidism"

# Store annotated data information in the correct format
annotated_data = {"sentence": text, "entities": [{"label": "SYMPTOM", "value": entity_1}, {"label": "DISEASE", "value": entity_2}]}

# Extract start and end characters of each entity
entity_1_start_char = text.index(entity_1)
entity_1_end_char = entity_1_start_char + len(entity_1)
entity_2_start_char = text.index(entity_2)
entity_2_end_char = entity_2_start_char + len(entity_2)

# Store the same input information in the proper format for training
training_data = [(text, {"entities": [(entity_1_start_char,entity_1_end_char,"SYMPTOM"), 
                                      (entity_2_start_char,entity_2_end_char,"DISEASE")]})]
print(training_data)

[('A patient with chest pain had hyperthyroidism.', {'entities': [(15, 25, 'SYMPTOM'), (30, 45, 'DISEASE')]})]


## Compatible training data

### Recall that you cannot feed the raw text directly to spaCy. Instead, you need to create an Example object for each training example. In this exercise, you will practice converting a training_data with a single annotated sentence into a list of Example objects.

### en_core_web_sm model is already imported and ready for use as nlp. The Example class is also imported for your use.

### Instructions
-    Iterate through the text and annotations in the training_data, convert the text to a Doc container and store it at doc.
-    Create an Example object using the doc object and the annotations of each training data point, and store it at example_sentence.
-    Append example_sentence to a list of all_examples.

In [6]:
from spacy.training import Example

In [7]:
example_text = 'A patient with chest pain had hyperthyroidism.'
training_data = [(example_text, {'entities': [(15, 25, 'SYMPTOM'), (30, 45, 'DISEASE')]})]

all_examples = []
# Iterate through text and annotations and convert text to a Doc container
for text, annotations in training_data:
  doc = nlp(text)
  
  # Create an Example object from the doc contianer and annotations
  example_sentence = Example.from_dict(doc, annotations)
  print(example_sentence.to_dict(), "\n")
  
  # Append the Example object to the list of all examples
  all_examples.append(example_sentence)
  
print("Number of formatted training data: ", len(all_examples))

{'doc_annotation': {'cats': {}, 'entities': ['O', 'O', 'O', 'B-SYMPTOM', 'L-SYMPTOM', 'O', 'U-DISEASE', 'O'], 'spans': {}, 'links': {}}, 'token_annotation': {'ORTH': ['A', 'patient', 'with', 'chest', 'pain', 'had', 'hyperthyroidism', '.'], 'SPACY': [True, True, True, True, True, True, False, False], 'TAG': ['', '', '', '', '', '', '', ''], 'LEMMA': ['', '', '', '', '', '', '', ''], 'POS': ['', '', '', '', '', '', '', ''], 'MORPH': ['', '', '', '', '', '', '', ''], 'HEAD': [0, 1, 2, 3, 4, 5, 6, 7], 'DEP': ['', '', '', '', '', '', '', ''], 'SENT_START': [1, 0, 0, 0, 0, 0, 0, 0]}} 

Number of formatted training data:  1


## Training preparation steps

### Before and during training of a spaCy model, you'll need to (1) disable other pipeline components in order to only train the intended component and (2) convert a Doc container of a training data point and its corresponding annotations into an Example class.

### In this exercise, you will practice these two steps by using a pre-loaded en_core_web_sm model, which is accessible as nlp. Example class is already imported and a text string and related annotations are also available for your use.

### Instructions
-    Disable all pipeline components of the nlp model except ner.
-    Convert a text string and its annotations to the correct format usable for training.

In [8]:
text = 'I will visit you in Austin.'
annotations = {'entities': [(20, 26, 'GPE')]}

In [9]:
nlp = spacy.load("en_core_web_sm")

# Disable all pipeline components of  except `ner`
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
nlp.disable_pipes(*other_pipes)

# Convert a text and its annotations to the correct format usable for training
doc = nlp.make_doc(text)
example = Example.from_dict(doc, annotations)
print("Example object for training: \n", example.to_dict())

Example object for training: 
 {'doc_annotation': {'cats': {}, 'entities': ['O', 'O', 'O', 'O', 'O', 'U-GPE', 'O'], 'spans': {}, 'links': {}}, 'token_annotation': {'ORTH': ['I', 'will', 'visit', 'you', 'in', 'Austin', '.'], 'SPACY': [True, True, True, True, True, False, False], 'TAG': ['', '', '', '', '', '', ''], 'LEMMA': ['', '', '', '', '', '', ''], 'POS': ['', '', '', '', '', '', ''], 'MORPH': ['', '', '', '', '', '', ''], 'HEAD': [0, 1, 2, 3, 4, 5, 6], 'DEP': ['', '', '', '', '', '', ''], 'SENT_START': [1, 0, 0, 0, 0, 0, 0]}}


## Train an existing NER model

### A spaCy model may not work well on a given data. One solution is to train the model on our data. In this exercise, you will practice training a NER model in order to improve its prediction performance.

### A spaCy en_core_web_sm model that is accessible as nlp, which is not able to correctly predict house as an entity in a test string.

### Given a training_data, write the steps to update this model while iterating through the data two times. The other pipelines are already disabled and optimizer is also ready to be used. Number of epochs is already set to 2.

### Instructions
-    Use the optimizer object and for each epoch, shuffle the dataset using random package and create an Example object.
-    Update the nlp model using .update attribute and set the sgd arguments to use the optimizer.

In [15]:
import random

training_data = [('I will visit you in Austin.', {'entities': [(20, 26, 'GPE')]}),
 ("I'm going to Sam's house.",
  {'entities': [(13, 16, 'PERSON'), (19, 24, 'GPE')]}),
 ('I will go.', {'entities': []})]

test = "I'm going to Sam's house."

epochs = 2

In [16]:
nlp = spacy.load("en_core_web_sm")
print("Before training: ", [(ent.text, ent.label_) for ent in nlp(test).ents])
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
nlp.disable_pipes(*other_pipes)
optimizer = nlp.create_optimizer()

# Shuffle training data and the dataset using random package per epoch
for i in range(epochs):
  random.shuffle(training_data)
  for text, annotations in training_data:
    doc = nlp.make_doc(text)
    # Update nlp model after setting sgd argument to optimizer
    example = Example.from_dict(doc, annotations)
    nlp.update([example], sgd = optimizer)
print("After training: ", [(ent.text, ent.label_) for ent in nlp(test).ents])

Before training:  [('Sam', 'PERSON')]
After training:  [('Sam', 'PERSON'), ('house', 'GPE')]


## Training a spaCy model from scratch

### spaCy provides a very clean and efficient approach to train your own models. In this exercise, you will train a NER model from scratch on a real-world corpus (CORD-19 data).

### Training data is available in the right format as training_data. In this exercise, you will use a given list of labels ("Pathogen", "MedicalCondition", "Medicine") stored in labels using a blank English model (nlp) with an NER component. Intended medical labels will be added the NER pipeline and then you can train the model for one epoch. You can use pre-imported Example class to convert the training data to the required format. To track model training you can add a losses list to the .update() method and review training loss.

### Instructions
-    Create a blank spaCy model and add an NER component to the model.
-    Disable other pipeline components, use the created optimizer object and update the model weights using converted data to the Example format.

In [20]:
training_data = [('Diarrhea, also spelled diarrhoea, is the condition of having at least three loose, liquid, or watery bowel movements each day.[2] It often lasts for a few days and can result in dehydration due to fluid loss.[2] Signs of dehydration often begin with loss of the normal stretchiness of the skin and irritable behaviour.[2] This can progress to decreased urination, loss of skin color, a fast heart rate, and a decrease in responsiveness as it becomes more severe.[2] Loose but non-watery stools in babies who are exclusively breastfed, however, are normal.[2]',
  {'entities': [(364, 382, 'MedicalCondition'), (0, 8, 'MedicalCondition'), (94, 116, 'MedicalCondition'), (178, 189, 'MedicalCondition'),
    (221, 232, 'MedicalCondition'), (23, 32, 'MedicalCondition'), (409, 435, 'MedicalCondition'), (386, 401, 'MedicalCondition')]}),
 ('Antiretroviral therapy (ART) is recommended for all HIV-infected individuals to reduce the risk of disease progression.\nART also is recommended for HIV-infected individuals for the prevention of transmission of HIV.\nPatients starting ART should be willing and able to commit to treatment and understand the benefits and risks of therapy and the importance of adherence. Patients may choose to postpone therapy, and providers, on a case-by-case basis, may elect to defer therapy on the basis of clinical and/or psychosocial factors.',
  {'entities': [(0, 22, 'Medicine'), (24, 27, 'Medicine'), (120, 123, 'Medicine'), (211, 214, 'Pathogen'), (52, 55, 'Pathogen'),
    (234, 237, 'Medicine'), (148, 151, 'Pathogen')]}),
 ("The goals of treatment are to reduce pain, decrease inflammation, and improve a person's overall functioning.[5] This may be helped by balancing rest and exercise, the use of splints and braces, or the use of assistive devices.[1][6][7] Pain medications, steroids, and NSAIDs are frequently used to help with symptoms.[1] Disease-modifying antirheumatic drugs (DMARDs), such as hydroxychloroquine and methotrexate, may be used to try to slow the progression of disease.[1] Biological DMARDs may be used when disease does not respond to other treatments.[8] However, they may have a greater rate of adverse effects.[9] Surgery to repair, replace, or fuse joints may help in certain situations.[1] Most alternative medicine treatments are not supported by evidence.[10][11]",
  {'entities': [(401, 413, 'Medicine'), (378, 396, 'Medicine'), (473, 490, 'Medicine'), (255, 263, 'Medicine')]}),
 ("Hantaviruses, usually found in rodents and shrews, were discovered in two species of bats. The Mouyassué virus (MOUV) was isolated from banana pipistrelle bats captured near Mouyassué village in Cote d'Ivoire, West Africa. The Magboi virus was isolated from hairy slit-faced bats found near the Magboi River in Sierra Leone in 2011. They are single-stranded, negative sense, RNA viruses in the Bunyaviridae family.[29][30][31][32]",
  {'entities': [(0, 12, 'Pathogen'), (394, 406, 'Pathogen'), (227, 239, 'Pathogen'), (95, 110, 'Pathogen')]}),
 ('Bats are the most common source of rabies in humans in North and South America, Western Europe, and Australia. In the United States, there were 19 cases of human rabies from 1997–2006, 17 of which were attributed to bats.[27] In North America, about half of human rabies instances are cryptic, meaning that the patient has no known bite history.[24] While it has been speculated that rabies virus could be transmitted through aerosols, studies of the rabies virus have concluded that this is only feasible in limited conditions. These conditions include a very large colony of bats in a hot and humid cave with poor ventilation. While two human deaths in 1956 and 1959 had been tentatively attributed to aerosolization of the rabies virus after entering a cave with bats, "investigations of the 2 reported human cases revealed that both infections could be explained by means other than aerosol transmission".[28] It is instead generally thought that most instances of cryptic rabies are the result of an unknown bat bite.[24] Bites from a bat can be so small that they are not visible without magnification equipment, for example. Outside of bites, rabies virus exposure can also occur if infected fluids come in contact with a mucous membrane or a break in the skin. Rabies virus has also been transmitted when an infected human unknowingly dies of rabies, and their organs are transplanted to others.[28]',
  {'entities': [(35, 41, 'MedicalCondition'),
    (162, 168, 'MedicalCondition'), (384, 396, 'Pathogen'), (1269, 1281, 'Pathogen'), (1343, 1347, 'MedicalCondition'),
    (977, 983, 'MedicalCondition'), (1027, 1032, 'MedicalCondition')]}),
 ('Other groups of intracellular bacterial pathogens include Salmonella, Neisseria, Brucella, Mycobacterium, Nocardia, Listeria, Francisella, Legionella, and Yersinia pestis. These can exist intracellularly, but can exist outside of host cells.',
  {'entities': [(116, 124, 'Pathogen'), (155, 170, 'Pathogen'), (126, 137, 'Pathogen'), (70, 79, 'Pathogen'),
    (139, 149, 'Pathogen'), (106, 114, 'Pathogen'), (91, 104, 'Pathogen'), (81, 89, 'Pathogen'), (58, 68, 'Pathogen')]}),
 ('One of the bacterial diseases with the highest disease burden is tuberculosis, caused by Mycobacterium tuberculosis bacteria, which kills about 2 million people a year, mostly in sub-Saharan Africa. Pathogenic bacteria contribute to other globally important diseases, such as pneumonia, which can be caused by bacteria such as Streptococcus and Pseudomonas, and foodborne illnesses, which can be caused by bacteria such as Shigella, Campylobacter, and Salmonella. Pathogenic bacteria also cause infections such as tetanus, typhoid fever, diphtheria, syphilis, and leprosy. Pathogenic bacteria are also the cause of high infant mortality rates in developing countries.[3]',
  {'entities': [(327, 340, 'Pathogen'), (514, 521, 'MedicalCondition'), (452, 462, 'Pathogen'), (276, 285, 'MedicalCondition'),
    (523, 536, 'MedicalCondition'), (564, 571, 'MedicalCondition'), (433, 446, 'Pathogen'), (538, 548, 'MedicalCondition'),
    (345, 356, 'Pathogen'), (65, 77, 'MedicalCondition'), (550, 558, 'MedicalCondition'), (89, 115, 'Pathogen'), (423, 431, 'Pathogen')]}),
 ("Although the vast majority of bacteria are harmless or beneficial to one's body, a few pathogenic bacteria can cause infectious diseases. The most common bacterial disease is tuberculosis, caused by the bacterium Mycobacterium tuberculosis, which affects about 2 million people mostly in sub-Saharan Africa. Pathogenic bacteria contribute to other globally important diseases, such as pneumonia, which can be caused by bacteria such as Streptococcus and Pseudomonas, and foodborne illnesses, which can be caused by bacteria such as Shigella, Campylobacter, and Salmonella. Pathogenic bacteria also cause infections such as tetanus, typhoid fever, diphtheria, syphilis, and Hansen's disease. They typically range between 1 and 5 micrometers in length.",
  {'entities': [(659, 667, 'MedicalCondition'), (436, 449, 'Pathogen'), (673, 689, 'MedicalCondition'), (30, 38, 'Pathogen'),
    (454, 465, 'Pathogen'), (647, 657, 'MedicalCondition'), (87, 106, 'Pathogen'), (532, 540, 'Pathogen'), (561, 571, 'Pathogen'),
    (623, 630, 'MedicalCondition'), (471, 490, 'MedicalCondition'), (632, 645, 'MedicalCondition'), (542, 555, 'Pathogen')]}),
 ('Much like viral pathogens, infection by certain bacterial pathogens can be prevented via vaccines.[30] Vaccines against bacterial pathogens include the anthrax vaccine and the pneumococcal vaccine. Many other bacterial pathogens lack vaccines as a preventive measure, but infection by these bacteria can often be treated or prevented with antibiotics. Common antibiotics include amoxicillin, ciprofloxacin, and doxycycline. Each antibiotic has different bacteria that it is effective against and has different mechanisms to kill that bacteria. For example, doxycycline inhibits the synthesis of new proteins in both gram-negative and gram-positive bacteria which leads to the death of the affected bacteria.[35]',
  {'entities': [(379, 390, 'Medicine'), (152, 167, 'Medicine'), (411, 422, 'Medicine'), (392, 405, 'Medicine'), (176, 196, 'Medicine')]}),
 ('The term pathogen came into use in the 1880s.[1][2] Typically, the term is used to describe an infectious microorganism or agent, such as a virus, bacterium, protozoan, prion, viroid, or fungus.[',
  {'entities': [(158, 167, 'Pathogen'), (95, 119, 'Pathogen'), (187, 193, 'Pathogen'), (147, 156, 'Pathogen'), (140, 145, 'Pathogen')]}),
 ("Some antidepressants are used as a treatment for social anxiety disorder, but their efficacy is not entirely convincing, as only a small proportion of antidepressants showed some efficacy for this condition. Paroxetine was the first drug to be FDA-approved for this disorder. Its efficacy is considered beneficial, although not everyone responds favorably to the drug. Sertraline and fluvoxamine extended release were later approved for it as well, while escitalopram is used off-label with acceptable efficacy. However, there isn't enough evidence to support citalopram for treating social phobia, and fluoxetine was no better than placebo in clinical trials. SSRIs are used as a first-line treatment for social anxiety, but they don't work for everyone. One alternative would be venlafaxine, which is a SNRI. It showed benefits for social phobia in five clinical trials against placebo, while the other SNRIs are not considered particularly useful for this disorder as many of them didn't undergo testing for it. As of now, it is unclear if duloxetine and desvenlafaxine can provide benefits for social anxiety sufferers. However, another class of antidepressants called MAOIs are considered effective for social anxiety, but they come with many unwanted side effects and are rarely used. Phenelzine was shown to be a good treatment option, but its use is limited by dietary restrictions. Moclobemide is a RIMA and showed mixed results but still got approval in some European countries for social anxiety disorder. TCA antidepressants, such as clomipramine and imipramine, are not considered effective for this anxiety disorder in particular. This leaves out SSRIs such as paroxetine, sertraline and fluvoxamine CR as acceptable and tolerated treatment options for this disorder.[19][20]",
  {'entities': [(384, 395, 'Medicine'), (1098, 1112, 'MedicalCondition'), (1687, 1697, 'Medicine'), (49, 72, 'MedicalCondition'),
    (1173, 1178, 'Medicine'), (1702, 1713, 'Medicine'), (781, 792, 'Medicine'), (1563, 1573, 'Medicine'), (603, 613, 'Medicine'),
    (1675, 1685, 'MedicalCondition'), (1613, 1629, 'MedicalCondition'), (369, 379, 'Medicine'), (1291, 1301, 'Medicine'), (1546, 1558, 'Medicine'),
    (455, 467, 'Medicine'), (1391, 1402, 'Medicine'), (584, 597, 'MedicalCondition')]}),
 ("However, existing data suggest that patients taking bedaquiline in addition to standard TB therapy are five times more likely to die than those without the new drug,[184] which has resulted in medical journal articles raising health policy questions about why the FDA approved the drug and whether financial ties to the company making bedaquiline influenced physicians' support for its use.[183][185]",
  {'entities': [(88, 98, 'Medicine'), (335, 346, 'Medicine'), (52, 63, 'Medicine')]}),
 ('Tuberculosis may infect any part of the body, but most commonly occurs in the lungs (known as pulmonary tuberculosis).[9] Extrapulmonary TB occurs when tuberculosis develops outside of the lungs, although extrapulmonary TB may coexist with pulmonary TB.[9]\n\nGeneral signs and symptoms include fever, chills, night sweats, loss of appetite, weight loss, and fatigue.[9] Significant nail clubbing may also occur.[16]',
  {'entities': []}),
 ('A number of factors make people more susceptible to TB infections. The most important risk factor globally is HIV; 13% of all people with TB are infected by the virus.[39] This is a particular problem in sub-Saharan Africa, where rates of HIV are high.[40][41] Of people without HIV who are infected with tuberculosis, about 5–10% develop active disease during their lifetimes;[16] in contrast, 30% of those coinfected with HIV develop the active disease.[16]',
  {'entities': [(279, 282, 'Pathogen')]}),
 ('Examples of common human diseases caused by viruses include the common cold, influenza, chickenpox, and cold sores. Many serious diseases such as rabies, Ebola virus disease, AIDS (HIV), avian influenza, and SARS are caused by viruses. The relative ability of viruses to cause disease is described in terms of virulence. Other diseases are under investigation to discover if they have a virus as the causative agent, such as the possible connection between human herpesvirus 6 (HHV6) and neurological diseases such as multiple sclerosis and chronic fatigue syndrome.[151] There is controversy over whether the bornavirus, previously thought to cause neurological diseases in horses, could be responsible for psychiatric illnesses in humans.[152]',
  {'entities': [(518, 536, 'MedicalCondition'), (154, 165, 'Pathogen'), (708, 729, 'MedicalCondition'), (463, 476, 'Pathogen'),
    (77, 86, 'MedicalCondition'), (88, 98, 'MedicalCondition'), (187, 202, 'MedicalCondition'), (610, 620, 'Pathogen')]}),
 ('Buprenorphine has been shown experimentally (1982–1995) to be effective against severe, refractory depression',
  {'entities': [(0, 13, 'Medicine'), (88, 109, 'MedicalCondition')]}),
 ('Bupropion (Wellbutrin), an anti-depressant, is also used as a smoking cessation aid; this indication was later approved, and the name of the smoking cessation product is Zyban. In Ontario, Canada, smoking cessation drugs are not covered by provincial drug plans; elsewhere, Zyban is priced higher than Wellbutrin, despite being the same drug. Therefore, some physicians prescribe Wellbutrin for both indications.[',
  {'entities': [(274, 279, 'Medicine'), (11, 21, 'Medicine'), (302, 312, 'Medicine'), (380, 390, 'Medicine'), (170, 175, 'Medicine'), (0, 9, 'Medicine')]}),
 ('Carbamazepine is an approved treatment for bipolar disorder and epileptic seizures, but it has side effects useful in treating attention-deficit hyperactivity disorder (ADHD), schizophrenia, phantom limb syndrome, paroxysmal extreme pain disorder, neuromyotonia, and post-traumatic stress disorder.[8]',
  {'entities': [(267, 288, 'MedicalCondition'), (248, 261, 'MedicalCondition'), (0, 13, 'Medicine'), (43, 59, 'MedicalCondition'),
    (145, 167, 'MedicalCondition'), (176, 189, 'MedicalCondition'), (64, 82, 'MedicalCondition'), (191, 212, 'MedicalCondition')]}),
 ('The antiviral drugs amantadine and rimantadine inhibit a viral ion channel (M2 protein), thus inhibiting replication of the influenza A virus.[86] These drugs are sometimes effective against influenza A if given early in the infection but are ineffective against influenza B viruses, which lack the M2 drug target.[160] Measured resistance to amantadine and rimantadine in American isolates of H3N2 has increased to 91% in 2005.[161] This high level of resistance may be due to the easy availability of amantadines as part of over-the-counter cold remedies in countries such as China and Russia,[162] and their use to prevent outbreaks of influenza in farmed poultry.[163][164] The CDC recommended against using M2 inhibitors during the 2005–06 influenza season due to high levels of drug resistance.[165]',
  {'entities': [(639, 648, 'MedicalCondition'), (35, 46, 'Medicine'), (712, 725, 'Medicine'), (20, 30, 'Medicine')]}),
 ('The two classes of antiviral drugs used against influenza are neuraminidase inhibitors (oseltamivir, zanamivir, laninamivir and peramivir) and M2 protein inhibitors (adamantane derivatives)',
  {'entities': [(128, 137, 'Medicine'), (101, 110, 'Medicine'), (112, 123, 'Medicine'), (48, 57, 'MedicalCondition'), (88, 99, 'Medicine')]})]

text = 'The two classes of antiviral drugs used against influenza are neuraminidase inhibitors (oseltamivir, zanamivir, laninamivir and peramivir) and M2 protein inhibitors (adamantane derivatives)'
labels = ['Pathogen', 'MedicalCondition', 'Medicine']

In [21]:
# Load a blank English model, add NER component, add given labels to the ner pipeline
nlp = spacy.blank("en")
ner = nlp.add_pipe("ner")
for ent in labels:
    ner.add_label(ent)

# Disable other pipeline components, complete training loop and run training loop
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
nlp.disable_pipes(*other_pipes)
losses = {}
optimizer = nlp.begin_training()
for text, annotation in training_data:
    doc = nlp.make_doc(text)
    example = Example.from_dict(doc, annotation)
    nlp.update([example], sgd=optimizer, losses=losses)
    print(losses)

{'ner': 99.35713374614716}
{'ner': 190.30853682756424}
{'ner': 311.3611886501312}
{'ner': 377.91553181409836}
{'ner': 580.8560008406639}
{'ner': 610.654515504837}
{'ner': 673.2626938521862}
{'ner': 725.7986409887671}
{'ner': 745.9462028536946}
{'ner': 755.2395603848854}
{'ner': 788.5934267874109}
{'ner': 794.5253374492286}
{'ner': 794.7103092671293}
{'ner': 796.7045415722134}
{'ner': 812.6534608074492}
{'ner': 816.6401432161048}
{'ner': 828.4768828170328}
{'ner': 844.368554690433}
{'ner': 852.3417709166351}
{'ner': 862.2381579514815}
