## Taggers

In [None]:
import plac
import random
from pathlib import Path
import spacy
from spacy.training import Example

### Pos tagger

In [None]:
TAG_MAP = {
    'N': 'NOUN',
    'V': 'VERB',
    'J':'ADJ',
    'NNS' : 'NOUN'
}

TRAIN_DATA = [
    ("I like green eggs", {'tags': ['N', 'V', 'J', 'NNS']}), 
    ("Eat blue ham", {'tags': ['V', 'J', 'N']}), 
]

In [None]:
def main(lang='en', output_dir=None, n_iter=3):

    nlp = spacy.blank(lang) #Créer un modèle spacy vide pour l'anglais
    tagger = nlp.add_pipe("tagger") #Créer un modèle (tagger) qui attribue des tags aux mots d'un texte 

    for item in TAG_MAP:
        tagger.add_label(item) #Ajouter des tags aux tagger

    optimizer = nlp.begin_training()
    for i in range(n_iter):
        random.shuffle(TRAIN_DATA)
        losses = {}
        for text, annotations in TRAIN_DATA:
            example = Example.from_dict(nlp.make_doc(text), annotations)
            nlp.update([example], sgd=optimizer, losses=losses)
        print(losses)

    test_text = "I love cats"
    doc = nlp(test_text)
    print('Tags', [(t.text, t.tag_, t.pos_) for t in doc])


In [None]:
main()

### Ner Taggin

In [None]:
import plac
import random
from pathlib import Path
import spacy

In [None]:
# training data
TRAIN_DATA = [
('Who is Shaka Khan?', {
    'entities': [(7, 17, 'PERSON')]
}),
('I like London and Berlin.', {
    'entities': [(7, 13, 'LOC'), (18, 24, 'LOC')]
}),
 ('I am learning Python', {
    'entities': [(14, 20, 'TECH')]
})
]

In [None]:
def main(model=None, output_dir=None, n_iter=100):
    
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
        nlp = spacy.blank('en')  # create blank Language class
        print("Created blank 'en' model")
        
        if 'ner' not in nlp.pipe_names:
            ner = nlp.add_pipe('ner', last=True)
    # otherwise, get it so we can add labels
        else:
            ner = nlp.get_pipe('ner')

    # add labels
    for _, annotations in TRAIN_DATA:
        for ent in annotations.get('entities'):
            ner.add_label(ent[2])
    
    # get names of other pipes to disable them during training 
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
    with nlp.disable_pipes(*other_pipes):  # only train NER
        optimizer = nlp.begin_training()
        
        for itn in range(n_iter):
            random.shuffle(TRAIN_DATA)
            losses = {}
            for text, annotations in TRAIN_DATA:
                example = Example.from_dict(nlp.make_doc(text), annotations)
                nlp.update(
                    [example],
                    drop=0.5, # dropout-make it harder to memorise data
                    sgd=optimizer, # callable to update weights
                    losses=losses)
            print(losses)
            
    # test the trained model
    doc = nlp('I love Python')
    print()
    print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
    print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])

In [None]:
main()