# Lesson 4

## Training a neural network model

https://course.spacy.io/chapter4

In [None]:
import json
import random

import spacy
from spacy.lang.en import English
from spacy.matcher import Matcher
from spacy.matcher import PhraseMatcher

from spacy.tokens import Doc, Span, Token

# 
from print_util import print_doc_analysis, print_matcher_results

In [None]:
spacy.prefer_gpu()
nlp = English()
nlp = spacy.load("en_core_web_sm")

### 1 Training and Updating the Models

### 3 Creating Training Data 

#### Part 1

In [None]:
nlp = English()
matcher = Matcher(nlp.vocab)

In [None]:
with open("iphone.json") as f:
    TEXTS = json.loads(f.read())

In [None]:
pattern1 = [{"LOWER": "iphone"}, {"LOWER": "x"}]
pattern2 = [{"LOWER": "iphone"}, {"IS_DIGIT": True, "OP": "?"}]

In [None]:
matcher.add("GADGET", None, pattern1, pattern2)

In [None]:
TRAINING_DATA = []

In [None]:
for doc in nlp.pipe(TEXTS):
    # Match on the doc create list of matched spans
    spans = [doc[start:end] for match_id, start, end in matcher(doc)]
    # Get:  start char, end char, label tuples from each match
    entities = [(span.start_char, span.end_char, "GADGET") for span in spans]
    # Format the matcheas as a (doc.text, entities) tuple
    training_example = (doc.text, {"entities": entities})
    # Append the example to the training data
    TRAINING_DATA.append(training_example)

In [None]:
print (*TRAINING_DATA, sep="\n")

### Make a Training Data File

In [None]:
for rec in TRAINING_DATA:
    print (type(rec), rec[0], rec[1])

In [None]:
json_data = json.dumps(TRAINING_DATA)
print (json_data)

In [None]:
with open("gadgets.json", "w") as f:
    f.write(json_data)

### 6 Setting Up Pipeline

In [None]:
# blank English model
nlp = spacy.blank("en")

In [None]:
# Create a new entity recognizer and add it to the pipeline
ner = nlp.create_pipe("ner")  # new entity recognizer
nlp.add_pipe(ner)

In [None]:
ner.add_label("GADGET")
print (nlp.pipe_names)

### 7 Build the Training Loop

In [None]:
with open("gadgets.json") as f:
    TRAINING_DATA = json.loads(f.read())
    
    
print (TRAINING_DATA)

In [None]:
nlp = spacy.blank("en")       # blank english model
ner = nlp.create_pipe("ner")  # create a new entity recognizer
nlp.add_pipe(ner)             # add it to the pipe
ner.add_label("GADGET")       # give it a label - all recognized entities as a GADGET

print (nlp.pipe_names)

In [None]:
nlp.begin_training()

In [None]:
# Loop for 10 iterations
for itn in range(10):
    # Shuffle the training data
    random.shuffle(TRAINING_DATA)
    losses = {}

    # Batch the examples and iterate over them
    for batch in spacy.util.minibatch(TRAINING_DATA, size=2):
        texts = [text for text, entities in batch]
        annotations = [entities for text, entities in batch]

        # Update the model
        nlp.update(texts, annotations, losses=losses)
        print(losses)
        
doc = nlp("Apple is slowing down the iPhone 8 and iPhone X - how to stop it")
print (doc.ents)

### Testing the updated model
- nlp is the model


In [None]:
doc = nlp("Apple is slowing down the iPhone 8 and iPhone X - how to stop it")
print (doc.ents)


In [None]:
doc = nlp("I finally understand what the iPhone X ‘notch’ is for")
print (doc.ents)

In [None]:
doc = nlp("Everything you need to know about the Samsung Galaxy S9")
print (doc.ents)

### 10 Data

In [None]:
TRAINING_DATA = [
    (
        "i went to amsterdem last year and the canals were beautiful",
        {"entities": [(10, 19, "GPE")]},
    ),
    (
        "You should visit Paris once in your life, but the Eiffel Tower is kinda boring",
        {"entities": [(17, 22, "GPE")]},
    ),
    ("There's also a Paris in Arkansas, lol", {"entities": [(15, 20, "GPE"), (24,32,"GPE")]}),
    (
        "Berlin is perfect for summer holiday: lots of parks, great nightlife, cheap beer!",
        {"entities": [(0, 6, "GPE")]},
    ),
]