# The purpose of this notebook is to demonstrate the value of visualization methods that exist in spacy for understanding data and debugging.

In [1]:
import spacy
from spacy import displacy

In [2]:
nlp = spacy.load("en_core_web_sm")

In [3]:
for pipe in nlp.pipeline:
    print(pipe)

('tagger', <spacy.pipeline.pipes.Tagger object at 0x7f186423bf28>)
('parser', <spacy.pipeline.pipes.DependencyParser object at 0x7f1841093648>)
('ner', <spacy.pipeline.pipes.EntityRecognizer object at 0x7f18410936a8>)


In [4]:
entity_doc = nlp('The days of sub-thousand dollar MacBooks are over. The cost of entry to be a MacBook owner is now $1,099.')

# There are some simple ways that we can inspect the results of spacy processing.  This includes looking at Named Entity Recognition results

In [5]:
print('Text Start End Label')
for ent in entity_doc.ents:
    print('{0} {1} {2} {3}'.format(ent.text, ent.start_char, ent.end_char, ent.label_))

Text Start End Label
The days 0 8 DATE
MacBooks 32 40 ORG
MacBook 77 84 ORG
1,099 99 104 MONEY


# While this is useful, it's not the easiest way to visualize data.  It would be better if we could see these entities and values "in line" in the text as an overlay over it.  
## spacy developers have created a package called 'displaCy' to display some spacy data.  
## Let's look at named entities again and see the visualization

In [6]:
displacy.render(entity_doc, style='ent', jupyter=True)

# Now try this on your own.  Enter some sentences down below and see which entities the default spacy model identifies.  You will also need to update the render() call to your document to test this...

In [7]:
my_text = 'CHANGE ME!!!!'

my_entity_doc = nlp(my_text)

# now visualize your text...
displacy.render(__CHANGE_ME__, style='ent', jupyter=True)

NameError: name '__CHANGE_ME__' is not defined

# Spacy also provides models allow generating a tree which represents the syntax of the sentence so that you can see phrases, which word is the "root" of a phrase, etc.  Let's first look at this in a simple way by printing out the syntax information

In [None]:
syntax_text = 'The tired patient is currently resting quietly in their bed.'

syntax_doc = nlp(syntax_text)

In [None]:
# first let's get all of our noun "chunks" in the text

print('Text Root Relation Head')
for noun_chunk in syntax_doc.noun_chunks:
    print('{0} {1} {2} {3}'.format(noun_chunk.text, noun_chunk.root.text, noun_chunk.root.dep_, noun_chunk.root.head.text))

# This is difficult to see, so let's try again using the displacy method of inspecting syntax trees

In [None]:
displacy.render(syntax_doc, style='dep', jupyter=True)

# There is also a smaller, more compact version of visualizing parse trees in spacy.  This is achieved by setting the argument 'compact' to True in options we pass in

In [None]:
options = {"compact": CHANGE_ME, 
           "bg": "#09a3d5",
           "color": "white", 
           "font": "Source Sans Pro"}

displacy.render(syntax_doc, style='dep', jupyter=True, options = options)

# Try this out on your own now.  Create your own sentences and see what kinds of trees are generated from the text you process

In [None]:
#my_syntax_text = 'CHANGE ME!'
my_syntax_text = 'The quick brown fox jumped over the lazy dog.'

my_options = {"compact": False, 
           "bg": "#09a3d5",
           "color": "white", 
           "font": "Source Sans Pro"}

my_entity_doc = nlp(my_syntax_text)

# now visualize your text...
displacy.render(my_entity_doc, style='dep', jupyter=True, options = my_options)