### Natural Language Processing With spaCy in Python

Real Python [Tutorial](https://realpython.com/sentiment-analysis-python/)

Installations required

In [None]:
# !pip install spacy
# !python -m spacy download en_core_web_sm

### Quick example

In [6]:
import spacy

# Load English tokenizer, tagger, parser, NER, and words vector.
nlp = spacy.load('en_core_web_sm')

# Process whole documents
text = ("When Sebastian Thrun started working on self-driving cars at "
        "Google in 2007, few people outside of the company took him "
        "seriously. “I can tell you very senior CEOs of major American "
        "car companies would shake my hand and turn away because I wasn’t "
        "worth talking to,” said Thrun, in an interview with Recode earlier "
        "this week.")
doc = nlp(text)

In [25]:
print(f'{type(text)} ---> {text}')
print()
print('-' * 50)
print()
print(f'{type(doc)} ---> {doc}')

<class 'str'> ---> When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.

--------------------------------------------------

<class 'spacy.tokens.doc.Doc'> ---> When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.


In [47]:
# Analyze syntax
print('Noun phrases:', [chunk.text for chunk in doc.noun_chunks])
print()
print('-' * 50)
print()
print('Verbs in infinitive form:', [token.lemma_ for token in doc if token.pos_ == 'VERB'])

Noun phrases: ['Sebastian Thrun', 'self-driving cars', 'Google', 'few people', 'the company', 'him', 'I', 'you', 'very senior CEOs', 'major American car companies', 'my hand', 'I', 'Thrun', 'an interview', 'Recode']

--------------------------------------------------

Verbs in infinitive form: ['start', 'work', 'drive', 'take', 'can', 'tell', 'would', 'shake', 'turn', 'talk', 'say']


In [46]:
# Find named entities, phrases and concepts
for entity in doc.ents:
    print(f'{entity.text} is a {entity.label_}')

Sebastian Thrun is a PERSON
Google is a ORG
2007 is a DATE
American is a NORP
Thrun is a PERSON
Recode is a LOC
earlier this week is a DATE


### Tutorial

In [56]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [57]:
text = """
Dave watched as the forest burned up on the hill,
only a few miles from his house. The car had
been hastily packed and Marta was inside trying to round
up the last of the pets. "Where could she be?" he wondered
as he continued to wait for Marta to appear with the pets.
"""
doc = nlp(text)

In [64]:
tokens = [token for token in doc]
print(tokens)

[
, Dave, watched, as, the, forest, burned, up, on, the, hill, ,, 
, only, a, few, miles, from, his, house, ., The, car, had, 
, been, hastily, packed, and, Marta, was, inside, trying, to, round, 
, up, the, last, of, the, pets, ., ", Where, could, she, be, ?, ", he, wondered, 
, as, he, continued, to, wait, for, Marta, to, appear, with, the, pets, ., 
]


In [79]:
filtered_tokens = [token.text for token in doc if not token.is_stop]
print(' '.join(filtered_tokens))


 Dave watched forest burned hill , 
 miles house . car 
 hastily packed Marta inside trying round 
 pets . " ? " wondered 
 continued wait Marta appear pets . 

