In [2]:
import spacy

nlp = spacy.load('en_core_web_sm')

text = "It’s official: Apple is the first U.S. public company to reach a $1 trillion market value"

# Process the text
doc = nlp(text)

# Print the document text
print(doc.text)

It’s official: Apple is the first U.S. public company to reach a $1 trillion market value


In [3]:
# Process the text
doc = nlp(text)

for token in doc:
    # Get the token text, part-of-speech tag and dependency label
    token_text = token.text
    token_pos = token.pos_
    token_dep = token.dep_
    # This is for formatting only
    print('{:<12}{:<10}{:<10}'.format(token_text, token_pos, token_dep))

It          PRON      nsubj     
’s          VERB      ccomp     
official    ADJ       acomp     
:           PUNCT     punct     
Apple       PROPN     nsubj     
is          AUX       ROOT      
the         DET       det       
first       ADJ       amod      
U.S.        PROPN     nmod      
public      ADJ       amod      
company     NOUN      attr      
to          PART      aux       
reach       VERB      relcl     
a           DET       det       
$           SYM       quantmod  
1           NUM       compound  
trillion    NUM       nummod    
market      NOUN      compound  
value       NOUN      dobj      


In [4]:
text = "New iPhone X release date leaked as Apple reveals pre-orders by mistake"

# Process the text
doc = nlp(text)

# Iterate over the entities
for ent in doc.ents:
    # print the entity text and label
    print(ent.text, ent.label_)

Apple ORG


### Using Matcher

In [5]:
# Import the Matcher
from spacy.matcher import Matcher

# Initialize the Matcher with the shared vocabulary
matcher = Matcher(nlp.vocab)

In [11]:
doc = nlp("After making the iOS update you won't notice a radical system-wide redesign: nothing like the aesthetic upheaval we got with iOS 7. Most of iOS 11's furniture remains the same as in iOS 10. But you will discover some tweaks once you delve a little deeper. i downloaded Fortnite on my laptop and can't open the game at all. Help? so when I was downloading Minecraft, I got the Windows version where it is the '.zip' folder and I used the default program to unpack it... do I also need to download Winzip? Features of the app include a beautiful design, smart search, automatic labels and optional voice responses.")

# Write a pattern for full iOS versions ("iOS 7", "iOS 11", "iOS 10")
pattern1 = [{'TEXT': 'iOS'}, {'IS_DIGIT': True}]
# Write a pattern that matches a form of "download" plus proper noun
pattern2 = [{'LEMMA': "download"}, {'POS': 'PROPN'}]
# Write a pattern for adjective plus one or two nouns
pattern3 = [{'POS': 'ADJ'}, {'POS': 'NOUN'}, {'POS': 'NOUN', 'OP': '?'}]
# other patterns
pattern4 = [{'IS_ALPHA': True}, {'TEXT': '-'}, {'IS_ALPHA': True}]

# Add the pattern to the matcher and apply the matcher to the doc
matcher.add('IOS_VERSION_PATTERN', [pattern1])
matcher.add('DOWNLOAD_THINGS_PATTERN', [pattern2])
matcher.add('ADJ_NOUN_PATTERN', [pattern3])
matcher.add('HYPHEN_PATTERN', [pattern4])

matches = matcher(doc)
print('Total matches found:', len(matches))

# Iterate over the matches and print the span text
for match_id, start, end in matches:
    print('Match found:', doc[start:end].text)

Total matches found: 15
Match found: radical system
Match found: system-wide
Match found: wide redesign
Match found: aesthetic upheaval
Match found: iOS 7
Match found: iOS 11
Match found: iOS 10
Match found: downloaded Fortnite
Match found: downloading Minecraft
Match found: download Winzip
Match found: beautiful design
Match found: smart search
Match found: automatic labels
Match found: optional voice
Match found: optional voice responses


### Creating a Doc