### Using the matcher + matching patterns

In [1]:
import spacy

In [2]:
# Import the Matcher
from spacy.matcher import Matcher

In [3]:
# Load a model and create the nlp object
nlp = spacy.load("en_core_web_sm")

In [4]:
# Initialize the matcher with the shared vocab
matcher = Matcher(nlp.vocab)

In [5]:
# Add the pattern to the matcher
pattern = [{"TEXT": "iPhone"}, {"TEXT": "X"}]
matcher.add("IPHONE_PATTERN", [pattern])

In [6]:
# Process some text
doc = nlp("Upcoming iPhone X release date leaked")

In [7]:
# Call the matcher on the doc
matches = matcher(doc)

In [8]:
# Iterate over the matches
for match_id, start, end in matches:
    # Get the matched span
    matched_span = doc[start:end]
    print(matched_span.text)

iPhone X


### Matching lexical attributes

In [9]:
# Add the pattern to the matcher
pattern = [
{"IS_DIGIT": True},
{"LOWER": "fifa"},
{"LOWER": "world"},
{"LOWER": "cup"},
{"IS_PUNCT": True}
]
matcher.add("FIFA_PATTERN", [pattern])

In [10]:
# Process some text
doc = nlp("2018 FIFA World Cup: France won!")

In [11]:
# Call the matcher on the doc
matches = matcher(doc)

In [12]:
# Iterate over the matches
for match_id, start, end in matches:
    # Get the matched span
    matched_span = doc[start:end]
    print(matched_span.text)

2018 FIFA World Cup:


### Matching other token attributes

In [13]:
# Add the pattern to the matcher
pattern = [
{"LEMMA": "love", "POS": "VERB"},
{"POS": "NOUN"}
]
matcher.add("PETS_PATTERN", [pattern])

In [14]:
# Process some text
doc = nlp("I loved dogs but now I love cats more.")

In [15]:
# Call the matcher on the doc
matches = matcher(doc)

In [16]:
# Iterate over the matches
for match_id, start, end in matches:
    # Get the matched span
    matched_span = doc[start:end]
    print(matched_span.text)

loved dogs
love cats


### Matching using operators and quantifiers

In [17]:
# Add the pattern to the matcher
pattern = [
{"LEMMA": "buy"},
{"POS": "DET", "OP": "?"}, # optional: match 0 or 1 times
{"POS": "NOUN"}
]
matcher.add("BUYING_PATTERN", [pattern])

In [18]:
# Process some text
doc = nlp("I bought a smartphone. Now I'm buying apps.")

In [19]:
# Call the matcher on the doc
matches = matcher(doc)

In [20]:
# Iterate over the matches
for match_id, start, end in matches:
    # Get the matched span
    matched_span = doc[start:end]
    print(matched_span.text)

bought a smartphone
buying apps
