# Compose Matcher Patterns
This notebook is used to compose patterns to add the the spaCy Dependency Matcher

In [1]:
import spacy
from spacy.matcher import DependencyMatcher

In [2]:
# Present simple active, minus modals and auxiliaries
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

present_simple_active = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"tag": {"IN": ["VBP", "VBZ"]}},
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"dep": "nsubj"}
    }
]

patterns = [present_simple_active]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I go",
         "He goes",
         "He does go",
         "He did go"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: I go
Matched Phrase: He goes


In [3]:
# Present simple active  with auxiliaries, minus modal verbs
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

present_simple_active_aux = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"tag": "VB"},
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux",
        "RIGHT_ATTRS": {"DEP": "aux", "TAG": {"IN": ["VBP","VBZ"]}}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"}
    }
]

patterns = [present_simple_active_aux]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I don't work",
         "He doesn't work",
         "He can't go"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: I do work
Matched Phrase: He does work


In [59]:
# Present simple active modal
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present simple tense
present_simple_active_modal = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"tag": "VB"},
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux",
        "RIGHT_ATTRS": {"DEP": "aux", "TAG": "MD"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"}
    }
]

patterns = [present_simple_active_modal]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I don't work",
         "He doesn't work",
         "He can't go",
         "Can you come?",
         "I will try",
         'I would make',
         "They should take it",
         "We could try"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_span = doc[token_ids[0]]
        print(matched_span.text)

go
come
try
make
take
try


In [49]:
# Present simple passive, includes aux
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present simple tense
present_simple_passive = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"},
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux",
        "RIGHT_ATTRS": {"TAG": {"IN": ["VBP","VBZ"]}, "DEP": "auxpass"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"dep": "nsubjpass"}
    }
]

patterns = [present_simple_passive]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["It isn't made here.",
         "It should be made here.",
         "You get called"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_span = doc[token_ids[0]]
        print(matched_span.text)

made
called


In [6]:
# Present simple passive modal
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present simple tense
present_simple_passive_modal = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"},
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux",
        "RIGHT_ATTRS": {"TAG": "VB", "DEP": "auxpass"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "modal",
        "RIGHT_ATTRS": {"TAG": "MD", "LEMMA": {"NOT_IN": ["would","could"]}}   
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"dep": "nsubjpass"}
    }
]

patterns = [present_simple_passive_modal]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["It can be made here",
         "It won't be done.",
         "It could have been done."
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_span = doc[token_ids[0]]
        print(matched_span.text)

made
done


In [7]:
# Past simple active
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
past_simple_active = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBD"},
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"}
    }
]

patterns = [past_simple_active]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I went",
         "I did go",
         "I should try",
         "Did I go?"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_span = doc[token_ids[0]]
        print(matched_span.text)

went


In [8]:
# Past simple active aux
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
past_simple_active_aux = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VB"},
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux",
        "RIGHT_ATTRS": {"DEP": "aux", "TAG": "VBD"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"}
    }
]

patterns = [past_simple_active_aux]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I went",
         "I did go",
         "I should try",
         "Did I go?",
         "I didn't see it",
         "Did you see it?",
         "Should we try?"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_span = doc[token_ids[0]]
        print(matched_span.text)

go
go
see
see


In [9]:
# Past simple passive
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
past_simple_passive = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"},
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux",
        "RIGHT_ATTRS": {"TAG": "VBD", "DEP": "auxpass"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubjpass"}
    }
]

patterns = [past_simple_passive]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["Where were you raised?",
         "I was born in Saskatoon",
         "I wasn't born there",
         "I had seen it",
         "We had been removed."
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_span = doc[token_ids[0]]
        print(matched_span.text)

raised
born
born


In [10]:
import spacy
from spacy.matcher import DependencyMatcher

# Present continuous active
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
present_continuous_active = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBG"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux",
        "RIGHT_ATTRS": {"TAG": {"IN": ["VBZ","VBP"]}, "LEMMA": "be"}
    },
    {
        "REL_OP": ">",
        "LEFT_ID": "verb",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"}
    }
]

patterns = [present_continuous_active]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I am not swimming",
         "He is not swimming",
         "He should be swimming",
         "Should he be swimming?",
         "Lambert is cooking",
         "Is he driving?",
         "He isn't driving",
         "Isn't he talking?",
         "Aren't I talking?"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: I am swimming
Matched Phrase: He is swimming
Matched Phrase: Lambert is cooking
Matched Phrase: Is he driving
Matched Phrase: He is driving
Matched Phrase: Is he talking
Matched Phrase: Are I talking


In [11]:
import spacy
from spacy.matcher import DependencyMatcher

# Present continuous active with modal
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
present_continuous_active_modal = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBG"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux",
        "RIGHT_ATTRS": {"TAG": "VB", "LEMMA": "be"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "modal",
        "RIGHT_ATTRS": {"DEP": "aux", "TAG": "MD", "LEMMA": {"NOT_IN": ["would","could"]}}
    },
    {
        "REL_OP": ">",
        "LEFT_ID": "verb",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"}
    }
]

patterns = [present_continuous_active_modal]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I am not swimming",
         "He is not swimming",
         "He should be swimming",
         "Should he be swimming?",
         "Lambert is cooking",
         "Is he driving?",
         "He isn't driving",
         "Isn't he talking?",
         "Aren't I talking?"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: He should be swimming
Matched Phrase: Should he be swimming


In [51]:
import spacy
from spacy.matcher import DependencyMatcher

# Present continuous passive
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

present_continuous_passive = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_ing",
        "RIGHT_ATTRS": {"DEP": "auxpass", "TAG": "VBG"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_be",
        "RIGHT_ATTRS": {"DEP": "aux", "TAG": {"IN": ["VBP","VBZ"]}, "LEMMA": "be"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubjpass"}
    }
]

patterns = [present_continuous_passive]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = [
         "I'm not being called",
    "I'm getting called"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: I 'm being called
Matched Phrase: I 'm getting called


In [48]:
import spacy
from spacy.matcher import DependencyMatcher

# Present continuous passive with modal
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

present_continuous_passive_modal = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_ing",
        "RIGHT_ATTRS": {"DEP": "auxpass", "TAG": "VBG", "LEMMA": {"IN": ["be","getting"]}}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_be",
        "RIGHT_ATTRS": {"DEP": "aux", "TAG": "VB", "LEMMA": "be"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "modal",
        "RIGHT_ATTRS": {"DEP": "aux", "TAG": "MD", "LEMMA": {"NOT_IN": ["would","could"]}}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubjpass"}
    }
]

patterns = [present_continuous_passive_modal]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I'm not being called",
         "I should be getting called",
         "Should I be getting called?",
         "I will be getting called",
         "I could be getting called"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: I should be getting called
Matched Phrase: Should I be getting called
Matched Phrase: I will be getting called


In [54]:
# Present perfect active, without modals
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
present_perfect_active = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux",
        "RIGHT_ATTRS": {"TAG": {"IN": ["VBZ","VBP"]} , "LEMMA": "have"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"}
    }
]

patterns = [present_perfect_active]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I have arrived",
         "He has arrived",
         "He has already been there",
         "Have you never been there before?"
         "He should have arrived",
         "You should have seen it"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: I have arrived
Matched Phrase: He has arrived
Matched Phrase: He has been
Matched Phrase: Have you been


In [63]:
# Present perfect active with modals
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
present_perfect_active_modal = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux",
        "RIGHT_ATTRS": {"TAG": "VB" , "LEMMA": "have"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "modal",
        "RIGHT_ATTRS": {"DEP": "aux", "TAG": "MD", "LEMMA": {"NOT_IN": ["would","could"]}}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"}
    }
]

patterns = [present_perfect_active_modal]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I should have left",
         "I shouldn't have left",
         "He should have left",
         "Should he have left?",
         "He has left"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: I should have left
Matched Phrase: I should have left
Matched Phrase: He should have left
Matched Phrase: Should he have left


In [74]:
# Present perfect passive
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
present_perfect_passive = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_be",
        "RIGHT_ATTRS": {"DEP":"auxpass", "TAG": "VBN", "LEMMA": "be"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_have",
        "RIGHT_ATTRS": {"TAG": {"IN":["VBP","VBZ"]}, "LEMMA": "have"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubjpass"}
    }
]

patterns = [present_perfect_passive]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["He has been banned",
         "Has he been banned?",
         "I have been banned.",
         "I have been getting banned",
         "I haven't been banned"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: He has been banned
Matched Phrase: Has he been banned
Matched Phrase: I have been banned
Matched Phrase: I have been banned


In [82]:
# Present perfect passive with modal
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

present_perfect_passive_modal = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_be",
        "RIGHT_ATTRS": {"DEP":"auxpass", "TAG": "VBN", "LEMMA": "be"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_have",
        "RIGHT_ATTRS": {"TAG": "VB", "LEMMA": "have"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "modal",
        "RIGHT_ATTRS": {"DEP": "aux", "TAG": "MD", "LEMMA": {"NOT_IN": ["would","could"]}}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubjpass"}
    }
]

patterns = [present_perfect_passive_modal]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I should have been banned",
         "Should I have been banned?",
         "He should have been banned.",
         "Should he have been banned?",
         "I shouldn't have been banned.",
         "He shouldn't have been banned"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: I should have been banned
Matched Phrase: Should I have been banned
Matched Phrase: He should have been banned
Matched Phrase: Should he have been banned
Matched Phrase: I should have been banned
Matched Phrase: He should have been banned


In [3]:
# Present perfect-continuous active
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
present_perfect_continuous_active = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBG"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_be",
        "RIGHT_ATTRS": {"TAG": "VBN", "LEMMA": "be"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_have",
        "RIGHT_ATTRS": {"TAG": {"IN": ["VBZ","VBP"]} , "LEMMA": "have"}
    },
    {
        "REL_OP": ">",
        "LEFT_ID": "verb",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"}
    }
]

patterns = [present_perfect_continuous_active]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I should have been working",
         "He has been getting called.",
         "You have been studying.",
         "Has he been studying?",
         "I haven't been studying"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_span = doc[token_ids[0]]
        print(matched_span.text)

studying
studying
studying


In [7]:
# Present perfect-continuous active with modals
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
present_perfect_continuous_active_modal = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBG"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_be",
        "RIGHT_ATTRS": {"TAG": "VBN", "LEMMA": "be"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_have",
        "RIGHT_ATTRS": {"TAG": "VB" , "LEMMA": "have"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "modal",
        "RIGHT_ATTRS": {"DEP": "aux", "TAG": "MD", "LEMMA": {"NOT_IN": ["would","could"]}}
    },
    {
        "REL_OP": ">",
        "LEFT_ID": "verb",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"}
    }
]

patterns = [present_perfect_continuous_active_modal]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I should have been working",
         "He has been getting called.",
         "You have been studying.",
         "Has he been studying?",
         "I haven't been studying",
         "I might have been working",
         "He might have been working",
         "Might they have been working?"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_span = doc[token_ids[0]]
        print(matched_span.text)

working
working
working


In [17]:
# Present perfect-continuous passive
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
present_perfect_continuous_passive = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_ing",
        "RIGHT_ATTRS": {"DEP": "auxpass", "TAG": "VBG"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_be",
        "RIGHT_ATTRS": {"TAG": "VBN" , "LEMMA": "be"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_have",
        "RIGHT_ATTRS": {"TAG": {"IN": ["VBZ","VBP"]} , "LEMMA": "have"}
    },
    {
        "REL_OP": ">",
        "LEFT_ID": "verb",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubjpass"}
    }
]

patterns = [present_perfect_continuous_passive]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["They have been getting burned",
         "He has been getting treated",
         "have they been getting burned?",
         "Have I been getting paid?",
         "He hasn't been getting paid"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_span = doc[token_ids[0]]
        print(matched_span.text)

burned
treated
burned
paid
paid


In [23]:
# Present perfect-continuous passive with modal
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
present_perfect_continuous_passive_modal = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_ing",
        "RIGHT_ATTRS": {"DEP": "auxpass", "TAG": "VBG"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_be",
        "RIGHT_ATTRS": {"TAG": "VBN" , "LEMMA": "be"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_have",
        "RIGHT_ATTRS": {"TAG": "VB" , "LEMMA": "have"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "modal",
        "RIGHT_ATTRS": {"DEP": "aux", "TAG": "MD", "LEMMA": {"NOT_IN": ["would","could"]}}
    },
    {
        "REL_OP": ">",
        "LEFT_ID": "verb",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubjpass"}
    }
]

patterns = [present_perfect_continuous_passive_modal]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["He should have been getting paid",
         "Should she have been getting paid?",
         "I should not have been getting paid"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_span = doc[token_ids[0]]
        print(matched_span.text)

paid
paid
paid


In [31]:
import spacy
from spacy.matcher import DependencyMatcher

# Past continuous active
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
past_continuous_active = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBG"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux",
        "RIGHT_ATTRS": {"TAG": "VBD", "LEMMA": "be"}
    },
    {
        "REL_OP": ">",
        "LEFT_ID": "verb",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"}
    }
]

patterns = [past_continuous_active]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["He was swimming",
         "She was swimming",
         "He had been swimming",
         "I wasn't swimming",
         "I was getting bored",
         "I was getting paid",
         "While I was working"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: He was swimming
Matched Phrase: She was swimming
Matched Phrase: I was swimming
Matched Phrase: I was getting
Matched Phrase: I was working


In [34]:
import spacy
from spacy.matcher import DependencyMatcher

# Past continuous active
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for present continuous tense
past_continuous_passive = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_ing",
        "RIGHT_ATTRS": {"DEP": "auxpass", "TAG": "VBG"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux",
        "RIGHT_ATTRS": {"TAG": "VBD", "LEMMA": "be"}
    },
    {
        "REL_OP": ">",
        "LEFT_ID": "verb",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubjpass"}
    }
]

patterns = [past_continuous_passive]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["He was swimming",
         "She was swimming",
         "He had been swimming",
         "I wasn't swimming",
         "I was getting bored",
         "I was getting paid",
         "While I was working",
         "Was he getting paid?",
         "He wasn't getting paid"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: I was getting paid
Matched Phrase: Was he getting paid
Matched Phrase: He was getting paid


In [41]:
# Past perfect active
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

# Define the pattern for past perfect active
past_perfect_active = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux",
        "RIGHT_ATTRS": {"TAG": "VBD" , "LEMMA": "have"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"}
    }
]

patterns = [past_perfect_active]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I had been there",
         "I had been working",
         "He had been there",
         "Had he gone?",
         "They hadn't left"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: I had been
Matched Phrase: He had been
Matched Phrase: Had he gone
Matched Phrase: They had left


In [45]:
# Past perfect passive
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

past_perfect_passive = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_be",
        "RIGHT_ATTRS": {"DEP":"auxpass", "TAG": "VBN", "LEMMA": "be"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_have",
        "RIGHT_ATTRS": {"TAG": "VBD", "LEMMA": "have"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubjpass"}
    }
]

patterns = [past_perfect_passive]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["He had been duped",
         "They had been saved",
         "Had I been ignored?",
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_phrase = [doc[i].text for i in sorted(token_ids)]
        print("Matched Phrase:", " ".join(matched_phrase))

Matched Phrase: He had been duped
Matched Phrase: They had been saved
Matched Phrase: Had I been ignored


In [52]:
# Past perfect continuous
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

past_perfect_continuous_active = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBG"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_be",
        "RIGHT_ATTRS": {"TAG": "VBN", "LEMMA": "be"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_have",
        "RIGHT_ATTRS": {"TAG": "VBD", "LEMMA": "have"}
    },
    {
        "REL_OP": ">",
        "LEFT_ID": "verb",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubj"}
    }
]

patterns = [past_perfect_continuous_active]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["I had been working a lot",
         "She hadn't been working",
         "Had he been studying?"
         "Hadn't they been eating?"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_span = doc[token_ids[0]]
        print(matched_span.text)

working
working


In [58]:
# Past perfect-continuous passive
nlp = spacy.load("en_core_web_sm")
matcher = DependencyMatcher(nlp.vocab)

past_perfect_continuous_passive = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"TAG": "VBN"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_ing",
        "RIGHT_ATTRS": {"DEP": "auxpass", "TAG": "VBG"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_be",
        "RIGHT_ATTRS": {"TAG": "VBN" , "LEMMA": "be"}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "aux_have",
        "RIGHT_ATTRS": {"TAG": "VBD" , "LEMMA": "have"}
    },
    {
        "REL_OP": ">",
        "LEFT_ID": "verb",
        "RIGHT_ID": "subject",
        "RIGHT_ATTRS": {"DEP": "nsubjpass"}
    }
]

patterns = [past_perfect_continuous_passive]

matcher.add("Patterns", patterns)  # Removed the extra brackets []

# Example sentences
texts = ["They had been getting attacked",
         "You had been being paid",
         "Had he been paid?",
         "Had she been being paid?"
        ]

for text in texts:
    doc = nlp(text)

    # Apply the matcher to the example sentence
    matches = matcher(doc)

    for match_id, token_ids in matches:
        matched_span = doc[token_ids[0]]
        print(matched_span.text)

attacked
paid
paid
