In [1]:
import spacy
import en_core_web_sm

from spacy.matcher import PhraseMatcher

In [2]:
nlp = en_core_web_sm.load()

In [3]:
doc = nlp('I like Samsung, but using Apple as the most popular brand. Buying Google and Huawei are also high quality brands.'
         'Mercedes and Amazon are spoken for. You need to get your hands on a Tesla.')

In [4]:
terms = ['Samsung', 'Apple', 'Google', 'Huawei', 'Mercedes', 'Amazon', 'Tesla']
terms

['Samsung', 'Apple', 'Google', 'Huawei', 'Mercedes', 'Amazon', 'Tesla']

In [5]:
print('Token\t\t\tLemma\t\t\tIs_Stopword')
for token in doc:
    print(f'{token}\t\t{token.lemma_}\t\t{token.is_stop}')

Token			Lemma			Is_Stopword
I		-PRON-		True
like		like		False
Samsung		Samsung		False
,		,		False
but		but		True
using		use		True
Apple		Apple		False
as		as		True
the		the		True
most		most		True
popular		popular		False
brand		brand		False
.		.		False
Buying		Buying		False
Google		Google		False
and		and		True
Huawei		Huawei		False
are		be		True
also		also		True
high		high		False
quality		quality		False
brands		brand		False
.		.		False
Mercedes		Mercedes		False
and		and		True
Amazon		Amazon		False
are		be		True
spoken		speak		False
for		for		True
.		.		False
You		-PRON-		True
need		need		False
to		to		True
get		get		True
your		-PRON-		True
hands		hand		False
on		on		True
a		a		True
Tesla		Tesla		False
.		.		False


In [6]:
matcher = PhraseMatcher(nlp.vocab, attr='LOWER')

In [7]:
patterns = [nlp(term) for term in terms]
patterns

[Samsung, Apple, Google, Huawei, Mercedes, Amazon, Tesla]

In [8]:
matcher.add('Terminology', patterns)

In [9]:
matches = matcher(doc)
matches

[(18086124541977399096, 2, 3),
 (18086124541977399096, 6, 7),
 (18086124541977399096, 14, 15),
 (18086124541977399096, 16, 17),
 (18086124541977399096, 23, 24),
 (18086124541977399096, 25, 26),
 (18086124541977399096, 38, 39)]

In [10]:
match_id, start, end = matches[3]

In [11]:
for match in matches:
    match_id, start, end = match[0], match[1], match[2]
    print(nlp.vocab.strings[match_id], doc[start:end])

Terminology Samsung
Terminology Apple
Terminology Google
Terminology Huawei
Terminology Mercedes
Terminology Amazon
Terminology Tesla


In [12]:
print(nlp.vocab.strings[match_id], doc[start:end])

Terminology Tesla


In [13]:
def get_matches(list):
    result = []
    for match in list:
        match_id, start, end = match[0], match[1], match[2]
        item = nlp.vocab.strings[match_id], doc[start:end]
        result.append(item)
        
    return result

In [14]:
get_matches(matches)

[('Terminology', Samsung),
 ('Terminology', Apple),
 ('Terminology', Google),
 ('Terminology', Huawei),
 ('Terminology', Mercedes),
 ('Terminology', Amazon),
 ('Terminology', Tesla)]