In [12]:
import spacy
nlp=spacy.load('en_core_web_sm')

nlp.pipe_names

['tagger', 'parser', 'ner']

In [14]:
input_text=("""This tutorial is about "Natural Language Processing" and it's very exciting.""")
doc=nlp(input_text)
print([token.text for token in doc])

['This', 'tutorial', 'is', 'about', '"', 'Natural', 'Language', 'Processing', '"', 'and', 'it', "'s", 'very', 'exciting', '.']


In [15]:
print([token.text for token in doc if not token.is_stop])

['tutorial', '"', 'Natural', 'Language', 'Processing', '"', 'exciting', '.']


In [16]:
cook = "cook cooking cooked"
lemma_doc=nlp(cook)
print([token.lemma_ for token in lemma_doc])

['cook', 'cook', 'cook']


In [22]:
sentence=("""William Shakespeare was an English playwright, poet, and actor,' 
          'widely regarded as the greatest writer in the English language '
          'and the world's greatest dramatist.""")
pos_sentence=nlp(sentence)
print([(token.text,token.tag_,token.pos_,spacy.explain(token.tag_)) for token in pos_sentence])

[('William', 'NNP', 'PROPN', 'noun, proper singular'), ('Shakespeare', 'NNP', 'PROPN', 'noun, proper singular'), ('was', 'VBD', 'AUX', 'verb, past tense'), ('an', 'DT', 'DET', 'determiner'), ('English', 'JJ', 'ADJ', 'adjective'), ('playwright', 'NN', 'NOUN', 'noun, singular or mass'), (',', ',', 'PUNCT', 'punctuation mark, comma'), ('poet', 'NN', 'NOUN', 'noun, singular or mass'), (',', ',', 'PUNCT', 'punctuation mark, comma'), ('and', 'CC', 'CCONJ', 'conjunction, coordinating'), ('actor', 'NN', 'NOUN', 'noun, singular or mass'), (',', ',', 'PUNCT', 'punctuation mark, comma'), ("'", '``', 'PUNCT', 'opening quotation mark'), ('\n          ', '_SP', 'SPACE', None), ("'", "''", 'PUNCT', 'closing quotation mark'), ('widely', 'RB', 'ADV', 'adverb'), ('regarded', 'VBN', 'VERB', 'verb, past participle'), ('as', 'IN', 'SCONJ', 'conjunction, subordinating or preposition'), ('the', 'DT', 'DET', 'determiner'), ('greatest', 'JJS', 'ADJ', 'adjective, superlative'), ('writer', 'NN', 'NOUN', 'noun, s

In [27]:
sentence=("""William Shakespeare was an English playwright, poet, and actor,' 
          'widely regarded as the greatest writer in the English language '
          'and the world's greatest dramatist. His phone number was +918900393436.""")
ner_sentence=nlp(sentence)
print([(ent.text,ent.start_char,ent.end_char,ent.label_,spacy.explain(ent.label_)) for ent in ner_sentence.ents])

[('William Shakespeare', 0, 19, 'PERSON', 'People, including fictional'), ('English', 27, 34, 'NORP', 'Nationalities or religious or political groups'), ('English', 123, 130, 'LANGUAGE', 'Any named language')]


In [34]:
sentence=("""William Shakespeare was an English playwright, poet, and actor,' 
          'widely regarded as the greatest writer in the English language. He'
          'was the world's greatest dramatist.""")
recog_sentence=nlp(sentence)
sentences=list(recog_sentence.sents)
print('Total number of sentences:',len(sentences))

print([lines for lines in sentences])


Total number of sentences: 2
[William Shakespeare was an English playwright, poet, and actor,' 
          'widely regarded as the greatest writer in the English language., He'
          'was the world's greatest dramatist.]


In [93]:
from spacy.matcher import Matcher
matcher= Matcher(nlp.vocab)
input_text=('My friend can be reached at (123) 456-789 . I tried to call her'
                'from my number (234) 567-890')

def match_phone_number(doc):
    pattern=[{'ORTH':'('},{'SHAPE':'ddd'},
             {'ORTH':')'},{'SHAPE':'ddd'},
             {'ORTH':'-'},{'SHAPE':'ddd'}]
    matcher.add('PHONE_NUMBERS',None,pattern)
    matches=matcher(doc)
    for match_id,start,end in matches:
        span=doc[start:end]
        print(span.text)
    
input_text_matcher=nlp(input_text)
match_phone_number(input_text_matcher)

(123) 456-789
(234) 567-890


In [82]:
input_text=(""" The helpline number of police is 911 in USA and 100 in India. I tried to call them'
                'from my number +919000011111""")
input_text_matcher=nlp(input_text)
print([token.text for token in input_text_matcher])

[' ', 'The', 'helpline', 'number', 'of', 'police', 'is', '911', 'in', 'USA', 'and', '100', 'in', 'India', '.', 'I', 'tried', 'to', 'call', 'them', "'", '\n                ', "'", 'from', 'my', 'number', '+919000011111']
