In [1]:
import spacy

#instantiating English module
nlp = spacy.load("en_core_web_sm")

#sample
x = "Embracing and analyzing self failures (of however multitude) is a virtue of nobelmen."

#creating doc object containing our token features
doc = nlp(x)

#Creating and updating our list of tokens using list comprehension 
tokens = [token.text for token in doc]
print(tokens)

['Embracing', 'and', 'analyzing', 'self', 'failures', '(', 'of', 'however', 'multitude', ')', 'is', 'a', 'virtue', 'of', 'nobelmen', '.']


In [2]:
nlp = spacy.load("en_core_web_sm")

#Creating the pipeline 'sentencizer' component
#sbd = nlp.create_pipe('sentencizer')

#Adding the component to the pipeline
nlp.create_pipe('sentencizer')

x = "Embracing and analyzing self failures (of however multitude) is a virtue of nobelmen. And nobility is a treasure few possess."

#creating doc object carring our sentence tokens
doc = nlp(x)

#Creating and updating our list of tokens using list comprehension 
tokens = [token for token in doc.sents]
print(tokens)

[Embracing and analyzing self failures (of however multitude) is a virtue of nobelmen., And nobility is a treasure few possess.]


In [3]:
import spacy

#instantiating English module

#sample

#creating doc object containing our token features
doc = nlp(x)

#Creating and updating our list of tokens using list comprehension 
tokens = [[token.text,token.lemma_] for token in doc]
print(tokens)

[['Embracing', 'embrace'], ['and', 'and'], ['analyzing', 'analyze'], ['self', 'self'], ['failures', 'failure'], ['(', '('], ['of', 'of'], ['however', 'however'], ['multitude', 'multitude'], [')', ')'], ['is', 'be'], ['a', 'a'], ['virtue', 'virtue'], ['of', 'of'], ['nobelmen', 'nobelman'], ['.', '.'], ['And', 'and'], ['nobility', 'nobility'], ['is', 'be'], ['a', 'a'], ['treasure', 'treasure'], ['few', 'few'], ['possess', 'possess'], ['.', '.']]


In [4]:
from spacy.lang.en.stop_words import STOP_WORDS
stop = STOP_WORDS
print(stop)

{'along', 'get', '’d', 'next', 'sometime', 'back', 'have', 'was', 'not', 'hereafter', 'are', 'fifty', 'but', 'down', 'himself', 'ever', 'becoming', 'for', 'make', 'above', 'thru', 'had', 'three', 'without', 'do', 'his', 'amount', 'up', 'some', 'last', 'anyhow', 'behind', 'must', 'against', 'whose', 'hereupon', 'what', 'move', 'them', 'often', 'on', 'sixty', 'whereafter', 'see', 'side', 'latter', 'meanwhile', 'herein', 'while', 'bottom', 'quite', 'too', '‘d', 'will', 'before', 'sometimes', 'into', 'formerly', 'were', 'many', 'both', 'my', 'become', 'toward', 'whole', 'however', 'nothing', 'using', 'been', 'latterly', 'hers', 'noone', 'due', 'the', 'ten', 'doing', 'well', 'how', 'has', 'so', 'this', 'always', 'via', 'used', 'made', 'same', 'eleven', 'again', 'an', 'five', '‘ve', 'whether', 'does', 'mostly', 'as', 'somewhere', 'wherein', 'one', 'therein', 'fifteen', 'in', 'already', 'out', 'part', 'yourself', 'he', 'full', 'several', 'every', 'of', 'throughout', 'who', 'once', "'s", 'yet'

In [5]:
filtered = [token.text for token in doc if token.is_stop == False]
print(filtered)

['Embracing', 'analyzing', 'self', 'failures', '(', 'multitude', ')', 'virtue', 'nobelmen', '.', 'nobility', 'treasure', 'possess', '.']


In [6]:
pos = [[token.text,token.pos_] for token in doc]
print (pos)

[['Embracing', 'VERB'], ['and', 'CCONJ'], ['analyzing', 'VERB'], ['self', 'NOUN'], ['failures', 'NOUN'], ['(', 'PUNCT'], ['of', 'ADP'], ['however', 'ADV'], ['multitude', 'NOUN'], [')', 'PUNCT'], ['is', 'AUX'], ['a', 'DET'], ['virtue', 'NOUN'], ['of', 'ADP'], ['nobelmen', 'NOUN'], ['.', 'PUNCT'], ['And', 'CCONJ'], ['nobility', 'NOUN'], ['is', 'AUX'], ['a', 'DET'], ['treasure', 'NOUN'], ['few', 'ADJ'], ['possess', 'NOUN'], ['.', 'PUNCT']]


In [9]:
x = """"India is considering a proposal to guarantee as much as 3  
trillion rupees ($39 billion) of loans to small businesses as part of a plan to 
restart Asia's third-largest economy, which is reeling under the impact of a 40-
day lockdown, people with knowledge of the matter said."""

#creating doc object
doc= nlp(x)


#extracting entities 
entities=[(i, i.label_, i.label) for i in bloomberg.ents]
print(entities)

[(India, 'GPE', 384), (as much as, 'CARDINAL', 397), ($39 billion, 'MONEY', 394), (Asia, 'LOC', 385), (third, 'ORDINAL', 396)]


In [16]:
from spacy import displacy
displacy.render(bloomberg, style = "ent",jupyter = True)

email separate

In [23]:
# text containing employee details
employee_text="""name : Koushiki age: 45 email : koushiki@gmail.com
                 name : Gayathri age: 34 email: gayathri1999@gmail.com
                 name : Ardra age: 60 email : ardra@gmail.com
                 name : pratham parmar age: 15 email : parmar15@yahoo.com
                 name : Shashank age: 54 email: shank@rediffmail.com
                 name : Utkarsh age: 46 email :utkarsh@gmail.com
                 name : Arman age: 49 email :arman@gmail.com"""

# creating a spacy doc          
employee_doc=nlp(employee_text)

# Printing the tokens which are email through `like_email` attribute
for token in employee_doc:
    if token.like_email:
        print(token.text)

koushiki@gmail.com
gayathri1999@gmail.com
ardra@gmail.com
parmar15@yahoo.com
shank@rediffmail.com
utkarsh@gmail.com
arman@gmail.com
