In [None]:
# it written in Cython so,much fast

In [None]:
# batteries included
#1.Index preserving tokenization (details about this later)
#2.Models for Part Of Speech tagging, Named Entity Recognition and Dependency Parsing
#3.Supports 8 languages out of the box
#4.Easy and beautiful visualizations
#5.Pretrained word vectors

In [None]:
# Extensible
# It plays nicely with all the other already existing tools that you know and love: Scikit-Learn, TensorFlow, gensim

In [None]:
#Notice that the installation doesn’t automatically download the English model. We need to do that ourselves.

In [7]:
import spacy
nlp = spacy.load('C:\\Users\DELL\\Anaconda3\\Lib\site-packages\\en_core_web_sm\\en_core_web_sm-2.2.5')
doc = nlp("Hello world")
for token in doc:
    print("'" + token.text + "'")

'Hello'
'world'


In [None]:
# Notice the index preserving tokenization in action. 
#Rather than only keeping the words, spaCy keeps the spaces too.
#This is helpful for situations when you need to replace words in the original text or add some annotations.
#With NLTK tokenization, there’s no way to know exactly where a tokenized word is in the original raw text.
#spaCy preserves this “link” between the word and its place in the raw text. Here’s how to get the exact index of a word:

In [9]:
import spacy
nlp = spacy.load('C:\\Users\DELL\\Anaconda3\\Lib\site-packages\\en_core_web_sm\\en_core_web_sm-2.2.5')
doc = nlp('Hello     World!')
for token in doc:
    print('"' + token.text + '"', token.idx)
 
# "Hello" 0
# "    " 6
# "World" 10
# "!" 15

"Hello" 0
"    " 6
"World" 10
"!" 15


In [None]:
# The Token class exposes a lot of word-level attributes. Here are a few examples:

In [10]:
doc = nlp("Next week I'll   be in Madrid.")
for token in doc:
    print("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}".format(
        token.text,
        token.idx,
        token.lemma_,
        token.is_punct,
        token.is_space,
        token.shape_,
        token.pos_,
        token.tag_
    ))

Next	0	next	False	False	Xxxx	ADJ	JJ
week	5	week	False	False	xxxx	NOUN	NN
I	10	-PRON-	False	False	X	PRON	PRP
'll	11	will	False	False	'xx	VERB	MD
  	15	  	False	True	  	SPACE	_SP
be	17	be	False	False	xx	AUX	VB
in	20	in	False	False	xx	ADP	IN
Madrid	23	Madrid	False	False	Xxxxx	PROPN	NNP
.	29	.	True	False	.	PUNCT	.


In [None]:
# Sentence detection
#Here’s how to achieve one of the most common NLP tasks with spaCy:

In [11]:
doc = nlp("These are apples. These are oranges.")
 
for sent in doc.sents:
    print(sent)
 
# These are apples.
# These are oranges.
 

These are apples.
These are oranges.


In [None]:
# Part Of Speech Tagging
#We’ve already seen how this works but let’s have another look:

In [12]:
doc = nlp("Next week I'll be in Madrid.")
print([(token.text, token.tag_) for token in doc])
 
# [('Next', 'JJ'), ('week', 'NN'), ('I', 'PRP'), ("'ll", 'MD'), ('be', 'VB'), ('in', 'IN'), ('Madrid', 'NNP'), ('.', '.')]
 

[('Next', 'JJ'), ('week', 'NN'), ('I', 'PRP'), ("'ll", 'MD'), ('be', 'VB'), ('in', 'IN'), ('Madrid', 'NNP'), ('.', '.')]


In [None]:
# Named Entity Recognition
#Doing NER with spaCy is super easy and the pretrained model performs pretty well:

In [13]:
doc = nlp("Next week I'll be in Madrid.")
for ent in doc.ents:
    print(ent.text, ent.label_)
 
# Next week DATE
# Madrid GPE
 

Next week DATE
Madrid GPE


In [14]:
# The spaCy NER also has a healthy variety of entities. You can view the full list here: Entity Types

doc = nlp("I just bought 2 shares at 9 a.m. because the stock went up 30% in just 2 days according to the WSJ")
for ent in doc.ents:
    print(ent.text, ent.label_)
 
# 2 CARDINAL
# 9 a.m. TIME
# 30% PERCENT
# just 2 days DATE
# WSJ ORG

2 CARDINAL
9 a.m. TIME
30% PERCENT
just 2 days DATE
WSJ ORG


In [None]:
# Let’s use displaCy to view a beautiful visualization of the Named Entity annotated sentence:

In [15]:
from spacy import displacy
 
doc = nlp('I just bought 2 shares at 9 a.m. because the stock went up 30% in just 2 days according to the WSJ')
displacy.render(doc, style='ent', jupyter=True)

In [None]:
# Chunking
#spaCy automatically detects noun-phrases as well

In [16]:
doc = nlp("Wall Street Journal just published an interesting piece on crypto currencies")
for chunk in doc.noun_chunks:
    print(chunk.text, chunk.label_, chunk.root.text)
 
# Wall Street Journal NP Journal
# an interesting piece NP piece
# crypto currencies NP currencies
 

Wall Street Journal NP Journal
an interesting piece NP piece
crypto currencies NP currencies


In [None]:
https://nlpforhackers.io/complete-guide-to-spacy/