In [1]:
import spacy #Importing the spacy library

In [2]:
from spacy.lang.en import English #Importing all the language specific rules for text processing

In [3]:
nlp = English() #Creating the nlp object for all the tools and processes and that contains the processing pipeline

In [4]:
doc = nlp('Hello world! I am spacy.') #spacy always makes a document file for performing all the functions on the text

In [5]:
# The Token object
# We need to iterate over the doc to access all the tokens of the text



for word in doc:
    print(word.text) #Represents the tokens of the text and we can iterate over it just like a list

Hello
world
!
I
am
spacy
.


![doc.png](attachment:doc.png)

In [6]:
single_token = doc[-2]
print(single_token)

spacy


In [7]:
# Some Lexical Attributes of a text which are included in dictionary and does not depend on word context
# Example and Image below for more clarity

"""

1. Index of the token -    token.i
2. Text of the token  -    token.text
3. Alphabetic         -    token.is_aplha
4. Punctuation        -    token.is_punct
5. Numerical          -    token.like_num

"""


doc = nlp('I have 10 pets and #1 is my favorite')

print('Index:   ', [token.i for token in doc])
print('Text:   ', [token.text for token in doc])
print('Alpha:   ', [token.is_alpha for token in doc])
print('Punct:   ', [token.is_punct for token in doc])
print('like_num:   ', [token.like_num for token in doc])

Index:    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Text:    ['I', 'have', '10', 'pets', 'and', '#', '1', 'is', 'my', 'favorite']
Alpha:    [True, True, False, True, True, False, False, True, True, True]
Punct:    [False, False, False, False, False, True, False, False, False, False]
like_num:    [False, False, True, False, False, False, True, False, False, False]


![Lexical%20Attributes.PNG](attachment:Lexical%20Attributes.PNG)

### Statistical Models


 Statistical Models are used to analyze text for context-specific prediction. Like what is the function of this text. 
 It includes Part-of-speech tagging, syntactic dependencies and named entities.
 spaCy provides a number of pre-trained model packages you can download using the "spacy download" command. 
 For example, the "en_core_web_sm" package is a small English model that supports all core capabilities and is 
 trained on web text.

    $ python -m spacy download en_core_web_sm 

    import spacy
    nlp = spacy.load('en_core_web_sm')


 1. It includes binary weights to allow Spacy to make predictions.
 2. Vocabulary to identify which language to use.
 3. Meta Information to know how to configure the processing pipeline.



In [8]:
# Predicting part-of-speech tagging in the text

import en_core_web_sm
nlp = en_core_web_sm.load()

doc = nlp('She ate the pizza')

for token in doc:
    print(token.text, token.pos_)

She PRON
ate VERB
the DET
pizza NOUN


In [12]:
# Predicting the syntactic dependecies of the words in a sentence

doc = nlp('I am hungry!')

for token in doc:
    print(token.text , token.pos_ , token.dep_, token.head.text )

I PRON nsubj am
am AUX ROOT am
hungry ADJ acomp am
! PUNCT punct am


![dependency%20label%20scheme.PNG](attachment:dependency%20label%20scheme.PNG)

In [14]:
# Predecting Named Entities 
# which means predciting the words as to what they are in real world like a person, money, a country etc.

doc = nlp('India is going to invest on apple, peache, avocado and $31 Million more than Italy')

for ent in doc.ents:
    print(ent.text, ent.label_)

India GPE
$31 Million MONEY
Italy GPE


![predicting%20named%20entities.PNG](attachment:predicting%20named%20entities.PNG)

In [16]:
spacy.explain('GPE')

'Countries, cities, states'