# Parts of Speech

In [1]:
import spacy

In [2]:
nlp=spacy.load('en_core_web_sm')

In [3]:
doc1=nlp(u"The Quick Brown Fox jump over the lazy dog's back.")

In [4]:
print(doc1.text)

The Quick Brown Fox jump over the lazy dog's back.


In [5]:
print(doc1[4].pos_)

NOUN


In [6]:
print(doc1[4].tag_)

NN


In [9]:
for token in doc1:
  print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_):{10}}")

The        DET        DT         determiner
Quick      PROPN      NNP        noun, proper singular
Brown      PROPN      NNP        noun, proper singular
Fox        PROPN      NNP        noun, proper singular
jump       NOUN       NN         noun, singular or mass
over       ADP        IN         conjunction, subordinating or preposition
the        DET        DT         determiner
lazy       ADJ        JJ         adjective 
dog        NOUN       NN         noun, singular or mass
's         PART       POS        possessive ending
back       NOUN       NN         noun, singular or mass
.          PUNCT      .          punctuation mark, sentence closer


In [None]:
doc2=nlp(" I read a book on NLP")

In [None]:
doc2[2].pos_

'VERB'

In [None]:
for token in doc1:
  print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_):{10}}")

The        DET        DT         determiner
Quick      PROPN      NNP        noun, proper singular
Brown      PROPN      NNP        noun, proper singular
Fox        PROPN      NNP        noun, proper singular
jump       NOUN       NN         noun, singular or mass
over       ADP        IN         conjunction, subordinating or preposition
the        DET        DT         determiner
lazy       ADJ        JJ         adjective 
dog        NOUN       NN         noun, singular or mass
's         PART       POS        possessive ending
back       NOUN       NN         noun, singular or mass
.          PUNCT      .          punctuation mark, sentence closer


In [None]:
doc3=nlp(u"The Quick Brown Fox jump over the lazy dog's back.")

In [None]:
pos_counts=doc3.count_by(spacy.attrs.POS)

In [None]:
#It will tell us about the counting of pos
print(pos_count)

{90: 2, 96: 3, 92: 3, 85: 1, 84: 1, 94: 1, 97: 1}


In [None]:
#To check it from the code
doc3.vocab[96].text


'PROPN'

In [None]:
doc3[3].pos_

'PROPN'

In [None]:
#To find all the counts
for k,v in sorted(pos_counts.items()):
  print(f"{k} {doc3.vocab[k].text:{6}}{v:{6}}")

84 ADJ        1
85 ADP        1
90 DET        2
92 NOUN       3
94 PART       1
96 PROPN      3
97 PUNCT      1


In [None]:
#To find the tag
tag_counts=doc3.count_by(spacy.attrs.TAG)
for k,v in sorted(tag_counts.items()):
  print(f"{k}   {doc3.vocab[k].text:{6}}{v:{6}}")

74   POS        1
1292078113972184607   IN         1
10554686591937588953   JJ         1
12646065887601541794   .          1
15267657372422890137   DT         2
15308085513773655218   NN         3
15794550382381185553   NNP        3


In [None]:
doc1=nlp(u"The Quick Brown Fox jump over the lazy dog's back.")

In [None]:
#To visualize
from spacy import displacy

In [None]:
options={'distance':60,'compact':True,'color':'green','bg':'#09a3d5','fonts':"Times"}
displacy.render(doc1,style="dep",jupyter=True,
                options=options)

# Word Embedding

In [None]:
from tensorflow.keras.preprocessing.text import one_hot

In [None]:
sent=['the glass of milk',
     'the glass of juice',
     'the cup of tea',
     'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [None]:
print(sent)

In [None]:
voc_size=10000

In [None]:
onehot_repr=[one_hot(words,voc_size)for words in sent]
print(onehot_repr)

In [None]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [None]:
import numpy

In [None]:
sent_length=8
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

In [None]:
dim=10

In [None]:
model=sequential()
model.add(Embedding(voc_size,10,input_length=sent_length))
model.compile('adam','mse')

In [None]:
model.summary()

In [None]:
model.predict(embedded_docs)

# Stop Words

In [None]:
nlp=spacy.load('en_core_web_sm')

In [None]:
#To print all the stop words
#Stopwords are those words which donot add meaning to the sentences so we remove it. 
print(nlp.Defaults.stop_words)

In [None]:
len(nlp.Defaults.stop_words)

In [None]:
#To check the word is stop word or not
nlp.vocab['Furqan'].is_stop

In [None]:
#there are some words which are usedd in phrases or we have made some shortcuts in chatting so we can add it in the list like Please find the attachment=pfa so we can add it to the stop_words list

nlp.Defaults.stop_words.add('pfa')

In [None]:
#we have to make it true
nlp.vocab['pfa'].is_stop=True

In [None]:
#if we have to remove the word from he stop_word list
nlp.Defaults.stop_words.remove('among')

In [None]:
nlp.vocab['among'].is_stop=False

In [None]:
#to check
nlp.vocab['among'].is_stop