In [None]:
import spacy
spacy.__version__

In [10]:
nlp = spacy.load('en_core_web_sm')
doc = nlp(u'I am learning hot to build chatbots')
for token in doc:
    print(token.text, token.pos_)

I PRON
am AUX
learning VERB
hot ADJ
to PART
build VERB
chatbots NOUN


In [12]:
doc = nlp('I am going to LONDON next week for a meeting.')
for token in doc:
    print(token.text, token.pos_)

I PRON
am AUX
going VERB
to ADP
LONDON PROPN
next ADJ
week NOUN
for ADP
a DET
meeting NOUN
. PUNCT


In [14]:
for token in doc:
    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop)

I I PRON PRP nsubj X True True
am be AUX VBP aux xx True True
going go VERB VBG ROOT xxxx True False
to to ADP IN prep xx True True
LONDON LONDON PROPN NNP pobj XXXX True False
next next ADJ JJ amod xxxx True True
week week NOUN NN npadvmod xxxx True False
for for ADP IN prep xxx True True
a a DET DT det x True True
meeting meeting NOUN NN pobj xxxx True False
. . PUNCT . punct . False False


In [18]:
import nltk
from nltk.stem.porter import PorterStemmer
from nltk.stem.snowball import SnowballStemmer

porter_stemmer = PorterStemmer()
snowball_stemmer = SnowballStemmer('english')

print(porter_stemmer.stem('fastest'))
print(snowball_stemmer.stem('fastest'))

fastest
fastest


In [35]:
# Named entity recognition
my_string = u"I flew to New York last Sunday for a conference."
doc = nlp(my_string)
for ent in doc.ents:
    print(ent.text, ent.label_)

New York GPE
last Sunday DATE


In [36]:
from spacy.lang.en.stop_words import STOP_WORDS
print(STOP_WORDS)

{'front', 'is', 'a', 'here', 'herself', 'at', 'down', 'from', 'none', 'anywhere', 'mostly', 'elsewhere', 'three', 'hundred', 'show', 'ever', 'have', 'throughout', 'name', 'had', "'d", 'should', 'thru', 'into', 'they', 'much', 'will', 'done', 'make', 'because', 'toward', 'everywhere', 'alone', 'while', 'due', 'else', 'seemed', 'four', 'two', 'these', 'top', 'their', 'used', 'whom', 'anything', 'say', '‘s', 'several', 'has', 'whereafter', 'if', 'ca', 'are', 'sixty', 'can', 'full', 'there', 'whereas', 'becoming', 'so', 'every', 'how', 'everyone', 'to', 'afterwards', '’ve', 'n’t', 'some', 'both', 'up', 'be', 'on', 'call', 'when', 'own', 're', 'across', '‘m', '’re', 'been', 'still', 'onto', 'others', 'ourselves', 'amongst', 'everything', 'now', 'towards', 'became', 'being', 'whose', 'made', 'yours', 'well', 'n‘t', 'yourselves', 'hereby', 'first', 'otherwise', 'move', 'before', 'yet', 'whoever', 'anyhow', 'what', 'i', 'latterly', "'ll", 'or', 'except', 'whither', 'between', 'she', 'would', '

In [41]:
# dependency parsing
from spacy import displacy
sentence = "I am learning how to build chatbots."
doc = nlp(sentence)
# displacy.serve(doc, style="dep", port=8889) # Server already running as its a notebook, therefore we can render only.
displacy.render(doc, style="dep")


In [44]:
# noun chunks example
sentence = "Autonomous cars are moving towards becoming a reality faster than expected."

# Process the sentence using SpaCy
doc = nlp(sentence)

# Extract and print noun chunks
for chunk in doc.noun_chunks:
    print(f"{chunk.text} --> Root: {chunk.root.text}, Dep: {chunk.root.dep_}, Head: {chunk.root.head.text}")


Autonomous cars --> Root: cars, Dep: nsubj, Head: moving
a reality --> Root: reality, Dep: attr, Head: becoming


In [49]:
# finding similarity using word vectors

# pick a model that has word vectors
nlp = spacy.load("en_core_web_md")

# Example words
word1 = nlp("king")
word2 = nlp("queen")
word3 = nlp("apple")

# Print word vectors for each word
print("Vector for 'king':", word1.vector)

# Compute similarity between words
print(f"Similarity between 'king' and 'queen': {word1.similarity(word2):.3f}")
print(f"Similarity between 'king' and 'apple': {word1.similarity(word3):.3f}")


Vector for 'king': [-6.0644e-01 -5.1205e-01  6.4921e-03 -2.9194e-01 -5.6515e-01 -1.1523e-01
  7.7274e-02  3.3561e-01  1.1593e-01  2.3516e+00  5.1773e-02 -5.4229e-01
 -5.7972e-01  1.3220e-01  2.8430e-01 -7.9592e-02 -2.6762e-01  1.8301e-01
 -4.1264e-01  2.0459e-01  1.4436e-01 -1.8714e-01 -3.1393e-01  1.7821e-01
 -1.0997e-01 -2.5584e-01 -1.1149e-01  9.6212e-02 -1.6168e-01  4.0055e-01
 -2.6115e-01  5.3777e-01 -5.2382e-01  2.7637e-01  7.2191e-01  6.0405e-02
 -1.7922e-01  1.8020e-01 -1.4381e-01 -1.4795e-01 -8.1394e-02  5.8282e-02
  2.2964e-02 -2.6374e-01  1.0704e-01 -4.5425e-01 -1.9964e-01  3.7720e-01
 -9.7784e-02 -3.1999e-01 -7.8509e-02  6.1502e-01  7.1643e-02 -3.0930e-02
  2.1508e-01  2.5280e-01 -3.1643e-01  6.6698e-01  1.9813e-02 -3.2311e-01
  2.9266e-02 -4.1403e-02  2.8346e-01 -7.9143e-01  1.3327e-01  7.7231e-02
 -1.8724e-01 -3.3146e-01 -2.0797e-01 -6.9326e-01 -2.3412e-01 -6.8752e-02
  3.8252e-02 -3.2459e-01 -8.3609e-03  1.2945e-01 -2.8316e-01 -5.7546e-01
  2.4336e-01  5.6433e-01 -7.1285

In [52]:
# word vector
sentence1 = nlp("The cat sat on the mat.")
sentence2 = nlp("A dog is lying on the carpet.")

# Compute sentence similarity
print(f"Similarity between sentences: {sentence1.similarity(sentence2):.3f}")


Similarity between sentences: 0.828


In [54]:
# tokenization example
# Input text
text = "SpaCy is an NLP library. It's designed for processing text efficiently!"

# Process the text
doc = nlp(text)

# Iterate over tokens and print them
print("Tokens:")
for token in doc:
    print(token.text)


Tokens:
SpaCy
is
an
NLP
library
.
It
's
designed
for
processing
text
efficiently
!


In [55]:
!rasa --version

  Base: DeclarativeMeta = declarative_base()
Rasa Version      :         3.6.21
Minimum Compatible Version: 3.6.21
Rasa SDK Version  :         3.6.2
Python Version    :         3.9.6
Operating System  :         macOS-15.1-arm64-arm-64bit
Python Path       :         /Users/mayankarora/dib/poc/chatbot-book/.venv/bin/python
