In [2]:
!pip install nltk spacy beautifulsoup4 kaggle
!python -m spacy download en_core_web_sm

import nltk
from nltk.tokenize import word_tokenize, RegexpTokenizer, sent_tokenize
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk import pos_tag
import spacy
from spacy.lang.en import English
from bs4 import BeautifulSoup
import pandas as pd
import requests
from collections import Counter
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from gensim.models import Word2Vec


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [3]:
# Initialize nltk components
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('omw-1.4')
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')


# Load spaCy models
nlp_spacy = spacy.load('en_core_web_sm')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# **The corpus we created in HW1**

In [4]:
# Use BeautifulSoup to scrape text data from a public page on one of your social media profiles.
url = 'https://en.wikipedia.org/wiki/English_Springer_Spaniel'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
print(soup.title)

scraped_text = ' '.join(p.get_text() for p in soup.find_all('p'))
print("Scraped Text:", scraped_text[:500])  # Print the first 500 characters to verify


<title>English Springer Spaniel - Wikipedia</title>
Scraped Text: 
 The English Springer Spaniel is a breed of gun dog in the Spaniel group traditionally used for flushing and retrieving game. They are descended from the Norfolk or Shropshire Spaniels of the mid-19th century; the breed has diverged into separate show and working lines. It is closely related to the Welsh Springer Spaniel and very closely to the English Cocker Spaniel; less than a century ago, springers and cockers would come from the same litter. The smaller "cockers" were used in woodcock hunt


# **Tokenization**


In [5]:
# White space tokenizer function
def white_space_tokenizer(text):
    return text.split()


# Example usage:
tokens_whitespace =  white_space_tokenizer(scraped_text)
print(tokens_whitespace[:100])

['The', 'English', 'Springer', 'Spaniel', 'is', 'a', 'breed', 'of', 'gun', 'dog', 'in', 'the', 'Spaniel', 'group', 'traditionally', 'used', 'for', 'flushing', 'and', 'retrieving', 'game.', 'They', 'are', 'descended', 'from', 'the', 'Norfolk', 'or', 'Shropshire', 'Spaniels', 'of', 'the', 'mid-19th', 'century;', 'the', 'breed', 'has', 'diverged', 'into', 'separate', 'show', 'and', 'working', 'lines.', 'It', 'is', 'closely', 'related', 'to', 'the', 'Welsh', 'Springer', 'Spaniel', 'and', 'very', 'closely', 'to', 'the', 'English', 'Cocker', 'Spaniel;', 'less', 'than', 'a', 'century', 'ago,', 'springers', 'and', 'cockers', 'would', 'come', 'from', 'the', 'same', 'litter.', 'The', 'smaller', '"cockers"', 'were', 'used', 'in', 'woodcock', 'hunting', 'while', 'their', 'larger', 'littermates', 'were', 'used', 'to', 'flush,', 'or', '"spring",', 'other', 'game', 'birds,', 'hence', 'the', 'name.', 'In']


In [6]:
# Initialize regex tokenizer
tokenizer_regex = RegexpTokenizer(r'\w+')

# Example usage:
regex_tokens = tokenizer_regex.tokenize(scraped_text)

print(regex_tokens)


['The', 'English', 'Springer', 'Spaniel', 'is', 'a', 'breed', 'of', 'gun', 'dog', 'in', 'the', 'Spaniel', 'group', 'traditionally', 'used', 'for', 'flushing', 'and', 'retrieving', 'game', 'They', 'are', 'descended', 'from', 'the', 'Norfolk', 'or', 'Shropshire', 'Spaniels', 'of', 'the', 'mid', '19th', 'century', 'the', 'breed', 'has', 'diverged', 'into', 'separate', 'show', 'and', 'working', 'lines', 'It', 'is', 'closely', 'related', 'to', 'the', 'Welsh', 'Springer', 'Spaniel', 'and', 'very', 'closely', 'to', 'the', 'English', 'Cocker', 'Spaniel', 'less', 'than', 'a', 'century', 'ago', 'springers', 'and', 'cockers', 'would', 'come', 'from', 'the', 'same', 'litter', 'The', 'smaller', 'cockers', 'were', 'used', 'in', 'woodcock', 'hunting', 'while', 'their', 'larger', 'littermates', 'were', 'used', 'to', 'flush', 'or', 'spring', 'other', 'game', 'birds', 'hence', 'the', 'name', 'In', '1902', 'The', 'Kennel', 'Club', 'recognized', 'the', 'English', 'Springer', 'Spaniel', 'as', 'a', 'distinc

In [7]:
tokens_nltk = word_tokenize(scraped_text)
print(tokens_nltk[:100])


['The', 'English', 'Springer', 'Spaniel', 'is', 'a', 'breed', 'of', 'gun', 'dog', 'in', 'the', 'Spaniel', 'group', 'traditionally', 'used', 'for', 'flushing', 'and', 'retrieving', 'game', '.', 'They', 'are', 'descended', 'from', 'the', 'Norfolk', 'or', 'Shropshire', 'Spaniels', 'of', 'the', 'mid-19th', 'century', ';', 'the', 'breed', 'has', 'diverged', 'into', 'separate', 'show', 'and', 'working', 'lines', '.', 'It', 'is', 'closely', 'related', 'to', 'the', 'Welsh', 'Springer', 'Spaniel', 'and', 'very', 'closely', 'to', 'the', 'English', 'Cocker', 'Spaniel', ';', 'less', 'than', 'a', 'century', 'ago', ',', 'springers', 'and', 'cockers', 'would', 'come', 'from', 'the', 'same', 'litter', '.', 'The', 'smaller', '``', 'cockers', "''", 'were', 'used', 'in', 'woodcock', 'hunting', 'while', 'their', 'larger', 'littermates', 'were', 'used', 'to', 'flush', ',']


In [8]:
from nltk.tokenize import sent_tokenize

def sentence_tokenize(text):
    sentences = sent_tokenize(text)
    return sentences
# Example usage:
sentences = sentence_tokenize(scraped_text)
print(sentences[:20])

['\n The English Springer Spaniel is a breed of gun dog in the Spaniel group traditionally used for flushing and retrieving game.', 'They are descended from the Norfolk or Shropshire Spaniels of the mid-19th century; the breed has diverged into separate show and working lines.', 'It is closely related to the Welsh Springer Spaniel and very closely to the English Cocker Spaniel; less than a century ago, springers and cockers would come from the same litter.', 'The smaller "cockers" were used in woodcock hunting while their larger littermates were used to flush, or "spring", other game birds, hence the name.', 'In 1902, The Kennel Club recognized the English Springer Spaniel as a distinct breed.', '[1] They are used as sniffer dogs on a widespread basis.', 'The English Springer Spaniel is a medium-sized compact dog.', 'Its coat is moderately long with feathering on the legs and tail.', 'It is a well proportioned, balanced dog with a gentle expression.', 'This breed represents perhaps the

## ** Apply Normalization:**

In [9]:
#Stemming is a process in natural language processing (NLP) where words are reduced to their root form - For example, the words "connection," "connected," "connecting," and "connects" all stem to the word "connect."
stemmer = PorterStemmer()

# Split the text into words
words = scraped_text.split()

# Applying the stemmer to each word
stemmed_words = [stemmer.stem(word) for word in words]

# Print the results
print(stemmed_words)

['the', 'english', 'springer', 'spaniel', 'is', 'a', 'breed', 'of', 'gun', 'dog', 'in', 'the', 'spaniel', 'group', 'tradit', 'use', 'for', 'flush', 'and', 'retriev', 'game.', 'they', 'are', 'descend', 'from', 'the', 'norfolk', 'or', 'shropshir', 'spaniel', 'of', 'the', 'mid-19th', 'century;', 'the', 'breed', 'ha', 'diverg', 'into', 'separ', 'show', 'and', 'work', 'lines.', 'it', 'is', 'close', 'relat', 'to', 'the', 'welsh', 'springer', 'spaniel', 'and', 'veri', 'close', 'to', 'the', 'english', 'cocker', 'spaniel;', 'less', 'than', 'a', 'centuri', 'ago,', 'springer', 'and', 'cocker', 'would', 'come', 'from', 'the', 'same', 'litter.', 'the', 'smaller', '"cockers"', 'were', 'use', 'in', 'woodcock', 'hunt', 'while', 'their', 'larger', 'litterm', 'were', 'use', 'to', 'flush,', 'or', '"spring",', 'other', 'game', 'birds,', 'henc', 'the', 'name.', 'in', '1902,', 'the', 'kennel', 'club', 'recogn', 'the', 'english', 'springer', 'spaniel', 'as', 'a', 'distinct', 'breed.[1]', 'they', 'are', 'use'

In [10]:
# Create an instance of WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

# Helper function to convert nltk tag to wordnet tag
def nltk_tag_to_wordnet_tag(nltk_tag):
    if nltk_tag.startswith('J'):
        return wordnet.ADJ
    elif nltk_tag.startswith('V'):
        return wordnet.VERB
    elif nltk_tag.startswith('N'):
        return wordnet.NOUN
    elif nltk_tag.startswith('R'):
        return wordnet.ADV
    else:
        return None

# Tokenize the text and get parts of speech tags
tokens = word_tokenize(scraped_text)
tagged_tokens = pos_tag(tokens)

# Lemmatize each token with its correct part of speech
lemmatized_text = [
    lemmatizer.lemmatize(token, nltk_tag_to_wordnet_tag(tag)) if nltk_tag_to_wordnet_tag(tag) else token
    for token, tag in tagged_tokens
]

# Print the lemmatized text
print(" ".join(lemmatized_text))

The English Springer Spaniel be a breed of gun dog in the Spaniel group traditionally use for flush and retrieve game . They be descend from the Norfolk or Shropshire Spaniels of the mid-19th century ; the breed have diverge into separate show and work line . It be closely relate to the Welsh Springer Spaniel and very closely to the English Cocker Spaniel ; less than a century ago , springer and cocker would come from the same litter . The small `` cocker '' be use in woodcock hunt while their large littermates be use to flush , or `` spring '' , other game bird , hence the name . In 1902 , The Kennel Club recognize the English Springer Spaniel as a distinct breed . [ 1 ] They be use as sniffer dog on a widespread basis . The English Springer Spaniel be a medium-sized compact dog . Its coat be moderately long with feather on the leg and tail . It be a well proportion , balance dog with a gentle expression . This breed represent perhaps the great divergence between work and show line of

 **Remove Stop Words**

In [11]:
stop_words = set(stopwords.words('english'))

# Tokenize the text
words = word_tokenize(scraped_text)

# Filter out the stop words
filtered_words = [word for word in words if word.lower() not in stop_words]

# Print the filtered text
print(" ".join(filtered_words))

English Springer Spaniel breed gun dog Spaniel group traditionally used flushing retrieving game . descended Norfolk Shropshire Spaniels mid-19th century ; breed diverged separate show working lines . closely related Welsh Springer Spaniel closely English Cocker Spaniel ; less century ago , springers cockers would come litter . smaller `` cockers '' used woodcock hunting larger littermates used flush , `` spring '' , game birds , hence name . 1902 , Kennel Club recognized English Springer Spaniel distinct breed . [ 1 ] used sniffer dogs widespread basis . English Springer Spaniel medium-sized compact dog . coat moderately long feathering legs tail . well proportioned , balanced dog gentle expression . breed represents perhaps greatest divergence working show lines breed dog . field-bred dog show-bred dog appear different breeds registered together . gene pools almost completely segregated least 70 years . [ 2 ] field-bred dog would competitive modern dog show , show dog would speed sta

**BOW**

In [12]:
vectorizer = CountVectorizer()
vectorizer.fit([scraped_text])
vector = vectorizer.transform([scraped_text])
print("Vocabulary: ", vectorizer.vocabulary_)
#print("Vector: ", vector.toarray())

Vocabulary:  {'the': 487, 'english': 199, 'springer': 458, 'spaniel': 452, 'is': 281, 'breed': 102, 'of': 357, 'gun': 247, 'dog': 188, 'in': 271, 'group': 246, 'traditionally': 502, 'used': 518, 'for': 227, 'flushing': 224, 'and': 65, 'retrieving': 410, 'game': 238, 'they': 492, 'are': 73, 'descended': 167, 'from': 232, 'norfolk': 349, 'or': 363, 'shropshire': 438, 'spaniels': 453, 'mid': 333, '19th': 18, 'century': 121, 'has': 249, 'diverged': 184, 'into': 278, 'separate': 425, 'show': 435, 'working': 546, 'lines': 310, 'it': 283, 'closely': 127, 'related': 406, 'to': 500, 'welsh': 531, 'very': 523, 'cocker': 133, 'less': 303, 'than': 485, 'ago': 57, 'springers': 459, 'cockers': 134, 'would': 547, 'come': 138, 'same': 419, 'litter': 311, 'smaller': 447, 'were': 532, 'woodcock': 544, 'hunting': 268, 'while': 535, 'their': 488, 'larger': 296, 'littermates': 312, 'flush': 223, 'spring': 457, 'other': 365, 'birds': 89, 'hence': 257, 'name': 344, '1902': 15, 'kennel': 290, 'club': 128, 're

### **TF-IDF**

In [13]:
vectorizer = TfidfVectorizer()
# Fit and transform the text
tfidf_matrix = vectorizer.fit_transform([scraped_text])

# Get feature names to use as dataframe column headers
feature_names = vectorizer.get_feature_names_out()

# View the TF-IDF dense matrix
dense = tfidf_matrix.todense()
denselist = dense.tolist()

df = pd.DataFrame(denselist, columns=feature_names)

print(df)

         10        11        12        13      13th        14        15  \
0  0.019451  0.019451  0.019451  0.012967  0.006484  0.006484  0.012967   

       1576        16        17  ...  willingness      wiry      with  \
0  0.006484  0.012967  0.019451  ...     0.006484  0.006484  0.051868   

    withers  woodcock      work   working     would     years   younger  
0  0.012967  0.012967  0.012967  0.019451  0.032418  0.032418  0.006484  

[1 rows x 550 columns]


# Word embedding by WORD2VEC

In [14]:
# Tokenize the corpus
tokenized_corpus = [word_tokenize(doc.lower()) for doc in filtered_words]

# Define corpus
corpus = tokenized_corpus


# Train the Word2Vec model
model = Word2Vec(
    sentences=corpus,      # The corpus to train the model on
    vector_size=100,       # The size of the word vectors to be learned
    window=5,              # The size of the window of words to be considered
    min_count=5,           # The minimum frequency required for a word to be included in the vocabulary
    sg=0,                  # 0 for CBOW, 1 for skip-gram
    negative=5,            # The number of negative samples to use for negative sampling
    ns_exponent=0.75,      # The exponent used to shape the negative sampling distribution
    alpha=0.03,            # The initial learning rate
    min_alpha=0.0007,      # The minimum learning rate to which the learning rate will be linearly reduced
    epochs=30,             # The number of epochs (iterations) over the corpus
    workers=4,             # The number of worker threads to use for training the model
    seed=42,               # The seed for the random number generator
    max_vocab_size=None    # The maximum vocabulary size (None means no limit)
)

# Get the vector representation of a word
vector = model.wv['dog']

# Find the most similar words to a given word
similar_words = model.wv.most_similar('dog')

# Print the vector and similar words
print("Vector for 'dog':", vector)
print("Most similar words to 'dog':", similar_words)

Vector for 'dog': [ 0.00041095  0.00232331 -0.00664561 -0.00657417 -0.0074132   0.00129901
 -0.00984293  0.00144861 -0.00048143 -0.0006803   0.00358552  0.00045264
  0.00455497  0.00527847  0.00840236  0.00598489  0.00035412 -0.00015694
 -0.00528583  0.00199187 -0.00721729  0.00862472  0.00690089 -0.00760533
  0.00571912 -0.00765793  0.0044245  -0.00824582 -0.00023613  0.00315727
  0.00457622 -0.00162783  0.00843247  0.00548643 -0.00583471  0.00342463
  0.00461069 -0.00332725  0.00476463  0.00796733  0.00634228  0.00525064
  0.00268995 -0.0045893   0.00590446 -0.00271616  0.00064394 -0.0037112
 -0.00373966 -0.00684777  0.00922595 -0.00704433 -0.00827482  0.00872255
  0.00110026 -0.00124192  0.00474983 -0.0023336  -0.00744157  0.00459371
 -0.00648001  0.00105986  0.00898132  0.0087228  -0.00278434  0.00560603
  0.00225352 -0.00041261 -0.00720144 -0.00247281  0.00303836  0.00973263
 -0.00990219  0.0043552   0.00692674  0.00902389 -0.00152393 -0.00763043
 -0.00458066  0.00701067  0.007558


**GloVe** (Global Vectors for Word Representation) is an unsupervised learning algorithm for obtaining vector representations for words. Training is performed on aggregated global word-word co-occurrence statistics from a corpus, and the resulting representations showcase linear substructures of the word vector space.

In [17]:

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [18]:

import os
import numpy as np

# Path to the directory in Google Drive
directory_path = '/content/drive/My Drive/glove.6B'
file_name = 'glove.6B.100d.txt'  # Update this with the correct file name

# Full file path
file_path = os.path.join(directory_path, file_name)

# Verify file existence
if os.path.exists(file_path):
    print("File exists.")
    # Attempt to read the file
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            first_line = f.readline()
        print("File read successfully. First line:")
        print(first_line)
    except Exception as e:
        print(f"Error reading file: {e}")
else:
    print("File not found.")


# Function to load GloVe model
def load_glove_model(glove_file):
    print("Loading Glove Model")
    glove_model = {}
    with open(glove_file, 'r', encoding='utf-8') as f:
        for line in f:
            split_line = line.split()
            word = split_line[0]
            embedding = np.array([float(val) for val in split_line[1:]])
            glove_model[word] = embedding
    print(f"Done. {len(glove_model)} words loaded!")
    return glove_model

# Load the GloVe model
glove_model = load_glove_model(file_path)

# Function to get the GloVe embedding for a word
def get_glove_embedding(word, glove_model):
    return glove_model.get(word, np.zeros(100))

# Example usage
word = 'dog'
word_embedding = get_glove_embedding(word, glove_model)
print(f"GloVe vector for '{word}':\n", word_embedding)


File exists.
File read successfully. First line:
the -0.038194 -0.24487 0.72812 -0.39961 0.083172 0.043953 -0.39141 0.3344 -0.57545 0.087459 0.28787 -0.06731 0.30906 -0.26384 -0.13231 -0.20757 0.33395 -0.33848 -0.31743 -0.48336 0.1464 -0.37304 0.34577 0.052041 0.44946 -0.46971 0.02628 -0.54155 -0.15518 -0.14107 -0.039722 0.28277 0.14393 0.23464 -0.31021 0.086173 0.20397 0.52624 0.17164 -0.082378 -0.71787 -0.41531 0.20335 -0.12763 0.41367 0.55187 0.57908 -0.33477 -0.36559 -0.54857 -0.062892 0.26584 0.30205 0.99775 -0.80481 -3.0243 0.01254 -0.36942 2.2167 0.72201 -0.24978 0.92136 0.034514 0.46745 1.1079 -0.19358 -0.074575 0.23353 -0.052062 -0.22044 0.057162 -0.15806 -0.30798 -0.41625 0.37972 0.15006 -0.53212 -0.2055 -1.2526 0.071624 0.70565 0.49744 -0.42063 0.26148 -1.538 -0.30223 -0.073438 -0.28312 0.37104 -0.25217 0.016215 -0.017099 -0.38984 0.87424 -0.72569 -0.51058 -0.52028 -0.1459 0.8278 0.27062

Loading Glove Model
Done. 400000 words loaded!
GloVe vector for 'dog':
 [ 0.30817    0.

# **CYK**

In [19]:
import numpy as np
import pandas as pd

# Define the CFG in CNF
cfg = {
    'S': [['NP', 'VP']],
    'PP': [['P', 'NP']],
    'NP': [['Det', 'N'], ['Det', 'N', 'PP'], ['I']],
    'VP': [['V', 'NP'], ['VP', 'PP']],
    'Det': ['an', 'my'],
    'N': ['elephant', 'pajamas'],
    'V': ['shot'],
    'P': ['in']
}

# Function to check if a production exists in the CFG
def check_production(cfg, rhs):
    return [lhs for lhs, prods in cfg.items() if rhs in prods]

# CYK algorithm implementation
def cyk_parse(words, cfg):
    n = len(words)
    table = [[set() for _ in range(n)] for _ in range(n)]

    # Fill the table
    for j in range(1, n+1):
        for lhs, rhs in cfg.items():
            if words[j-1] in rhs:
                table[j-1][j-1].add(lhs)
        for i in range(j-2, -1, -1):
            for k in range(i+1, j):
                for lhs, rhs in cfg.items():
                    for production in rhs:
                        if len(production) == 2:
                            B, C = production
                            if B in table[i][k-1] and C in table[k][j-1]:
                                table[i][j-1].add(lhs)

    return table

# Define the sentences
sentences = [
    "I shot an elephant",
    "I shot an elephant in my pajamas",
    "I shot my elephant in pajamas",
    "I shot an elephant in pajamas",
    "I shot my elephant"
]

# Parse each sentence
for sentence in sentences:
    words = sentence.split()
    table = cyk_parse(words, cfg)
    print(f"\nSentence: {sentence}")
    df = pd.DataFrame(table)
    print(df)



Sentence: I shot an elephant
    0    1      2     3
0  {}   {}     {}    {}
1  {}  {V}     {}  {VP}
2  {}   {}  {Det}  {NP}
3  {}   {}     {}   {N}

Sentence: I shot an elephant in my pajamas
    0    1      2     3    4      5     6
0  {}   {}     {}    {}   {}     {}    {}
1  {}  {V}     {}  {VP}   {}     {}  {VP}
2  {}   {}  {Det}  {NP}   {}     {}    {}
3  {}   {}     {}   {N}   {}     {}    {}
4  {}   {}     {}    {}  {P}     {}  {PP}
5  {}   {}     {}    {}   {}  {Det}  {NP}
6  {}   {}     {}    {}   {}     {}   {N}

Sentence: I shot my elephant in pajamas
    0    1      2     3    4    5
0  {}   {}     {}    {}   {}   {}
1  {}  {V}     {}  {VP}   {}   {}
2  {}   {}  {Det}  {NP}   {}   {}
3  {}   {}     {}   {N}   {}   {}
4  {}   {}     {}    {}  {P}   {}
5  {}   {}     {}    {}   {}  {N}

Sentence: I shot an elephant in pajamas
    0    1      2     3    4    5
0  {}   {}     {}    {}   {}   {}
1  {}  {V}     {}  {VP}   {}   {}
2  {}   {}  {Det}  {NP}   {}   {}
3  {}   {}    