In [None]:
# Import needed packages
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('sentiwordnet')
nltk.download('gutenberg')
nltk.download('genesis')
nltk.download('inaugural')
nltk.download('nps_chat')
nltk.download('webtext')
nltk.download('treebank')
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import wordnet as wn
from nltk.wsd import lesk
from nltk.corpus import sentiwordnet as swn
from nltk.book import text4
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package sentiwordnet to /root/nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!
[nltk_data] Downloading package gutenberg to /root/nltk_data...
[nltk_data]   Package gutenberg is already up-to-date!
[nltk_data] Downloading package genesis to /root/nltk_data...
[nltk_data]   Package genesis is already up-to-date!
[nltk_data] Downloading package inaugural to /root/nltk_data...
[nltk_data]   Package inaugural is already up-to-date!
[nltk_data] Downloading package nps_chat to /root/nltk_data...
[nltk_data]   Package nps_chat is already up-to-date!
[nltk_data] Downloading package webtext to /root/nltk_data...
[nltk_data]   Package webtext is already up-to-date!
[nltk_data] Downloading package treebank to /root/nltk_data.

In [None]:
""" 
1. WordNet
WordNet is a database for English words to be linked together based on their semantic, or meaning. It is similar
to a thesarus except it is more specific about noting the relationship between words, rather than throwing them
into a group with other words. Therefore, words may be "more" semantically close than others.
"""

# 2. Outputting all synsets for a chosen noun
print('\nChosen noun: cat')
print(wn.synsets('cat'))
cat = wn.synset('big_cat.n.01')

# 3a. Extracting definition, usage examples, and lemmas
print('Definition:', cat.definition())
print('Examples:', cat.examples())
print('Lemmas:', cat.lemmas())

# 3b. Traversing up WordNet hierarchy
print('Moving up WordNet Hierarchy:')
l = cat.hypernyms()
print('\t', l)
while l[0].hypernyms():
  print('\t', l[0].hypernyms())
  l = l[0].hypernyms()

"""
Most nouns seems to eventually end at the synset entity. 
The further up the WordNet hierarchy goes, a less specific the noun is. 
This shows an "is-a" relationship of decreasing exclusivity.
"""

# 4. Extracting hypernyms, hyponyms, meronyms, holonyms, antonyms
print('Hypernyms:', cat.hypernyms())
print('Hyponyms:', cat.hyponyms())
print('Meronyms:', cat.part_meronyms())
print('Holonyms:', cat.member_holonyms())
print('Antonyms:', cat.lemmas()[0].antonyms())

# 5. Outputting all synsets for a chosen verb
print('\nChosen verb: sculpt')
print(wn.synsets('sculpt'))
sculpt = wn.synset('sculpt.v.02')

# 6a. Extracting definition, usage examples, and lemmas
print('Definition:', sculpt.definition())
print('Examples:', sculpt.examples())
print('Lemmas:', sculpt.lemmas())

# 6b. Traversing up WordNet hierarchy
print('Moving up WordNet Hierarchy:')
l = sculpt.hypernyms()
print('\t', l)
while l[0].hypernyms():
  print('\t', l[0].hypernyms())
  l = l[0].hypernyms()

"""
Similar to nouns, verbs also seem to be organized in a manner than decreases in specifity moving up
the WordNet hierarchy. Unlike nouns, verbs seem to bit a bit more vague and less extensive in connections.
"""

# 7. Finding other forms using Morphy
print('Alternative word forms:')
print('\tsculpted ->', wn.morphy('sculpted', wn.VERB))
print('\tsculpting ->', wn.morphy('sculpting', wn.VERB))
print('\tsculpts ->', wn.morphy('sculpts', wn.VERB))
print('\tskelp ->', wn.morphy('skelp', wn.VERB))

# 8. Wu-Palmer and Lesk on two "similar" words
print('\nChoose two similar words: fashionable and stylish')
wn.synsets('fashionable')
wn.synsets('stylish')
fashionable = wn.synset('fashionable.a.01')
stylish = wn.synset('stylish.a.01')

print("Wu-Palmer:", fashionable.wup_similarity(stylish))
print('Example sentence: She is ostentatiously fashionable/stylish.')

sentence = 'She is ostentatiously fashionable.'.split()
print('Lesk (fashionable):', lesk(sentence, 'fashionable'))

sentence = 'She is ostentatiously stylish.'.split()
print('Lesk (stylish):', lesk(sentence, 'stylish')) 

"""
The Wu-Palmer algorithm works by comparing distances between each synset and the lowest common hypernym. 
The similarity score can range from 0 < s <= 1. The Lesk algorithm returns the synset that explains the context 
a word is being used in a sentence. When I put the two words in the sentence, they gave the same coontext.
"""

""" 
SentiWordNet
SWN is a resource for analyzing the sentiment of words, whether they are positive, neutral, or negative.
It can be used by businesses or governments to make general conclusions about the feedback a populace provides.
"""

# 9. Pick emotionally charged word
print('\nChosen emotion word: elated')
elated_list = list(swn.senti_synsets('elated'))
for i in elated_list:
  print(i)

sentence='Coffee sounds perfect right now'
print('\nChosen sentence:', sentence)
for i in sentence.split(): 
  context_word = lesk(sentence, i) 
  word_ss = swn.senti_synset(context_word.name())
  if word_ss:
    print(word_ss)

"""
I used the Lesk algorithm to determine the correct synset to derive the senti-synset. It seems that that were 
some errors which affected sentiment analysis. Seeing this, there is probably still a lot of improvements to make 
for this complex endeavor. 
"""

""" 
Collocations are words that are often used together. 
A few common examples are "high temperature" or "heavy rain."
"""

# 10. Collocation study with text4 
tokens = word_tokenize(str(text4))

stop_words = set(stopwords.words('english'))
filtered_tokens = []
for i in tokens:
  if i.isalpha() and i not in stop_words:
    filtered_tokens.append(i.lower())

biagram_collocation = BigramCollocationFinder.from_words(filtered_tokens)
biagram_collocation.nbest(BigramAssocMeasures.pmi, 5)
biagram_collocation.score_ngrams(BigramAssocMeasures.pmi)

"""
All of the scores were equal to each other. PMI is calculated by finding the log probability of co-occurrence
factored against the single probabilities of the two words to see if it is siimply a coincidence.
The lower the score is, the more connected they are. 2.0 is not awfully low, so these collocations are not very strong.
"""




Chosen noun: cat
[Synset('cat.n.01'), Synset('guy.n.01'), Synset('cat.n.03'), Synset('kat.n.01'), Synset('cat-o'-nine-tails.n.01'), Synset('caterpillar.n.02'), Synset('big_cat.n.01'), Synset('computerized_tomography.n.01'), Synset('cat.v.01'), Synset('vomit.v.01')]
Definition: any of several large cats typically able to roar and living in the wild
Examples: []
Lemmas: [Lemma('big_cat.n.01.big_cat'), Lemma('big_cat.n.01.cat')]
Moving up WordNet Hierarchy:
	 [Synset('feline.n.01')]
	 [Synset('carnivore.n.01')]
	 [Synset('placental.n.01')]
	 [Synset('mammal.n.01')]
	 [Synset('vertebrate.n.01')]
	 [Synset('chordate.n.01')]
	 [Synset('animal.n.01')]
	 [Synset('organism.n.01')]
	 [Synset('living_thing.n.01')]
	 [Synset('whole.n.02')]
	 [Synset('object.n.01')]
	 [Synset('physical_entity.n.01')]
	 [Synset('entity.n.01')]
Hypernyms: [Synset('feline.n.01')]
Hyponyms: [Synset('cheetah.n.01'), Synset('jaguar.n.01'), Synset('leopard.n.02'), Synset('liger.n.01'), Synset('lion.n.01'), Synset('saber-

[(('address', 'corpus'), 2.0),
 (('inaugural', 'address'), 2.0),
 (('text', 'inaugural'), 2.0)]