<a href="https://colab.research.google.com/github/dhruv21csu155/nlp/blob/main/nlp_exp_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer


In [10]:
nltk.download('all')

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /root/nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to /root/nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_eng is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_ru is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_r

True

In [11]:
def preprocess(text):
  tokens = word_tokenize(text)
  words = [word.lower() for word in tokens if word.isalpha()]
  lemmatizer = WordNetLemmatizer()
  words = [lemmatizer.lemmatize(word) for word in words]

  return words

In [12]:
def word_sense_disambiguation(word):
  synsets = wordnet.synsets(word)
  if synsets:
    return synsets[0]
  else:
    return None

In [13]:
def get_word_info(word):
  sense = word_sense_disambiguation(word)

  if sense:
    synonyms = [lemma.name() for lemma in sense.lemmas()]
    antonyms = []

    for lemma in sense.lemmas():
      antonyms.extend(lemma.antonyms())

    antonyms = [antonym.name() for antonym in antonyms]

    hypernyms = [hypernym.name() for hypernym in sense.hypernyms()]

    hyponyms = [hyponym.name() for hyponym in sense.hyponyms()]

    return {
        'Synonyms': synonyms,
        'Antonyms': antonyms,
        'Hypernyms': hypernyms,
        'Hyponyms': hyponyms
    }

  else:
    return None

In [14]:
def main(file_path):
  with open(file_path, 'r', encoding = 'utf-8') as file:
    text = file.read()

  words = preprocess(text)

  word_info_dict = {}
  for word in words:
    if word not in word_info_dict:
      word_info = get_word_info(word)
      if word_info:
        word_info_dict[word] = word_info

  for word, info in word_info_dict.items():
    print(f"Word: {word}")
    print(f"Synonyms: {','.join(info['Synonyms'])}")
    print(f"Antonyms: {','.join(info['Antonyms'])}")
    print(f"Hypernyms: {','.join(info['Hypernyms'])}")
    print(f"Hyponyms: {','.join(info['Hyponyms'])}")
    print("\n")

In [18]:
if __name__ == "__main__":
  main('/content/nlp.txt')

Word: nlp
Synonyms: natural_language_processing,NLP,human_language_technology
Antonyms: 
Hypernyms: information_science.n.01
Hyponyms: 


Word: or
Synonyms: Oregon,Beaver_State,OR
Antonyms: 
Hypernyms: 
Hyponyms: 


Word: natural
Synonyms: natural
Antonyms: 
Hypernyms: achiever.n.01
Hyponyms: 


Word: language
Synonyms: language,linguistic_communication
Antonyms: 
Hypernyms: communication.n.02
Hyponyms: artificial_language.n.01,barrage.n.01,dead_language.n.01,indigenous_language.n.01,lingua_franca.n.01,metalanguage.n.01,native_language.n.01,natural_language.n.01,object_language.n.02,sign_language.n.01,slanguage.n.01,source_language.n.01,string_of_words.n.01,superstrate.n.02,usage.n.03,words.n.03


Word: processing
Synonyms: processing
Antonyms: 
Hypernyms: process.n.06
Hyponyms: blowing.n.01,data_processing.n.01,development.n.08,refining.n.01,vulcanization.n.01


Word: is
Synonyms: be
Antonyms: 
Hypernyms: 
Hyponyms: abound.v.01,accept.v.08,account.v.01,account_for.v.01,act.v.06,answer