In [0]:
import io
import nltk
from nltk.stem import WordNetLemmatizer
from nltk import word_tokenize, pos_tag
import numpy as np
import pandas as pd
from nltk.corpus import wordnet as wn
import spacy
from nltk.corpus import stopwords
from nltk.wsd import lesk

In [2]:
nltk.download('all')

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/abc.zip.
[nltk_data]    | Downloading package alpino to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/alpino.zip.
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping corpora/biocreative_ppi.zip.
[nltk_data]    | Downloading package brown to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/brown.zip.
[nltk_data]    | Downloading package brown_tei to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/brown_tei.zip.
[nltk_data]    | Downloading package cess_cat to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/cess_cat.zip.
[nltk_data]    | Downloading package cess_esp to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/cess_esp.zip.
[nltk_data]    | Downloading package chat80 to /root/nltk_data...
[nltk_data]    |   Unzipp

True

In [0]:
def penn_to_wn(tag):
    """ Convert between a Penn Treebank tag to a simplified Wordnet tag """
    if tag.startswith('N'):
        return 'n'
 
    if tag.startswith('V'):
        return 'v'
 
    if tag.startswith('J'):
        return 'a'
 
    if tag.startswith('R'):
        return 'r'
 
    return None

In [0]:
def task2(sentence):

  #loading spacy mode
  nlp = spacy.load("en_core_web_sm")

  #Tokenization
  tokens = []
  tokens = nltk.word_tokenize(sentence);
  print("\n Tokens: \n", tokens)

  #Lemmatization without POS tags
  lems = []
  lemmatizer = WordNetLemmatizer()
  for word in tokens:
    lems.append(lemmatizer.lemmatize(word));
  print("\n Lemmatization of sentence without POS tags: \n",lems)

  # POS Tagging
  pos_sen = pos_tag(tokens);
  print("\n POS Tags: \n",pos_sen);

  pos_wn = [(s[0],penn_to_wn(s[1])) for s in pos_sen]  
  print("\n POS Tags for wordnet: \n",pos_wn)

  #Lemmatization with POS tags
  lems_pos = []
  for w in pos_wn:
    if(w[1]):
      lems_pos.append(lemmatizer.lemmatize(w[0],pos=w[1]))
    else:
      lems_pos.append(lemmatizer.lemmatize(w[0]))
  print("\n Lemmatization by taking into account the pos tags: \n")
  print(lems_pos)

  #Dependency Parse Tree
  sen = nlp(sentence)
  print("\n Dependency parse tree: \n")
  for token in sen:
    print("{2}({3}-{6}, {0}-{5})".format(token.text, token.tag_, token.dep_, token.head.text, token.head.tag_, token.i+1, token.head.i+1))

  #Finding the synsets of the words:
  synset_sen = []
  synset_sen = [wn.synsets(s) for s in sentence]
  print("\n The synsets of all the words in the sentence: \n")
  [print(s," : ",wn.synsets(s)) for s in tokens]
  

  #Word Sense Disambiguation 
  word_sense = []
  for w in pos_wn:
    if(w[1]):
      word_sense.append((w[0],lesk(sentence,w[0],pos=w[1])))
    else:
      word_sense.append((w[0],lesk(sentence,w[0])))

  print("\n disambiguated word senses: \n")
  print(word_sense)

  #Finding Relations

  hypernyms_sen = []
  hyponyms_sen = []
  holonyms_part_sen = []
  holonyms_substance_sen = []
  meronyms_part_sen = []
  meronyms_substance_sen = []

  for w in word_sense:
    if(w[1]):
      
      hypernyms_sen.append((w[0],w[1].hypernyms()))
      hyponyms_sen.append((w[0],w[1].hyponyms()))
      holonyms_part_sen.append((w[0],w[1].part_holonyms()))
      holonyms_substance_sen.append((w[0],w[1].substance_holonyms()))
      meronyms_part_sen.append((w[0],w[1].part_meronyms()))
      meronyms_substance_sen.append((w[0],w[1].substance_meronyms()))

    else:
      hypernyms_sen.append((w[0],None))
      hyponyms_sen.append((w[0],None))
      holonyms_part_sen.append((w[0],None))
      holonyms_substance_sen.append((w[0],None))
      meronyms_part_sen.append((w[0],None))
      meronyms_substance_sen.append((w[0],None))


  print("\n Hypernyms: \n")
  print(hypernyms_sen)

  print("\n Hyponyms: \n")
  print(hyponyms_sen)

  print("\n part_Holonyms: \n")
  print(holonyms_part_sen)

  print("\n substance_Holonyms: \n")
  print(holonyms_substance_sen)

  print("\n part_Meronyms: \n")
  print(meronyms_part_sen)

  print("\n substance_Meronyms: \n")
  print(meronyms_substance_sen)


    


 
   






In [5]:
task2("My name is Divya Sharma and I love fruits and table and cycle handle")


 Tokens: 
 ['My', 'name', 'is', 'Divya', 'Sharma', 'and', 'I', 'love', 'fruits', 'and', 'table', 'and', 'cycle', 'handle']

 Lemmatization of sentence without POS tags: 
 ['My', 'name', 'is', 'Divya', 'Sharma', 'and', 'I', 'love', 'fruit', 'and', 'table', 'and', 'cycle', 'handle']

 POS Tags: 
 [('My', 'PRP$'), ('name', 'NN'), ('is', 'VBZ'), ('Divya', 'NNP'), ('Sharma', 'NNP'), ('and', 'CC'), ('I', 'PRP'), ('love', 'VBP'), ('fruits', 'NNS'), ('and', 'CC'), ('table', 'JJ'), ('and', 'CC'), ('cycle', 'NN'), ('handle', 'NN')]

 POS Tags for wordnet: 
 [('My', None), ('name', 'n'), ('is', 'v'), ('Divya', 'n'), ('Sharma', 'n'), ('and', None), ('I', None), ('love', 'v'), ('fruits', 'n'), ('and', None), ('table', 'a'), ('and', None), ('cycle', 'n'), ('handle', 'n')]

 Lemmatization by taking into account the pos tags: 

['My', 'name', 'be', 'Divya', 'Sharma', 'and', 'I', 'love', 'fruit', 'and', 'table', 'and', 'cycle', 'handle']

 Dependency parse tree: 

poss(name-2, My-1)
nsubj(is-3, name-2