## Triplet Extraction 

In [104]:
# Import spaCy 
import spacy
nlp = spacy.load("en_core_web_sm")
# Forming the object doc which include our text
doc = nlp("The dog jumped and picked up the bone")
# object and subject constants
OBJECT_DEPS = {"dobj", "dative", "attr", "oprd"}
SUBJECT_DEPS = {"nsubj", "nsubjpass", "csubj", "agent", "expl"}

# extract the subject, object and verb from the input
def extract_svo(doc):
    sub = []
    at = []
    ve = []
    for token in doc:
# is this a verb?
        if token.pos_ == "VERB":
            ve.append(token.text)
# is this the object?
        if token.dep_ in OBJECT_DEPS or token.head.dep_ in OBJECT_DEPS:
            at.append(token.text)
        # is this the subject?
        if token.dep_ in SUBJECT_DEPS or token.head.dep_ in SUBJECT_DEPS:
            sub.append(token.text)
    return " ".join(sub).strip().lower(), " ".join(ve).strip().lower(), " ".join(at).strip().lower()



In [105]:

# Print out input 
subject, verb, attribute = extract_svo(doc)

print("svo:, subject: {}, verb: {}, attribute: {}".format(subject, verb, attribute))


svo:, subject: the dog, verb: jumped picked, attribute: the bone


# WordNet Task
##performing the lexical relations

In [50]:
#Import/ download nltk and then import wordnet
import nltk
nltk.download('all')
from nltk.corpus import wordnet

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/abc.zip.
[nltk_data]    | Downloading package alpino to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/alpino.zip.
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping corpora/biocreative_ppi.zip.
[nltk_data]    | Downloading package brown to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/brown.zip.
[nltk_data]    | Downloading package brown_tei to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/brown_tei.zip.
[nltk_data]    | Downloading package cess_cat to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/cess_cat.zip.
[nltk_data]    | Downloading package cess_esp to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/cess_esp.zip.
[nltk_data]    | Downloading package chat80 to /root/nltk_data...
[nltk_data]    |   Unzipp

In [63]:
# From nltk corpus we import WordNet as wn then we pass car to look for synonyms
from nltk.corpus import wordnet as wn
wn.synsets('car') 

[Synset('car.n.01'),
 Synset('car.n.02'),
 Synset('car.n.03'),
 Synset('car.n.04'),
 Synset('cable_car.n.01')]

In [64]:
# Looking for hyponyms 
wn.synset('car.n.01').hyponyms() 

[Synset('ambulance.n.01'),
 Synset('beach_wagon.n.01'),
 Synset('bus.n.04'),
 Synset('cab.n.03'),
 Synset('compact.n.03'),
 Synset('convertible.n.01'),
 Synset('coupe.n.01'),
 Synset('cruiser.n.01'),
 Synset('electric.n.01'),
 Synset('gas_guzzler.n.01'),
 Synset('hardtop.n.01'),
 Synset('hatchback.n.01'),
 Synset('horseless_carriage.n.01'),
 Synset('hot_rod.n.01'),
 Synset('jeep.n.01'),
 Synset('limousine.n.01'),
 Synset('loaner.n.02'),
 Synset('minicar.n.01'),
 Synset('minivan.n.01'),
 Synset('model_t.n.01'),
 Synset('pace_car.n.01'),
 Synset('racer.n.02'),
 Synset('roadster.n.01'),
 Synset('sedan.n.01'),
 Synset('sport_utility.n.01'),
 Synset('sports_car.n.01'),
 Synset('stanley_steamer.n.01'),
 Synset('stock_car.n.01'),
 Synset('subcompact.n.01'),
 Synset('touring_car.n.01'),
 Synset('used-car.n.01')]

In [66]:
# Looking for hypernyms
wn.synset('car.n.01').hypernyms()


[Synset('motor_vehicle.n.01')]

In [71]:
# Synonyms of bread
wn.synsets('bread')

[Synset('bread.n.01'), Synset('boodle.n.01'), Synset('bread.v.01')]

In [72]:
# Holonyms of bread
wn.synset('bread.n.01').part_holonyms() 

[Synset('sandwich.n.01')]

In [74]:
# Meronyms of sandwich
wn.synset('sandwich.n.01').part_meronyms() 

[Synset('bread.n.01')]

In [102]:
#Synonyms of walk
wn.synsets('walk')

[Synset('walk.n.01'),
 Synset('base_on_balls.n.01'),
 Synset('walk.n.03'),
 Synset('walk.n.04'),
 Synset('walk.n.05'),
 Synset('walk.n.06'),
 Synset('walk_of_life.n.01'),
 Synset('walk.v.01'),
 Synset('walk.v.02'),
 Synset('walk.v.03'),
 Synset('walk.v.04'),
 Synset('walk.v.05'),
 Synset('walk.v.06'),
 Synset('walk.v.07'),
 Synset('walk.v.08'),
 Synset('walk.v.09'),
 Synset('walk.v.10')]

In [103]:
#Entailments of walk
wn.synset('walk.v.01').entailments()

[Synset('step.v.01')]