### use term embeddings to encode hypernymy properties

**training embedding on probase**

In [1]:
from EmbedTrainer import EmbeddingTrainer

trainer = EmbeddingTrainer(embedding_size=50, verbose=2)
trainer.load_data("./data/probase", minimum_count=5, minimum_frequency=10)
save_location = ("./hypernym_embedding.txt", "./hyponym_embedding.txt")
trainer.train(epochs=5, batch_size=32, lr=0.01, gpu=False, save_location=save_location)  # epoch=20

File loaded.
Data filtered.
Duplicate words removed.
Index dictionary created.
Average cost in epoch 1: 1.4422178268432617
Embeddings saved.
Average cost in epoch 2: 0.39279404282569885
Embeddings saved.
Average cost in epoch 3: 0.19924098253250122
Embeddings saved.
Average cost in epoch 4: 0.12274154275655746
Embeddings saved.
Average cost in epoch 5: 0.08650821447372437
Embeddings saved.


**training svm on BLESS**

In [2]:
from preprocess import *

hypernym_pairs, cohypernym_pairs, meronym_pairs, random_pairs = process_bless("./data/BLESS.txt")
print(hypernym_pairs)

[('animal', 'alligator'), ('beast', 'alligator'), ('carnivore', 'alligator'), ('chordate', 'alligator'), ('creature', 'alligator'), ('predator', 'alligator'), ('reptile', 'alligator'), ('vertebrate', 'alligator'), ('amphibian', 'frog'), ('animal', 'frog'), ('beast', 'frog'), ('chordate', 'frog'), ('creature', 'frog'), ('vertebrate', 'frog'), ('animal', 'lizard'), ('beast', 'lizard'), ('carnivore', 'lizard'), ('chordate', 'lizard'), ('creature', 'lizard'), ('reptile', 'lizard'), ('vertebrate', 'lizard'), ('animal', 'snake'), ('beast', 'snake'), ('chordate', 'snake'), ('creature', 'snake'), ('reptile', 'snake'), ('vertebrate', 'snake'), ('amphibian', 'turtle'), ('animal', 'turtle'), ('beast', 'turtle'), ('chordate', 'turtle'), ('creature', 'turtle'), ('food', 'turtle'), ('pet', 'turtle'), ('reptile', 'turtle'), ('vertebrate', 'turtle'), ('appliance', 'dishwasher'), ('artefact', 'dishwasher'), ('artifact', 'dishwasher'), ('commodity', 'dishwasher'), ('device', 'dishwasher'), ('good', 'dis

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib

from models import DynamicMarginModel

X = []
X.extend(hypernym_pairs)
X.extend(cohypernym_pairs)
X.extend(meronym_pairs)
X.extend(random_pairs)

y = []
y.extend([1 for _ in range(len(hypernym_pairs))])
y.extend([0 for _ in range(len(cohypernym_pairs))])
y.extend([0 for _ in range(len(meronym_pairs))])
y.extend([0 for _ in range(len(random_pairs))])

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2)

model = DynamicMarginModel("./hypernym_embedding.txt",\
                 "./hyponym_embedding.txt", C=8, class_weight='balanced')

model.fit(X_train, y_train)
print('Train score: {}'.format(model.score(X_train, y_train)))
print('Test score: {}'.format(model.score(X_test, y_test)))
print(classification_report(y_test, model.predict(X_test)))
with open("./results.txt", "w") as res:
    res.write('Train score: {}\n'.format(model.score(X_train, y_train)))
    res.write('Test score: {}\n'.format(model.score(X_test, y_test)))
    for line in classification_report(y_test, model.predict(X_test)):
        res.write(line)
res.close()

model.fit(X, y)
joblib.dump(model, './trained_model.pkl')

Train score: 0.9301366331528744
Test score: 0.9206185567010309
              precision    recall  f1-score   support

           0       0.96      0.95      0.96      2641
           1       0.57      0.61      0.59       269

    accuracy                           0.92      2910
   macro avg       0.76      0.78      0.77      2910
weighted avg       0.92      0.92      0.92      2910



['./trained_model.pkl']

In [15]:
"""Check if given two words form hypernym hyponym relationship.
Usage: python3 test.py word1 word2
Returns if word1 is hypernym of word2.
"""

import sys

import joblib

# if __name__ == "__main__":
#     if len(sys.argv) != 3:
#         print('Usage: {} word1 word2', sys.argv[0])
#         sys.exit()
sys.path.append('..')
        
model = joblib.load('./trained_model.pkl')
    
if model.predict([('animal', 'dog')]) == 1:
    print("{} is a hypernym of {}.\n".format('animal', 'dog'))
else:
    print("{} is not a hypernym of {}.\n".format('animal', 'dog'))

    
# ````````````````````````````````````````````````````````````
# Write prediction into file
with open("../Results/term_embed_result.txt", "w", encoding="utf-8") as f:
    f.write("hyper\thypo\tisA\n")
    for pair in X_test:
        f.write(pair[0]+"\t")
        f.write(pair[1]+"\t")
        f.write(str(model.predict([(pair[0], pair[1])])[0])+"\n")
f.close()

animal is a hypernym of dog.

