In [None]:
!pip install ampligraph

In [None]:
!pip install tensorflow==1.14

# Link Prediction

In [None]:
import numpy as np
import pandas as pd
import requests
import ampligraph
from ampligraph.datasets import load_from_csv
ampligraph.__version__

In [None]:
url = 'https://ampligraph.s3-eu-west-1.amazonaws.com/datasets/GoT.csv'
open('GoT.csv', 'wb').write(requests.get(url).content)
X = load_from_csv('.', 'GoT.csv', sep=',')

In [None]:
type(X)

numpy.ndarray

## 1. Dataset exploration

In [None]:
X

array([['Smithyton', 'SEAT_OF', 'House Shermer of Smithyton'],
       ['House Mormont of Bear Island', 'LED_BY', 'Maege Mormont'],
       ['Margaery Tyrell', 'SPOUSE', 'Joffrey Baratheon'],
       ...,
       ['Extinct', 'SEAT_OF', 'House Hook'],
       ['House Marsh', 'SWORN_TO', 'House Bolton of the Dreadfort'],
       ['House Marbrand of Ashemark', 'IN_REGION', 'The Westerlands']],
      dtype=object)

In [None]:
entities = np.unique(np.concatenate([X[:, 0], X[:, 2]]))
entities

array(['Abelar Hightower', 'Acorn Hall', 'Addam Frey', ..., 'the Antlers',
       'the Paps', 'unnamed tower'], dtype=object)

In [None]:
relations = np.unique(X[:, 1])
relations

array(['ALLIED_WITH', 'BRANCH_OF', 'FOUNDED_BY', 'HEIR_TO', 'IN_REGION',
       'LED_BY', 'PARENT_OF', 'SEAT_OF', 'SPOUSE', 'SWORN_TO'],
      dtype=object)

## 2. Defining train and test datasets

In [None]:
len(X)

3175

In [None]:
# X_train, X_test = X[:3000], X[3000:]
from ampligraph.evaluation import train_test_split_no_unseen 

X_train, X_test = train_test_split_no_unseen(X, test_size=175) 

In [None]:
print('Train set size: ', X_train.shape)
print('Test set size: ', X_test.shape)

Train set size:  (3000, 3)
Test set size:  (175, 3)


## 3. Training a model

In [None]:
#!pip install tensorflow==1.14

In [None]:
#%tensorflow_version 1.x
import tensorflow
print(tensorflow.__version__)

1.14.0


In [None]:
from ampligraph.latent_features import TransE, DistMult, ComplEx

In [None]:
model = ComplEx(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

In [None]:
model2 = TransE(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

In [None]:
model3 = DistMult(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

## 4. Fitting the model

In [None]:
import tensorflow as tf
tf.__version__

'1.14.0'

In [None]:
tf.logging.set_verbosity(tf.logging.ERROR)

model.fit(X_train, early_stopping = False)

Average ComplEx Loss:   0.018880: 100%|██████████| 200/200 [09:05<00:00,  2.73s/epoch]


In [None]:
model2.fit(X_train, early_stopping = False)

Average TransE Loss:   0.021648: 100%|██████████| 200/200 [04:38<00:00,  1.39s/epoch]


In [None]:
model3.fit(X_train, early_stopping = False)

Average DistMult Loss:   0.019057: 100%|██████████| 200/200 [04:50<00:00,  1.45s/epoch]


## 5. Saving and restoring a model

In [None]:
#from ampligraph.latent_features import save_model, restore_model

In [None]:
#save_model(model, './best_model.pkl')

## 6. Evaluating a model

In [None]:
from ampligraph.evaluation import evaluate_performance

In [None]:
positives_filter = X

In [None]:
ranks = evaluate_performance(X_test, 
                model=model, 
                filter_triples=positives_filter,   # Corruption strategy filter defined above 
                use_default_protocol=True, # corrupt subj and obj separately while evaluating
                verbose=True)



100%|██████████| 175/175 [00:03<00:00, 51.82it/s]


In [None]:
from ampligraph.evaluation import mr_score, mrr_score, hits_at_n_score
print('ComplEx:')

mrr = mrr_score(ranks)
print("MRR: %.2f" % (mrr))

hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))
hits_1 = hits_at_n_score(ranks, n=1)
print("Hits@1: %.2f" % (hits_1))

ComplEx:
MRR: 0.42
Hits@10: 0.55
Hits@3: 0.43
Hits@1: 0.35


In [None]:
ranks = evaluate_performance(X_test, 
                model=model2, 
                filter_triples=positives_filter,   # Corruption strategy filter defined above 
                use_default_protocol=True, # corrupt subj and obj separately while evaluating
                verbose=True)
print('TransE:')

mrr = mrr_score(ranks)
print("MRR: %.2f" % (mrr))

hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))
hits_1 = hits_at_n_score(ranks, n=1)
print("Hits@1: %.2f" % (hits_1))



100%|██████████| 175/175 [00:00<00:00, 175.63it/s]


TransE:
MRR: 0.26
Hits@10: 0.39
Hits@3: 0.32
Hits@1: 0.17


In [None]:
ranks = evaluate_performance(X_test, 
                model=model3, 
                filter_triples=positives_filter,   # Corruption strategy filter defined above 
                use_default_protocol=True, # corrupt subj and obj separately while evaluating
                verbose=True)
print('DistMult:')

mrr = mrr_score(ranks)
print("MRR: %.2f" % (mrr))

hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))
hits_1 = hits_at_n_score(ranks, n=1)
print("Hits@1: %.2f" % (hits_1))



100%|██████████| 175/175 [00:01<00:00, 100.56it/s]


DistMult:
MRR: 0.42
Hits@10: 0.56
Hits@3: 0.46
Hits@1: 0.34


## 7. Predicting New Links

In [None]:
X_unseen = np.array([
    ['Jorah Mormont', 'SPOUSE', 'Daenerys Targaryen'],
    ['Tyrion Lannister', 'SPOUSE', 'Missandei'],
    ["King's Landing", 'SEAT_OF', 'House Lannister of Casterly Rock'],
    ['Sansa Stark', 'SPOUSE', 'Petyr Baelish'],
    ['Daenerys Targaryen', 'SPOUSE', 'Jon Snow'],
    ['Daenerys Targaryen', 'SPOUSE', 'Craster'],
    ['House Stark of Winterfell', 'IN_REGION', 'The North'],
    ['House Stark of Winterfell', 'IN_REGION', 'Dorne'],
    ['House Tyrell of Highgarden', 'IN_REGION', 'Beyond the Wall'],
    ['Brandon Stark', 'ALLIED_WITH', 'House Stark of Winterfell']
])

In [None]:
unseen_filter = np.array(list({tuple(i) for i in np.vstack((positives_filter, X_unseen))}))

In [None]:
ranks_unseen = evaluate_performance(
    X_unseen, 
    model=model, 
    filter_triples=unseen_filter,   # Corruption strategy filter defined above 
    corrupt_side = 's+o',
    use_default_protocol=False, # corrupt subj and obj separately while evaluating
    verbose=True
)

100%|██████████| 10/10 [00:00<00:00, 21.01it/s]


In [None]:
scores = model.predict(X_unseen)

In [None]:
scores

array([-0.41005313, -0.7544201 ,  0.64468706,  2.448595  , -0.4941705 ,
        0.28912663,  3.023992  ,  0.7066382 , -0.5839975 ,  4.652188  ],
      dtype=float32)

---

In [None]:
from scipy.special import expit
probs = expit(scores)
pd.DataFrame(list(zip([' '.join(x) for x in X_unseen], 
                      ranks_unseen, 
                      np.squeeze(scores),
                      np.squeeze(probs))), 
             columns=['statement', 'rank', 'score', 'prob']).sort_values("score")

Unnamed: 0,statement,rank,score,prob
1,Tyrion Lannister SPOUSE Missandei,3394,-0.75442,0.319859
8,House Tyrell of Highgarden IN_REGION Beyond th...,3203,-0.583997,0.358013
4,Daenerys Targaryen SPOUSE Jon Snow,3140,-0.49417,0.378912
0,Jorah Mormont SPOUSE Daenerys Targaryen,2759,-0.410053,0.398899
5,Daenerys Targaryen SPOUSE Craster,1558,0.289127,0.571782
2,King's Landing SEAT_OF House Lannister of Cast...,741,0.644687,0.655812
7,House Stark of Winterfell IN_REGION Dorne,578,0.706638,0.669658
3,Sansa Stark SPOUSE Petyr Baelish,34,2.448595,0.920459
6,House Stark of Winterfell IN_REGION The North,7,3.023992,0.953646
9,Brandon Stark ALLIED_WITH House Stark of Winte...,1,4.652188,0.99055


# Task2-2 Relation Prediction

In [None]:
entities

array(['Abelar Hightower', 'Acorn Hall', 'Addam Frey', ..., 'the Antlers',
       'the Paps', 'unnamed tower'], dtype=object)

In [None]:
relations

array(['ALLIED_WITH', 'BRANCH_OF', 'FOUNDED_BY', 'HEIR_TO', 'IN_REGION',
       'LED_BY', 'PARENT_OF', 'SEAT_OF', 'SPOUSE', 'SWORN_TO'],
      dtype=object)

In [None]:
def relation_ComplEx(s, o):
  lst = []
  for i in relations:
    lst.append([s, i, o])
  X_unseen = np.array(lst)
  scores = model.predict(X_unseen)
  index = np.argmax(scores)
  p = X_unseen[index][1]
  return p

def relation_TransE(s, o):
  lst = []
  for i in relations:
    lst.append([s, i, o])
  X_unseen = np.array(lst)
  scores = model2.predict(X_unseen)
  index = np.argmax(scores)
  p = X_unseen[index][1]
  return p

def relation_DistMult(s, o):
  lst = []
  for i in relations:
    lst.append([s, i, o])
  X_unseen = np.array(lst)
  scores = model3.predict(X_unseen)
  index = np.argmax(scores)
  p = X_unseen[index][1]
  return p

print('ComplEx: ', relation_ComplEx('Jorah Mormont', 'Daenerys Targaryen'))
print('TransE: ', relation_TransE('Jorah Mormont', 'Daenerys Targaryen'))
print('DistMult: ', relation_DistMult('Jorah Mormont', 'Daenerys Targaryen'))

ComplEx:  BRANCH_OF
TransE:  SPOUSE
DistMult:  SPOUSE


# Task2-3 Nearest Neighbor Search

In [None]:
list(filter(lambda x: 'Arya' in x, entities))

['Arya Flint', 'Arya Stark']

In [None]:
from ampligraph.discovery import find_nearest_neighbours

In [None]:
#ComplEx
neighbors, dist = find_nearest_neighbours(model,
                      entities=['Arya Stark'],
                      n_neighbors=5,
                      entities_subset=entities)
print('ComplEx: ', neighbors)

#TransE
neighbors, dist = find_nearest_neighbours(model2,
                      entities=['Arya Stark'],
                      n_neighbors=5,
                      entities_subset=entities)
print('TransE: ', neighbors)

#DistMult
neighbors, dist = find_nearest_neighbours(model3,
                      entities=['Arya Stark'],
                      n_neighbors=5,
                      entities_subset=entities)
print('DistMult: ', neighbors)

ComplEx:  [['Arya Stark' 'Rickon Stark' 'Skittrick' 'Donnis' 'TomToo']]
TransE:  [['Arya Stark' 'Skittrick' 'Nan' 'Porther' 'Edwyn Stark']]
DistMult:  [['Arya Stark' 'Rickon Stark' 'Farlen' 'TomToo' 'Porther']]
