In [1]:
!pip install ampligraph

Collecting ampligraph
  Downloading ampligraph-2.0.1-py3-none-any.whl (204 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m204.0/204.0 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting myst-parser==0.18.0 (from ampligraph)
  Downloading myst_parser-0.18.0-py3-none-any.whl (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.8/57.8 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docutils<0.18 (from ampligraph)
  Downloading docutils-0.17.1-py2.py3-none-any.whl (575 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m575.5/575.5 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sphinx-rtd-theme==1.0.0 (from ampligraph)
  Downloading sphinx_rtd_theme-1.0.0-py2.py3-none-any.whl (2.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sphinxcontrib-bibtex==2.4.2 (from ampligraph)
  Downloading sphinxcontrib_bibte

In [1]:
import tensorflow as tf
import ampligraph as ag
import numpy as np
import pandas as pd
from ampligraph.datasets import load_from_csv
from ampligraph.latent_features import ScoringBasedEmbeddingModel
from ampligraph.evaluation import train_test_split_no_unseen
from ampligraph.evaluation import mr_score, hits_at_n_score
from ampligraph.latent_features.loss_functions import get as get_loss
from ampligraph.latent_features.regularizers import get as get_regularizer
from ampligraph.utils import create_tensorboard_visualizations
from ampligraph.utils import save_model, restore_model
from ampligraph.datasets import load_wn18

tf_version = tf.__version__
ag_version = ag.__version__

print('Tensorflow', tf_version)
print('Ampligraph', ag_version)

Tensorflow 2.15.0
Ampligraph 2.0.1


In [2]:
data = load_from_csv('./sample_data', 'medical_knowledge_graph.csv', sep=',')[1:]
X_train_valid, X_test = train_test_split_no_unseen(data, test_size = int(len(data)*0.15), seed=42)
X_train, X_valid = train_test_split_no_unseen(X_train_valid, test_size = int(len(data)*0.10))
X = {
    'train' : X_train,
    'test' : X_test,
    'valid': X_valid
}
print('Train Set Size', len(X['train']))
print('Test Set Size', len(X['test']))
print('Validation Set Size', len(X['valid']))

Train Set Size 375
Test Set Size 75
Validation Set Size 50


In [3]:
'''
TransE - Custom
'''

model = 'TransE'
k = 10
epochs = 50
eta = 4
loss = 'pairwise'
loss_params = {'margin': 0.1}
regularizer = 'LP'
regularizer_params = {'lambda': 0.0001, 'p': 2}
optimizer = 'adam'
optimizer_params = {'learning_rate': 0.0001}

model = ScoringBasedEmbeddingModel(k=k, eta=eta, scoring_type=model)
optim = tf.keras.optimizers.get(optimizer)
loss = get_loss(loss, loss_params)
regularizer = get_regularizer(regularizer, regularizer_params)
model.compile(optimizer=optim, loss=loss, entity_relation_regularizer=regularizer)

model.fit(X['train'],
          batch_size=int(X['train'].shape[0] / 10),
          epochs=epochs,
          verbose=False
          )

ranks = model.evaluate(X['test'],
                       use_filter=None,
                       corrupt_side='s,o',
                       verbose=False)

CD_transE_mean = round(mr_score(ranks), 3)
CD_transE_hits_10 = round(hits_at_n_score(ranks, n=10), 3)

In [5]:
'''
DistMult - Custom
'''

model = 'DistMult'
k = 10
epochs = 50
eta = 4
loss = 'pairwise'
loss_params = {'margin': 0.1}
regularizer = 'LP'
regularizer_params = {'lambda': 0.0001, 'p': 2}
optimizer = 'adam'
optimizer_params = {'learning_rate': 0.0001}

model = ScoringBasedEmbeddingModel(k=k, eta=eta, scoring_type=model)
optim = tf.keras.optimizers.get(optimizer)
loss = get_loss(loss, loss_params)
regularizer = get_regularizer(regularizer, regularizer_params)
model.compile(optimizer=optim, loss=loss, entity_relation_regularizer=regularizer)

model.fit(X['train'],
          batch_size=int(X['train'].shape[0] / 10),
          epochs=epochs,
          verbose=False
          )

ranks = model.evaluate(X['test'],
                       use_filter=None,
                       corrupt_side='s,o',
                       verbose=False)

CD_DistMult_mean = round(mr_score(ranks), 3)
CD_DistMult_hits_10 = round(hits_at_n_score(ranks, n=10), 3)

In [9]:
'''
ComplEx - Custom
'''

model = 'ComplEx'
k = 10
epochs = 50
eta = 4
loss = 'pairwise'
loss_params = {'margin': 0.1}
regularizer = 'LP'
regularizer_params = {'lambda': 0.0001, 'p': 2}
optimizer = 'adam'
optimizer_params = {'learning_rate': 0.0001}

model = ScoringBasedEmbeddingModel(k=k, eta=eta, scoring_type=model)
optim = tf.keras.optimizers.get(optimizer)
loss = get_loss(loss, loss_params)
regularizer = get_regularizer(regularizer, regularizer_params)
model.compile(optimizer=optim, loss=loss, entity_relation_regularizer=regularizer)

model.fit(X['train'],
          batch_size=int(X['train'].shape[0] / 10),
          epochs=epochs,
          verbose=False
          )

ranks = model.evaluate(X['test'],
                       use_filter=None,
                       corrupt_side='s,o',
                       verbose=False)

CD_ComplEx_mean = round(mr_score(ranks), 3)
CD_ComplEx_hits_10 = round(hits_at_n_score(ranks, n=10), 3)

In [13]:
from ampligraph.latent_features import ConvE

'''
ConvE - Custom
'''

model = 'ConvE'
k = 10
epochs = 50
eta = 4
loss = 'pairwise'
loss_params = {'margin': 0.1}
regularizer = 'LP'
regularizer_params = {'lambda': 0.0001, 'p': 2}
optimizer = 'adam'
optimizer_params = {'learning_rate': 0.0001}

model = ScoringBasedEmbeddingModel(k=k, eta=eta, scoring_type=model)
optim = tf.keras.optimizers.get(optimizer)
loss = get_loss(loss, loss_params)
regularizer = get_regularizer(regularizer, regularizer_params)
model.compile(optimizer=optim, loss=loss, entity_relation_regularizer=regularizer)

model.fit(X['train'],
          batch_size=int(X['train'].shape[0] / 10),
          epochs=epochs,
          verbose=False
          )

ranks = model.evaluate(X['test'],
                       use_filter=None,
                       corrupt_side='s,o',
                       verbose=False)

CD_ComplEx_mean = round(mr_score(ranks), 3)
CD_ComplEx_hits_10 = round(hits_at_n_score(ranks, n=10), 3)

ImportError: cannot import name 'ConvE' from 'ampligraph.latent_features' (/usr/local/lib/python3.10/dist-packages/ampligraph/latent_features/__init__.py)

In [10]:
# Define the algorithms
models = ["TransE", "DistMult", "ComplEx", "ConvE"]

# Placeholder metrics - replace None with actual values
metrics = {
    "Custom Data (hits10/mean)": [f'{CD_transE_hits_10}/{CD_transE_mean}',
                                  f'{CD_DistMult_hits_10}/{CD_DistMult_mean}',
                                  f'{CD_ComplEx_hits_10}/{CD_ComplEx_mean}',
                                  f'{CD_ConvE_hits_10}/{CD_ConvE_mean}'],
}

# Create the DataFrame
eval_df = pd.DataFrame(metrics, index=models)

# Display the DataFrame
print(eval_df)

         Custom Data (hits10/mean)
TransE                0.333/14.913
DistMult              0.387/15.073
ComplEx                 0.213/18.5
ConvE                    None/None
