In [1]:
import os
import numpy as np
from ampligraph.evaluation import train_test_split_no_unseen
from ampligraph.evaluation import evaluate_performance
from ampligraph.latent_features import TransE
from ampligraph.datasets import load_from_csv
from ampligraph.utils import create_tensorboard_visualizations
from ampligraph.evaluation import mr_score, mrr_score, hits_at_n_score
from ampligraph.discovery import *

In [2]:
datasetPath = os.getcwd()
datasetCsvFilename = 'output.csv'
separator = ','

In [3]:
X = load_from_csv(datasetPath, datasetCsvFilename, sep=separator)

In [4]:
num_test = int(len(X) * (20 / 100)) #20% test, 80% train

In [5]:
X_train, X_test = train_test_split_no_unseen(X, test_size=num_test)

In [6]:
model = TransE(batches_count=1, seed=555, epochs=20, k=2, loss='pairwise', loss_params={'margin':5})



In [7]:
model.fit(X_train)

In [8]:
model.predict(X_test)

array([-0.78648734, -0.45819777, -0.5543292 , ..., -0.6693086 ,
       -0.28723234, -0.4304322 ], dtype=float32)

Both entities below are _Malicious_

In [9]:
model.get_embeddings(['CUmrqr4svHuSXJy5z7','CH98aB3s1kJeq6SFOc'], embedding_type='entity')

array([[-0.00515619, -0.01402817],
       [-0.00719168, -0.0088463 ]], dtype=float32)

## Evaluate performance of fitted X_Test data

In [10]:
positives_filter = X

In [11]:
ranks = evaluate_performance(X_test, 
                             model=model,
                             filter_triples=positives_filter,   # Corruption strategy filter defined above 
                             use_default_protocol=True, # corrupt subj and obj separately while evaluating
                             verbose=True)

    protocol. This may be unnecessary and will lead to a 'harder' task. Besides, it will lead to a much slower
    evaluation procedure. We recommended to set the 'corruption_entities' argument to a reasonably sized set
    of entities. The size of corruption_entities depends on your domain-specific task.


    protocol. This may be unnecessary and will lead to a 'harder' task. Besides, it will lead to a much slower
    evaluation procedure. We recommended to set the 'corruption_entities' argument to a reasonably sized set
    of entities. The size of corruption_entities depends on your domain-specific task.


### Metrics

In [12]:
mrr = mrr_score(ranks)
print("MRR: %.2f" % (mrr))

hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))
hits_1 = hits_at_n_score(ranks, n=1)
print("Hits@1: %.2f" % (hits_1))

MRR: 0.01
Hits@10: 0.02
Hits@3: 0.01
Hits@1: 0.01


## Generate Files for Visualization
After running the next cell, you may upload the `embeddings_projector.tsv` and `metadata.tsv` to [https://projector.tensorflow.org/](https://projector.tensorflow.org/)

In [13]:
create_tensorboard_visualizations(model, 'tensorboard_files')

## Discovering Facts

In [14]:
discover_facts(X_test, model, top_n=10, target_rel='label')

    protocol. This may be unnecessary and will lead to a 'harder' task. Besides, it will lead to a much slower
    evaluation procedure. We recommended to set the 'corruption_entities' argument to a reasonably sized set
    of entities. The size of corruption_entities depends on your domain-specific task.


(array([], shape=(0, 3), dtype=object), array([], dtype=float64))