In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!pip install rdflib

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rdflib
  Downloading rdflib-6.1.1-py3-none-any.whl (482 kB)
[K     |████████████████████████████████| 482 kB 22.7 MB/s 
Collecting isodate
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[K     |████████████████████████████████| 41 kB 542 kB/s 
Installing collected packages: isodate, rdflib
Successfully installed isodate-0.6.1 rdflib-6.1.1


In [None]:
!pip install "tensorflow-gpu>=1.15.2,<2.0" ampligraph

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-gpu<2.0,>=1.15.2
  Downloading tensorflow_gpu-1.15.5-cp37-cp37m-manylinux2010_x86_64.whl (411.0 MB)
[K     |████████████████████████████████| 411.0 MB 25 kB/s 
[?25hCollecting ampligraph
  Downloading ampligraph-1.4.0-py3-none-any.whl (168 kB)
[K     |████████████████████████████████| 168 kB 63.8 MB/s 
Collecting numpy<1.19.0,>=1.16.0
  Downloading numpy-1.18.5-cp37-cp37m-manylinux1_x86_64.whl (20.1 MB)
[K     |████████████████████████████████| 20.1 MB 1.1 MB/s 
Collecting h5py<=2.10.0
  Downloading h5py-2.10.0-cp37-cp37m-manylinux1_x86_64.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 6.9 MB/s 
Collecting gast==0.2.2
  Downloading gast-0.2.2.tar.gz (10 kB)
Collecting keras-applications>=1.0.8
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 4.1 MB/s 
[?25hCollecting tensorboard<1.16

In [None]:
import ampligraph

ampligraph.__version__

'1.4.0'

In [None]:
import tensorflow as tf

tf.test.is_gpu_available()

False

In [None]:
import rdflib

graph = rdflib.Graph()
graph.parse('/content/gdrive/MyDrive/Графы/filled_games_cut.owl')

<Graph identifier=Nc7159a0b97094231b68e0cc78abf5b9c (<class 'rdflib.graph.Graph'>)>

In [None]:
import numpy as np
import pandas as pd

triples = list(graph)
triples_df = pd.DataFrame(triples, columns=['subject', 'predicate', 'object']).dropna()

In [108]:
triples_df

Unnamed: 0,subject,predicate,object
0,http://www.semanticweb.org/naymoll/ontologies/...,http://www.semanticweb.org/naymoll/ontologies/...,"The game is mechanically, fairly simple, but s..."
1,http://www.semanticweb.org/naymoll/ontologies/...,http://www.semanticweb.org/naymoll/ontologies/...,Many of the most critical reviews below are ve...
2,http://www.semanticweb.org/naymoll/ontologies/...,http://www.semanticweb.org/naymoll/ontologies/...,http://www.semanticweb.org/naymoll/ontologies/...
3,http://www.semanticweb.org/naymoll/ontologies/...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.semanticweb.org/naymoll/ontologies/...
4,http://www.semanticweb.org/naymoll/ontologies/...,http://www.semanticweb.org/naymoll/ontologies/...,http://www.semanticweb.org/naymoll/ontologies/...
...,...,...,...
30150,http://www.semanticweb.org/naymoll/ontologies/...,http://www.semanticweb.org/naymoll/ontologies/...,2022-05-26T23:27:55
30151,http://www.semanticweb.org/naymoll/ontologies/...,http://www.semanticweb.org/naymoll/ontologies/...,1
30152,http://www.semanticweb.org/naymoll/ontologies/...,http://www.semanticweb.org/naymoll/ontologies/...,5
30153,http://www.semanticweb.org/naymoll/ontologies/...,http://www.semanticweb.org/naymoll/ontologies/...,2


In [None]:
import re

prefix_reg = re.compile('^http.+[/#]')

def remove_prefixes(row):
  new_items = []
  for item in row:
    item_str = str(item)
    split = prefix_reg.split(item_str)
    new_items.append(split[-1])

  return pd.Series(new_items, index = ['subject', 'predicate', 'object'])

In [None]:
triples_df = triples_df.apply(remove_prefixes, axis=1)

In [None]:
from ampligraph.evaluation import train_test_split_no_unseen

mapped_triples = np.array(triples_df)
X_train, X_valid = train_test_split_no_unseen(mapped_triples, test_size=10000)

In [None]:
from ampligraph.latent_features import ComplEx

model = ComplEx(batches_count=50,
                epochs=300,
                k=100,
                eta=20,
                optimizer='adam',
                optimizer_params={'lr': 1e-4},
                loss='multiclass_nll',
                regularizer='LP',
                regularizer_params={'p': 3, 'lambda': 1e-5},
                seed=0,
                verbose=True)

In [None]:
model.fit(X_train)

Average ComplEx Loss:   0.081710: 100%|██████████| 300/300 [17:45<00:00,  3.55s/epoch]


In [None]:
filter_triples = np.concatenate((X_train, X_valid))

In [None]:
from ampligraph.evaluation import evaluate_performance

ranks = evaluate_performance(X_valid,
                             model=model,
                             filter_triples=filter_triples,
                             use_default_protocol=True,
                             verbose=True)



100%|██████████| 10000/10000 [07:56<00:00, 20.97it/s]


In [None]:
from ampligraph.evaluation import mr_score, mrr_score, hits_at_n_score

mr = mr_score(ranks)
mrr = mrr_score(ranks)

print("MRR: %.2f" % (mrr))
print("MR: %.2f" % (mr))

hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))
hits_1 = hits_at_n_score(ranks, n=1)
print("Hits@1: %.2f" % (hits_1))

MRR: 0.37
MR: 647.55
Hits@10: 0.47
Hits@3: 0.40
Hits@1: 0.31


In [None]:
ampligraph.utils.save_model(model, model_name_path='/content/gdrive/MyDrive/Графы/games.pkl')