### 1. Framework imports

In [None]:
import os
import time
import json
import argparse
import pandas as pd

import torch
import numpy as np

from pykeen.pipeline import pipeline
from pykeen.datasets import get_dataset, dataset_resolver




### 2. Defining function for training and testing

In [None]:
def run_experiment(model_name: str, dataset_name: str, epochs: int = 100,
                   batch_size: int = 256, device: str = "cuda",
                   inference_batch_size: int = 1024, seed: int = 42,
                   n_tests: int = 1) -> pd.DataFrame:
    metrics = []
    models = []
    for i in range(n_tests):
        seed = seed + i
        np.random.seed(seed)
        # --- Treino + avaliação do PyKEEN
        result = pipeline(
            model=model_name,
            dataset=dataset_name,
            epochs=epochs,
            device=device,
            random_seed=seed,
            training_kwargs=dict(batch_size=batch_size, use_tqdm_batch=False),
        )

        # extrair tempos do pipeline
        train_time = getattr(result, "train_seconds", None)
        eval_time = getattr(result, "evaluate_seconds", None)

        # métricas do teste
        mrr = result.metric_results.get_metric('both.realistic.inverse_harmonic_mean_rank')
        hits10 = result.metric_results.get_metric('both.realistic.hits_at_10')

        # --- Tempo de inferência pura 
        dataset = dataset_resolver.lookup(dataset_name)()
        triples = dataset.testing.mapped_triples
        n_test = triples.shape[0]
        print(eval_time/n_test)

        model = result.model
        device_torch = model.device
        triples_tensor = torch.tensor(triples, dtype=torch.long, device=device_torch)

        with torch.inference_mode():
            infer_t0 = time.perf_counter()
            for i in range(0, n_test, inference_batch_size):
                batch = triples_tensor[i:i+inference_batch_size]
                _ = model.score_hrt(batch)
            torch.cuda.synchronize()
            infer_t1 = time.perf_counter()
        infer_time = infer_t1 - infer_t0
        infer_time = infer_time/n_test
        print(infer_time)

        # --- montar resultado em forma de DataFrame (1 linha)
        df = pd.DataFrame([{
            "model": model_name,
            "dataset": dataset_name,
            "seed": seed,
            "epochs": epochs,
            "train_time": train_time,
            "eval_time": eval_time,
            "inference_time": infer_time,
            "mrr": mrr,
            "hits@10": hits10,
        }])
        metrics.append(df)
        models.append(result)
        
        
    return pd.concat(metrics, ignore_index=True), models

# Exemplo de uso
if __name__ == "__main__":
    df, models = run_experiment(model_name="QuatE", dataset_name="Nations", epochs=100, device="cuda", seed=1, n_tests = 1, inference_batch_size=1)
    df

INFO:pykeen.datasets.utils:Loading cached preprocessed dataset from file:///C:/Users/grkremer/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/grkremer/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/training
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/grkremer/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/testing
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/grkremer/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/validation
INFO:pykeen.pipeline.api:Using device: cuda
INFO:pykeen.nn.representation:Inferred unique=False for Embedding(
  (regularizer): LpRegularizer()
)
INFO:pykeen.nn.representation:Inferred unique=False for Embedding(
  (regularizer): LpRegularizer()
)


Training epochs on cuda:0:   0%|          | 0/100 [00:00<?, ?epoch/s]

Evaluating on cuda:0:   0%|          | 0.00/201 [00:00<?, ?triple/s]

INFO:pykeen.evaluation.evaluator:Evaluation took 0.06s seconds


0.00031957341663873016
0.0005511323381586009


  triples_tensor = torch.tensor(triples, dtype=torch.long, device=device_torch)


In [228]:
models[0]

PipelineResult(random_seed=1, model=QuatE(
  (loss): MarginRankingLoss(
    (margin_activation): ReLU()
  )
  (interaction): QuatEInteraction()
  (entity_representations): ModuleList(
    (0): Embedding(
      (regularizer): LpRegularizer()
      (_embeddings): Embedding(14, 400)
    )
  )
  (relation_representations): ModuleList(
    (0): Embedding(
      (regularizer): LpRegularizer()
      (_embeddings): Embedding(55, 400)
    )
  )
  (weight_regularizers): ModuleList()
), training=TriplesFactory(num_entities=14, num_relations=55, create_inverse_triples=False, num_triples=1592, path="C:\Users\grkremer\miniconda3\Lib\site-packages\pykeen\datasets\nations\train.txt"), training_loop=<pykeen.training.slcwa.SLCWATrainingLoop object at 0x000002B28CA774D0>, losses=[1.0397261551448278, 1.0138601575578963, 1.000816285610199, 0.9945494106837681, 0.9850840057645526, 0.9731997421809605, 0.9576020070484706, 0.942274911063058, 0.9318392361913409, 0.9124024254935128, 0.8972616195678711, 0.87063996

In [230]:
triples.shape[0]

201