In [1]:
import os
import time
import json
import argparse

import torch
import numpy as np

from pykeen.pipeline import pipeline
from pykeen.datasets import get_dataset


In [2]:
def train_and_measure(model_name: str, dataset_name: str, epochs: int = 50,
                      batch_size: int = 256, device: str | None = None,
                      inference_batch_size: int = 1024,
                      output_dir: str = "./pykeen_results"):
    os.makedirs(output_dir, exist_ok=True)

    print(f"Training model={model_name!r} on dataset={dataset_name!r} | epochs={epochs} | batch_size={batch_size} | device={device}")

    # 1) TREINO: tempo "wall-clock" medido externamente + pipeline() (que também retorna train_seconds)
    t0 = time.perf_counter()
    result = pipeline(
        model=model_name,
        dataset=dataset_name,
        epochs=epochs,  # atalho para training_kwargs['num_epochs']
        device=device,  # aceita 'cuda' / 'cpu' / torch.device
        training_kwargs=dict(batch_size=batch_size, use_tqdm_batch=False),  # use_tqdm_batch=False para logs mais limpos
    )
    t1 = time.perf_counter()
    wallclock_train_seconds = t1 - t0

    # pipeline returns PipelineResult; try extrair os tempos informados internamente
    train_seconds_reported = getattr(result, "train_seconds", None)
    evaluate_seconds_reported = getattr(result, "evaluate_seconds", None)

    print(f"Wall-clock total (pipeline call) = {wallclock_train_seconds:.3f} s")
    if train_seconds_reported is not None:
        print(f"PipelineResult.train_seconds = {train_seconds_reported:.3f} s")
    if evaluate_seconds_reported is not None:
        print(f"PipelineResult.evaluate_seconds = {evaluate_seconds_reported:.3f} s")

In [6]:
train_and_measure(
        model_name='ConvE',
        dataset_name='Nations',
        epochs=50,
        batch_size=256,
        device='cpu',
        inference_batch_size=1024,
        output_dir='/pykeen_results',
    )

INFO:pykeen.datasets.utils:Loading cached preprocessed dataset from file:///C:/Users/grkremer/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/grkremer/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/training
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/grkremer/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/testing
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/grkremer/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/validation
INFO:pykeen.pipeline.api:Using device: cpu
The ConvE model should be trained with inverse triples.
This can be done by defining the TriplesFactory class with the _create_inverse_triples_ parameter set to true.
INFO:pykeen.nn.modules:Resolving None * None * None = 200.
INFO:pykeen.nn.representation:Inferred unique=False for Embedding()
INFO:pykeen.nn.representation:Inferred u

Training model='ConvE' on dataset='Nations' | epochs=50 | batch_size=256 | device=cpu




Training epochs on cpu:   0%|          | 0/50 [00:00<?, ?epoch/s]

INFO:pykeen.training.training_loop:Dropping last (incomplete) batch each epoch (1/6 (16.67%) batches).


Evaluating on cpu:   0%|          | 0.00/201 [00:00<?, ?triple/s]

INFO:pykeen.evaluation.evaluator:Evaluation took 0.16s seconds


Wall-clock total (pipeline call) = 22.824 s
PipelineResult.train_seconds = 22.603 s
PipelineResult.evaluate_seconds = 0.163 s


In [7]:
train_and_measure(
        model_name='TransE',
        dataset_name='Nations',
        epochs=50,
        batch_size=256,
        device='cpu',
        inference_batch_size=1024,
        output_dir='/pykeen_results',
    )

INFO:pykeen.datasets.utils:Loading cached preprocessed dataset from file:///C:/Users/grkremer/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/grkremer/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/training
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/grkremer/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/testing
INFO:pykeen.triples.triples_factory:Loading from file:///C:/Users/grkremer/.data/pykeen/datasets/nations/cache/47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM/validation
INFO:pykeen.pipeline.api:Using device: cpu
INFO:pykeen.nn.representation:Inferred unique=False for Embedding()
INFO:pykeen.nn.representation:Inferred unique=False for Embedding()


Training model='TransE' on dataset='Nations' | epochs=50 | batch_size=256 | device=cpu


Training epochs on cpu:   0%|          | 0/50 [00:00<?, ?epoch/s]



Evaluating on cpu:   0%|          | 0.00/201 [00:00<?, ?triple/s]

INFO:pykeen.evaluation.evaluator:Evaluation took 0.08s seconds


Wall-clock total (pipeline call) = 6.294 s
PipelineResult.train_seconds = 6.172 s
PipelineResult.evaluate_seconds = 0.082 s
