In [1]:
! pip install git+https://github.com/pykeen/pykeen.git

Collecting git+https://github.com/pykeen/pykeen.git
  Cloning https://github.com/pykeen/pykeen.git to /tmp/pip-req-build-bgryqlur
  Running command git clone --filter=blob:none --quiet https://github.com/pykeen/pykeen.git /tmp/pip-req-build-bgryqlur
  Resolved https://github.com/pykeen/pykeen.git to commit dbdfb0261002d1f4405710aba190f298424c72e6
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting dataclasses-json (from pykeen==1.11.2.dev0)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting click_default_group (from pykeen==1.11.2.dev0)
  Downloading click_default_group-1.2.4-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting optuna>=2.0.0 (from pykeen==1.11.2.dev0)
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting more_click (from pykeen==1.11.2.dev0)
  Downloading more_click-0.1.2-py3-none-any.whl.metadat

In [2]:
!pip install pykeen

Collecting pykeen
  Downloading pykeen-1.11.1-py3-none-any.whl.metadata (85 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/85.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.9/85.9 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dataclasses-json (from pykeen)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting click_default_group (from pykeen)
  Downloading click_default_group-1.2.4-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting optuna>=2.0.0 (from pykeen)
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting more_click (from pykeen)
  Downloading more_click-0.1.2-py3-none-any.whl.metadata (4.3 kB)
Collecting pystow>=0.4.3 (from pykeen)
  Downloading pystow-0.7.8-py3-none-any.whl.metadata (17 kB)
Collecting docdata>=0.0.5 (from pykeen)
  Downloading docdata-0.0.5-py3-none-any.whl.metadata (13 kB)
Collecting class_resolver>=0.6.0 (from 

In [1]:
import torch
print(torch.cuda.is_available())  # should return True
print(torch.cuda.get_device_name(0))  # prints the GPU model

True
NVIDIA A100-SXM4-40GB


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import warnings
warnings.filterwarnings("ignore", category=FutureWarning, message="use_inf_as_na option is deprecated")

import os, time, json, random, math
from typing import Dict, Optional, List, Tuple
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

from pykeen.triples import TriplesFactory
from pykeen.pipeline import pipeline
from pykeen.evaluation import RankBasedEvaluator

# ======================
# User paths
# ======================
TRAIN_CSV = "/content/drive/MyDrive/transD_dif/positive_dataset.csv"
TEST_CSV  = "/content/drive/MyDrive/transD_dif/test_dataset_with_pubmed_count.csv"
BASE_PARENT = "/content/drive/MyDrive/transD_dif/GRID_MIN"  # parent folder for all runs

# The specific rows you want to track (present in test CSV as 'TotalHits')
SELECTED_HITS = [45809, 36808, 19658, 16575, 11737]

# ======================
# Hyperparameter variants
# ======================

CONFIGS = [
  dict(seed=73, embedding_dim=256, relation_dim=128, epochs_transd=300,
       denoiser_epochs=600, denoiser_lr=5e-4, batch_size_denoiser=8192,
       sigma_min=0.01, sigma_max=0.50, ema_decay=0.9995,
       refine_sigmas=(0.50, 0.25, 0.12, 0.06, 0.03),
       refine_steps_per_sigma=2, refine_step_scale=0.15)
]

# ======================
# Global switches
# ======================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Using device: {device}")

# In most PyKEEN versions, higher score = more plausible.
SCORE_HIGHER_IS_BETTER = True

def ensure_dir(p: str):
    os.makedirs(p, exist_ok=True)

def set_deterministic(seed: int):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def _pick(flat: Dict[str, float], *keys, default=float("nan")) -> float:
    for k in keys:
        if k in flat:
            return flat[k]
    return default

def round4(x):
    try:
        return None if x is None else float(np.round(x, 4))
    except Exception:
        return x

def evaluate_model(mdl, label: str, test_factory, train_factory) -> Dict[str, float]:
    evaluator = RankBasedEvaluator()
    metrics = evaluator.evaluate(
        model=mdl,
        mapped_triples=test_factory.mapped_triples.to(device),
        additional_filter_triples=[train_factory.mapped_triples.to(device)],
    )
    flat = metrics.to_flat_dict()
    return {
        "label": label,
        "mrr":     _pick(flat, "both.realistic.mrr", "both.realistic.mean_reciprocal_rank"),
        "hits@1":  _pick(flat, "both.realistic.hits_at_k.1", "both.realistic.hits_at_1", "both.realistic.hits@1"),
        "hits@3":  _pick(flat, "both.realistic.hits_at_k.3", "both.realistic.hits_at_3", "both.realistic.hits@3"),
        "hits@10": _pick(flat, "both.realistic.hits_at_k.10", "both.realistic.hits_at_10", "both.realistic.hits@10"),
        "mr":      _pick(flat, "both.realistic.mr", "both.realistic.arithmetic_mean_rank"),
        "raw": flat,
    }

@torch.no_grad()
def get_entity_base_matrix(mdl) -> torch.Tensor:
    return mdl.entity_representations[0](indices=None).detach()

def set_entity_base_matrix_(mdl, new_weights: torch.Tensor):
    rep = mdl.entity_representations[0]
    rep._embeddings.weight.data = new_weights.to(rep._embeddings.weight.device)

def write_scored_csv(csv_path: str, mdl, test_factory, df_original: pd.DataFrame,
                     normalize: bool = True, sort_by_score: bool = True,
                     score_col: str = "Score", raw_col: str = "ScoreRaw"):
    mapped = test_factory.mapped_triples.to(device)
    with torch.no_grad():
        scores = mdl.score_hrt(mapped).detach().cpu().numpy().astype(float)

    if normalize:
        smin, smax = float(np.min(scores)), float(np.max(scores))
        scores_norm = 2 * (scores - smin) / (smax - smin) - 1.0 if smax > smin else np.zeros_like(scores, dtype=float)
    else:
        scores_norm = scores.copy()

    df_out = df_original.copy()
    df_out[raw_col] = scores
    df_out[score_col] = scores_norm

    if sort_by_score:
        df_out = df_out.sort_values(score_col, ascending=not SCORE_HIGHER_IS_BETTER).reset_index(drop=True)

    ensure_dir(os.path.dirname(csv_path))
    df_out.to_csv(csv_path, index=False)
    print(f"[INFO] Wrote {csv_path} | shape={df_out.shape}")

def extract_ranks_and_scores(csv_path: str, selected: List[int],
                             score_col: str = "Score") -> Dict[int, Dict[str, Optional[float]]]:
    df = pd.read_csv(csv_path).sort_values(score_col, ascending=not SCORE_HIGHER_IS_BETTER).reset_index(drop=True)
    if "TotalHits" not in df.columns:
        raise ValueError(f"'TotalHits' column not found in {csv_path}")
    results = {}
    for hit in selected:
        match = df[df["TotalHits"] == hit]
        if not match.empty:
            row = match.iloc[0]
            rank = int(row.name + 1)  # 1-based
            results[hit] = {"rank": rank, "score": float(row[score_col])}
        else:
            results[hit] = {"rank": None, "score": None}
    return results

# ======================
# Diffusion-style denoiser
# ======================

class SinusoidalTimeEmbedding(nn.Module):
    """Standard Fourier features for time/noise level."""
    def __init__(self, dim: int = 64, max_period: float = 10000.0):
        super().__init__()
        self.dim = dim
        self.max_period = max_period

    def forward(self, t: torch.Tensor) -> torch.Tensor:
        # t is any positive scalar per-sample: we feed log_sigma to the embedder
        half = self.dim // 2
        freqs = torch.exp(-math.log(self.max_period) * torch.arange(half, device=t.device) / (half - 1 + 1e-12))
        args = t[:, None] * freqs[None, :]
        emb = torch.cat([torch.sin(args), torch.cos(args)], dim=-1)
        if self.dim % 2 == 1:
            emb = torch.nn.functional.pad(emb, (0,1))
        return emb

class ResBlock(nn.Module):
    def __init__(self, dim, t_dim, hidden):
        super().__init__()
        self.fc1 = nn.Linear(dim, hidden)
        self.fc2 = nn.Linear(hidden, dim)
        self.tproj = nn.Linear(t_dim, hidden)
        self.norm1 = nn.LayerNorm(hidden)
        self.act = nn.SiLU()

    def forward(self, x, temb):
        h = self.fc1(x) + self.tproj(temb)
        h = self.norm1(h)
        h = self.act(h)
        h = self.fc2(h)
        return x + h

class DiffusionDenoiser(nn.Module):
    """
    Predicts epsilon (noise) at multiple noise levels sigma.
    Score estimate: s_theta(x, sigma) ~= -eps_theta(x, sigma)/sigma
    """
    def __init__(self, dim: int, t_dim: int = 64, hidden: int = 512, n_blocks: int = 4):
        super().__init__()
        self.time_emb = SinusoidalTimeEmbedding(t_dim)
        self.inp = nn.Linear(dim, dim)
        self.blocks = nn.ModuleList([ResBlock(dim, t_dim, hidden) for _ in range(n_blocks)])
        self.out = nn.Linear(dim, dim)

        # init
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight); nn.init.zeros_(m.bias)

    def forward(self, x: torch.Tensor, sigma: torch.Tensor) -> torch.Tensor:
        # sigma: (N,) positive; use log-sigma for embedding stability
        log_sigma = torch.log(sigma.clamp_min(1e-8))
        temb = self.time_emb(log_sigma)  # (N, t_dim)
        h = self.inp(x)
        for blk in self.blocks:
            h = blk(h, temb)
        eps_pred = self.out(torch.nn.functional.silu(h))
        return eps_pred

class EMA:
    """Exponential Moving Average wrapper."""
    def __init__(self, model: nn.Module, decay: float = 0.999):
        self.decay = decay
        self.shadow = {k: v.detach().clone() for k, v in model.state_dict().items()}
        self.device = next(iter(model.parameters())).device

    @torch.no_grad()
    def update(self, model: nn.Module):
        for k, v in model.state_dict().items():
            if v.dtype.is_floating_point:
                self.shadow[k].mul_(self.decay).add_(v.detach(), alpha=1.0 - self.decay)

    @torch.no_grad()
    def copy_to(self, model: nn.Module):
        model.load_state_dict(self.shadow, strict=True)

def _sample_sigmas(n: int, sigma_min: float, sigma_max: float, device) -> torch.Tensor:
    # log-uniform between sigma_min and sigma_max
    u = torch.rand(n, device=device)
    return sigma_min * (sigma_max / sigma_min) ** u

def train_diffusion_denoiser(
    x_clean: torch.Tensor,
    epochs: int = 400,
    lr: float = 1e-3,
    batch_size: int = 4096,
    sigma_min: float = 0.02,
    sigma_max: float = 0.6,
    ema_decay: float = 0.999,
    verbose: bool = True,
) -> Tuple[DiffusionDenoiser, DiffusionDenoiser]:
    """
    Returns (model, ema_model). Use ema_model for inference.
    """
    x = x_clean.detach().to(device)
    n, d = x.shape
    model = DiffusionDenoiser(dim=d).to(device)
    ema = EMA(model, decay=ema_decay)

    opt = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
    mse = nn.MSELoss()

    steps_per_epoch = max(1, (n + batch_size - 1)//batch_size)
    for ep in range(1, epochs+1):
        perm = torch.randperm(n, device=device)
        total = 0.0
        model.train()
        for i in range(steps_per_epoch):
            idx = perm[i*batch_size : (i+1)*batch_size]
            clean = x[idx]                                         # (B, d)
            sigma = _sample_sigmas(clean.shape[0], sigma_min, sigma_max, device)  # (B,)
            noise = torch.randn_like(clean)                        # epsilon ~ N(0, I)
            x_noisy = clean + sigma[:, None] * noise               # x_sigma

            eps_pred = model(x_noisy, sigma)                       # predict epsilon
            loss = mse(eps_pred, noise)

            opt.zero_grad(); loss.backward(); opt.step()
            ema.update(model)

            total += loss.item()
        if verbose and (ep == 1 or ep % max(1, epochs//10) == 0):
            print(f"[DIFF] epoch {ep:4d}/{epochs} | loss={total/steps_per_epoch:.6f}")

    # build an ema model copy for inference
    ema_model = DiffusionDenoiser(dim=d).to(device)
    ema.copy_to(ema_model)
    ema_model.eval()
    return model.eval(), ema_model.eval()

@torch.no_grad()
def refine_embeddings_via_score(
    ema_model: DiffusionDenoiser,
    x_init: torch.Tensor,
    sigmas: Tuple[float, ...] = (0.5, 0.25, 0.12, 0.06, 0.03),
    steps_per_sigma: int = 1,
    step_scale: float = 0.1,
) -> torch.Tensor:
    """
    Annealed score refinement:
      x <- x + step_scale * sigma^2 * s_theta(x, sigma),  s_theta ≈ -eps_theta / sigma
    Small # of steps; very cheap; strong regularization toward learned prior.
    """
    x = x_init.detach().to(device)
    for s in sigmas:
        sigma = torch.full((x.shape[0],), float(s), device=device)
        for _ in range(steps_per_sigma):
            eps = ema_model(x, sigma)                      # predict epsilon
            score = -eps / sigma[:, None]                  # score approx
            step = step_scale * (s ** 2)
            x = x + step * score
    return x

# ======================
# Load data ONCE
# ======================
df_pos        = pd.read_csv(TRAIN_CSV)[['SUBJECT_CUI','PREDICATE','OBJECT_CUI']]
df_test_full  = pd.read_csv(TEST_CSV)
df_test_triples = df_test_full[['SUBJECT_CUI','PREDICATE','OBJECT_CUI']]

combined_triples = pd.concat([df_pos, df_test_triples], axis=0).values
combined_factory = TriplesFactory.from_labeled_triples(triples=combined_triples)

timestamp = time.strftime("%Y%m%d-%H%M%S")
BATCH_DIR = os.path.join(BASE_PARENT, f"run-{timestamp}")
ensure_dir(BATCH_DIR)
print(f"[INFO] Batch outputs -> {BATCH_DIR}")

summary_rows = []
batch_json = {"batch_dir": BATCH_DIR, "runs": []}

# ======================
# Loop over configs
# ======================
for i, cfg in enumerate(CONFIGS, start=1):
    print("\n" + "="*90)
    print(f"[RUN {i}/{len(CONFIGS)}] cfg={cfg}")
    print("="*90)

    EMBED_DIM      = int(cfg["embedding_dim"])
    REL_DIM        = int(cfg["relation_dim"])
    EPOCHS         = int(cfg["epochs_transd"])
    DENOISE_EPOCHS = int(cfg["denoiser_epochs"])
    DENOISE_LR     = float(cfg["denoiser_lr"])
    DENOISE_BATCH  = int(cfg["batch_size_denoiser"])
    SIGMA_MIN      = float(cfg["sigma_min"])
    SIGMA_MAX      = float(cfg["sigma_max"])
    EMA_DECAY      = float(cfg["ema_decay"])
    REFINE_SIGMAS  = tuple(cfg["refine_sigmas"])
    REFINE_STEPS   = int(cfg["refine_steps_per_sigma"])
    REFINE_SCALE   = float(cfg["refine_step_scale"])
    SEED           = int(cfg["seed"])

    set_deterministic(SEED)

    # Factories (no inverse triples)
    train_factory = TriplesFactory.from_labeled_triples(
        triples=df_pos.values,
        entity_to_id=combined_factory.entity_to_id,
        relation_to_id=combined_factory.relation_to_id
    )
    test_factory = TriplesFactory.from_labeled_triples(
        triples=df_test_triples.values,
        entity_to_id=combined_factory.entity_to_id,
        relation_to_id=combined_factory.relation_to_id
    )

    tag = (f"emb{EMBED_DIM}_rel{REL_DIM}_ep{EPOCHS}"
           f"_diffEp{DENOISE_EPOCHS}_sig{SIGMA_MIN}-{SIGMA_MAX}"
           f"_lr{DENOISE_LR}_bs{DENOISE_BATCH}_seed{SEED}")
    OUT_DIR = os.path.join(BATCH_DIR, tag)
    ensure_dir(OUT_DIR)

    RESULTS_JSON = os.path.join(OUT_DIR, "results.json")
    BASELINE_SCORES_CSV = os.path.join(OUT_DIR, "test_triple_scores_transd.csv")
    DENOISED_SCORES_CSV = os.path.join(OUT_DIR, "test_triple_scores_transd_diffusion.csv")

    # ---------------------
    # Train TransD
    # ---------------------
    print("[INFO] Training TransD …")
    transd_result = pipeline(
        training=train_factory,
        testing=test_factory,
        model="TransD",
        model_kwargs=dict(embedding_dim=EMBED_DIM, relation_dim=REL_DIM),
        training_kwargs=dict(num_epochs=EPOCHS, batch_size=1024),
        random_seed=SEED,
        device=device,
    )
    model = transd_result.model.to(device).eval()

    # ---------------------
    # Evaluate baseline + write scored CSV
    # ---------------------
    print("[INFO] Evaluating TransD baseline …")
    metrics_transd = evaluate_model(model, "TransD", test_factory, train_factory)
    write_scored_csv(
        BASELINE_SCORES_CSV,
        model,
        test_factory,
        df_original=df_test_full,
        normalize=True,
        sort_by_score=True,
    )
    ranks_baseline = extract_ranks_and_scores(BASELINE_SCORES_CSV, SELECTED_HITS)

    # ---------------------
    # Train diffusion denoiser on entity table
    # ---------------------
    print("[INFO] Training diffusion-style denoiser …")
    E_clean = get_entity_base_matrix(model)  # (num_entities, d)
    diff_model, diff_model_ema = train_diffusion_denoiser(
        x_clean=E_clean,
        epochs=DENOISE_EPOCHS,
        lr=DENOISE_LR,
        batch_size=DENOISE_BATCH,
        sigma_min=SIGMA_MIN,
        sigma_max=SIGMA_MAX,
        ema_decay=EMA_DECAY,
        verbose=True,
    )
    # save denoiser
    torch.save(diff_model.state_dict(), os.path.join(OUT_DIR, "diffusion_denoiser.pt"))
    torch.save(diff_model_ema.state_dict(), os.path.join(OUT_DIR, "diffusion_denoiser_ema.pt"))

    # ---------------------
    # Refine embeddings (cheap multi-sigma score steps)
    # ---------------------
    print("[INFO] Refining embeddings via annealed score steps …")
    with torch.no_grad():
        E_refined = refine_embeddings_via_score(
            ema_model=diff_model_ema,
            x_init=E_clean.to(device),
            sigmas=REFINE_SIGMAS,
            steps_per_sigma=REFINE_STEPS,
            step_scale=REFINE_SCALE,
        ).detach()

    # ---------------------
    # Evaluate diffusion-denoised + write CSV
    # ---------------------
    print("[INFO] Evaluating TransD + diffusion denoising …")
    E_backup = E_clean.clone().detach()
    set_entity_base_matrix_(model, E_refined)
    metrics_denoised = evaluate_model(model, "TransD+diffusion", test_factory, train_factory)
    write_scored_csv(
        DENOISED_SCORES_CSV,
        model,
        test_factory,
        df_original=df_test_full,
        normalize=True,
        sort_by_score=True,
    )
    ranks_denoised = extract_ranks_and_scores(DENOISED_SCORES_CSV, SELECTED_HITS)
    set_entity_base_matrix_(model, E_backup)  # restore

    # ---------------------
    # Summaries + score comparison
    # ---------------------
    print("\n=== RESULTS (TransD vs TransD+diffusion) ===")
    for k in ["mrr","hits@1","hits@3","hits@10","mr"]:
        a = round4(metrics_transd.get(k))
        b = round4(metrics_denoised.get(k))
        print(f"{k:>8}: {a}  →  {b}")

    per_hit = []
    all_improved = True
    for hit in SELECTED_HITS:
        base_entry = ranks_baseline[hit]
        deno_entry = ranks_denoised[hit]

        s_base = base_entry["score"]
        s_deno = deno_entry["score"]

        if SCORE_HIGHER_IS_BETTER:
            improved = (s_base is not None and s_deno is not None and s_deno > s_base)
        else:
            improved = (s_base is not None and s_deno is not None and s_deno < s_base)

        if not improved:
            all_improved = False

        per_hit.append(dict(
            TotalHits=hit,
            score_transd=round4(s_base),
            score_denoised=round4(s_deno),
            improved=bool(improved),
        ))
        print(f" - {hit}: {s_base} → {s_deno} (improved={improved})")

    print(f"[SELECTED_HITS] all improved? {all_improved}")

    # Per-run JSON
    run_json = {
        "out_dir": OUT_DIR,
        "config": cfg,
        "metrics": {
            "transd": {k: round4(metrics_transd.get(k)) for k in ["mrr","hits@1","hits@3","hits@10","mr"]},
            "transd_denoised": {k: round4(metrics_denoised.get(k)) for k in ["mrr","hits@1","hits@3","hits@10","mr"]},
        },
        "selected_hits": per_hit,
        "all_selected_scores_improved": bool(all_improved),
        "files": {
            "baseline_csv": BASELINE_SCORES_CSV,
            "denoised_csv": DENOISED_SCORES_CSV,
        }
    }
    with open(RESULTS_JSON, "w") as f:
        json.dump(run_json, f, indent=2)

    # Row for master CSV
    row = dict(
        out_dir=OUT_DIR, seed=cfg["seed"],
        embedding_dim=cfg["embedding_dim"], relation_dim=cfg["relation_dim"],
        epochs_transd=cfg["epochs_transd"],
        denoiser_epochs=cfg["denoiser_epochs"],
        sigma_min=cfg["sigma_min"], sigma_max=cfg["sigma_max"],
        denoiser_lr=cfg["denoiser_lr"], batch_size_denoiser=cfg["batch_size_denoiser"],
        mrr_transd=round4(metrics_transd.get("mrr")),
        mrr_denoised=round4(metrics_denoised.get("mrr")),
        hits1_transd=round4(metrics_transd.get("hits@1")),
        hits1_denoised=round4(metrics_denoised.get("hits@1")),
        hits10_transd=round4(metrics_transd.get("hits@10")),
        hits10_denoised=round4(metrics_denoised.get("hits@10")),
        mr_transd=round4(metrics_transd.get("mr")),
        mr_denoised=round4(metrics_denoised.get("mr")),
        all_selected_scores_improved=bool(all_improved),
    )
    for r in per_hit:
        h = r["TotalHits"]
        row[f"score_base_{h}"] = r["score_transd"]
        row[f"score_deno_{h}"] = r["score_denoised"]
        row[f"improved_{h}"]   = r["improved"]

    summary_rows.append(row)
    batch_json["runs"].append(run_json)

# ======================
# Batch-level summary files
# ======================
summary_csv_path = os.path.join(BATCH_DIR, "summary_runs.csv")
summary_json_path = os.path.join(BATCH_DIR, "summary_runs.json")
pd.DataFrame(summary_rows).to_csv(summary_csv_path, index=False)
with open(summary_json_path, "w") as f:
    json.dump(batch_json, f, indent=2)

print("\n" + "#"*90)
print(f"[DONE] Master summary:\n - {summary_csv_path}\n - {summary_json_path}")
print("#"*90)


INFO:pykeen.utils:Using opt_einsum


[INFO] Using device: cuda


INFO:pykeen.pipeline.api:Using device: cuda
INFO:pykeen.nn.representation:Inferred unique=False for Embedding()


[INFO] Batch outputs -> /content/drive/MyDrive/transD_dif/GRID_MIN/run-20250827-154605

[RUN 1/1] cfg={'seed': 73, 'embedding_dim': 256, 'relation_dim': 128, 'epochs_transd': 300, 'denoiser_epochs': 600, 'denoiser_lr': 0.0005, 'batch_size_denoiser': 8192, 'sigma_min': 0.01, 'sigma_max': 0.5, 'ema_decay': 0.9995, 'refine_sigmas': (0.5, 0.25, 0.12, 0.06, 0.03), 'refine_steps_per_sigma': 2, 'refine_step_scale': 0.15}
[INFO] Training TransD …


INFO:pykeen.nn.representation:Inferred unique=False for Embedding()
INFO:pykeen.nn.representation:Inferred unique=False for Embedding()
INFO:pykeen.nn.representation:Inferred unique=False for Embedding()


Training epochs on cuda:0:   0%|          | 0/300 [00:00<?, ?epoch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Training batches on cuda:0:   0%|          | 0.00/84.0 [00:00<?, ?batch/s]

Evaluating on cuda:0:   0%|          | 0.00/1.49k [00:00<?, ?triple/s]

INFO:pykeen.evaluation.evaluator:Evaluation took 1.64s seconds


[INFO] Evaluating TransD baseline …


Evaluating on cuda:0:   0%|          | 0.00/1.49k [00:00<?, ?triple/s]

INFO:pykeen.evaluation.evaluator:Evaluation took 1.17s seconds


[INFO] Wrote /content/drive/MyDrive/transD_dif/GRID_MIN/run-20250827-154605/emb256_rel128_ep300_diffEp600_sig0.01-0.5_lr0.0005_bs8192_seed73/test_triple_scores_transd.csv | shape=(1492, 13)
[INFO] Training diffusion-style denoiser …
[DIFF] epoch    1/600 | loss=1.316293
[DIFF] epoch   60/600 | loss=0.739423
[DIFF] epoch  120/600 | loss=0.663636
[DIFF] epoch  180/600 | loss=0.634040
[DIFF] epoch  240/600 | loss=0.608454
[DIFF] epoch  300/600 | loss=0.592027
[DIFF] epoch  360/600 | loss=0.575096
[DIFF] epoch  420/600 | loss=0.561261
[DIFF] epoch  480/600 | loss=0.547163
[DIFF] epoch  540/600 | loss=0.533418
[DIFF] epoch  600/600 | loss=0.525484
[INFO] Refining embeddings via annealed score steps …
[INFO] Evaluating TransD + diffusion denoising …


Evaluating on cuda:0:   0%|          | 0.00/1.49k [00:00<?, ?triple/s]

INFO:pykeen.evaluation.evaluator:Evaluation took 0.95s seconds


[INFO] Wrote /content/drive/MyDrive/transD_dif/GRID_MIN/run-20250827-154605/emb256_rel128_ep300_diffEp600_sig0.01-0.5_lr0.0005_bs8192_seed73/test_triple_scores_transd_diffusion.csv | shape=(1492, 13)

=== RESULTS (TransD vs TransD+diffusion) ===
     mrr: nan  →  nan
  hits@1: 0.0067  →  0.0144
  hits@3: 0.0191  →  0.0305
 hits@10: 0.0603  →  0.0804
      mr: 2057.9314  →  1644.4388
 - 45809: -0.1959022850098168 → -0.077313251145787 (improved=True)
 - 36808: 0.0650976251425876 → 0.1518595888282656 (improved=True)
 - 19658: 0.1138169645895159 → 0.2043606590095188 (improved=True)
 - 16575: 0.7141433627978224 → 0.7890972672458576 (improved=True)
 - 11737: 0.4054424673982366 → 0.4722958921233557 (improved=True)
[SELECTED_HITS] all improved? True

##########################################################################################
[DONE] Master summary:
 - /content/drive/MyDrive/transD_dif/GRID_MIN/run-20250827-154605/summary_runs.csv
 - /content/drive/MyDrive/transD_dif/GRID_MIN/run-