## Do full evaluation on CCP

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from copy import deepcopy
import os
import json
import numpy as np
import pandas as pd

from lib.problems import ProblemDataset
from baselines.utils import eval_method
from baselines.CPP import methods_registry
from baselines.CPP.methods_registry import CUDA_METHODS
from lib.ltr.utils import load_model
from lib.ltr.ccp.method import CKMeans

In [None]:
SIZE = 100
SEED = 1
NSEEDS = 3
CUDA = False
K_NOT_KNOWN = False
N = 200
CORES = 4
T_LIM = 180     # 3min

SAVE_DIR = f"./outputs_eval/ccp{N}/"
BL_DIR = os.path.join(SAVE_DIR, "baselines")
M_DIR = os.path.join(SAVE_DIR, "model")
smp_cfg = {"sample_size": SIZE}
INF = float("inf")

DS_PTH = f"data/CCP/CCP{N}/test_gm_n{N}_k3-12_s100_cap1_1_seed4321.npz"
CKPT = "outputs/final/ccp_200/gnn_pool_pointwise/2023-01-09_11-19-49_057495/checkpoints/epoch=198_val_acc=0.9858.ckpt"
NUM_INIT = 8

In [None]:
metrics = {}
RESULTS = {}
seeds = [SEED+i for i in range(NSEEDS)]
ds = ProblemDataset(problem="CCP", seed=SEED, data_pth=DS_PTH)
ds = ds.sample(**smp_cfg)

In [None]:
mthd = "random_select"
result, smry = eval_method(
    method=getattr(methods_registry, mthd),
    dataset=ds,
    seeds=seeds,
    save_dir=BL_DIR,
    cuda=CUDA,
    k_not_known=K_NOT_KNOWN,
    sample_cfg=smp_cfg,
    method_str=mthd,
)
m_id = f"{mthd}{'_cuda' if CUDA and mthd in CUDA_METHODS else ''}"
RESULTS[m_id] = result
print(smry)
metrics[m_id] = smry
#

In [None]:
mthd = "random_center_knn"
#if not CUDA or CUDA and mthd in CUDA_METHODS:
result, smry = eval_method(
    method=getattr(methods_registry, mthd),
    dataset=ds,
    seeds=seeds,
    save_dir=BL_DIR,
    cuda=CUDA,
    k_not_known=K_NOT_KNOWN,
    sample_cfg=smp_cfg,
    method_str=mthd,
    verbose=False,
)
m_id = f"{mthd}{'_cuda' if CUDA and mthd in CUDA_METHODS else ''}"
RESULTS[m_id] = result
rnd_res = deepcopy(result)
costs = np.array([r['tot_center_dist'] for r in rnd_res])
max_cost = costs[costs != INF].max()
costs = costs.reshape(NSEEDS, -1)
is_inf = np.all(costs == INF, axis=0)
print(f"inf: {is_inf.sum()}")
rnd_mean_cost = np.nanmean(costs, axis=0)
rnd_mean_cost[is_inf] = max_cost
#print(rnd_mean_cost)
smry['center_dist_mean'] = rnd_mean_cost.mean()
print(smry)
metrics[m_id] = smry

In [None]:
mthd = "topk_center_knn"
if not CUDA or CUDA and mthd in CUDA_METHODS:
    result, smry = eval_method(
        method=getattr(methods_registry, mthd),
        dataset=ds,
        seeds=seeds,
        save_dir=BL_DIR,
        cuda=CUDA,
        k_not_known=K_NOT_KNOWN,
        sample_cfg=smp_cfg,
        method_str=mthd,
        verbose=False,
    )
    m_id = f"{mthd}{'_cuda' if CUDA and mthd in CUDA_METHODS else ''}"
    RESULTS[m_id] = result
    # replace infeasible runs with mean cost of random method
    res = deepcopy(result)
    costs = np.array([r['tot_center_dist'] for r in res])
    costs = costs.reshape(NSEEDS, -1)
    for i, c_rnd in enumerate(rnd_mean_cost):
        inst_cost = costs[:, i]
        inf_msk = inst_cost == INF
        if np.any(inf_msk):
            inst_cost[inf_msk] = c_rnd
            costs[:, i] = inst_cost

    smry['center_dist_mean'] = np.mean(costs)
    smry['center_dist_std'] = np.mean(np.std(costs, axis=0))
    print(f"adapted summary: {smry}")
    metrics[m_id] = smry


In [None]:
mthd = "ccp_mh"
if not CUDA or CUDA and mthd in CUDA_METHODS:
    result, smry = eval_method(
        method=getattr(methods_registry, mthd),
        dataset=ds,
        seeds=seeds,
        save_dir=BL_DIR,
        cuda=CUDA,
        k_not_known=K_NOT_KNOWN,
        sample_cfg=smp_cfg,
        num_init=NUM_INIT,
        num_cores=CORES,
        t_total=T_LIM,
        t_local=T_LIM//10,
        g_initial=25,
    )
    m_id = f"{mthd}{'_cuda' if CUDA and mthd in CUDA_METHODS else ''}"
    RESULTS[m_id] = result
    # replace infeasible runs with mean cost of random method
    res = deepcopy(result)
    costs = np.array([r['tot_center_dist'] for r in res])
    costs = costs.reshape(NSEEDS, -1)
    for i, c_rnd in enumerate(rnd_mean_cost):
        inst_cost = costs[:, i]
        inf_msk = inst_cost == INF
        if np.any(inf_msk):
            inst_cost[inf_msk] = c_rnd
            costs[:, i] = inst_cost

    smry['center_dist_mean'] = np.mean(costs)
    smry['center_dist_std'] = np.mean(np.std(costs, axis=0))
    print(f"adapted summary: {smry}")
    metrics[m_id] = smry

In [None]:
mthd = "rpack"
if not CUDA or CUDA and mthd in CUDA_METHODS:
    result, smry = eval_method(
        method=getattr(methods_registry, mthd),
        dataset=ds,
        seeds=seeds,
        save_dir=BL_DIR,
        cuda=CUDA,
        k_not_known=K_NOT_KNOWN,
        sample_cfg=smp_cfg,
        num_init=NUM_INIT,
        num_cores=CORES,
        timeout=T_LIM,
        timeout_kill=(T_LIM*2)+1,
        verbose=False,
    )
    m_id = f"{mthd}{'_cuda' if CUDA and mthd in CUDA_METHODS else ''}"
    RESULTS[m_id] = result
    # replace infeasible runs with mean cost of random method
    res = deepcopy(result)
    costs = np.array([r['tot_center_dist'] for r in res])
    costs = costs.reshape(NSEEDS, -1)
    for i, c_rnd in enumerate(rnd_mean_cost):
        inst_cost = costs[:, i]
        inf_msk = inst_cost == INF
        if np.any(inf_msk):
            inst_cost[inf_msk] = c_rnd
            costs[:, i] = inst_cost

    smry['center_dist_mean'] = np.mean(costs)
    smry['center_dist_std'] = np.mean(np.std(costs, axis=0))
    print(f"adapted summary: {smry}")
    metrics[m_id] = smry

In [None]:
mthd = "agglomerative"
if not CUDA or CUDA and mthd in CUDA_METHODS:
    result, smry = eval_method(
        method=getattr(methods_registry, mthd),
        dataset=ds,
        seeds=seeds,
        save_dir=BL_DIR,
        cuda=CUDA,
        k_not_known=K_NOT_KNOWN,
        sample_cfg=smp_cfg,
        num_init=NUM_INIT,
        nn_selection=True,
    )
    m_id = f"{mthd}{'_cuda' if CUDA and mthd in CUDA_METHODS else ''}"
    RESULTS[m_id] = result
    # replace infeasible runs with mean cost of random method
    res = deepcopy(result)
    costs = np.array([r['tot_center_dist'] for r in res])
    costs = costs.reshape(NSEEDS, -1)
    for i, c_rnd in enumerate(rnd_mean_cost):
        inst_cost = costs[:, i]
        inf_msk = inst_cost == INF
        if np.any(inf_msk):
            inst_cost[inf_msk] = c_rnd
            costs[:, i] = inst_cost

    smry['center_dist_mean'] = np.mean(costs)
    smry['center_dist_std'] = np.mean(np.std(costs, axis=0))
    print(f"adapted summary: {smry}")
    metrics[m_id] = smry

### Results + Ablation



In [None]:
mthd = "cap_kmeans"
if not CUDA or CUDA and mthd in CUDA_METHODS:
    result, smry = eval_method(
        method=getattr(methods_registry, mthd),
        dataset=ds,
        seeds=seeds,
        save_dir=BL_DIR,
        cuda=CUDA,
        k_not_known=K_NOT_KNOWN,
        sample_cfg=smp_cfg,
        verbose=False,
        num_init=NUM_INIT,
        tol=1e-4,
        max_iter=80,
        init_method="ckm++",
    )
    m_id = f"{mthd}{'_cuda' if CUDA and mthd in CUDA_METHODS else ''}"
    RESULTS[m_id] = result
    # replace infeasible runs with mean cost of random method
    res = deepcopy(result)
    costs = np.array([r['tot_center_dist'] for r in res])
    costs = costs.reshape(NSEEDS, -1)
    for i, c_rnd in enumerate(rnd_mean_cost):
        inst_cost = costs[:, i]
        inf_msk = inst_cost == INF
        if np.any(inf_msk):
            inst_cost[inf_msk] = c_rnd
            costs[:, i] = inst_cost

    smry['center_dist_mean'] = np.mean(costs)
    smry['center_dist_std'] = np.mean(np.std(costs, axis=0))
    print(f"adapted summary: {smry}")
    metrics[m_id] = smry

In [None]:
# greedily assigns the last 'opt_last_frac' fraction of total nodes
# ordered by their absolute priority to the closest center

mthd = "ncc_greedy"
model = load_model("ccp", CKPT)

ckmeans = CKMeans(
    max_iter=50,
    num_init=NUM_INIT,
    model=model,
    seed=SEED,
    init_method="ckm++",
    tol=0.001,
    pre_iter=0,
    verbose=False,
    opt_last_frac=0.25,
    opt_last_samples=1, # no multiple samples
    opt_last_prio=True,
)

result, smry = eval_method(
    method=ckmeans.inference,
    dataset=ds,
    seeds=seeds,
    save_dir=M_DIR,
    cuda=CUDA,
    k_not_known=K_NOT_KNOWN,
    sample_cfg=smp_cfg,
    method_str=mthd,
)
m_id = f"{mthd}{'_cuda' if CUDA and mthd in CUDA_METHODS else ''}"
RESULTS[m_id] = result
# replace infeasible runs with mean cost of random method
res = deepcopy(result)
costs = np.array([r['tot_center_dist'] for r in res])
costs = costs.reshape(NSEEDS, -1)
for i, c_rnd in enumerate(rnd_mean_cost):
    inst_cost = costs[:, i]
    inf_msk = inst_cost == INF
    if np.any(inf_msk):
        inst_cost[inf_msk] = c_rnd
        costs[:, i] = inst_cost

smry['center_dist_mean'] = np.mean(costs)
smry['center_dist_std'] = np.mean(np.std(costs, axis=0))
print(f"adapted summary: {smry}")
metrics[m_id] = smry


In [None]:
# samples multiple assignments for the last 'opt_last_frac' fraction of total nodes
# and selects the best one

mthd = "ncc_samp"
model = load_model("ccp", CKPT)

ckmeans = CKMeans(
    max_iter=50,
    num_init=NUM_INIT,
    model=model,
    seed=SEED,
    init_method="ckm++",
    tol=0.001,
    pre_iter=0,
    verbose=False,
    opt_last_frac=0.25,
    opt_last_samples=64,
)

result, smry = eval_method(
    method=ckmeans.inference,
    dataset=ds,
    seeds=seeds,
    save_dir=M_DIR,
    cuda=CUDA,
    k_not_known=K_NOT_KNOWN,
    sample_cfg=smp_cfg,
    method_str=mthd,
)
m_id = f"{mthd}{'_cuda' if CUDA and mthd in CUDA_METHODS else ''}"
RESULTS[m_id] = result
# replace infeasible runs with mean cost of random method
res = deepcopy(result)
costs = np.array([r['tot_center_dist'] for r in res])
costs = costs.reshape(NSEEDS, -1)
for i, c_rnd in enumerate(rnd_mean_cost):
    inst_cost = costs[:, i]
    inf_msk = inst_cost == INF
    if np.any(inf_msk):
        inst_cost[inf_msk] = c_rnd
        costs[:, i] = inst_cost

smry['center_dist_mean'] = np.mean(costs)
smry['center_dist_std'] = np.mean(np.std(costs, axis=0))
print(f"adapted summary: {smry}")
metrics[m_id] = smry