In [185]:
import os
from glob import glob
PATH = "results/predict_single_config/predict_benchmark.2022-11-21_13-23-34.files/"

prediction_files = glob(os.path.join(PATH, "**", "prediction.json"), recursive = True)

In [186]:
instance_index = []
label_index    = {}

predictions = []

import json

for file in prediction_files:
    name = os.path.basename(os.path.dirname(os.path.dirname(file)))
    
    with open(file, 'r') as i:
        prediction = json.load(i)
    
    for tool_name, pred in prediction.items():
        if tool_name == "features": continue
        if tool_name not in label_index: label_index[tool_name] = len(label_index)
    
    output = [0] * len(label_index)
    for tool_name, pred in prediction.items():
        if tool_name == "features": continue
        output[label_index[tool_name]] = pred
    
    predictions.append(output)
    instance_index.append(name)


In [187]:
from pesco.data.utils import load_dataset
dataset = load_dataset("../pesco_data/datasets/svcomp22_count_embedding_new.jsonl", "../pesco_data/cpachecker_labels_nobam.jsonl", fill_unknown = True)
dataset.embedding.shape

(6494, 385)

In [188]:
aligned_prediction = [None] * dataset.embedding.shape[0]

instance_lookup = {k.split("/")[1]: i for i, k in enumerate(dataset.instance_index)}

count = 0
for i, name in enumerate(instance_index):
    name = name.replace(".yml", "").replace(".c", "").replace(".i", "")
    if name in instance_lookup:
        aligned_prediction[instance_lookup[name]] = predictions[i]

import numpy as np
aligned_mask       = np.array([aligned_prediction[i] is not None for i in range(dataset.embedding.shape[0])])
aligned_prediction = np.array([p for p in aligned_prediction if p is not None])

In [189]:
label_index

{'bmc': 0, 'ki': 1, 'pa': 2, 'symbolic': 3, 'va': 4, 'vaitp': 5}

In [190]:
allowed_entries = [label_index[f"{k}"] for k in dataset.label_index]
aligned_prediction = [aligned_prediction[:, i] for i in allowed_entries]
aligned_prediction = np.stack(aligned_prediction).transpose()

In [191]:
aligned_prediction.shape

(5568, 6)

In [192]:
labels, runtimes = dataset.labels[aligned_mask], dataset.runtimes[aligned_mask]

In [193]:
aligned_prediction.shape, labels.shape, runtimes.shape

((5568, 6), (5568, 6), (5568, 6))

In [201]:
def _evaluate(candidate, y, runtimes):
    if isinstance(candidate, int):
        candidate = [(candidate, 900)]

    eval_results   = np.zeros((y.shape[0],))
    eval_runtimes  = np.zeros((y.shape[0],)) 

    running = np.ones((y.shape[0],))
    for tool, timelimit in candidate:
        labels, truntimes = y[:, tool], runtimes[:, tool]
        timemask = np.clip(timelimit - truntimes + 1, 0, 1).astype(int)

        cruntimes = timemask * truntimes + (1 - timemask) * timelimit

        eval_results  = (1 - running) * eval_results + running * timemask * labels
        eval_runtimes = (1 - running) * eval_runtimes + running * (eval_runtimes + cruntimes)
        running -= running * timemask * labels

    return eval_results, eval_runtimes

lookup = {v: k for k, v in label_index.items()}
aligned_selection = aligned_prediction.argmin(axis = 1)

solved = 0

for k in lookup.keys():
    index = aligned_selection == k
    if not np.any(index): continue

    _labels   = labels[index]
    _runtimes = runtimes[index]

    candidate = lookup[k]
    
    if "," in candidate:
        candidate = candidate.split(",")
    else:
        candidate = [candidate]
    
    candidate = [x.split(":") if ":" in x else (x, 900) for x in candidate]
    candidate = [(dataset.label_index.index(x[0]), int(x[1])) for x in candidate]
    result, _ = _evaluate(candidate, _labels, _runtimes)
    _solved    = result.sum()
    _solvable  = _labels.max(axis = 1).sum()
    print(lookup[k], int(_solved), "/", int(_solvable), "Times selected:", _labels.shape[0])
    solved += _solved

pre_solved = dataset.labels.shape[0] - aligned_prediction.shape[0]
print("Score before:", solved, pre_solved, solved + pre_solved)
print("Max solvable", dataset.labels.max(axis = 1).sum())

bmc 501 / 508 Times selected: 736
ki 1288 / 1307 Times selected: 1756
pa 388 / 438 Times selected: 589
symbolic 484 / 501 Times selected: 707
va 181 / 182 Times selected: 233
vaitp 810 / 839 Times selected: 1547
Score before: 3652.0 926 4578.0
Max solvable 4563


In [195]:
aligned_selection = aligned_prediction.argmin(axis = 1)
selected_entries  = labels[np.arange(labels.shape[0]), aligned_selection]
solved = dataset.labels.shape[0] - aligned_prediction.shape[0]
print("Score before:", selected_entries.sum(), solved, selected_entries.sum() + solved)
print("Max solvable", dataset.labels.max(axis = 1).sum())

Score before: 3652 926 4578
Max solvable 4563


In [202]:
scores = aligned_prediction
subset_size = 4
num_verifier = scores.shape[1]
best_mask    = np.zeros(num_verifier)
best_score   = 0

for _ in range(num_verifier):
    current_mask  = None
    current_score = 0

    for cand in range(num_verifier):
        if best_mask[cand] == 1: continue

        test_mask = np.copy(best_mask)
        test_mask[cand] = 1
        test_mask_b   = np.broadcast_to(test_mask, scores.shape)
        test_scores = (scores * test_mask_b) + ((1 - test_mask_b) * np.max(scores)) 
        test_selection = test_scores.argmin(axis = 1)
        test_score  = labels[np.arange(labels.shape[0]), test_selection].mean()

        if test_score > current_score:
            current_mask = test_mask
            current_score = test_score

    if current_score > best_score:
        best_score = current_score
        best_mask  = current_mask

        if subset_size != -1 and best_mask.sum() >= subset_size:
            break

    else:
        break

[l for i, l in enumerate(dataset.label_index) if best_mask[i] == 1]

['bmc', 'ki', 'va', 'vaitp']

In [197]:
best_mask = np.ones((labels.shape[1]))

In [203]:
masked_scores = best_mask * scores + (1 - best_mask) * 9000
masked_selection = masked_scores.argmin(axis = 1)
masked_selection

array([1, 1, 1, ..., 5, 5, 5])

In [248]:
%load_ext autoreload
%autoreload 2
from pesco.optim import optimize_portfolio

cluster_assign = masked_selection

_tools = []

solved = 0
solvables = labels.max(axis = 1) == 1

for k in range(labels.shape[1]):
    index = cluster_assign == k
    if np.any(index):
        _labels = labels[index]
        truntimes = runtimes[index]

        print("Times selected:", _labels.shape[0])

        solvable = _labels.max(axis = 1) == 1
        _labels  = _labels[solvable]
        truntimes = truntimes[solvable]

        unsolved = (1 - _labels[:, k]).sum()

        candidate = optimize_portfolio(_labels, truntimes, max_runtime = 900, q = 60)
        solved += _labels.shape[0] - len(candidate.unsolved)
        print("Unsolved for k = %d:" % k, len(candidate.unsolved), "/", unsolved)
        _tools.append(candidate.portfolio)
    else:
        _tools.append(((k, 900),))

pre_solved = dataset.labels.shape[0] - aligned_prediction.shape[0]
print("Solved tasks:", solved, pre_solved, solved + pre_solved)

for i, tool in enumerate(_tools):
    if best_mask[i] == 0: continue
    print(dataset.label_index[i], "-->", [(dataset.label_index[a], b) for a, b in tool])

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Times selected: 826
Unsolved for k = 0: 0 / 15
Times selected: 2054
Unsolved for k = 1: 6 / 53
Times selected: 431
Unsolved for k = 4: 1 / 4
Times selected: 2257
Unsolved for k = 5: 37 / 76
Solved tasks: 3731 926 4657
bmc --> [('vaitp', 60), ('symbolic', 60), ('pa', 60), ('bmc', 900)]
ki --> [('pa', 60), ('bmc', 60), ('symbolic', 60), ('ki', 900)]
va --> [('pa', 60), ('va', 900)]
vaitp --> [('symbolic', 60), ('pa', 60), ('vaitp', 900)]


In [229]:
_tools
n_tools = list(_tools)
n_tools[1] = ((2, 60), (3, 60), (0, 200), (1, 900))
n_tools

[((5, 60), (3, 60), (2, 60), (0, 900)),
 ((2, 60), (3, 60), (0, 200), (1, 900)),
 ((2, 900),),
 ((3, 900),),
 ((2, 60), (4, 900)),
 ((3, 60), (2, 60), (5, 900))]

In [249]:
def _evaluate(candidate, y, runtimes):
    if isinstance(candidate, int):
        candidate = [(candidate, 900)]

    eval_results   = np.zeros((y.shape[0],))
    eval_runtimes  = np.zeros((y.shape[0],)) 

    running = np.ones((y.shape[0],))
    for tool, timelimit in candidate:
        labels, truntimes = y[:, tool], runtimes[:, tool]
        timemask = np.clip(timelimit - truntimes + 1, 0, 1).astype(int)

        cruntimes = timemask * truntimes + (1 - timemask) * timelimit

        eval_results  = (1 - running) * eval_results + running * timemask * labels
        eval_runtimes = (1 - running) * eval_runtimes + running * (eval_runtimes + cruntimes)
        running -= running * timemask * labels

    return eval_results, eval_runtimes

solved = 0

for k in range(labels.shape[1]):
    index = masked_selection == k
    if not np.any(index): continue

    _labels   = labels[index]
    _runtimes = runtimes[index]

    solvable  = _labels.max(axis = 1) == 1
    _labels   = _labels[solvable]
    _runtimes = _runtimes[solvable]

    candidate = _tools[k]
    result, _ = _evaluate(candidate, _labels, _runtimes)
    _solved    = result.sum()
    _solvable  = _labels.max(axis = 1).sum()

    named_cand = [(lookup[x], y) for x, y in candidate]
    print(named_cand, int(_solved), "/", int(_solvable))
    solved += _solved

pre_solved = dataset.labels.shape[0] - aligned_prediction.shape[0]
print("Score before:", solved, pre_solved, solved + pre_solved)
print("Max solvable", dataset.labels.max(axis = 1).sum())

[('vaitp', 60), ('symbolic', 60), ('pa', 60), ('bmc', 900)] 545 / 545
[('pa', 60), ('bmc', 60), ('symbolic', 60), ('ki', 900)] 1507 / 1513
[('pa', 60), ('va', 900)] 350 / 351
[('symbolic', 60), ('pa', 60), ('vaitp', 900)] 1332 / 1366
Score before: 3734.0 926 4660.0
Max solvable 4563


In [207]:
mapping = {}

for i, tool in enumerate(_tools):
    label = dataset.label_index[i]
    if best_mask[i] == 0:
        mapping[label] = "passthrough"
    else:
        mapping[label] = ",".join(f"{dataset.label_index[_tool]}:{_time}" for _tool, _time in tool)

mapping

{'bmc': 'vaitp:60,symbolic:60,pa:60,bmc:900',
 'ki': 'pa:60,symbolic:60,ki:900',
 'pa': 'passthrough',
 'symbolic': 'passthrough',
 'va': 'pa:60,va:900',
 'vaitp': 'symbolic:60,pa:60,vaitp:900'}

In [208]:
with open("mapping.json", "w") as o:
    json.dump(mapping, o, indent = 4)