In [None]:
import impmeas as imp 
import pandas as pd
import os

# generates a fixed set of random cnfs
def generate(ds,dsname):
    fname =  f"cnfs/{dsname}.csv"
    if os.path.exists(fname): os.remove(fname)
    solver = imp.GPMC()
    with open(fname, "x") as file:
        file.write("instance,n,m,k,cnf,expectation\n")
        idx = 0
        for n,m,k,count in ds:
            for _ in range(count):
                cnf, _ = imp.random_k_cnf(n,m,k)
                cnf_str = "\"["+"],[".join(",".join(str(var) for var in clause) for clause in cnf)+"]\""
                expectation = solver.satcount(cnf) / 2**max(max(abs(lit) for lit in cl) for cl in cnf)
                print(n,m,k, expectation)
                file.write(f"{idx},{n},{m},{k},{cnf_str},{expectation}\n")
                idx+=1

ds1 = [(n,int(n*3+0.5),7,10) for n in range(10, 47, 3)]
ds2 = [(n,int(n*7+0.5),4,10) for n in range(10, 64, 3)]
# this overwrites the datasets!
# generate(ds1, "ds1")
# generate(ds2, "ds2")

In [None]:
import json
import time
import numpy as np
import pandas as pd
import os
from tqdm.notebook import tqdm

import impmeas as imp

def cnf2formula(cnf):
    cnf = [[int(lit) for lit in clause.split(",")] for clause in cnf[1:-1].split("],[")]
    formula = None 
    for clause in cnf:
        inner = None
        for lit in clause:
            lit_formula = ("V", f"x{lit}") if lit > 0 else ("~", ("V", f"x{-lit}"))
            inner = ("|", lit_formula, inner) if inner else lit_formula
        formula = ("&", inner, formula) if formula else inner
    return formula, cnf

def test_method(model_key, model, method_key, method, n, m, k, formula):
    start = time.time()
    if methodkey != "raw_satcount": f = model.parse(formula)
    else: f = formula
    end1 = time.time()
    method(f)
    end2 = time.time()
    methodtime, overalltime = end2-end1, end2-start
    return methodtime*1000, overalltime*1000

# approx hard instances for clauses / n = 3
# (see e.g. https://www.cs.cornell.edu/selman/papers/pdf/96.aij.generating.pdf)
# also a usual ratio used in the model counting competition
methodkey2method = { 
    "blame": lambda f: imp.blame(f, "x1", cutoff=0),
    "mblame": lambda f: imp.blame(f, "x1", modified=True, cutoff=0),
    "influence": lambda f: imp.influence(f, "x1"),
    "dominating_cgm": lambda f: imp.dominating_cgm(f),
    "banzhaf": lambda f: imp.banzhaf(f, "x1"),
    "satcount": lambda f: f.expectation(),
    "tseitin": lambda f: f.tseitin(),
    "raw_satcount": lambda f: imp.GPMC().satcount(f)
} 
modelkey2model = {
    "BDD": imp.BuddyNode,
    "GPMC": imp.Formula
}
# buddy setup
imp.set_pmc_solver(imp.representation.GPMC())
imp.buddy_initialize([f"x{i+1}" for i in range(100)], nodenum=1<<27, cachesize=1<<22) # we never use more than 100 variables
imp.representation.set_dynamic_reordering()
fpstat = "./results/ds_random_cnfs.csv"

# exectues a method on a specific cnf and measures the time
def execute_sample(index):
    global ds_cnfs_name, modelkey, methodkey, limits, modelkey2model, methodkey2method, ds_stat, ds_cnfs

    row = ds_cnfs.loc[index]
    n,m,k,(form,cnf) = row["n"], row["m"], row["k"], cnf2formula(row["cnf"])
    if methodkey == "raw_satcount": form = cnf 
    mt, ot = test_method(modelkey, modelkey2model[modelkey], methodkey, methodkey2method[methodkey], n, m, k, form)
    new_index=pd.MultiIndex.from_tuples([(ds_cnfs_name, index, modelkey, methodkey)], names=["dataset","instance","model","method"])
    d = pd.DataFrame([{"n": n, "ot": ot, "mt": mt}], index=new_index)

    ds_stat = pd.concat((ds_stat, d))
    ds_stat.to_csv(fpstat)
    return ds_stat

In [None]:
#global parameters
limits = {
    "ds1": {
        "BDD": {
            "influence": [10, 46],
            "blame": [10, 28],
            "mblame": [10, 28],
            "dominating_cgm": [10, 27],
            "satcount": [10, 46]
        },
        "GPMC": {
            "blame": [10, 19],
            "mblame": [10, 19],
            "satcount": [10, 46],
            "raw_satcount": [10, 46],
            "influence": [10, 46]
        },
    },
    "ds2": {
        "BDD": {
            "influence": [10, 40],
            "blame": [10, 25],
            "mblame": [10, 25],
            "dominating_cgm": [10, 40],
            "satcount": [10, 40]
        },
        "GPMC": {
            "blame": [10, 19],
            "mblame": [10, 19],
            "influence": [10, 61],
            "satcount": [10, 61],
            "raw_satcount": [10, 61]
        }
    }
}
modelkey, methodkey, ds_cnfs_name = "BDD", "dominating_cgm", "ds1"
ds_stat = pd.read_csv(fpstat, index_col=["dataset", "instance","model","method"]) if os.path.exists(fpstat) else pd.DataFrame()
ds_cnfs = pd.read_csv(f"cnfs/{ds_cnfs_name}.csv", sep=",", index_col="instance")

# iterates over all cnfs of the chosen dataset and measures the method as specified above
tpls = list(ds_cnfs.itertuples())
for row in (pbar := tqdm(tpls)):
    if ds_cnfs_name in limits and \
       modelkey in limits[ds_cnfs_name] and \
       methodkey in limits[ds_cnfs_name][modelkey] and \
       (limits[ds_cnfs_name][modelkey][methodkey][1] < row.n or \
        limits[ds_cnfs_name][modelkey][methodkey][0] > row.n):
       continue 
    pbar.set_description(f"{modelkey} {methodkey} n={row.n} m={row.m} k={row.k} idx={row.Index}")
    ds_stat = execute_sample(row.Index)