In [None]:
import pandas as pd

def parse_results(group_data, csv_path, sep='-'):
    
    df = pd.read_csv(csv_path)
    
    header = ['Set', '#vars', '#clauses', '#instances', 'mean', 'min', '#perfect', 'ratio perfect']
    statistics = pd.DataFrame(columns=header)
    #print('\t'.join(header))
    for group in group_data:
        #print(group)
        filtered = df[df['dataset'].str.startswith(f"{group['name']}{sep}")]
        #display(filtered)
        #display(filtered.agg(['mean', 'count']))
        acc = filtered['accuracy']
        perfect_count = len(filtered[filtered['accuracy'] == 1])
        ratio_perfect = perfect_count / len(filtered) if len(filtered) > 0 else 0
        data = [group['name'], group['vars'], group['clauses'], acc.count(), acc.mean(), acc.min(), perfect_count, ratio_perfect]
        #print('\t'.join([str(x) for x in data]))

        statistics = statistics.append(pd.DataFrame([data], columns=header))

    
    return statistics

In [None]:
# this cell gathers statistics about uniform, phase transition instances
# description: https://www.cs.ubc.ca/~hoos/SATLIB/Benchmarks/SAT/RND3SAT/descr.html

'''
UF instances are prefixed by:
uf20: 20 variables, 91 clauses - 1000 instances, all sat
uf50: 50 variables, 218 clauses - 1000 instances, all sat
uf75: 75 variables, 325 clauses - 100 instances, all sat
uf100: 100 variables, 430 clauses - 1000 instances, all sat
uf125: 125 variables, 538 clauses - 100 instances, all sat
uf150: 150 variables, 645 clauses - 100 instances, all sat
uf175: 175 variables, 753 clauses - 100 instances, all sat
uf200: 200 variables, 860 clauses - 100 instances, all sat
uf225: 225 variables, 960 clauses - 100 instances, all sat
uf250: 250 variables, 1065 clauses - 100 instances, all sat
'''
group_prefixes = ['uf20', 'uf50', 'uf75', 'uf100', 'uf125', 'uf150', 'uf175', 'uf200', 'uf225', 'uf250']
var_count = [20, 50, 75, 100, 125, 150, 175, 200, 225, 250]
clause_count = [91, 218, 325, 430, 538, 645, 753, 860, 960, 1065]

group_data = [{'name': group_prefixes[i], 'vars': var_count[i], 'clauses': clause_count[i]}  for i in range(len(group_prefixes))]
stats = parse_results(group_data, 'results_2020-08-04/satlib_uf_mlp.csv')
display(stats)


In [None]:
# this cell gathers statistics about uniform, controlled backbone size instances
# description: https://www.cs.ubc.ca/~hoos/SATLIB/Benchmarks/SAT/CBS/descr_CBS.html
import pandas as pd
import itertools
'''
CBS instances are prefixed by
CBS_k3_nV_mM_bB, where V = 100, M = [403, 411, 418, 423, 429, 435, 441, 449] and B = [10, 30, 50, 70, 90]
'''

#cbs instances are generated with all combinations of var_count, clause_count and backbone_sizes
var_count = [100]
clause_count = [403, 411, 418, 423, 429, 435, 441, 449]
backbone_sizes = [10, 30, 50, 70, 90]


df = pd.read_csv('results_2020-08-04/satlib_cbs_mlp.csv')
#display(df)

header = ['instances', '#vars', '#clauses', 'count', 'mean', 'min', '#perfect', 'ratio perfect']
statistics = pd.DataFrame(columns=header)
#print('\t'.join(header))

data = []
for v, c, b in itertools.product(var_count, clause_count, backbone_sizes):
    name = f'CBS_k3_n{v}_m{c}_b{b}'
    data.append({'name': name, 'vars': v, 'clauses': c})

stats = parse_results(data, 'results_2020-08-04/satlib_cbs_mlp.csv', sep='_')
display(stats)


In [None]:
# this cell gathers statistics about GCP instances
# description: https://www.cs.ubc.ca/~hoos/SATLIB/Benchmarks/SAT/GCP/descr.html
import pandas as pd
'''
GCP instances are prefixed by
flatN, where N is the number of vertices
'''

clauses = [300, 545, 840, 1117, 1403, 1680, 1951, 2237]
vertices = [30, 50, 75, 100, 125, 150, 175, 200]

data = [{'name': f'flat{v}', 'vars': 3*v, 'clauses': c} for c, v in zip(clauses, vertices)]
stats = parse_results(data, 'results_2020-08-04/satlib_gcp_mlp.csv', sep='-')
display(stats)
#df = pd.read_csv('results_2020-08-04/satlib_gcp_mlp.csv')
#display(df)
