-
Notifications
You must be signed in to change notification settings - Fork 11
/
eval-runs.py
executable file
·104 lines (98 loc) · 4.99 KB
/
eval-runs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#! /usr/bin/python3
# This script helps to evaluate a given set of Yuck integration test runs.
#
# For each instance, the script retrieves the objective value of the best solution
# in order to compute, for each given run, a penalty between 0 and 1 (using feature
# scaling) where 0 means the solution is one of the best and 1 means it is one of the
# worst. (In case there is no solution, the penalty is 1.)
#
# In the end the script prints, for each given run, the number of instances it failed on,
# and the penalties in terms of their mean, standard deviation, and median.
#
# The database is expected to reside in the working directory under the name results.db.
#
# Notice that, by default, feature scaling uses all results that the database provides.
# To restrict the analysis to the given runs, use the -r option.
import argparse
import json
import numpy
import sqlite3
import statistics
import sys
def evalRuns(cursor, args):
jobQuery = 'SELECT DISTINCT problem, model, instance, problem_type FROM result'
if args.problemType:
jobQuery += ' WHERE problem_type = ?'
jobQuery += ' ORDER BY problem, model, instance';
jobs = list(cursor.execute(jobQuery, (args.problemType, )) if args.problemType else cursor.execute(jobQuery))
if not jobs:
print('No results found', file = sys.stderr)
return {}
runsInScope = args.runs if args.ignoreOtherRuns else list(map(lambda result: result[0], cursor.execute('SELECT DISTINCT run from result')));
results = {}
penalties = {}
failures = {}
for run in runsInScope:
resultQuery = 'SELECT solved, quality FROM result WHERE run = ?';
if args.problemType:
resultQuery += ' AND problem_type = ?'
resultQuery += ' ORDER BY problem, model, instance';
results[run] = list(cursor.execute(resultQuery, (run, args.problemType)) if args.problemType else cursor.execute(resultQuery, (run,)))
if len(results[run]) != len(jobs):
print('Expected {} results for run {}, but found {}'.format(len(jobs), run, len(results[run])), file = sys.stderr)
return {}
for run in args.runs:
penalties[run] = []
failures[run] = 0
for i in range(0, len(jobs)):
(problem, model, instance, problemType) = jobs[i]
qualities = [int(result[1]) for result in [results[run][i] for run in runsInScope] if result[0] and result[1]]
optima = [int(result[0]) for result in cursor.execute('SELECT optimum FROM result WHERE problem = ? AND model = ? AND instance = ? AND optimum IS NOT NULL', (problem, model, instance))]
qualities += optima
highScores = [int(result[0]) for result in cursor.execute('SELECT high_score FROM result WHERE problem = ? AND model = ? AND instance = ? AND high_score IS NOT NULL', (problem, model, instance))]
qualities += highScores
(low, high) = (None, None) if not qualities else (min(qualities), max(qualities))
if args.verbose:
print('-' * 80)
print(problem, instance, problemType, low, high)
for run in args.runs:
(solved, quality) = results[run][i]
if solved:
if high == low:
penalty = 0
elif problemType == 'MIN':
penalty = (quality - low) / (high - low)
else:
penalty = 1 - ((quality - low) / (high - low))
if args.verbose:
print(run, quality, penalty)
else:
failures[run] += 1
penalty = 1
penalties[run] += [penalty]
return {run: {'failures': failures[run], 'penalties': penalties[run]} for run in args.runs}
def postprocessResult(result):
penalties = result['penalties']
return {
'failures': result['failures'],
'penalty-min': min(penalties),
'penalty-max': max(penalties),
'penalty-mean': statistics.mean(penalties),
'penalty-pstdev': statistics.pstdev(penalties),
'penalty-median': statistics.median(penalties),
'penalty-histogram': numpy.histogram(penalties, 10, (0, 1))[0].tolist()
}
def main():
parser = argparse.ArgumentParser(description = 'Helps to evaluate a given set of Yuck integration test runs')
parser.add_argument('-v', '--verbose', action='store_true')
parser.add_argument('-r', '--ignore-other-runs', dest = 'ignoreOtherRuns', action='store_true', help = 'Ignore results from runs other than the given ones')
parser.add_argument('-t', '--problem-type', dest = 'problemType', choices = ['SAT', 'MIN', 'MAX'], help = 'Restrict analysis to given problem type')
parser.add_argument('runs', metavar = 'run', nargs = '+')
args = parser.parse_args()
with sqlite3.connect("results.db") as conn:
cursor = conn.cursor()
results = evalRuns(cursor, args)
if results:
postprocessedResults = {run: postprocessResult(results[run]) for run in results}
print(json.dumps(postprocessedResults, sort_keys = True, indent = 4))
main()