Added tools for comparing integration test runs

informarte · Jun 8, 2018 · 5cc0455 · 5cc0455
1 parent d080016
commit 5cc0455
Show file tree

Hide file tree

Showing 22 changed files with 376 additions and 42 deletions.
diff --git a/build.sbt b/build.sbt
@@ -40,6 +40,7 @@ libraryDependencies += "com.novocode" % "junit-interface" % "0.11" % "test"
 libraryDependencies += "org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.6"
 libraryDependencies += "org.jgrapht" % "jgrapht-core" % "1.1.0"
 libraryDependencies += "com.github.scopt" %% "scopt" % "3.5.0"
+libraryDependencies += "io.spray" %% "spray-json" % "1.3.3"
 
 // See https://github.com/sbt/junit-interface/issues/66 for why it does not work!
 testOptions in Test := Seq(Tests.Filter(s => s.endsWith("UnitTestSuite")))

diff --git a/doc/copyright b/doc/copyright
@@ -28,5 +28,11 @@ Public License 1.0:
 * JGraphT
   (http://www.jgrapht.org)
 
+The following libraries are used under the terms of the Apache
+License 2.0:
+
+* spray-json
+  (https://github.com/spray/spray-json)
+
 Please see the folder licenses/ for all copyright statements and the
 licenses.
diff --git a/scripts/compute-speedup.py b/scripts/compute-speedup.py
@@ -0,0 +1,44 @@
+#! /usr/bin/python3
+
+# This script compares the performance of Yuck on two given test runs.
+#
+# The result database is expected to reside in the working directory under the name results.db.
+
+import argparse
+import json
+import numpy
+import sqlite3
+import statistics
+import sys
+
+def computeSpeedups(cursor, run1, run2):
+    tasks = list(cursor.execute('SELECT DISTINCT problem, model, instance, problem_type FROM result ORDER BY problem, model, instance'))
+    resultQuery = 'SELECT solved, moves_per_second FROM result WHERE run = ? AND problem = ? AND model = ? AND instance = ?'
+    for problem, model, instance, problemType in tasks:
+        result1 = cursor.execute(resultQuery, (run1, problem, model, instance))
+        solved1, mps1 = result1.fetchone()
+        result2 = cursor.execute(resultQuery, (run2, problem, model, instance))
+        solved2, mps2 = result2.fetchone()
+        yield {'problem': problem, 'model': model, 'instance': instance, 'speedup': mps2 / mps1 if mps1 and mps2 else None}
+
+def main():
+    parser = argparse.ArgumentParser(description = 'Compares the performance of Yuck on two given test runs')
+    parser.add_argument('run1', metavar = 'run1')
+    parser.add_argument('run2', metavar = 'run2')
+    args = parser.parse_args()
+    with sqlite3.connect("results.db") as conn:
+        cursor = conn.cursor()
+        results = list(computeSpeedups(cursor, args.run1, args.run2))
+        speedups = sorted([result['speedup'] for result in results])
+        stats = {
+            'speedup-min': min(speedups),
+            'speedup-max': max(speedups),
+            'speedup-mean': statistics.mean(speedups),
+            # harmonic mean might be more appropriate, but it is available only from Python 3.6
+            'speedup-pstdev': statistics.pstdev(speedups),
+            'speedup-median': statistics.median(speedups),
+            'speedup-histogram': numpy.histogram(speedups, 'auto')[0].tolist()
+        }
+        print(json.dumps(stats, sort_keys = True, indent = 4))
+
+main()
diff --git a/scripts/create-results-db.py b/scripts/create-results-db.py
@@ -0,0 +1,51 @@
+#! /usr/bin/python3
+
+# This script creates a database of Yuck integration test results (results.db).
+#
+# To import results, call the script with a list of JSON result files on the command line.
+# When the database already exists, the given results will be added unless they are
+# already in the database.
+
+import argparse
+import json
+import sqlite3
+from itertools import repeat
+
+def createDb(cursor):
+    cursor.execute('CREATE TABLE IF NOT EXISTS result (run TEXT NOT NULL, suite TEXT NOT NULL, problem TEXT NOT NULL, model TEXT NOT NULL, instance TEXT NOT NULL, problem_type TEXT NOT NULL CONSTRAINT result_problem_type_constraint CHECK (problem_type IN ("MIN", "MAX", "SAT")), optimum INT, high_score INT, solved INT NOT NULL CONSTRAINT result_solved_constraint CHECK (solved in (0, 1)), violation INT CONSTRAINT result_violation_constraint CHECK (violation >= 0), quality INT, runtime_in_seconds DOUBLE CONSTRAINT result_runtime_in_seconds_constraint CHECK (runtime_in_seconds >= 0), moves_per_second DOUBLE CONSTRAINT result_moves_per_second_constraint CHECK (moves_per_second >= 0), CONSTRAINT result_unique_constraint UNIQUE (run, problem, model, instance) ON CONFLICT IGNORE)')
+    cursor.execute('CREATE INDEX IF NOT EXISTS result_index ON result(run, problem, model, instance)')
+
+def importResults(run, file, cursor):
+    results = json.load(file)
+    solverStatistics = results.get('solver-statistics')
+    cursor.execute(
+        'INSERT INTO result VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
+        (run,
+         results['task']['suite'],
+         results['task']['problem'],
+         results['task']['model'],
+         results['task']['instance'],
+         results['task']['problem-type'],
+         results['task'].get('optimum'),
+         results['task'].get('high-score'),
+         results['result']['solved'],
+         results['result'].get('violation'),
+         results['result'].get('quality'),
+         solverStatistics['runtime-in-seconds'] if solverStatistics else None,
+         solverStatistics['moves-per-second'] if solverStatistics else None))
+
+def main():
+    parser = argparse.ArgumentParser(description = 'Puts Yuck integration test results into database')
+    parser.add_argument('run', metavar = 'run')
+    parser.add_argument('filenames', metavar = 'json-result-file', nargs = '+')
+    args = parser.parse_args()
+    with sqlite3.connect("results.db") as conn:
+        cursor = conn.cursor()
+        createDb(cursor)
+        cursor.execute('PRAGMA foreign_keys = ON');
+        for filename in args.filenames:
+            with open(filename) as file:
+                importResults(args.run, file, cursor)
+                conn.commit()
+
+main()
diff --git a/scripts/eval-runs.py b/scripts/eval-runs.py
@@ -0,0 +1,104 @@
+#! /usr/bin/python3
+
+# This script helps to evaluate a given set of Yuck integration test runs.
+#
+# For each instance, the script retrieves the objective value of the best solution
+# in order to compute, for each given run, a penalty between 0 and 1 (using feature
+# scaling) where 0 means the solution is one of the best and 1 means it is one of the
+# worst. (In case there is no solution, the penalty is 1.)
+#
+# In the end the script prints, for each given run, the number of instances it failed on,
+# and the penalties in terms of their mean, standard deviation, and median.
+#
+# The database is expected to reside in the working directory under the name results.db.
+#
+# Notice that, by default, feature scaling uses all results that the database provides.
+# To restrict the analysis to the given runs, use the -r option.
+
+import argparse
+import json
+import numpy
+import sqlite3
+import statistics
+import sys
+
+def evalRuns(cursor, args):
+    jobQuery = 'SELECT DISTINCT problem, model, instance, problem_type FROM result'
+    if args.problemType:
+        jobQuery += ' WHERE problem_type = ?'
+    jobQuery += ' ORDER BY problem, model, instance';
+    jobs = list(cursor.execute(jobQuery, (args.problemType, )) if args.problemType else cursor.execute(jobQuery))
+    if not jobs:
+        print('No results found', file = sys.stderr)
+        return {}
+    runsInScope = args.runs if args.ignoreOtherRuns else list(map(lambda result: result[0], cursor.execute('SELECT DISTINCT run from result')));
+    results = {}
+    penalties = {}
+    failures = {}
+    for run in runsInScope:
+        resultQuery = 'SELECT solved, quality FROM result WHERE run = ?';
+        if args.problemType:
+            resultQuery += ' AND problem_type = ?'
+        resultQuery += ' ORDER BY problem, model, instance';
+        results[run] = list(cursor.execute(resultQuery, (run, args.problemType)) if args.problemType else cursor.execute(resultQuery, (run,)))
+        if len(results[run]) != len(jobs):
+            print('Expected {} results for run {}, but found {}'.format(len(jobs), run, len(results[run])), file = sys.stderr)
+            return {}
+    for run in args.runs:
+        penalties[run] = []
+        failures[run] = 0
+    for i in range(0, len(jobs)):
+        (problem, model, instance, problemType) = jobs[i]
+        qualities = [int(result[1]) for result in [results[run][i] for run in runsInScope] if result[0] and result[1]]
+        optima = [int(result[0]) for result in cursor.execute('SELECT optimum FROM result WHERE problem = ? AND model = ? AND instance = ? AND optimum IS NOT NULL', (problem, model, instance))]
+        qualities += optima
+        highScores = [int(result[0]) for result in cursor.execute('SELECT high_score FROM result WHERE problem = ? AND model = ? AND instance = ? AND high_score IS NOT NULL', (problem, model, instance))]
+        qualities += highScores
+        (low, high) = (None, None) if not qualities else (min(qualities), max(qualities))
+        if args.verbose:
+            print('-' * 80)
+            print(problem, instance, problemType, low, high)
+        for run in args.runs:
+            (solved, quality) = results[run][i]
+            if solved:
+                if high == low:
+                    penalty = 0
+                elif problemType == 'MIN':
+                    penalty = (quality - low) / (high - low)
+                else:
+                    penalty = 1 - ((quality - low) / (high - low))
+                if args.verbose:
+                    print(run, quality, penalty)
+            else:
+                failures[run] += 1
+                penalty = 1
+            penalties[run] += [penalty]
+    return {run: {'failures': failures[run], 'penalties': penalties[run]} for run in args.runs}
+
+def postprocessResult(result):
+    penalties = result['penalties']
+    return {
+        'failures': result['failures'],
+        'penalty-min': min(penalties),
+        'penalty-max': max(penalties),
+        'penalty-mean': statistics.mean(penalties),
+        'penalty-pstdev': statistics.pstdev(penalties),
+        'penalty-median': statistics.median(penalties),
+        'penalty-histogram': numpy.histogram(penalties, 10, (0, 1))[0].tolist()
+    }
+
+def main():
+    parser = argparse.ArgumentParser(description = 'Helps to evaluate a given set of Yuck integration test runs')
+    parser.add_argument('-v', '--verbose', action='store_true')
+    parser.add_argument('-r', '--ignore-other-runs', dest = 'ignoreOtherRuns', action='store_true', help = 'Ignore results from runs other than the given ones')
+    parser.add_argument('-t', '--problem-type', dest = 'problemType', choices = ['SAT', 'MIN', 'MAX'], help = 'Restrict analysis to given problem type')
+    parser.add_argument('runs', metavar = 'run', nargs = '+')
+    args = parser.parse_args()
+    with sqlite3.connect("results.db") as conn:
+        cursor = conn.cursor()
+        results = evalRuns(cursor, args)
+        if results:
+            postprocessedResults = {run: postprocessResult(results[run]) for run in results}
+            print(json.dumps(postprocessedResults, sort_keys = True, indent = 4))
+
+main()
diff --git a/src/main/yuck/annealing/AnnealingResult.scala b/src/main/yuck/annealing/AnnealingResult.scala
@@ -20,10 +20,10 @@ final class AnnealingResult(
     var costsOfFinalProposal: Costs = null
     val roundLogs = new mutable.ArrayBuffer[RoundLog]
     var indexOfRoundWithBestProposal: Int = 0
-    var runtimeInMillis: Long = 0
-    def movesPerSecond: Int = roundLogs.toIterator.map(_.movesPerSecond).sum / roundLogs.size
-    def consultationsPerSecond: Int = roundLogs.toIterator.map(_.consultationsPerSecond).sum / roundLogs.size
-    def consultationsPerMove: Int = roundLogs.toIterator.map(_.consultationsPerMove).sum / roundLogs.size
-    def commitmentsPerSecond: Int = roundLogs.toIterator.map(_.commitmentsPerSecond).sum / roundLogs.size
-    def commitmentsPerMove: Int = roundLogs.toIterator.map(_.commitmentsPerMove).sum / roundLogs.size
+    def runtimeInSeconds: Double = roundLogs.toIterator.map(_.runtimeInSeconds).sum
+    def movesPerSecond: Double = roundLogs.toIterator.map(_.movesPerSecond).sum / roundLogs.size
+    def consultationsPerSecond: Double = roundLogs.toIterator.map(_.consultationsPerSecond).sum / roundLogs.size
+    def consultationsPerMove: Double = roundLogs.toIterator.map(_.consultationsPerMove).sum / roundLogs.size
+    def commitmentsPerSecond: Double = roundLogs.toIterator.map(_.commitmentsPerSecond).sum / roundLogs.size
+    def commitmentsPerMove: Double = roundLogs.toIterator.map(_.commitmentsPerMove).sum / roundLogs.size
 }
diff --git a/src/main/yuck/annealing/RoundLog.scala b/src/main/yuck/annealing/RoundLog.scala
@@ -8,7 +8,7 @@ import yuck.core._
  */
 final class RoundLog(val roundIndex: Int) {
     override def toString =
-        "%d;%3.6f;%1.6f;%1.6f;%s;%s;%s;%s;%s;%d;%s;%d;%d;%d;%d;%d;%d;%d".format(
+        "%d;%3.6f;%1.6f;%1.6f;%s;%s;%s;%s;%s;%d;%f;%f;%d;%f;%f;%d;%f;%f".format(
             roundIndex,
             temperature, traditionalAcceptanceRatio, uphillAcceptanceRatio,
             costsOfInitialProposal, costsOfFinalProposal, costsOfBestProposal,
@@ -40,12 +40,12 @@ final class RoundLog(val roundIndex: Int) {
     var numberOfConsultations: Int = 0
     /** How often Constraint.commit was called. */
     var numberOfCommitments: Int = 0
-    def runtimeInSeconds: Double = scala.math.max(1l, runtimeInMillis).toDouble / 1000.0
-    def movesPerSecond: Int = (numberOfMonteCarloAttempts.toDouble / runtimeInSeconds).toInt
-    def consultationsPerSecond: Int = (numberOfConsultations.toDouble / runtimeInSeconds).toInt
-    def consultationsPerMove: Int = (numberOfConsultations.toDouble / numberOfMonteCarloAttempts.toDouble).toInt
-    def commitmentsPerSecond: Int = (numberOfCommitments.toDouble / runtimeInSeconds).toInt
-    def commitmentsPerMove: Int = (numberOfCommitments.toDouble / numberOfMonteCarloAttempts.toDouble).toInt
+    def runtimeInSeconds: Double = scala.math.max(1l, runtimeInMillis).toDouble / 1000
+    def movesPerSecond: Double = numberOfMonteCarloAttempts.toDouble / runtimeInSeconds
+    def consultationsPerSecond: Double = numberOfConsultations.toDouble / runtimeInSeconds
+    def consultationsPerMove: Double = numberOfConsultations.toDouble / numberOfMonteCarloAttempts
+    def commitmentsPerSecond: Double = numberOfCommitments.toDouble / runtimeInSeconds
+    def commitmentsPerMove: Double = numberOfCommitments.toDouble / numberOfMonteCarloAttempts
     def updateAcceptanceRatio {
         val numberOfAcceptedMoves = numberOfMonteCarloAttempts - numberOfRejectedMoves
         traditionalAcceptanceRatio = numberOfAcceptedMoves.toDouble / numberOfMonteCarloAttempts

diff --git a/src/main/yuck/annealing/StandardAnnealingMonitor.scala b/src/main/yuck/annealing/StandardAnnealingMonitor.scala
@@ -1,5 +1,7 @@
 package yuck.annealing
 
+import scala.collection._
+
 import yuck.core._
 import yuck.util.arm.ManagedResource
 import yuck.util.logging.LazyLogger
@@ -52,6 +54,7 @@ class StandardAnnealingMonitor(
             logger.criticalSection {
                 logger.log("Suspended solver in round %d".format(result.roundLogs.size - 1))
                 logStatistics(result)
+                captureSolverStatistics(result)
             }
         }
     }
@@ -76,6 +79,7 @@ class StandardAnnealingMonitor(
                 "Solver finished with proposal of quality %s in round %d".format(
                     result.costsOfBestProposal, result.roundLogs.size - 1))
             logStatistics(result)
+            captureSolverStatistics(result)
         }
     }
 
@@ -125,13 +129,39 @@ class StandardAnnealingMonitor(
         logger.withLogScope("Solver statistics".format(result.solverName)) {
             logger.log("Number of rounds: %d".format(result.roundLogs.size))
             if (result.roundLogs.size > 0) {
-                logger.log("Moves per second: %d".format(result.movesPerSecond))
-                logger.log("Consultations per second: %d".format(result.consultationsPerSecond))
-                logger.log("Consultations per move: %d".format(result.consultationsPerMove))
-                logger.log("Commitments per second: %d".format(result.commitmentsPerSecond))
-                logger.log("Commitments per move: %d".format(result.commitmentsPerMove))
+                logger.log("Moves per second: %f".format(result.movesPerSecond))
+                logger.log("Consultations per second: %f".format(result.consultationsPerSecond))
+                logger.log("Consultations per move: %f".format(result.consultationsPerMove))
+                logger.log("Commitments per second: %f".format(result.commitmentsPerSecond))
+                logger.log("Commitments per move: %f".format(result.commitmentsPerMove))
             }
         }
     }
 
+    private class SolverStatistics(
+        val runtimeInSeconds: Double, val movesPerSecond: Double,
+        val consultationsPerSecond: Double, val consultationsPerMove: Double,
+        val commitmentsPerSecond: Double, val commitmentsPerMove: Double)
+
+    private val solverStatistics = new mutable.ArrayBuffer[SolverStatistics]
+
+    private def captureSolverStatistics(result: AnnealingResult) {
+        if (! result.roundLogs.isEmpty) {
+            solverStatistics +=
+                new SolverStatistics(
+                    result.runtimeInSeconds, result.movesPerSecond,
+                    result.consultationsPerSecond, result.consultationsPerMove,
+                    result.commitmentsPerSecond, result.commitmentsPerMove)
+        }
+    }
+
+    def wasSearchRequired: Boolean = ! solverStatistics.isEmpty
+    def numberOfRestarts: Int = scala.math.max(0, solverStatistics.size - 1)
+    def runtimeInSeconds: Double = solverStatistics.toIterator.map(_.runtimeInSeconds).sum
+    def movesPerSecond: Double = solverStatistics.toIterator.map(_.movesPerSecond).sum / solverStatistics.size
+    def consultationsPerSecond: Double = solverStatistics.toIterator.map(_.consultationsPerSecond).sum / solverStatistics.size
+    def consultationsPerMove: Double = solverStatistics.toIterator.map(_.consultationsPerMove).sum / solverStatistics.size
+    def commitmentsPerSecond: Double = solverStatistics.toIterator.map(_.commitmentsPerSecond).sum / solverStatistics.size
+    def commitmentsPerMove: Double = solverStatistics.toIterator.map(_.commitmentsPerMove).sum / solverStatistics.size
+
 }
diff --git a/src/main/yuck/core/NumericalObjective.scala b/src/main/yuck/core/NumericalObjective.scala
@@ -9,7 +9,7 @@ import scala.math._
  */
 abstract class NumericalObjective
     [Value <: NumericalValue[Value]]
-    (x: Variable[Value])
+    (val x: Variable[Value])
     (implicit valueTraits: NumericalValueTraits[Value])
     extends AnyObjective
 {

diff --git a/src/main/yuck/flatzinc/compiler/CompilationContext.scala b/src/main/yuck/flatzinc/compiler/CompilationContext.scala
@@ -29,6 +29,7 @@ final class CompilationContext(
     val definedVars = new mutable.HashSet[AnyVariable] // variables that occur in a defined_var annotation
     val costVars = new mutable.ArrayBuffer[Variable[IntegerValue]]
     var costVar: Variable[IntegerValue] = null
+    var objectiveVar: Variable[IntegerValue] = null
     var objective: AnyObjective = null
     val implicitlyConstrainedVars = new mutable.HashSet[AnyVariable]
     var maybeNeighbourhood: Option[Neighbourhood] = null

diff --git a/src/main/yuck/flatzinc/compiler/FlatZincCompiler.scala b/src/main/yuck/flatzinc/compiler/FlatZincCompiler.scala
@@ -43,13 +43,15 @@ final class FlatZincCompiler
 
         logger.criticalSection {
             logger.withLogScope("Yuck model statistics") {
-                logModelStatistics(cc)
+                logYuckModelStatistics(cc)
             }
         }
 
         val vars = (for ((key, x) <- cc.vars) yield key.toString -> x).toMap
         val arrays = (for ((key, array) <- cc.arrays) yield key.toString -> array).toMap
-        new FlatZincCompilerResult(cc.ast, cc.space, vars, arrays, cc.costVar, cc.objective, cc.maybeNeighbourhood)
+        new FlatZincCompilerResult(
+            cc.ast, cc.space, vars, arrays, cc.costVar, Option(cc.objectiveVar), cc.objective,
+            cc.maybeNeighbourhood)
 
     }
 
@@ -147,7 +149,7 @@ final class FlatZincCompiler
         }
     }
 
-    private def logModelStatistics(cc: CompilationContext) = {
+    private def logYuckModelStatistics(cc: CompilationContext) = {
         lazy val searchVariables = cc.space.searchVariables
         logger.logg("Search variables: %s".format(searchVariables))
         logger.log("%d search variables".format(searchVariables.size))

diff --git a/src/main/yuck/flatzinc/compiler/FlatZincCompilerResult.scala b/src/main/yuck/flatzinc/compiler/FlatZincCompilerResult.scala
@@ -14,7 +14,8 @@ final class FlatZincCompilerResult(
     val space: Space,
     val vars: immutable.Map[String, AnyVariable], // also holds named parameters
     val arrays: immutable.Map[String, immutable.IndexedSeq[AnyVariable]],
-    val costVar: AnyVariable,
+    val costVar: Variable[IntegerValue],
+    val maybeObjectiveVar: Option[Variable[IntegerValue]],
     val objective: AnyObjective,
     val maybeNeighbourhood: Option[Neighbourhood]
 )