In [1]:
#python basic imports
import math
import multiprocessing
#3rd party imports (from packages, the environment)
import numpy as np
#custom (local) imports
import experiment.config as config
from util.database import Database
from util.logging import setupLogging, shutdownLogging
from IPython.display import clear_output, display, HTML
from util.worker import worker
from experiment.optimizers import MonteCarloSampling1M
import experiment.optimizers as optimizers

In [2]:
#Experimental Setup

if __name__ == '__main__':
    logger = setupLogging()
    logger.info("Evaluation "+str(config.EVALUATION_HASH))
    db = Database()
    db.loadFromJson(config.DATABASE_PATH)

Start Logging
Evaluation ac7edd350532958d8d785b6af611fe427604846151319cc8a946360e087e35fb
could not find json file to load from: results.json


In [3]:
if __name__ == '__main__':
    maxScoresDict = dict()
    bestAvgScoresDict = Database()
    sumOfScoresDict = dict()
    noOfScoresDict = dict()
    meanOfScoresDict = dict()
    targetValuesDict = dict()
    
    for optimizerId,optimizerValues in db.core.items():
        for problemId,problemValues in optimizerValues.items():
            for k,values in problemValues.items():
                #logger.info(values.items())
                score = values['bestscore']

                
                if problemId in maxScoresDict:
                    if score > maxScoresDict[problemId]:
                        maxScoresDict[problemId] = score
                else:
                    maxScoresDict[problemId] = score
                
                #we calculate the mean only from the MonteCarloSampling1M optimizer.
                if ( optimizerId == MonteCarloSampling1M.__name__ ):
                    allScores = values['scores']
                    noOfScores = np.shape(allScores)[0]
                    sumOfScores = np.sum(allScores)
                    
                    #because we do this for each k:
                    if problemId in sumOfScoresDict:
                        sumOfScores += sumOfScoresDict[problemId]
                        noOfScores += noOfScoresDict[problemId]
                    
                    sumOfScoresDict[problemId] = sumOfScores
                    noOfScoresDict[problemId] = noOfScores

    #note: we only go over problems where we can calculate the mean values of f(x), e.g. where the monte carlo sampling exists.                
    if ( len(sumOfScoresDict.keys()) == 0):
        logger.info("No MonteCarloSampling exists, that can happen if you use an incomplete result.json file. Don't panic, just run the Benchmark further to fix this issue")
    for problemId in sumOfScoresDict.keys():
        mx = maxScoresDict[problemId]
        sm = sumOfScoresDict[problemId]
        mean = sm / noOfScoresDict[problemId]
        
        def target(t):
            return mx - ( ( mx - mean ) * (1-t) ) 
        targetValues = np.array([ target(t) for t in config.TARGETS ])
        
        logger.info("mx: "+str(mx))
        logger.info("mean: "+str(mean))    
        logger.info("target values of "+str(problemId)+" are: "+str(targetValues))
        targetValuesDict[problemId] = targetValues
    
    
    refinedResults = Database()
    for optimizerId,optimizerValues in db.core.items():
        #dont take the dummy optimizer MonteCarloSampling1M into the final evaluation
        if ( optimizerId == MonteCarloSampling1M.__name__ ):
            continue
        for problemId in sumOfScoresDict.keys():
            if not(problemId in optimizerValues.keys()):
                continue
            problemValues = optimizerValues[problemId]
            indexesWhereTargetValuesWhereExceeded = np.zeros((config.K,len(config.TARGETS)))
            logger.info(optimizerId+" on "+problemId)
            valueCounter = 0#only for debugging / validation
            completionCounter = 0 #count how many runs actually have config.MAX_EVALUATIONS values in them
            for k,values in problemValues.items():
                allScores = values['scores']
                valueCounter += np.shape(allScores)[0]
                completionCounter += int( np.shape(allScores)[0] >= config.MAX_EVALUATIONS )
                #check, at which index the values are above the target values
                mask = np.expand_dims( np.array(allScores), axis=-1) > np.expand_dims(targetValuesDict[problemId],axis=-1).T
                indexWhereTargetValuesAreHit = np.argmax(mask, axis=0)
                #if the algorithm never reached the target value, it is set to the maximum number of iterations:
                indexWhereTargetValuesAreHit[ np.diag( mask[indexWhereTargetValuesAreHit] ) == False ] = config.MAX_EVALUATIONS-1#since we count from index 0
                indexesWhereTargetValuesWhereExceeded[int(k),:] = indexWhereTargetValuesAreHit
            
            meanValue = np.mean(indexesWhereTargetValuesWhereExceeded, axis=0) + 1 #(since we count from index 0)
            stdValue = np.std(indexesWhereTargetValuesWhereExceeded, axis=0)
            
            refinedResults.store(optimizerId, problemId, 'mean', meanValue)
            refinedResults.store(optimizerId, problemId, 'std', stdValue)   
            logger.info("mean: "+str( meanValue ) )
            logger.info("std: "+str( stdValue ) )
            logger.info("completed runs: "+str(100.0*completionCounter/config.K)+"%")
            logger.info("completed function calls: "+str(100.0*valueCounter/(config.K*config.MAX_EVALUATIONS))+"%")
            
    
    #Determine, which optimizer was the best on on a problem in order to plot it thick
    for optimizerId,optimizerValues in refinedResults.core.items():
        for problemId,problemValues in optimizerValues.items():
            meanValues = problemValues['mean'].copy()
            if  bestAvgScoresDict.exists(problemId):
                oldValues = bestAvgScoresDict.get(problemId, 'values')
                oldBestOptimizerIds = bestAvgScoresDict.get(problemId, 'optimizerId')
                mask = ( meanValues < oldValues )
                maskEqual = ( meanValues == oldValues )
                oldValues[mask] = meanValues[mask]
                
                for i in range(np.shape(meanValues)[0]):
                    if mask[i]: oldBestOptimizerIds[i] = list([optimizerId])
                    if maskEqual[i]: oldBestOptimizerIds[i].append(optimizerId)

                bestAvgScoresDict.store(problemId, 'values', oldValues)
                bestAvgScoresDict.store(problemId, 'optimizerId', oldBestOptimizerIds)
            else:
                bestAvgScoresDict.store(problemId, 'values', meanValues)
                bestAvgScoresDict.store(problemId, 'optimizerId',[list([optimizerId]) for i in range(np.shape(meanValues)[0])])


def applyFilter(currentId, filterDict):
    if filterDict == None:
        return False
    if currentId in filterDict:
        return False
    return True
    

No MonteCarloSampling exists, that can happen if you use an incomplete result.json file. Don't panic, just run the Benchmark further to fix this issue


In [4]:
#Render as html table
def createHTMLResultTable(tableString, index, problemFilter = None, optimizerFilter = None):
    tableString += '<table>'
    tableString += '    <tr>'
    tableString += '    <td>'+str(int(config.TARGETS[index]*100))+'% Target</td>'

    for problemId in maxScoresDict.keys():
        if ( applyFilter(problemId, problemFilter ) ): continue
        tableString += '<td>'+problemId+'</td>'

    for optimizerId,optimizerValues in refinedResults.core.items():
        if ( applyFilter(optimizerId, optimizerFilter ) ): continue
        tableString += '<tr>'
        tableString += '<td>'+optimizerId+'</td>'

        for problemId,problemValues in optimizerValues.items():
            if ( applyFilter(problemId, problemFilter ) ): continue

            isBestInCategory = ( optimizerId in (bestAvgScoresDict.get(problemId, 'optimizerId')[index]) )
            bestStartTag = ''
            bestEndTag = ''
            if ( isBestInCategory ):
                bestStartTag = '<b>'
                bestEndTag = '</b>'
                
            tableString += '<td>' + bestStartTag + str(problemValues['mean'][index]) + bestEndTag + '(+-' +str(("%.2f" % problemValues['std'][index]))+')' + '</td>'
    tableString += '</table>'
    return tableString

def renderHTMLTables(problemFilter = None, optimizerFilter = None):
    tableString = ""
    #Create one table per target setting
    for i in range(len(config.TARGETS)):
        tableString = createHTMLResultTable(tableString, i, problemFilter, optimizerFilter)
    display(HTML(tableString))
    
allProblems = [cls.__name__ for cls in config.problems.ALL.values()]
#optimizerFilter = [optimizers.PureRandomSearch.__name__, optimizers.BayesianOptimization.__name__, optimizers.HierarchicalOptimisticOptimization.__name__, optimizers.GradOpt.__name__]
print(allProblems)
renderHTMLTables(problemFilter = allProblems)

['AutoMPGHD', 'BreastCancerHD', 'SlumpHD', 'YachtHD', 'HousingHD', 'AutoMPG', 'BreastCancer', 'Slump', 'Yacht', 'Housing', 'HolderTable', 'Rosenbrock', 'Sphere', 'SphereHighDim', 'LinearSlope', 'DebN1', 'BraninHoo', 'Himmelblau', 'Styblinski', 'LevyN13', 'MishraN2', 'GriewankN4', 'TestProblem', 'TestProblem2']


0
90% Target

0
95% Target

0
99% Target


In [5]:



if __name__ == '__main__':
    shutdownLogging()

Shutdown logger, bye bye!
