In [1]:
#python basic imports
import math
import multiprocessing
#3rd party imports (from packages, the environment)
import numpy as np
#custom (local) imports
import experiment.config as config
from util.database import Database
from util.logging import setupLogging, shutdownLogging
from IPython.display import clear_output, display, HTML
from util.worker import worker
from experiment.optimizers import MonteCarloSampling1M
import experiment.optimizers as optimizers

In [2]:
#Experimental Setup

if __name__ == '__main__':
    logger = setupLogging()
    logger.info("Evaluation "+str(config.EVALUATION_HASH))
    db = Database()
    db.loadFromJson(config.DATABASE_PATH)

Start Logging
Evaluation 82909850012955443557adc2a05682e2ddd74e9e1421ea30ec2819dac6f9eb24


In [3]:
if __name__ == '__main__':
    maxScoresDict = dict()
    bestAvgScoresDict = Database()
    sumOfScoresDict = dict()
    noOfScoresDict = dict()
    meanOfScoresDict = dict()
    targetValuesDict = dict()
    
    for optimizerId,optimizerValues in db.core.items():
        for problemId,problemValues in optimizerValues.items():
            for k,values in problemValues.items():
                #logger.info(values.items())
                score = values['bestscore']
                #if ( (k == '1') and (optimizerId == 'GradOpt') ):
                    #logger.info(optimizerId + problemId + " k:"+k)
                    #logger.info(values['bestscore'])
                    #logger.info(values['bestparams'])

                
                if problemId in maxScoresDict:
                    if score > maxScoresDict[problemId]:
                        maxScoresDict[problemId] = score
                else:
                    maxScoresDict[problemId] = score
                
                #we calculate the mean only from the MonteCarloSampling1M optimizer.
                if ( optimizerId == MonteCarloSampling1M.__name__ ):
                    allScores = values['scores']
                    noOfScores = np.shape(allScores)[0]
                    sumOfScores = np.sum(allScores)
                    
                    #because we do this for each k:
                    if problemId in sumOfScoresDict:
                        sumOfScores += sumOfScoresDict[problemId]
                        noOfScores += noOfScoresDict[problemId]
                    
                    sumOfScoresDict[problemId] = sumOfScores
                    noOfScoresDict[problemId] = noOfScores

    #note: we only go over problems where we can calculate the mean values of f(x), e.g. where the monte carlo sampling exists.                
    if ( len(sumOfScoresDict.keys()) == 0):
        logger.info("No MonteCarloSampling exists, that can happen if you use an incomplete result.json file. Don't panic, just run the Benchmark further to fix this issue")
    for problemId in sumOfScoresDict.keys():
        mx = maxScoresDict[problemId]
        sm = sumOfScoresDict[problemId]
        mean = sm / noOfScoresDict[problemId]
        
        def target(t):
            return mx - ( ( mx - mean ) * (1-t) ) 
        targetValues = np.array([ target(t) for t in config.TARGETS ])
        
        logger.info("mx: "+str(mx))
        logger.info("mean: "+str(mean))    
        logger.info("target values of "+str(problemId)+" are: "+str(targetValues))
        targetValuesDict[problemId] = targetValues
    
    
    refinedResults = Database()
    for optimizerId,optimizerValues in db.core.items():
        #dont take the dummy optimizer MonteCarloSampling1M into the final evaluation
        if ( optimizerId == MonteCarloSampling1M.__name__ ):
            continue
        for problemId in sumOfScoresDict.keys():
            if not(problemId in optimizerValues.keys()):
                continue
            problemValues = optimizerValues[problemId]
            indexesWhereTargetValuesWhereExceeded = np.zeros((config.K,len(config.TARGETS)))
            logger.info(optimizerId+" on "+problemId)
            valueCounter = 0#only for debugging / validation
            completionCounter = 0 #count how many runs actually have config.MAX_EVALUATIONS values in them
            for k,values in problemValues.items():
                allScores = values['scores']
                valueCounter += np.shape(allScores)[0]
                completionCounter += int( np.shape(allScores)[0] >= config.MAX_EVALUATIONS )
                #check, at which index the values are above the target values
                mask = np.expand_dims( np.array(allScores), axis=-1) > np.expand_dims(targetValuesDict[problemId],axis=-1).T
                indexWhereTargetValuesAreHit = np.argmax(mask, axis=0)
                #if the algorithm never reached the target value, it is set to the maximum number of iterations:
                indexWhereTargetValuesAreHit[ np.diag( mask[indexWhereTargetValuesAreHit] ) == False ] = config.MAX_EVALUATIONS-1#since we count from index 0
                indexesWhereTargetValuesWhereExceeded[int(k),:] = indexWhereTargetValuesAreHit
            
            meanValue = np.mean(indexesWhereTargetValuesWhereExceeded, axis=0) + 1 #(since we count from index 0)
            stdValue = np.std(indexesWhereTargetValuesWhereExceeded, axis=0)
            
            refinedResults.store(optimizerId, problemId, 'mean', meanValue)
            refinedResults.store(optimizerId, problemId, 'std', stdValue)   
            logger.info("mean: "+str( meanValue ) )
            logger.info("std: "+str( stdValue ) )
            logger.info("completed runs: "+str(100.0*completionCounter/config.K)+"%")
            logger.info("completed function calls: "+str(100.0*valueCounter/(config.K*config.MAX_EVALUATIONS))+"%")
            
    
    #Determine, which optimizer was the best on on a problem in order to plot it thick
    for optimizerId,optimizerValues in refinedResults.core.items():
        for problemId,problemValues in optimizerValues.items():
            meanValues = problemValues['mean'].copy()
            if  bestAvgScoresDict.exists(problemId):
                oldValues = bestAvgScoresDict.get(problemId, 'values')
                oldBestOptimizerIds = bestAvgScoresDict.get(problemId, 'optimizerId')
                mask = ( meanValues < oldValues )
                maskEqual = ( meanValues == oldValues )
                oldValues[mask] = meanValues[mask]
                
                for i in range(np.shape(meanValues)[0]):
                    if mask[i]: oldBestOptimizerIds[i] = list([optimizerId])
                    if maskEqual[i]: oldBestOptimizerIds[i].append(optimizerId)

                bestAvgScoresDict.store(problemId, 'values', oldValues)
                bestAvgScoresDict.store(problemId, 'optimizerId', oldBestOptimizerIds)
            else:
                bestAvgScoresDict.store(problemId, 'values', meanValues)
                bestAvgScoresDict.store(problemId, 'optimizerId',[list([optimizerId]) for i in range(np.shape(meanValues)[0])])


def applyFilter(currentId, filterDict):
    if filterDict == None:
        return False
    if currentId in filterDict:
        return False
    return True
    

mx: -14.023527856747103
mean: -14.575129778839946
target values of Slump are: [-14.07868805 -14.05110795 -14.02904388]
mx: 0.09896543115652726
mean: -0.43568491983450164
target values of BreastCancer are: [0.0455004  0.07223291 0.09361893]
mx: 0.8849536315791152
mean: -0.1144000329123871
target values of Yacht are: [0.78501827 0.83498595 0.87496009]
mx: 0.998830589570028
mean: 0.12391307617569552
target values of YachtHD are: [0.91133884 0.95508471 0.99008141]
mx: -9.394761123171241
mean: -12.264340349767382
target values of SlumpHD are: [-9.68171905 -9.53824008 -9.42345692]
mx: 0.14304370708515907
mean: -0.45463565630672953
target values of BreastCancerHD are: [0.08327577 0.11315974 0.13706691]
mx: -10.683388833697906
mean: -16.169047113491903
target values of AutoMPG are: [-11.23195466 -10.95767175 -10.73824542]
mx: -1.214749793197588
mean: -6.167550156737873
target values of HousingHD are: [-1.71002983 -1.46238981 -1.2642778 ]
mx: -11.295189432095514
mean: -17.265616496150315
target

In [4]:
#Render as html table
def createHTMLResultTable(tableString, index, problemFilter = None, optimizerFilter = None):
    tableString += '<table>'
    tableString += '    <tr>'
    tableString += '    <td>'+str(int(config.TARGETS[index]*100))+'% Target</td>'

    for problemId in maxScoresDict.keys():
        if ( applyFilter(problemId, problemFilter ) ): continue
        tableString += '<td>'+problemId+'</td>'

    for optimizerId,optimizerValues in refinedResults.core.items():
        if ( applyFilter(optimizerId, optimizerFilter ) ): continue
        tableString += '<tr>'
        tableString += '<td>'+optimizerId+'</td>'

        for problemId,problemValues in optimizerValues.items():
            if ( applyFilter(problemId, problemFilter ) ): continue

            isBestInCategory = ( optimizerId in (bestAvgScoresDict.get(problemId, 'optimizerId')[index]) )
            bestStartTag = ''
            bestEndTag = ''
            if ( isBestInCategory ):
                bestStartTag = '<b>'
                bestEndTag = '</b>'
                
            tableString += '<td>' + bestStartTag + str(problemValues['mean'][index]) + bestEndTag + '(+-' +str(("%.2f" % problemValues['std'][index]))+')' + '</td>'
    tableString += '</table>'
    return tableString

def renderHTMLTables(problemFilter = None, optimizerFilter = None):
    tableString = ""
    #Create one table per target setting
    for i in range(len(config.TARGETS)):
        tableString = createHTMLResultTable(tableString, i, problemFilter, optimizerFilter)
    display(HTML(tableString))
    
allProblems = [cls.__name__ for cls in config.problems.ALL.values()]
#optimizerFilter = [optimizers.PureRandomSearch.__name__, optimizers.BayesianOptimization.__name__, optimizers.HierarchicalOptimisticOptimization.__name__, optimizers.GradOpt.__name__]
print(allProblems)
renderHTMLTables(problemFilter = allProblems)

['AutoMPGHD', 'BreastCancerHD', 'SlumpHD', 'YachtHD', 'HousingHD', 'AutoMPG', 'BreastCancer', 'Slump', 'Yacht', 'Housing', 'HolderTable', 'Rosenbrock', 'Sphere', 'SphereHighDim', 'LinearSlope', 'DebN1', 'BraninHoo', 'Himmelblau', 'Styblinski', 'LevyN13', 'MishraN2', 'GriewankN4', 'TestProblem', 'TestProblem2']


0,1,2,3,4,5,6,7,8,9,10
90% Target,Slump,Yacht,BreastCancer,YachtHD,AutoMPG,SlumpHD,BreastCancerHD,Housing,HousingHD,AutoMPGHD
AdaLipoTR,8.5(+-1.50),8.0(+-3.00),5.0(+-1.00),5.0(+-3.00),100.0(+-0.00),51.5(+-48.50),5.0(+-0.00),100.0(+-0.00),4.5(+-0.50),99.0(+-1.00)
AdaLipo,8.5(+-4.50),18.0(+-12.00),13.0(+-9.00),4.0(+-1.00),68.5(+-22.50),62.5(+-37.50),7.5(+-2.50),38.0(+-32.00),8.0(+-2.00),100.0(+-0.00)
PureRandomSearch,100.0(+-0.00),23.5(+-16.50),7.0(+-2.00),16.0(+-8.00),100.0(+-0.00),57.5(+-42.50),53.0(+-47.00),59.0(+-41.00),100.0(+-0.00),39.0(+-34.00)
HierarchicalOptimisticOptimization,100.0(+-0.00),19.5(+-4.50),7.5(+-0.50),2.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),92.5(+-7.50),100.0(+-0.00)
GradOpt,7.0(+-5.00),28.0(+-12.00),6.5(+-2.50),54.5(+-45.50),60.0(+-40.00),28.0(+-19.00),14.0(+-8.00),43.0(+-12.00),4.0(+-2.00),49.0(+-38.00)

0,1,2,3,4,5,6,7,8,9,10
95% Target,Slump,Yacht,BreastCancer,YachtHD,AutoMPG,SlumpHD,BreastCancerHD,Housing,HousingHD,AutoMPGHD
AdaLipoTR,8.5(+-1.50),8.0(+-3.00),5.0(+-1.00),5.0(+-3.00),100.0(+-0.00),87.5(+-12.50),5.0(+-0.00),100.0(+-0.00),6.0(+-1.00),99.0(+-1.00)
AdaLipo,10.0(+-6.00),34.5(+-28.50),13.0(+-9.00),5.5(+-0.50),73.0(+-27.00),62.5(+-37.50),16.5(+-3.50),38.0(+-32.00),19.5(+-9.50),100.0(+-0.00)
PureRandomSearch,100.0(+-0.00),23.5(+-16.50),31.5(+-26.50),46.0(+-22.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),59.0(+-41.00),100.0(+-0.00),100.0(+-0.00)
HierarchicalOptimisticOptimization,100.0(+-0.00),39.0(+-8.00),7.5(+-0.50),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00)
GradOpt,7.0(+-5.00),56.0(+-16.00),6.5(+-2.50),54.5(+-45.50),97.0(+-3.00),100.0(+-0.00),14.0(+-8.00),47.0(+-10.00),4.0(+-2.00),64.5(+-35.50)

0,1,2,3,4,5,6,7,8,9,10
99% Target,Slump,Yacht,BreastCancer,YachtHD,AutoMPG,SlumpHD,BreastCancerHD,Housing,HousingHD,AutoMPGHD
AdaLipoTR,9.0(+-2.00),9.0(+-4.00),7.0(+-1.00),34.0(+-32.00),100.0(+-0.00),100.0(+-0.00),5.0(+-0.00),100.0(+-0.00),6.0(+-1.00),99.0(+-1.00)
AdaLipo,10.0(+-6.00),53.5(+-46.50),18.0(+-4.00),44.0(+-14.00),100.0(+-0.00),62.5(+-37.50),28.5(+-8.50),100.0(+-0.00),50.0(+-3.00),100.0(+-0.00)
PureRandomSearch,100.0(+-0.00),36.5(+-3.50),43.0(+-15.00),80.5(+-19.50),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00)
HierarchicalOptimisticOptimization,100.0(+-0.00),39.0(+-8.00),73.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00)
GradOpt,10.5(+-1.50),86.0(+-14.00),24.5(+-20.50),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),14.0(+-8.00),100.0(+-0.00),4.0(+-2.00),100.0(+-0.00)


In [5]:



if __name__ == '__main__':
    shutdownLogging()

Shutdown logger, bye bye!
