In [1]:
#python basic imports
import math
import multiprocessing
#3rd party imports (from packages, the environment)
import numpy as np
#custom (local) imports
import experiment.config as config
from util.database import Database
from util.logging import setupLogging, shutdownLogging
from IPython.display import clear_output, display, HTML
from util.worker import worker
from experiment.optimizers import MonteCarloSampling1M
import experiment.optimizers as optimizers

In [2]:
#Experimental Setup

if __name__ == '__main__':
    logger = setupLogging()
    logger.info("Evaluation "+str(config.EVALUATION_HASH))
    db = Database()
    db.loadFromJson(config.DATABASE_PATH)

Start Logging
Evaluation a2222ebe605a0e47042bf1a11b9045613d7866f032032907b73d23fa9dc5c810


In [3]:
if __name__ == '__main__':
    maxScoresDict = dict()
    bestAvgScoresDict = Database()
    sumOfScoresDict = dict()
    noOfScoresDict = dict()
    meanOfScoresDict = dict()
    targetValuesDict = dict()
    
    for optimizerId,optimizerValues in db.core.items():
        for problemId,problemValues in optimizerValues.items():
            for k,values in problemValues.items():
                #logger.info(values.items())
                score = values['bestscore']

                
                if problemId in maxScoresDict:
                    if score > maxScoresDict[problemId]:
                        maxScoresDict[problemId] = score
                else:
                    maxScoresDict[problemId] = score
                
                #we calculate the mean only from the MonteCarloSampling1M optimizer.
                if ( optimizerId == MonteCarloSampling1M.__name__ ):
                    allScores = values['scores']
                    noOfScores = np.shape(allScores)[0]
                    sumOfScores = np.sum(allScores)
                    
                    #because we do this for each k:
                    if problemId in sumOfScoresDict:
                        sumOfScores += sumOfScoresDict[problemId]
                        noOfScores += noOfScoresDict[problemId]
                    
                    sumOfScoresDict[problemId] = sumOfScores
                    noOfScoresDict[problemId] = noOfScores

    #note: we only go over problems where we can calculate the mean values of f(x), e.g. where the monte carlo sampling exists.                
    if ( len(sumOfScoresDict.keys()) == 0):
        logger.info("No MonteCarloSampling exists, that can happen if you use an incomplete result.json file. Don't panic, just run the Benchmark further to fix this issue")
    for problemId in sumOfScoresDict.keys():
        mx = maxScoresDict[problemId]
        sm = sumOfScoresDict[problemId]
        mean = sm / noOfScoresDict[problemId]
        
        def target(t):
            return mx - ( ( mx - mean ) * (1-t) ) 
        targetValues = np.array([ target(t) for t in config.TARGETS ])
        
        logger.info("mx: "+str(mx))
        logger.info("mean: "+str(mean))    
        logger.info("target values of "+str(problemId)+" are: "+str(targetValues))
        targetValuesDict[problemId] = targetValues
    
    
    refinedResults = Database()
    for optimizerId,optimizerValues in db.core.items():
        #dont take the dummy optimizer MonteCarloSampling1M into the final evaluation
        if ( optimizerId == MonteCarloSampling1M.__name__ ):
            continue
        for problemId in sumOfScoresDict.keys():
            if not(problemId in optimizerValues.keys()):
                continue
            problemValues = optimizerValues[problemId]
            indexesWhereTargetValuesWhereExceeded = np.zeros((config.K,len(config.TARGETS)))
            logger.info(optimizerId+" on "+problemId)
            valueCounter = 0#only for debugging / validation
            completionCounter = 0 #count how many runs actually have config.MAX_EVALUATIONS values in them
            for k,values in problemValues.items():
                allScores = values['scores']
                valueCounter += np.shape(allScores)[0]
                completionCounter += int( np.shape(allScores)[0] >= config.MAX_EVALUATIONS )
                #check, at which index the values are above the target values
                mask = np.expand_dims( np.array(allScores), axis=-1) > np.expand_dims(targetValuesDict[problemId],axis=-1).T
                indexWhereTargetValuesAreHit = np.argmax(mask, axis=0)
                #if the algorithm never reached the target value, it is set to the maximum number of iterations:
                indexWhereTargetValuesAreHit[ np.diag( mask[indexWhereTargetValuesAreHit] ) == False ] = config.MAX_EVALUATIONS-1#since we count from index 0
                indexesWhereTargetValuesWhereExceeded[int(k),:] = indexWhereTargetValuesAreHit
            
            meanValue = np.mean(indexesWhereTargetValuesWhereExceeded, axis=0) + 1 #(since we count from index 0)
            stdValue = np.std(indexesWhereTargetValuesWhereExceeded, axis=0)
            
            refinedResults.store(optimizerId, problemId, 'mean', meanValue)
            refinedResults.store(optimizerId, problemId, 'std', stdValue)   
            logger.info("mean: "+str( meanValue ) )
            logger.info("std: "+str( stdValue ) )
            logger.info("completed runs: "+str(100.0*completionCounter/config.K)+"%")
            logger.info("completed function calls: "+str(100.0*valueCounter/(config.K*config.MAX_EVALUATIONS))+"%")
            
    
    #Determine, which optimizer was the best on on a problem in order to plot it thick
    for optimizerId,optimizerValues in refinedResults.core.items():
        for problemId,problemValues in optimizerValues.items():
            meanValues = problemValues['mean'].copy()
            if  bestAvgScoresDict.exists(problemId):
                oldValues = bestAvgScoresDict.get(problemId, 'values')
                oldBestOptimizerIds = bestAvgScoresDict.get(problemId, 'optimizerId')
                mask = ( meanValues < oldValues )
                maskEqual = ( meanValues == oldValues )
                oldValues[mask] = meanValues[mask]
                
                for i in range(np.shape(meanValues)[0]):
                    if mask[i]: oldBestOptimizerIds[i] = list([optimizerId])
                    if maskEqual[i]: oldBestOptimizerIds[i].append(optimizerId)

                bestAvgScoresDict.store(problemId, 'values', oldValues)
                bestAvgScoresDict.store(problemId, 'optimizerId', oldBestOptimizerIds)
            else:
                bestAvgScoresDict.store(problemId, 'values', meanValues)
                bestAvgScoresDict.store(problemId, 'optimizerId',[list([optimizerId]) for i in range(np.shape(meanValues)[0])])


def applyFilter(currentId, filterDict):
    if filterDict == None:
        return False
    if currentId in filterDict:
        return False
    return True
    

mx: 19.20850256788675
mean: 2.4500232848622496
target values of HolderTable are: [17.53265464 18.3705786  19.04091778]
mx: -0.0
mean: -983.9557922191774
target values of Rosenbrock are: [-98.39557922 -49.19778961  -9.83955792]
mx: -0.00021933193784503063
mean: -0.8039187927059791
target values of Sphere are: [-0.08058928 -0.0404043  -0.00825633]
mx: 0.0
mean: -32.37210788065871
target values of LinearSlope are: [-3.23721079 -1.61860539 -0.32372108]
mx: 1.0
mean: 0.31219509287791364
target values of DebN1 are: [0.93121951 0.96560975 0.99312195]
mx: -0.01626871174217684
mean: -1.3036676084799184
target values of SphereHighDim are: [-0.1450086  -0.08063866 -0.0291427 ]
mx: 308.12909601160663
mean: 55.155793790348106
target values of BraninHoo are: [282.83176579 295.4804309  305.59936299]
mx: -5.357231936605656e-22
mean: -137.00843324999283
target values of Himmelblau are: [-13.70084332  -6.85042166  -1.37008433]
mx: 78.33233140754284
mean: 8.093963766406791
target values of Styblinski are

completed runs: 100.0%
completed function calls: 100.0%
GradOpt on HolderTable
mean: [57.  91.6 96.9]
std: [35.23066846 17.50542773  9.3       ]
completed runs: 100.0%
completed function calls: 101.0%
GradOpt on Rosenbrock
mean: [32.4 49.2 68.2]
std: [20.74222746 31.22114668 30.79870127]
completed runs: 100.0%
completed function calls: 101.0%
GradOpt on Sphere
mean: [100. 100. 100.]
std: [0. 0. 0.]
completed runs: 100.0%
completed function calls: 101.0%
GradOpt on LinearSlope
mean: [100. 100. 100.]
std: [0. 0. 0.]
completed runs: 100.0%
completed function calls: 101.0%
GradOpt on DebN1
mean: [100. 100. 100.]
std: [0. 0. 0.]
completed runs: 100.0%
completed function calls: 101.0%
GradOpt on SphereHighDim
mean: [100. 100. 100.]
std: [0. 0. 0.]
completed runs: 100.0%
completed function calls: 101.0%
GradOpt on BraninHoo
mean: [22.5 23.3 26.4]
std: [20.98213526 21.82681837 21.86869909]
completed runs: 100.0%
completed function calls: 101.0%
GradOpt on Himmelblau
mean: [75.8 88.9 91.1]
std:

In [4]:
#Render as html table
def createHTMLResultTable(tableString, index, problemFilter = None, optimizerFilter = None):
    tableString += '<table>'
    tableString += '    <tr>'
    tableString += '    <td>'+str(int(config.TARGETS[index]*100))+'% Target</td>'

    for problemId in maxScoresDict.keys():
        if ( applyFilter(problemId, problemFilter ) ): continue
        tableString += '<td>'+problemId+'</td>'

    for optimizerId,optimizerValues in refinedResults.core.items():
        if ( applyFilter(optimizerId, optimizerFilter ) ): continue
        tableString += '<tr>'
        tableString += '<td>'+optimizerId+'</td>'

        for problemId,problemValues in optimizerValues.items():
            if ( applyFilter(problemId, problemFilter ) ): continue

            isBestInCategory = ( optimizerId in (bestAvgScoresDict.get(problemId, 'optimizerId')[index]) )
            bestStartTag = ''
            bestEndTag = ''
            if ( isBestInCategory ):
                bestStartTag = '<b>'
                bestEndTag = '</b>'
                
            tableString += '<td>' + bestStartTag + str(problemValues['mean'][index]) + bestEndTag + '(+-' +str(("%.2f" % problemValues['std'][index]))+')' + '</td>'
    tableString += '</table>'
    return tableString

def renderHTMLTables(problemFilter = None, optimizerFilter = None):
    tableString = ""
    #Create one table per target setting
    for i in range(len(config.TARGETS)):
        tableString = createHTMLResultTable(tableString, i, problemFilter, optimizerFilter)
    display(HTML(tableString))
    
allProblems = [cls.__name__ for cls in config.problems.ALL.values()]
#optimizerFilter = [optimizers.PureRandomSearch.__name__, optimizers.BayesianOptimization.__name__, optimizers.HierarchicalOptimisticOptimization.__name__, optimizers.GradOpt.__name__]
print(allProblems)
renderHTMLTables(problemFilter = allProblems)

['AutoMPG', 'BreastCancer', 'Slump', 'Yacht', 'Housing', 'HolderTable', 'Rosenbrock', 'Sphere', 'SphereHighDim', 'LinearSlope', 'DebN1', 'BraninHoo', 'Himmelblau', 'Styblinski', 'LevyN13', 'MishraN2', 'GriewankN4', 'TestProblem', 'TestProblem2']


0,1,2,3,4,5,6,7,8,9,10,11,12
90% Target,HolderTable,Rosenbrock,Sphere,LinearSlope,DebN1,SphereHighDim,BraninHoo,Himmelblau,Styblinski,LevyN13,MishraN2,GriewankN4
AdaLipoTR,44.7(+-27.34),1.0(+-0.00),26.1(+-4.99),6.6(+-0.80),88.0(+-17.41),83.7(+-18.99),20.7(+-24.13),10.9(+-5.97),35.1(+-19.02),1.0(+-0.00),6.1(+-1.81),9.9(+-2.12)
AdaLipo,29.1(+-14.06),1.0(+-0.00),42.3(+-15.23),8.7(+-2.45),100.0(+-0.00),100.0(+-0.00),19.6(+-27.63),22.7(+-7.01),49.8(+-19.86),1.0(+-0.00),4.8(+-2.48),22.7(+-4.34)
PureRandomSearch,64.6(+-37.27),9.1(+-7.65),94.6(+-16.20),100.0(+-0.00),99.1(+-2.70),100.0(+-0.00),100.0(+-0.00),12.9(+-12.98),54.5(+-38.66),4.1(+-3.24),7.6(+-6.20),56.6(+-36.52)
HierarchicalOptimisticOptimization,55.4(+-12.15),7.5(+-1.50),100.0(+-0.00),32.7(+-1.73),31.0(+-0.00),100.0(+-0.00),100.0(+-0.00),3.8(+-0.40),3.8(+-0.40),1.0(+-0.00),5.5(+-0.50),7.4(+-0.49)
GradOpt,57.0(+-35.23),32.4(+-20.74),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),22.5(+-20.98),75.8(+-36.04),79.6(+-33.26),1.0(+-0.00),8.2(+-3.46),87.3(+-29.10)

0,1,2,3,4,5,6,7,8,9,10,11,12
95% Target,HolderTable,Rosenbrock,Sphere,LinearSlope,DebN1,SphereHighDim,BraninHoo,Himmelblau,Styblinski,LevyN13,MishraN2,GriewankN4
AdaLipoTR,50.0(+-26.88),1.0(+-0.00),35.3(+-3.77),7.0(+-0.00),95.0(+-13.12),96.4(+-7.32),22.1(+-23.40),15.3(+-6.66),36.9(+-18.64),1.0(+-0.00),7.6(+-1.28),11.0(+-1.26)
AdaLipo,41.2(+-25.26),1.0(+-0.00),72.4(+-26.27),29.2(+-16.81),100.0(+-0.00),100.0(+-0.00),23.6(+-27.95),30.3(+-16.50),61.1(+-23.75),1.0(+-0.00),6.7(+-3.10),29.9(+-13.73)
PureRandomSearch,91.1(+-26.70),16.6(+-10.87),94.6(+-16.20),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),30.2(+-34.95),63.6(+-37.28),6.8(+-7.59),19.8(+-16.69),91.0(+-24.75)
HierarchicalOptimisticOptimization,57.7(+-11.86),7.5(+-1.50),100.0(+-0.00),45.9(+-1.97),31.0(+-0.00),100.0(+-0.00),100.0(+-0.00),25.0(+-4.80),47.0(+-18.58),1.0(+-0.00),9.4(+-0.49),13.2(+-0.40)
GradOpt,91.6(+-17.51),49.2(+-31.22),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),23.3(+-21.83),88.9(+-26.78),89.0(+-22.76),1.0(+-0.00),17.8(+-7.92),100.0(+-0.00)

0,1,2,3,4,5,6,7,8,9,10,11,12
99% Target,HolderTable,Rosenbrock,Sphere,LinearSlope,DebN1,SphereHighDim,BraninHoo,Himmelblau,Styblinski,LevyN13,MishraN2,GriewankN4
AdaLipoTR,52.9(+-24.37),1.0(+-0.00),61.8(+-9.43),7.4(+-0.80),96.6(+-10.20),98.2(+-4.02),22.4(+-23.72),20.8(+-6.10),40.8(+-17.19),6.6(+-2.62),27.7(+-22.37),34.0(+-9.18)
AdaLipo,79.1(+-28.88),1.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),64.8(+-25.72),49.3(+-27.28),89.8(+-16.55),7.9(+-1.45),16.2(+-8.82),96.9(+-9.30)
PureRandomSearch,100.0(+-0.00),57.8(+-36.49),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),71.0(+-36.03),93.1(+-20.70),14.3(+-8.71),87.8(+-25.84),100.0(+-0.00)
HierarchicalOptimisticOptimization,57.7(+-11.86),7.5(+-1.50),100.0(+-0.00),100.0(+-0.00),31.0(+-0.00),100.0(+-0.00),100.0(+-0.00),83.1(+-33.80),75.6(+-29.92),21.8(+-1.25),25.1(+-1.64),66.0(+-2.83)
GradOpt,96.9(+-9.30),68.2(+-30.80),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),100.0(+-0.00),26.4(+-21.87),91.1(+-26.70),100.0(+-0.00),18.1(+-22.86),65.6(+-28.33),100.0(+-0.00)


In [5]:



if __name__ == '__main__':
    shutdownLogging()

Shutdown logger, bye bye!
