In [1]:
#python basic imports
import math
import multiprocessing
#3rd party imports (from packages, the environment)
import numpy as np
#custom (local) imports
import experiment.config as config
from util.database import Database
from util.logging import setupLogging, shutdownLogging
from IPython.display import clear_output, display, HTML
from util.worker import worker
from experiment.optimizers import MonteCarloSampling1M
import experiment.optimizers as optimizers

In [2]:
#Experimental Setup

if __name__ == '__main__':
    logger = setupLogging()
    logger.info("Evaluation "+str(config.EVALUATION_HASH))
    db = Database()
    db.loadFromJson(config.DATABASE_PATH)

Start Logging
Evaluation 3f6a02d78d7d3c2b804f1d2107bda68a81fdd56bddb66d8eb1474bc9517c692a


In [3]:
if __name__ == '__main__':
    maxScoresDict = dict()
    bestAvgScoresDict = Database()
    sumOfScoresDict = dict()
    noOfScoresDict = dict()
    meanOfScoresDict = dict()
    targetValuesDict = dict()
    
    for optimizerId,optimizerValues in db.core.items():
        for problemId,problemValues in optimizerValues.items():
            for k,values in problemValues.items():
                #logger.info(values.items())
                score = values['bestscore']

                
                if problemId in maxScoresDict:
                    if score > maxScoresDict[problemId]:
                        maxScoresDict[problemId] = score
                else:
                    maxScoresDict[problemId] = score
                
                #we calculate the mean only from the MonteCarloSampling1M optimizer.
                if ( optimizerId == MonteCarloSampling1M.__name__ ):
                    allScores = values['scores']
                    noOfScores = np.shape(allScores)[0]
                    sumOfScores = np.sum(allScores)
                    
                    #because we do this for each k:
                    if problemId in sumOfScoresDict:
                        sumOfScores += sumOfScoresDict[problemId]
                        noOfScores += noOfScoresDict[problemId]
                    
                    sumOfScoresDict[problemId] = sumOfScores
                    noOfScoresDict[problemId] = noOfScores

    #note: we only go over problems where we can calculate the mean values of f(x), e.g. where the monte carlo sampling exists.                
    if ( len(sumOfScoresDict.keys()) == 0):
        logger.info("No MonteCarloSampling exists, that can happen if you use an incomplete result.json file. Don't panic, just run the Benchmark further to fix this issue")
    for problemId in sumOfScoresDict.keys():
        mx = maxScoresDict[problemId]
        sm = sumOfScoresDict[problemId]
        mean = sm / noOfScoresDict[problemId]
        
        def target(t):
            return mx - ( ( mx - mean ) * (1-t) ) 
        targetValues = np.array([ target(t) for t in config.TARGETS ])
        
        logger.info("mx: "+str(mx))
        logger.info("mean: "+str(mean))    
        logger.info("target values of "+str(problemId)+" are: "+str(targetValues))
        targetValuesDict[problemId] = targetValues
    
    
    refinedResults = Database()
    for optimizerId,optimizerValues in db.core.items():
        #dont take the dummy optimizer MonteCarloSampling1M into the final evaluation
        if ( optimizerId == MonteCarloSampling1M.__name__ ):
            continue
        for problemId in sumOfScoresDict.keys():
            if not(problemId in optimizerValues.keys()):
                continue
            problemValues = optimizerValues[problemId]
            indexesWhereTargetValuesWhereExceeded = np.zeros((config.K,len(config.TARGETS)))
            logger.info(optimizerId+" on "+problemId)
            valueCounter = 0#only for debugging / validation
            completionCounter = 0 #count how many runs actually have config.MAX_EVALUATIONS values in them
            for k,values in problemValues.items():
                allScores = values['scores']
                valueCounter += np.shape(allScores)[0]
                completionCounter += int( np.shape(allScores)[0] >= config.MAX_EVALUATIONS )
                #check, at which index the values are above the target values
                mask = np.expand_dims( np.array(allScores), axis=-1) > np.expand_dims(targetValuesDict[problemId],axis=-1).T
                indexWhereTargetValuesAreHit = np.argmax(mask, axis=0)
                #if the algorithm never reached the target value, it is set to the maximum number of iterations:
                indexWhereTargetValuesAreHit[ np.diag( mask[indexWhereTargetValuesAreHit] ) == False ] = config.MAX_EVALUATIONS-1#since we count from index 0
                indexesWhereTargetValuesWhereExceeded[int(k),:] = indexWhereTargetValuesAreHit
            
            meanValue = np.mean(indexesWhereTargetValuesWhereExceeded, axis=0) + 1 #(since we count from index 0)
            stdValue = np.std(indexesWhereTargetValuesWhereExceeded, axis=0)
            
            refinedResults.store(optimizerId, problemId, 'mean', meanValue)
            refinedResults.store(optimizerId, problemId, 'std', stdValue)   
            logger.info("mean: "+str( meanValue ) )
            logger.info("std: "+str( stdValue ) )
            logger.info("completed runs: "+str(100.0*completionCounter/config.K)+"%")
            logger.info("completed function calls: "+str(100.0*valueCounter/(config.K*config.MAX_EVALUATIONS))+"%")
            
    
    #Determine, which optimizer was the best on on a problem in order to plot it thick
    for optimizerId,optimizerValues in refinedResults.core.items():
        for problemId,problemValues in optimizerValues.items():
            meanValues = problemValues['mean'].copy()
            if  bestAvgScoresDict.exists(problemId):
                oldValues = bestAvgScoresDict.get(problemId, 'values')
                oldBestOptimizerIds = bestAvgScoresDict.get(problemId, 'optimizerId')
                mask = ( meanValues < oldValues )
                maskEqual = ( meanValues == oldValues )
                oldValues[mask] = meanValues[mask]
                
                for i in range(np.shape(meanValues)[0]):
                    if mask[i]: oldBestOptimizerIds[i] = list([optimizerId])
                    if maskEqual[i]: oldBestOptimizerIds[i].append(optimizerId)

                bestAvgScoresDict.store(problemId, 'values', oldValues)
                bestAvgScoresDict.store(problemId, 'optimizerId', oldBestOptimizerIds)
            else:
                bestAvgScoresDict.store(problemId, 'values', meanValues)
                bestAvgScoresDict.store(problemId, 'optimizerId',[list([optimizerId]) for i in range(np.shape(meanValues)[0])])


def applyFilter(currentId, filterDict):
    if filterDict == None:
        return False
    if currentId in filterDict:
        return False
    return True
    

mx: 19.208502567886754
mean: 2.433610402835009
target values of HolderTable are: [17.53101335 18.36975796 19.04075365]
mx: -0.0
mean: -987.2333386651593
target values of Rosenbrock are: [-98.72333387 -49.36166693  -9.87233339]
mx: -1.3894930166106489e-14
mean: -0.8017325596802386
target values of Sphere are: [-0.08017326 -0.04008663 -0.00801733]
mx: 0.0
mean: -32.49756742146168
target values of LinearSlope are: [-3.24975674 -1.62487837 -0.32497567]
mx: 1.0
mean: 0.31244610813211277
target values of DebN1 are: [0.93124461 0.96562231 0.99312446]
mx: -8.966021318620746e-08
mean: -1.3039541468019893
target values of SphereHighDim are: [-0.1303955  -0.06519779 -0.01303963]
mx: 308.12909601160663
mean: 54.3437887356942
target values of BraninHoo are: [282.75056528 295.43983065 305.59124294]
mx: -0.0
mean: -136.5910690057624
target values of Himmelblau are: [-13.6591069   -6.82955345  -1.36591069]
mx: 78.33233140754285
mean: 8.423439483679575
target values of Styblinski are: [71.34144222 74.8

completed runs: 100.0%
completed function calls: 100.0%
PureRandomSearch on LevyN13
mean: [ 5.22  6.96 16.46]
std: [ 5.22796327  6.72297553 15.85460186]
completed runs: 100.0%
completed function calls: 100.0%
PureRandomSearch on MishraN2
mean: [  9.01  24.29 678.87]
std: [ 10.41488838  27.31420693 356.19072574]
completed runs: 100.0%
completed function calls: 100.0%
PureRandomSearch on GriewankN4
mean: [ 113.86  543.76 1000.  ]
std: [115.78903402 368.3313758    0.        ]
completed runs: 100.0%
completed function calls: 100.0%
PureRandomSearch on Housing
mean: [332.13 535.49 771.22]
std: [224.64223356 373.47890155 356.79359804]
completed runs: 100.0%
completed function calls: 100.0%
PureRandomSearch on Yacht
mean: [ 32.33  43.12 208.23]
std: [ 30.27145685  42.72242502 137.4215307 ]
completed runs: 100.0%
completed function calls: 100.0%
PureRandomSearch on Slump
mean: [363.41 601.05 936.42]
std: [295.77755476 338.67770446 171.79139559]
completed runs: 100.0%
completed function calls: 

In [4]:
#Render as html table
def createHTMLResultTable(tableString, index, problemFilter = None, optimizerFilter = None):
    tableString += '<table>'
    tableString += '    <tr>'
    tableString += '    <td>'+str(int(config.TARGETS[index]*100))+'% Target</td>'

    for problemId in maxScoresDict.keys():
        if ( applyFilter(problemId, problemFilter ) ): continue
        tableString += '<td>'+problemId+'</td>'

    for optimizerId,optimizerValues in refinedResults.core.items():
        if ( applyFilter(optimizerId, optimizerFilter ) ): continue
        tableString += '<tr>'
        tableString += '<td>'+optimizerId+'</td>'

        for problemId,problemValues in optimizerValues.items():
            if ( applyFilter(problemId, problemFilter ) ): continue

            isBestInCategory = ( optimizerId in (bestAvgScoresDict.get(problemId, 'optimizerId')[index]) )
            bestStartTag = ''
            bestEndTag = ''
            if ( isBestInCategory ):
                bestStartTag = '<b>'
                bestEndTag = '</b>'
                
            tableString += '<td>' + bestStartTag + str(problemValues['mean'][index]) + bestEndTag + '(+-' +str(("%.2f" % problemValues['std'][index]))+')' + '</td>'
    tableString += '</table>'
    return tableString

def renderHTMLTables(problemFilter = None, optimizerFilter = None):
    tableString = ""
    #Create one table per target setting
    for i in range(len(config.TARGETS)):
        tableString = createHTMLResultTable(tableString, i, problemFilter, optimizerFilter)
    display(HTML(tableString))
    
allProblems = [cls.__name__ for cls in config.problems.ALL.values()]
#optimizerFilter = [optimizers.PureRandomSearch.__name__, optimizers.BayesianOptimization.__name__, optimizers.HierarchicalOptimisticOptimization.__name__, optimizers.GradOpt.__name__]
print(allProblems)
renderHTMLTables(problemFilter = allProblems)

['AutoMPG', 'BreastCancer', 'Slump', 'Yacht', 'Housing', 'HolderTable', 'Rosenbrock', 'Sphere', 'SphereHighDim', 'LinearSlope', 'DebN1', 'BraninHoo', 'Himmelblau', 'Styblinski', 'LevyN13', 'MishraN2', 'GriewankN4', 'TestProblem', 'TestProblem2']


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
90% Target,HolderTable,Rosenbrock,Sphere,LinearSlope,DebN1,SphereHighDim,BraninHoo,Himmelblau,Styblinski,LevyN13,MishraN2,GriewankN4,Housing,Yacht,Slump,BreastCancer,AutoMPG
AdaLipoTR,39.0(+-32.41),1.0(+-0.00),26.3(+-4.50),6.74(+-0.52),147.29(+-151.51),106.1(+-20.86),21.24(+-34.15),14.58(+-6.08),30.6(+-20.03),1.0(+-0.00),5.74(+-2.23),10.51(+-1.69),6.03(+-1.63),9.35(+-16.16),6.74(+-2.18),23.25(+-51.37),5.38(+-1.40)
AdaLipo,49.34(+-31.49),1.0(+-0.00),49.59(+-14.61),11.11(+-4.99),961.54(+-147.68),995.31(+-46.56),21.91(+-27.34),19.52(+-12.12),49.9(+-26.64),1.0(+-0.00),5.03(+-2.33),21.69(+-7.47),8.1(+-4.92),12.4(+-25.08),12.46(+-9.52),98.48(+-159.73),9.48(+-14.62)
PureRandomSearch,181.73(+-176.39),8.99(+-8.06),905.36(+-227.61),918.51(+-214.26),987.34(+-91.65),1000.0(+-0.00),782.72(+-323.01),15.63(+-13.12),79.73(+-84.23),5.22(+-5.23),9.01(+-10.41),113.86(+-115.79),332.13(+-224.64),32.33(+-30.27),363.41(+-295.78),20.12(+-15.54),374.04(+-283.17)
HierarchicalOptimisticOptimization,56.48(+-10.38),7.6(+-1.32),952.47(+-48.50),32.96(+-1.79),31.0(+-0.00),1000.0(+-0.00),1000.0(+-0.00),3.52(+-0.50),3.5(+-0.50),1.0(+-0.00),5.51(+-0.50),7.53(+-0.50),94.27(+-10.35),7.56(+-0.50),1000.0(+-0.00),19.5(+-4.50),118.71(+-5.31)
GradOpt,86.46(+-84.77),29.01(+-29.09),490.13(+-119.59),1000.0(+-0.00),965.39(+-145.85),956.67(+-82.44),21.23(+-17.06),72.45(+-75.43),225.34(+-246.93),1.0(+-0.00),7.53(+-5.69),148.79(+-74.71),10.38(+-6.33),16.9(+-21.74),14.41(+-9.10),18.59(+-18.90),8.52(+-4.69)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
95% Target,HolderTable,Rosenbrock,Sphere,LinearSlope,DebN1,SphereHighDim,BraninHoo,Himmelblau,Styblinski,LevyN13,MishraN2,GriewankN4,Housing,Yacht,Slump,BreastCancer,AutoMPG
AdaLipoTR,45.23(+-31.87),1.0(+-0.00),35.9(+-7.12),6.97(+-0.22),156.82(+-151.31),155.08(+-44.92),21.8(+-34.08),18.73(+-6.32),32.71(+-19.79),1.0(+-0.00),7.07(+-2.15),11.72(+-1.76),6.21(+-1.68),10.26(+-16.01),6.95(+-2.21),24.23(+-51.32),5.51(+-1.49)
AdaLipo,79.57(+-44.78),1.0(+-0.00),94.55(+-39.81),36.05(+-22.36),995.88(+-29.09),1000.0(+-0.00),25.54(+-27.26),27.03(+-17.62),64.8(+-40.08),1.0(+-0.00),7.04(+-2.47),30.22(+-13.69),11.45(+-9.35),13.44(+-24.96),17.4(+-13.51),106.26(+-160.24),13.27(+-18.04)
PureRandomSearch,355.85(+-312.47),19.01(+-17.78),987.94(+-73.77),993.66(+-44.92),1000.0(+-0.00),1000.0(+-0.00),961.42(+-154.56),28.89(+-30.02),159.4(+-155.92),6.96(+-6.72),24.29(+-27.31),543.76(+-368.33),535.49(+-373.48),43.12(+-42.72),601.05(+-338.68),31.33(+-26.19),603.8(+-327.23)
HierarchicalOptimisticOptimization,58.7(+-10.19),7.6(+-1.32),1000.0(+-0.00),47.61(+-3.18),31.0(+-0.00),1000.0(+-0.00),1000.0(+-0.00),21.31(+-5.52),232.67(+-383.71),1.0(+-0.00),9.47(+-0.50),13.47(+-0.50),109.94(+-10.90),7.56(+-0.50),1000.0(+-0.00),38.79(+-7.80),133.81(+-6.02)
GradOpt,337.22(+-218.34),46.33(+-41.66),805.56(+-144.91),1000.0(+-0.00),992.92(+-70.45),1000.0(+-0.00),22.22(+-18.52),213.15(+-156.44),324.61(+-270.81),1.0(+-0.00),13.36(+-10.34),308.24(+-97.49),10.47(+-6.28),20.21(+-25.08),14.69(+-9.10),19.67(+-19.74),8.98(+-5.32)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
99% Target,HolderTable,Rosenbrock,Sphere,LinearSlope,DebN1,SphereHighDim,BraninHoo,Himmelblau,Styblinski,LevyN13,MishraN2,GriewankN4,Housing,Yacht,Slump,BreastCancer,AutoMPG
AdaLipoTR,50.4(+-31.05),1.0(+-0.00),59.24(+-10.72),7.04(+-0.28),182.3(+-149.89),271.52(+-55.02),22.33(+-34.11),24.38(+-7.44),37.0(+-19.45),7.18(+-2.01),16.4(+-14.50),38.77(+-97.50),6.42(+-1.70),12.93(+-15.92),7.15(+-2.34),26.61(+-51.27),5.71(+-1.48)
AdaLipo,137.38(+-52.15),1.0(+-0.00),979.69(+-93.92),960.99(+-153.26),1000.0(+-0.00),1000.0(+-0.00),151.95(+-150.37),55.88(+-30.82),106.0(+-76.53),8.24(+-5.38),22.15(+-21.23),792.97(+-298.09),36.31(+-24.66),22.45(+-30.12),45.38(+-26.40),132.36(+-168.33),35.08(+-25.44)
PureRandomSearch,809.73(+-319.79),97.86(+-103.00),1000.0(+-0.00),1000.0(+-0.00),1000.0(+-0.00),1000.0(+-0.00),1000.0(+-0.00),184.65(+-181.72),568.58(+-375.71),16.46(+-15.85),678.87(+-356.19),1000.0(+-0.00),771.22(+-356.79),208.23(+-137.42),936.42(+-171.79),70.69(+-87.75),918.51(+-178.79)
HierarchicalOptimisticOptimization,58.7(+-10.19),7.6(+-1.32),1000.0(+-0.00),152.15(+-9.42),31.0(+-0.00),1000.0(+-0.00),1000.0(+-0.00),468.79(+-490.28),730.59(+-432.02),21.96(+-1.17),25.31(+-1.80),68.36(+-7.16),608.82(+-300.89),62.0(+-9.95),1000.0(+-0.00),38.79(+-7.80),412.59(+-60.16)
GradOpt,671.45(+-280.82),130.77(+-107.98),999.06(+-9.35),1000.0(+-0.00),1000.0(+-0.00),1000.0(+-0.00),23.04(+-19.30),507.63(+-261.08),538.86(+-267.17),20.98(+-21.29),70.33(+-50.73),941.1(+-112.46),10.7(+-6.47),43.26(+-36.80),18.17(+-13.96),34.13(+-49.46),9.54(+-5.92)


In [6]:



if __name__ == '__main__':
    shutdownLogging()

Shutdown logger, bye bye!
