In [252]:
class ComputeResourceInformation:
        
    def __init__(self, processor="", memory="", disk="", operating_system="", kernel="", compiler="", file_system="", system_layer="", motherboard="", chipset="", graphics="", network_card="", monitor=""):
        self.processor = processor
        self.memory = memory
        self.disk = disk
        self.operating_system = operating_system
        self.kernel = kernel
        self.compiler = compiler
        self.file_system = file_system
        self.system_layer = system_layer
        self.motherboard = motherboard
        self.chipset = chipset
        self.graphics = graphics
        self.network_card = network_card
        self.monitor = monitor
        
    def as_map(self):
        map_repr = {}
        
        map_repr['Processor'] = self.processor
        map_repr['Memory'] = self.memory
        map_repr['Disk'] = self.disk
        map_repr['OS'] = self.operating_system
        map_repr['Kernel'] = self.kernel
        map_repr['Compiler'] = self.compiler
        map_repr['File System'] = self.file_system
        map_repr['System Layer'] = self.system_layer
        map_repr['Motherboard'] = self.motherboard
        map_repr['Chipset'] = self.chipset
        map_repr['Graphics'] = self.graphics
        map_repr['Network'] = self.network_card
        map_repr['Monitor'] = self.monitor
        
        return map_repr
    
    def __str__(self):
        return str( self.as_map() )

In [253]:
class TestNotFound(Exception):
    def __init__(self, value="Test Not Found"):
        self.value = value
    def __str__(self):
        return repr(self.value)

In [254]:
import csv

class ComputeResourceTestResults:
    
    def __init__(self, file = None):
        
        self._results = {}
        self._info = ComputeResourceInformation()
        
        if file is not None:
            self.load_from_file(file)
        
    def update_resource_info(self, processor, memory, disk, operating_system, kernel, compiler, file_system, 
                             system_layer='', motherboard='', chipset='', graphics='', network_card=''):
            self._info.processor = processor
            self._info.memory = memory
            self._info.disk = disk
            self._info.operating_system = operating_system
            self._info.kernel = kernel
            self._info.compiler = compiler
            self._info.file_system = file_system
            self._info.system_layer = system_layer
            self._info.motherboard = motherboard
            self._info.chipset = chipset
            self._info.graphics = graphics
            self._info.network_card = network_card
            
    def add_test_results(self, test_name, test_results_values):
        
        if test_name not in self._results:
            self._results[test_name] = test_results_values
        else:
            self._results[test_name].extend( test_results_values )
    
    def add_test_result(self, test_name, test_result_value):
        
        results = []
        results.append(test_result_value)
        
        self.add_test_results(test_name, results)
    
    def get_test_results(self, test_name):
        try:
            return self._results[test_name]
        except KeyError as e:
            raise TestNotFound("Test Not Found")
    
    def delete_test_results(self, test_name):
        try:
            del self._results[test_name]
        except KeyError as e:
            raise TestNotFound("Test Not Found")
            
    def load_from_file(self, file):
        
        processor = ''
        memory = ''
        disk = ''
        operating_system = ''
        kernel = ''
        compiler = ''
        file_system = ''
        system_layer = ''
        motherboard = ''
        chipset = ''
        graphics = ''
        network_card = ''
        
        with open(file, newline='') as csvfile:
            
            test_result_reader = csv.reader(csvfile, delimiter=',')
             
            for row in test_result_reader:
                #print(row)
                if len(row) >= 2  and row[0] != ' ':
                    if row[0] == 'Processor':
                        processor = row[1] # Processor decoded
                    elif row[0] == 'Memory':
                        memory = row[1]  # Memory decoded
                    elif row[0] == 'Disk':
                        disk = row[1]    # Disk decoded
                    elif row[0] == 'OS':
                        operating_system = row[1]  ## OS decoded
                    elif row[0] == 'Kernel':
                        kernel = row[1]  # Kernel decoded
                    elif row[0] == 'Compiler':
                        compiler = row[1] # Compiler decoded
                    elif row[0] == 'File-System':
                        file_system = row[1] # File System decoded
                    elif row[0] == 'System Layer':
                        system_layer = row[1] # System layer decoded
                    elif row[0] == 'Motherboard':
                        motherboard = row[1] # Mother board decoded
                    elif row[0] == 'Chipset':
                        chipset = row[1]  # Chipset decoded
                    elif row[0] == 'Graphics':
                        graphics = row[1]  # Graphic card decoded
                    elif row[0] == 'Network':
                        network_card = row[1]  # Network card decoded
                    elif row[0] == 'Screen Resolution' or row[0] == 'Monitor':
                        pass
                    else:
                        # Fill test results
                        
                        self.add_test_results(row[0], [float(x) for x in row[1:]] ) 
        
        self.update_resource_info(processor, memory, disk, operating_system, kernel, compiler, file_system, 
                                  system_layer, motherboard, chipset, graphics, network_card)
                

In [255]:
aws_m1large_variable = ComputeResourceTestResults('./benchmarks/aws/m1large-aws-variable-ubuntu/merge-2092.csv')
aws_m4xlarge_variable = ComputeResourceTestResults('./benchmarks/aws/m44xlarge-aws-variable-ubuntu/merge-7274.csv')
aws_m4large_magnetic = ComputeResourceTestResults('./benchmarks/aws/m4large-aws-magnetic-ubuntu/merge-7288.csv')
aws_m4large_sdd = ComputeResourceTestResults('./benchmarks/aws/m4large-aws-sdd-ubuntu/merge-6914.csv')
aws_t2medium_magnetic = ComputeResourceTestResults('./benchmarks/aws/t2medium-aws-magnetic-ubuntu/merge-9347.csv')
aws_t2medium_variable = ComputeResourceTestResults('./benchmarks/aws/t2medium-aws-variable-ubuntu/merge-9691.csv')
aws_t2small_magnetic = ComputeResourceTestResults('./benchmarks/aws/t2small-aws-magnetic-ubuntu/merge-8438.csv')

In [286]:
cecad_2xlarge_gp = ComputeResourceTestResults('./benchmarks/cecad/2xlarge-gp-ubuntu/merge-5274.csv')
cecad_2xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/2xlarge-hpc-ubuntu/merge-4929.csv')
cecad_3xlarge_gp = ComputeResourceTestResults('./benchmarks/cecad/3xlarge-gp-ubuntu/merge-6495.csv')
cecad_3xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/3xlarge-hpc-ubuntu/merge-6359.csv')
cecad_4xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/4xlarge-hpc-ubuntu/merge-7671.csv')
cecad_5xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/5xlarge-hpc-ubuntu/merge-5104.csv')
cecad_6xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/6xlarge-hpc-ubuntu/merge-4492.csv')
cecad_7xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/7xlarge-hpc-ubuntu/merge-1826.csv')
cecad_8xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/8xlarge-hpc-ubuntu/merge-3615.csv')
cecad_dl1606g = ComputeResourceTestResults('./benchmarks/cecad/dl1606g-ubuntu/merge-6157.csv')
cecad_large_gp = ComputeResourceTestResults('./benchmarks/cecad/large-gp-ubuntu/merge-9893.csv')
cecad_large_hpc = ComputeResourceTestResults('./benchmarks/cecad/large-hpc-ubuntu/merge-7675.csv')
cecad_large_nova = ComputeResourceTestResults('./benchmarks/cecad/large-ubuntu/merge-9724.csv')
cecad_medium_gp = ComputeResourceTestResults('./benchmarks/cecad/medium-gp-ubuntu/merge-5312.csv')
cecad_medium_hpc = ComputeResourceTestResults('./benchmarks/cecad/medium-hpc-ubuntu/merge-8901.csv')
cecad_medium_nova = ComputeResourceTestResults('./benchmarks/cecad/medium-ubuntu/merge-2260.csv')
cecad_r610 = ComputeResourceTestResults('./benchmarks/cecad/r610-ubuntu/merge-8348.csv')
cecad_r900 = ComputeResourceTestResults('./benchmarks/cecad/r900-debian/merge-4836.csv')
cecad_small_gp = ComputeResourceTestResults('./benchmarks/cecad/small-gp-ubuntu/merge-3500.csv')
cecad_small_hpc = ComputeResourceTestResults('./benchmarks/cecad/small-hpc-ubuntu/merge-1569.csv')
cecad_small_test = ComputeResourceTestResults('./benchmarks/cecad/small-test-ubuntu/merge-1472.csv')
cecad_small_nova = ComputeResourceTestResults('./benchmarks/cecad/small-ubuntu/merge-4467.csv')
cecad_xlarge_gp = ComputeResourceTestResults('./benchmarks/cecad/xlarge-gp-ubuntu/merge-1562.csv')
cecad_xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/xlarge-hpc-ubuntu/merge-3341.csv')
cecad_xsmall_gp = ComputeResourceTestResults('./benchmarks/cecad/xsmall-gp-ubuntu/merge-3733.csv')
cecad_xsmall_hpc = ComputeResourceTestResults('./benchmarks/cecad/xsmall-hpc-ubuntu/merge-2556.csv')
cecad_xsmall_nova = ComputeResourceTestResults('./benchmarks/cecad/xsmall-ubuntu/merge-1877.csv')

In [287]:
print(aws_m4large_sdd._info )

{'Disk': '158GB', 'File System': 'ext4', 'Memory': '8192MB', 'Processor': 'Intel Xeon E5-2676 v3 @ 2.39GHz (2 Cores)', 'Network': 'Intel 82599 Virtual Function', 'Graphics': 'Cirrus Logic GD 5446', 'Monitor': '', 'Chipset': 'Intel 440FX- 82441FX PMC', 'Compiler': 'GCC 4.8.4', 'Kernel': '3.13.0-48-generic (x86_64)', 'OS': 'Ubuntu 14.04', 'System Layer': 'Xen HVM domU 4.2.amazon', 'Motherboard': 'Xen HVM domU'}


In [288]:
class TestInformation:
    
    CPU_TEST = 0
    MEMORY_TEST = 1
    DISK_TEST = 2
    
    categories = ("CPU","MEMORY","DISK")
    
    def __init__(self, test_name, units, category = MEMORY_TEST, more_is_better=True):
        self.test_name = test_name
        self.units = units
        self.category = category
        self.more_is_better = more_is_better
    
    def as_map(self):
        map_repr = {}
        
        map_repr['Test Name'] = self.test_name
        map_repr['Units'] = self.units
        map_repr['Category'] = self.categories[self.category]
        map_repr['More Is Better'] = self.more_is_better
        
        return map_repr
    
    def __eq__(self,other):
        return self.test_name == other.test_name and self.units == other.units
    
    def __str__(self):
        return str( self.as_map() )

In [289]:
test_info = TestInformation("SQLite", "MB/s", TestInformation.CPU_TEST, True)
test_info.as_map()

{'Category': 'CPU',
 'More Is Better': True,
 'Test Name': 'SQLite',
 'Units': 'MB/s'}

In [290]:
class TestSuite:
    
    def __init__(self, file = None):
        
        self._tests = {}
        
        if file is not None:
            self.load_from_file(file)
    
    def add_test(self, test_name, units, category=TestInformation.MEMORY_TEST, more_is_better=True ):
        test_info = TestInformation(test_name, units, category, more_is_better)
        self._tests[test_info.test_name] = test_info
    
    def load_from_file(self, file ):
        
        with open(file, newline='') as csvfile:
            
            test_result_reader = csv.reader(csvfile, delimiter=',')
             
            for row in test_result_reader:
                
                test_name = row[0]
                units = row[1]
                category = int(row[2])
                more_is_better = False
                
                if( row[3] == "True" ):
                    more_is_better = True
                
                self.add_test(test_name, units, category, more_is_better)
    
    def get_test_info(self, test_name ):
        return self._tests[test_name]

In [291]:
test_suite = TestSuite("./benchmarks/tests.csv")
print( test_suite._tests['7-Zip Compression - Compress Speed Test'] )

{'Test Name': '7-Zip Compression - Compress Speed Test', 'More Is Better': True, 'Category': 'CPU', 'Units': 'MIPS'}


In [292]:
import scipy as sp
from sklearn.cluster import KMeans

class ComputeResourceTestTournament:
    
    POINTS_PER_WIN = 3
    POINTS_PER_TIE = 1
    POINTS_PER_LOSE = 0
    
    def __init__(self, test_name, more_is_better=True):
        self._test_name = test_name
        self._more_is_better = more_is_better
        self._players = {}
        self._km = None
        self._cluster_results = {}
        self._tournament = {}
        self._results = {}
    
    def add_player(self, player_name, player):
        try:
            test_result = player.get_test_results( self._test_name )
            self._players[ player_name ] = test_result[0]
        except TestNotFound as e:
            self._players[ player_name ] = sp.NAN
            
    def clusterize_results( self, number_clusters=10, init_mode='random', number_init=10, max_iterations=300 ):
        
        self._km = KMeans(n_clusters=number_clusters, init=init_mode, n_init=number_init, max_iter=max_iterations, random_state=0)
        
        # Sort players by value
        sorted_players = [ (k,v) for v,k in sorted([(v,k) for k,v in self._players.items() ]) ]
        
        try:
            
            # Create a 2-dimensional array for doing K-Means
            results = [[w,w] for w in sorted(list(self._players.values()))]
            results_as_np = sp.array(results)
        
            # Execute cluster algorithm
            y_km = self._km.fit_predict( results_as_np ) 
        
            # Create clustering results {(p_i,clusterid_i)}
            for i,item in enumerate(y_km) :
                self._cluster_results[ sorted_players[i][0] ] = y_km[i]
        
        except ValueError as e:  #Doesn't work if Nan values
            
            results = []
            for r in sorted(list(self._players.values())):
                if r is not sp.NAN:
                    results.append( [r,r] )
            
            results_as_np = sp.array(results)
                            
            y_km = self._km.fit_predict( results_as_np ) # Call clustering without Nan
            
            # Create clustering results {(p_i,clusterid_i)}
            
            assign_index = 0
            
            for info in sorted_players:
                player_name = info[0]
                
                if self._players[ player_name ] is not sp.NAN:
                    self._cluster_results[ player_name ] = y_km[assign_index]  
                    assign_index = assign_index + 1
                else:
                    self._cluster_results[ player_name ] = sp.NAN
  
    def play_tournament(self):
        
        for key1 in self._players:
            for key2 in self._players:
                
                if key1 != key2:
                    
                    cluster_player_1 = self._cluster_results[ key1 ]
                    cluster_player_2 = self._cluster_results[ key2 ]
                    
                    if cluster_player_1 is sp.NAN: #Player doesn't compete in tournamet. Automatically loses
                        points = self.POINTS_PER_LOSE
                    else:
                        if cluster_player_2 is sp.NAN: #Player doesn't compete in tournamet. Automatically wins
                            points = self.POINTS_PER_WIN
                        else:
                            
                            points = self.POINTS_PER_TIE

                            # Different cluster?
                            if cluster_player_1 != cluster_player_2:

                                if( self._km.cluster_centers_[cluster_player_1][0] > self._km.cluster_centers_[cluster_player_2][0] ):
                                    if self._more_is_better:
                                        points = self.POINTS_PER_WIN
                                    else:
                                        points = self.POINTS_PER_LOSE
                                else:
                                    if self._more_is_better:
                                        points = self.POINTS_PER_LOSE
                                    else:
                                        points = self.POINTS_PER_WIN

                    match_result = (key2, points)

                    if( key1 not in self._tournament):
                        self._tournament[key1] = []

                    self._tournament[key1].append( match_result )
            
    def calculate_tournament_results(self ):
        # results is a dictionary with the player as the key and a list [points,position] as the value
    
        # Calculate points and positions for every player
        # First, calculate points
    
        for player in self._tournament:
    
            points = 0
        
            for (op,p) in self._tournament[player]:
                points = points + p
        
            self._results[player] = [points,0]
            
        # Second, recalculate positions
        
        unsorted_cluster_centers = []
        for centers in self._km.cluster_centers_:
            unsorted_cluster_centers.append(centers[0])
        
        sorted_cluster_centers = sorted( unsorted_cluster_centers, reverse=(self._more_is_better) )
        
        for i,cc in enumerate(sorted_cluster_centers):
            # Get cluster index of this value
            index = unsorted_cluster_centers.index(cc)
            
            # Update positions
            
            for (k,v) in self._cluster_results.items():
                if index == v:
                    self._results[k][1] = i
                if v is sp.NAN:
                    self._results[k][1] = len( unsorted_cluster_centers ) +1

    def get_points(self, player ):
        return self._results[player][0]

    def get_position(self, player):
        return self._results[player][1]

In [293]:
test = ComputeResourceTestTournament('Tachyon - Total Time',False)
test.add_player( 'aws_m1large_variable', aws_m1large_variable )
test.add_player( 'aws_m4xlarge_variable', aws_m4xlarge_variable )
test.add_player( 'aws_m4large_magnetic', aws_m4large_magnetic )
test.add_player( 'aws_m4large_sdd', aws_m4large_sdd )
test.add_player( 'aws_t2medium_magnetic', aws_t2medium_magnetic )
test.add_player( 'aws_t2medium_variable', aws_t2medium_variable )
test.add_player( 'aws_t2small_magnetic', aws_t2small_magnetic )
test.add_player( 'cecad_2xlarge_gp', cecad_2xlarge_gp )
test.add_player( 'cecad_2xlarge_hpc', cecad_2xlarge_hpc )
test.add_player( 'cecad_3xlarge_gp', cecad_3xlarge_gp )
test.add_player( 'cecad_3xlarge_hpc', cecad_3xlarge_hpc )
test.add_player( 'cecad_4xlarge_hpc', cecad_4xlarge_hpc )
test.add_player( 'cecad_5xlarge_hpc', cecad_5xlarge_hpc )
test.add_player( 'cecad_6xlarge_hpc', cecad_6xlarge_hpc )
test.add_player( 'cecad_7xlarge_hpc', cecad_7xlarge_hpc )
test.add_player( 'cecad_8xlarge_hpc', cecad_8xlarge_hpc )
test.add_player( 'cecad_dl1606g', cecad_dl1606g )
test.add_player( 'cecad_large_gp', cecad_large_gp )
test.add_player( 'cecad_large_hpc', cecad_large_hpc )
test.add_player( 'cecad_large_nova', cecad_large_nova )
test.add_player( 'cecad_medium_gp', cecad_medium_gp )
test.add_player( 'cecad_medium_hpc', cecad_medium_hpc )
test.add_player( 'cecad_medium_nova', cecad_medium_nova )
test.add_player( 'cecad_r610', cecad_r610 )
test.add_player( 'cecad_r900', cecad_r900 )
test.add_player( 'cecad_small_gp', cecad_small_gp )
test.add_player( 'cecad_small_hpc', cecad_small_hpc )
test.add_player( 'cecad_small_test', cecad_small_test )
test.add_player( 'cecad_small_nova', cecad_small_nova )
test.add_player( 'cecad_xlarge_gp', cecad_xlarge_gp )
test.add_player( 'cecad_xlarge_hpc', cecad_xlarge_hpc )
test.add_player( 'cecad_xsmall_gp', cecad_xsmall_gp )
test.add_player( 'cecad_xsmall_hpc', cecad_xsmall_hpc )
test.add_player( 'cecad_xsmall_nova', cecad_xsmall_nova )
test._players

{'aws_m1large_variable': 71.18,
 'aws_m4large_magnetic': 68.47,
 'aws_m4large_sdd': 68.49,
 'aws_m4xlarge_variable': 8.66,
 'aws_t2medium_magnetic': 193.24,
 'aws_t2medium_variable': 188.99,
 'aws_t2small_magnetic': 389.23,
 'cecad_2xlarge_gp': 10.38,
 'cecad_2xlarge_hpc': 10.21,
 'cecad_3xlarge_gp': 10.92,
 'cecad_3xlarge_hpc': 10.23,
 'cecad_4xlarge_hpc': 22.7,
 'cecad_5xlarge_hpc': 11.55,
 'cecad_6xlarge_hpc': 7.98,
 'cecad_7xlarge_hpc': 7.27,
 'cecad_8xlarge_hpc': 8.06,
 'cecad_dl1606g': 19.3,
 'cecad_large_gp': 25.5,
 'cecad_large_hpc': 39.99,
 'cecad_large_nova': 24.58,
 'cecad_medium_gp': 47.3,
 'cecad_medium_hpc': 90.77,
 'cecad_medium_nova': 48.54,
 'cecad_r610': 10.31,
 'cecad_r900': nan,
 'cecad_small_gp': 93.48,
 'cecad_small_hpc': 181.27,
 'cecad_small_nova': nan,
 'cecad_small_test': 154.88,
 'cecad_xlarge_gp': 14.13,
 'cecad_xlarge_hpc': 20.08,
 'cecad_xsmall_gp': 93.34,
 'cecad_xsmall_hpc': 179.01,
 'cecad_xsmall_nova': 94.12}

In [294]:
test.clusterize_results( 10, 'random', 10, 300 )

In [295]:
test._cluster_results

{'aws_m1large_variable': 5,
 'aws_m4large_magnetic': 5,
 'aws_m4large_sdd': 5,
 'aws_m4xlarge_variable': 1,
 'aws_t2medium_magnetic': 9,
 'aws_t2medium_variable': 9,
 'aws_t2small_magnetic': 0,
 'cecad_2xlarge_gp': 1,
 'cecad_2xlarge_hpc': 1,
 'cecad_3xlarge_gp': 1,
 'cecad_3xlarge_hpc': 1,
 'cecad_4xlarge_hpc': 2,
 'cecad_5xlarge_hpc': 1,
 'cecad_6xlarge_hpc': 1,
 'cecad_7xlarge_hpc': 1,
 'cecad_8xlarge_hpc': 1,
 'cecad_dl1606g': 2,
 'cecad_large_gp': 2,
 'cecad_large_hpc': 4,
 'cecad_large_nova': 2,
 'cecad_medium_gp': 3,
 'cecad_medium_hpc': 6,
 'cecad_medium_nova': 3,
 'cecad_r610': 1,
 'cecad_r900': nan,
 'cecad_small_gp': 8,
 'cecad_small_hpc': 9,
 'cecad_small_nova': nan,
 'cecad_small_test': 7,
 'cecad_xlarge_gp': 1,
 'cecad_xlarge_hpc': 2,
 'cecad_xsmall_gp': 8,
 'cecad_xsmall_hpc': 9,
 'cecad_xsmall_nova': 8}

In [296]:
test._km.cluster_centers_

array([[ 389.23      ,  389.23      ],
       [   9.97272727,    9.97272727],
       [  22.432     ,   22.432     ],
       [  47.92      ,   47.92      ],
       [  39.99      ,   39.99      ],
       [  69.38      ,   69.38      ],
       [  90.77      ,   90.77      ],
       [ 154.88      ,  154.88      ],
       [  93.64666667,   93.64666667],
       [ 185.6275    ,  185.6275    ]])

In [297]:
test.play_tournament()

In [298]:
test._tournament["aws_m1large_variable"]

[('cecad_small_gp', 3),
 ('cecad_xsmall_nova', 3),
 ('cecad_2xlarge_hpc', 0),
 ('cecad_3xlarge_hpc', 0),
 ('cecad_xlarge_hpc', 0),
 ('cecad_xsmall_hpc', 3),
 ('cecad_6xlarge_hpc', 0),
 ('aws_m4xlarge_variable', 0),
 ('cecad_medium_gp', 0),
 ('cecad_3xlarge_gp', 0),
 ('cecad_large_gp', 0),
 ('aws_t2medium_magnetic', 3),
 ('cecad_large_hpc', 0),
 ('cecad_4xlarge_hpc', 0),
 ('cecad_2xlarge_gp', 0),
 ('cecad_r610', 0),
 ('cecad_7xlarge_hpc', 0),
 ('cecad_medium_nova', 0),
 ('cecad_medium_hpc', 3),
 ('cecad_xlarge_gp', 0),
 ('cecad_small_test', 3),
 ('cecad_large_nova', 0),
 ('cecad_dl1606g', 0),
 ('aws_m4large_magnetic', 1),
 ('aws_t2medium_variable', 3),
 ('cecad_small_nova', 3),
 ('cecad_r900', 3),
 ('aws_t2small_magnetic', 3),
 ('cecad_5xlarge_hpc', 0),
 ('aws_m4large_sdd', 1),
 ('cecad_xsmall_gp', 3),
 ('cecad_small_hpc', 3),
 ('cecad_8xlarge_hpc', 0)]

In [299]:
test._cluster_results.items()

dict_items([('cecad_medium_hpc', 6), ('cecad_xlarge_hpc', 2), ('cecad_xsmall_nova', 8), ('cecad_2xlarge_hpc', 1), ('cecad_3xlarge_hpc', 1), ('cecad_small_gp', 8), ('cecad_xsmall_hpc', 9), ('aws_m1large_variable', 5), ('cecad_6xlarge_hpc', 1), ('aws_t2medium_magnetic', 9), ('aws_m4xlarge_variable', 1), ('cecad_medium_gp', 3), ('cecad_3xlarge_gp', 1), ('cecad_large_gp', 2), ('cecad_small_test', 7), ('cecad_large_hpc', 4), ('cecad_4xlarge_hpc', 2), ('cecad_2xlarge_gp', 1), ('cecad_7xlarge_hpc', 1), ('cecad_medium_nova', 3), ('cecad_r610', 1), ('cecad_xlarge_gp', 1), ('cecad_large_nova', 2), ('cecad_dl1606g', 2), ('aws_m4large_magnetic', 5), ('aws_t2medium_variable', 9), ('cecad_small_nova', nan), ('cecad_r900', nan), ('aws_t2small_magnetic', 0), ('cecad_5xlarge_hpc', 1), ('aws_m4large_sdd', 5), ('cecad_xsmall_gp', 8), ('cecad_small_hpc', 9), ('cecad_8xlarge_hpc', 1)])

In [300]:
test.calculate_tournament_results()
points = test.get_points("aws_m1large_variable")
points

38

In [301]:
points = test.get_points("cecad_dl1606g")
points

58

In [302]:
points = test.get_points("cecad_r610")
points

79

In [303]:
test._results

{'aws_m1large_variable': [38, 4],
 'aws_m4large_magnetic': [38, 4],
 'aws_m4large_sdd': [38, 4],
 'aws_m4xlarge_variable': [79, 0],
 'aws_t2medium_magnetic': [12, 8],
 'aws_t2medium_variable': [12, 8],
 'aws_t2small_magnetic': [6, 9],
 'cecad_2xlarge_gp': [79, 0],
 'cecad_2xlarge_hpc': [79, 0],
 'cecad_3xlarge_gp': [79, 0],
 'cecad_3xlarge_hpc': [79, 0],
 'cecad_4xlarge_hpc': [58, 1],
 'cecad_5xlarge_hpc': [79, 0],
 'cecad_6xlarge_hpc': [79, 0],
 'cecad_7xlarge_hpc': [79, 0],
 'cecad_8xlarge_hpc': [79, 0],
 'cecad_dl1606g': [58, 1],
 'cecad_large_gp': [58, 1],
 'cecad_large_hpc': [51, 2],
 'cecad_large_nova': [58, 1],
 'cecad_medium_gp': [46, 3],
 'cecad_medium_hpc': [33, 5],
 'cecad_medium_nova': [46, 3],
 'cecad_r610': [79, 0],
 'cecad_r900': [0, 11],
 'cecad_small_gp': [26, 6],
 'cecad_small_hpc': [12, 8],
 'cecad_small_nova': [0, 11],
 'cecad_small_test': [21, 7],
 'cecad_xlarge_gp': [79, 0],
 'cecad_xlarge_hpc': [58, 1],
 'cecad_xsmall_gp': [26, 6],
 'cecad_xsmall_hpc': [12, 8],
 

In [274]:
sorted_players = [ (k,v) for v,k in sorted([(v,k) for k,v in test._players.items() ]) ]
sorted_players

[('cecad_7xlarge_hpc', 7.27),
 ('cecad_6xlarge_hpc', 7.98),
 ('cecad_8xlarge_hpc', 8.06),
 ('aws_m4xlarge_variable', 8.66),
 ('cecad_2xlarge_hpc', 10.21),
 ('cecad_3xlarge_hpc', 10.23),
 ('cecad_r610', 10.31),
 ('cecad_2xlarge_gp', 10.38),
 ('cecad_3xlarge_gp', 10.92),
 ('cecad_5xlarge_hpc', 11.55),
 ('cecad_xlarge_gp', 14.13),
 ('cecad_dl1606g', 19.3),
 ('cecad_large_hpc', 20.08),
 ('cecad_4xlarge_hpc', 22.7),
 ('cecad_large_nova', 24.58),
 ('cecad_large_gp', 25.5),
 ('cecad_medium_gp', 47.3),
 ('cecad_medium_nova', 48.54),
 ('aws_m4large_magnetic', 68.47),
 ('aws_m4large_sdd', 68.49),
 ('aws_m1large_variable', 71.18),
 ('cecad_medium_hpc', 90.77),
 ('cecad_xsmall_gp', 93.34),
 ('cecad_small_gp', 93.48),
 ('cecad_xsmall_nova', 94.12),
 ('cecad_small_test', 154.88),
 ('cecad_xsmall_hpc', 179.01),
 ('cecad_small_hpc', 181.27),
 ('aws_t2medium_variable', 188.99),
 ('aws_t2medium_magnetic', 193.24),
 ('cecad_r900', nan),
 ('cecad_small_nova', nan),
 ('aws_t2small_magnetic', 389.23)]

In [275]:
import scipy as sp
test_results = sp.array( sorted(list(test._players.values()) ) ) 
test_results

array([   7.27,    7.98,    8.06,    8.66,   10.21,   10.23,   10.31,
         10.38,   10.92,   11.55,   14.13,   19.3 ,   20.08,   22.7 ,
         24.58,   25.5 ,   47.3 ,   48.54,   68.47,   68.49,   71.18,
         90.77,   93.34,   93.48,   94.12,  154.88,  179.01,  181.27,
        188.99,  193.24,     nan,     nan,  389.23])

In [276]:
import matplotlib.pyplot as plt
#plt.scatter([w for w in range( len( stream_copy._players  ) ) ],stream_copy_results,s=10)
plt.scatter(test_results, test_results,s=20)
plt.title(test._test_name)
plt.xlabel("MB/s")
plt.ylabel("MB/s")
plt.autoscale(tight=True)
plt.grid(True, linestyle='-', color='0.75')
plt.show()

In [277]:
from sklearn.cluster import KMeans
km = KMeans(n_clusters=10, init='random', n_init=10, max_iter=300, random_state=0)

In [278]:
results = [[w,w] for w in sorted(list(test._players.values()))]
results


[[7.27, 7.27],
 [7.98, 7.98],
 [8.06, 8.06],
 [8.66, 8.66],
 [10.21, 10.21],
 [10.23, 10.23],
 [10.31, 10.31],
 [10.38, 10.38],
 [10.92, 10.92],
 [11.55, 11.55],
 [14.13, 14.13],
 [19.3, 19.3],
 [20.08, 20.08],
 [22.7, 22.7],
 [24.58, 24.58],
 [25.5, 25.5],
 [47.3, 47.3],
 [48.54, 48.54],
 [68.47, 68.47],
 [68.49, 68.49],
 [71.18, 71.18],
 [90.77, 90.77],
 [93.34, 93.34],
 [93.48, 93.48],
 [94.12, 94.12],
 [154.88, 154.88],
 [179.01, 179.01],
 [181.27, 181.27],
 [188.99, 188.99],
 [193.24, 193.24],
 [nan, nan],
 [nan, nan],
 [389.23, 389.23]]

In [279]:
data_to_cluster = sp.array(results)
data_to_cluster


array([[   7.27,    7.27],
       [   7.98,    7.98],
       [   8.06,    8.06],
       [   8.66,    8.66],
       [  10.21,   10.21],
       [  10.23,   10.23],
       [  10.31,   10.31],
       [  10.38,   10.38],
       [  10.92,   10.92],
       [  11.55,   11.55],
       [  14.13,   14.13],
       [  19.3 ,   19.3 ],
       [  20.08,   20.08],
       [  22.7 ,   22.7 ],
       [  24.58,   24.58],
       [  25.5 ,   25.5 ],
       [  47.3 ,   47.3 ],
       [  48.54,   48.54],
       [  68.47,   68.47],
       [  68.49,   68.49],
       [  71.18,   71.18],
       [  90.77,   90.77],
       [  93.34,   93.34],
       [  93.48,   93.48],
       [  94.12,   94.12],
       [ 154.88,  154.88],
       [ 179.01,  179.01],
       [ 181.27,  181.27],
       [ 188.99,  188.99],
       [ 193.24,  193.24],
       [    nan,     nan],
       [    nan,     nan],
       [ 389.23,  389.23]])

In [280]:
y_km = km.fit_predict(data_to_cluster)
y_km

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [None]:
km.labels_

In [None]:
km.cluster_centers_

In [None]:
len(y_km)

In [None]:
clustering_results = {}
for i,item in enumerate(y_km) :
    clustering_results[ sorted_players[i][0] ] = y_km[i]
clustering_results

In [None]:
test._players.keys()

In [368]:
class EquivalenceTournament:
        
    def __init__(self):
        self._competitions = []
        self._players = {}
        self._tournament = {}
        self._results = {}
    
    def add_player(self, player_name, new_player):
        self._players[player_name] = new_player
    
    def add_competition(self, competition):
        self._competitions.append( competition )
    
    def add_competitions(self, competitions):
        self._competitions.extend( competitions )
    
    def add_test_suite(self, file):
        test_suite = TestSuite(file)
        for key,test_info in test_suite._tests.items():
            self.add_competition(test_info)
    
    def load_players_from_file(self, file):
        
        with open(file, newline='') as csvfile:
            
            player_result_reader = csv.reader(csvfile, delimiter=',')
             
            for row in player_result_reader:
                
                player_name = row[0]
                player_info_path = row[1]
                
                # Create info 
                
                new_player = ComputeResourceTestResults(player_info_path)
                
                self.add_player(player_name, new_player)
        
    
    def play_tournament_at_competition(self, players, competition):
        pass
    
    def play_tournament_at_competitions(self, players, competitions):
        pass
    
    def play_tournament_at_all_competitions(self, players ):
        pass
    
    def play_complete_tournament(self):
        
        for test_info in self._competitions:
            
            test = ComputeResourceTestTournament(test_info.test_name, test_info.more_is_better )
            
            for k,v in self._players.items():
                test.add_player(k,v)
                
            test.clusterize_results( 10, 'random', 10, 300 )
            test.play_tournament()
            test.calculate_tournament_results()
            
            self._tournament[test_info.test_name] = test
    
    def calculate_tournament_results(self):
        
        # Calculate total points of player
        
        for player in self._players:
            
            points = 0
            
            for result in self._tournament.values():
                points = points + result.get_points( player )
                
            self._results[player] = [points,0]
            
        # Second, recalculate positions
        
        unsorted_points = [ r[0] for r in self._results.values() ]
        sorted_points = sorted( unsorted_points, reverse=True )
                
        for i,position in enumerate(sorted_points):
            
            # Update positions
            
            for (k,v) in self._results.items():
                if position == v[0]:
                    self._results[k][1] = i
            
    def get_tournament_positions(self):
        pass
    
    def get_tournament_results(self):
        pass
    

In [369]:
equivalence_tournament = EquivalenceTournament()
equivalence_tournament.add_test_suite("./benchmarks/tests.csv")
equivalence_tournament.load_players_from_file("./benchmarks/players.csv")

In [370]:
equivalence_tournament._players

{'aws_m1large_variable': <__main__.ComputeResourceTestResults at 0x7f20a8f80c88>,
 'aws_m4large_magnetic': <__main__.ComputeResourceTestResults at 0x7f20a7c8b128>,
 'aws_m4large_sdd': <__main__.ComputeResourceTestResults at 0x7f20a7c8b2e8>,
 'aws_m4xlarge_variable': <__main__.ComputeResourceTestResults at 0x7f20a865c898>,
 'aws_t2medium_magnetic': <__main__.ComputeResourceTestResults at 0x7f20a7c8b668>,
 'aws_t2medium_variable': <__main__.ComputeResourceTestResults at 0x7f20a7c8b550>,
 'aws_t2small_magnetic': <__main__.ComputeResourceTestResults at 0x7f20a7c8b320>,
 'cecad_2xlarge_gp': <__main__.ComputeResourceTestResults at 0x7f20a7c8b390>,
 'cecad_2xlarge_hpc': <__main__.ComputeResourceTestResults at 0x7f20a7c8b7f0>,
 'cecad_3xlarge_gp': <__main__.ComputeResourceTestResults at 0x7f20a7c8b470>,
 'cecad_3xlarge_hpc': <__main__.ComputeResourceTestResults at 0x7f20a7c8bb70>,
 'cecad_4xlarge_hpc': <__main__.ComputeResourceTestResults at 0x7f20a7c8bc88>,
 'cecad_5xlarge_hpc': <__main__.Com

In [371]:
len( equivalence_tournament._players )

34

In [372]:
len( equivalence_tournament._competitions )

110

In [373]:
equivalence_tournament.play_complete_tournament()

In [374]:
equivalence_tournament._tournament

{'7-Zip Compression - Compress Speed Test': <__main__.ComputeResourceTestTournament at 0x7f20a76d9fd0>,
 'AIO-Stress - Random Write': <__main__.ComputeResourceTestTournament at 0x7f20a76d9f60>,
 'Apache Benchmark - Static Web Page Serving': <__main__.ComputeResourceTestTournament at 0x7f20a8f80a58>,
 'C-Ray - Total Time': <__main__.ComputeResourceTestTournament at 0x7f20a8f807f0>,
 'CacheBench - Read Cache': <__main__.ComputeResourceTestTournament at 0x7f20a7c2aa20>,
 'CacheBench - Write Cache': <__main__.ComputeResourceTestTournament at 0x7f20a79a5160>,
 'Crafty - Elapsed Time': <__main__.ComputeResourceTestTournament at 0x7f20a7c2a5c0>,
 'Dbench - 1 Clients': <__main__.ComputeResourceTestTournament at 0x7f20a7c2a860>,
 'Dbench - 12 Clients': <__main__.ComputeResourceTestTournament at 0x7f20a7c2aef0>,
 'Dbench - 128 Clients': <__main__.ComputeResourceTestTournament at 0x7f20a7c2a208>,
 'Dbench - 48 Clients': <__main__.ComputeResourceTestTournament at 0x7f20a79a5b00>,
 'FFmpeg - H.264 

In [375]:
equivalence_tournament._tournament["7-Zip Compression - Compress Speed Test"]._results

{'aws_m1large_variable': [19, 8],
 'aws_m4large_magnetic': [35, 7],
 'aws_m4large_sdd': [35, 7],
 'aws_m4xlarge_variable': [99, 0],
 'aws_t2medium_magnetic': [35, 7],
 'aws_t2medium_variable': [35, 7],
 'aws_t2small_magnetic': [6, 9],
 'cecad_2xlarge_gp': [72, 4],
 'cecad_2xlarge_hpc': [82, 3],
 'cecad_3xlarge_gp': [72, 4],
 'cecad_3xlarge_hpc': [82, 3],
 'cecad_4xlarge_hpc': [60, 5],
 'cecad_5xlarge_hpc': [72, 4],
 'cecad_6xlarge_hpc': [89, 2],
 'cecad_7xlarge_hpc': [96, 1],
 'cecad_8xlarge_hpc': [89, 2],
 'cecad_dl1606g': [60, 5],
 'cecad_large_gp': [50, 6],
 'cecad_large_hpc': [50, 6],
 'cecad_large_nova': [50, 6],
 'cecad_medium_gp': [35, 7],
 'cecad_medium_hpc': [19, 8],
 'cecad_medium_nova': [35, 7],
 'cecad_r610': [72, 4],
 'cecad_r900': [89, 2],
 'cecad_small_gp': [19, 8],
 'cecad_small_hpc': [6, 9],
 'cecad_small_nova': [0, 11],
 'cecad_small_test': [6, 9],
 'cecad_xlarge_gp': [60, 5],
 'cecad_xlarge_hpc': [60, 5],
 'cecad_xsmall_gp': [19, 8],
 'cecad_xsmall_hpc': [6, 9],
 'ce

In [376]:
equivalence_tournament.calculate_tournament_results()
equivalence_tournament._results

{'aws_m1large_variable': [2988, 29],
 'aws_m4large_magnetic': [7370, 4],
 'aws_m4large_sdd': [8568, 1],
 'aws_m4xlarge_variable': [8239, 2],
 'aws_t2medium_magnetic': [6765, 7],
 'aws_t2medium_variable': [2325, 31],
 'aws_t2small_magnetic': [4339, 24],
 'cecad_2xlarge_gp': [5736, 11],
 'cecad_2xlarge_hpc': [5942, 9],
 'cecad_3xlarge_gp': [5630, 14],
 'cecad_3xlarge_hpc': [5614, 15],
 'cecad_4xlarge_hpc': [4284, 25],
 'cecad_5xlarge_hpc': [4703, 23],
 'cecad_6xlarge_hpc': [4922, 20],
 'cecad_7xlarge_hpc': [5706, 12],
 'cecad_8xlarge_hpc': [5044, 19],
 'cecad_dl1606g': [5782, 10],
 'cecad_large_gp': [5346, 17],
 'cecad_large_hpc': [4879, 21],
 'cecad_large_nova': [7083, 5],
 'cecad_medium_gp': [4725, 22],
 'cecad_medium_hpc': [3093, 28],
 'cecad_medium_nova': [6810, 6],
 'cecad_r610': [8907, 0],
 'cecad_r900': [7439, 3],
 'cecad_small_gp': [3845, 26],
 'cecad_small_hpc': [2544, 30],
 'cecad_small_nova': [6382, 8],
 'cecad_small_test': [1343, 33],
 'cecad_xlarge_gp': [5676, 13],
 'cecad_x