In [1]:
class ComputeResourceInformation:
        
    def __init__(self, processor="", memory="", disk="", operating_system="", kernel="", compiler="", file_system="", system_layer="", motherboard="", chipset="", graphics="", network_card="", monitor=""):
        self.processor = processor
        self.memory = memory
        self.disk = disk
        self.operating_system = operating_system
        self.kernel = kernel
        self.compiler = compiler
        self.file_system = file_system
        self.system_layer = system_layer
        self.motherboard = motherboard
        self.chipset = chipset
        self.graphics = graphics
        self.network_card = network_card
        self.monitor = monitor
        
    def as_map(self):
        map_repr = {}
        
        map_repr['Processor'] = self.processor
        map_repr['Memory'] = self.memory
        map_repr['Disk'] = self.disk
        map_repr['OS'] = self.operating_system
        map_repr['Kernel'] = self.kernel
        map_repr['Compiler'] = self.compiler
        map_repr['File System'] = self.file_system
        map_repr['System Layer'] = self.system_layer
        map_repr['Motherboard'] = self.motherboard
        map_repr['Chipset'] = self.chipset
        map_repr['Graphics'] = self.graphics
        map_repr['Network'] = self.network_card
        map_repr['Monitor'] = self.monitor
        
        return map_repr
    
    def __str__(self):
        return str( self.as_map() )

In [2]:
class TestNotFound(Exception):
    def __init__(self, value="Test Not Found"):
        self.value = value
    def __str__(self):
        return repr(self.value)

In [3]:
import csv

class ComputeResourceTestResults:
    
    def __init__(self, file = None):
        
        self._results = {}
        self._info = ComputeResourceInformation()
        
        if file is not None:
            self.load_from_file(file)
        
    def update_resource_info(self, processor, memory, disk, operating_system, kernel, compiler, file_system, 
                             system_layer='', motherboard='', chipset='', graphics='', network_card=''):
            self._info.processor = processor
            self._info.memory = memory
            self._info.disk = disk
            self._info.operating_system = operating_system
            self._info.kernel = kernel
            self._info.compiler = compiler
            self._info.file_system = file_system
            self._info.system_layer = system_layer
            self._info.motherboard = motherboard
            self._info.chipset = chipset
            self._info.graphics = graphics
            self._info.network_card = network_card
            
    def add_test_results(self, test_name, test_results_values):
        
        if test_name not in self._results:
            self._results[test_name] = test_results_values
        else:
            self._results[test_name].extend( test_results_values )
    
    def add_test_result(self, test_name, test_result_value):
        
        results = []
        results.append(test_result_value)
        
        self.add_test_results(test_name, results)
    
    def get_test_results(self, test_name):
        try:
            return self._results[test_name]
        except KeyError as e:
            raise TestNotFound("Test Not Found")
    
    def delete_test_results(self, test_name):
        try:
            del self._results[test_name]
        except KeyError as e:
            raise TestNotFound("Test Not Found")
            
    def load_from_file(self, file):
        
        processor = ''
        memory = ''
        disk = ''
        operating_system = ''
        kernel = ''
        compiler = ''
        file_system = ''
        system_layer = ''
        motherboard = ''
        chipset = ''
        graphics = ''
        network_card = ''
        
        with open(file, newline='') as csvfile:
            
            test_result_reader = csv.reader(csvfile, delimiter=',')
             
            for row in test_result_reader:
                #print(row)
                if len(row) >= 2  and row[0] != ' ':
                    if row[0] == 'Processor':
                        processor = row[1] # Processor decoded
                    elif row[0] == 'Memory':
                        memory = row[1]  # Memory decoded
                    elif row[0] == 'Disk':
                        disk = row[1]    # Disk decoded
                    elif row[0] == 'OS':
                        operating_system = row[1]  ## OS decoded
                    elif row[0] == 'Kernel':
                        kernel = row[1]  # Kernel decoded
                    elif row[0] == 'Compiler':
                        compiler = row[1] # Compiler decoded
                    elif row[0] == 'File-System':
                        file_system = row[1] # File System decoded
                    elif row[0] == 'System Layer':
                        system_layer = row[1] # System layer decoded
                    elif row[0] == 'Motherboard':
                        motherboard = row[1] # Mother board decoded
                    elif row[0] == 'Chipset':
                        chipset = row[1]  # Chipset decoded
                    elif row[0] == 'Graphics':
                        graphics = row[1]  # Graphic card decoded
                    elif row[0] == 'Network':
                        network_card = row[1]  # Network card decoded
                    elif row[0] == 'Screen Resolution' or row[0] == 'Monitor':
                        pass
                    else:
                        # Fill test results
                        
                        self.add_test_results(row[0], [float(x) for x in row[1:]] ) 
        
        self.update_resource_info(processor, memory, disk, operating_system, kernel, compiler, file_system, 
                                  system_layer, motherboard, chipset, graphics, network_card)
                

In [4]:
aws_m1large_variable = ComputeResourceTestResults('./benchmarks/aws/m1large-aws-variable-ubuntu/merge-2092.csv')
aws_m4xlarge_variable = ComputeResourceTestResults('./benchmarks/aws/m44xlarge-aws-variable-ubuntu/merge-7274.csv')
aws_m4large_magnetic = ComputeResourceTestResults('./benchmarks/aws/m4large-aws-magnetic-ubuntu/merge-7288.csv')
aws_m4large_sdd = ComputeResourceTestResults('./benchmarks/aws/m4large-aws-sdd-ubuntu/merge-6914.csv')
aws_t2medium_magnetic = ComputeResourceTestResults('./benchmarks/aws/t2medium-aws-magnetic-ubuntu/merge-9347.csv')
aws_t2medium_variable = ComputeResourceTestResults('./benchmarks/aws/t2medium-aws-variable-ubuntu/merge-9691.csv')
aws_t2small_magnetic = ComputeResourceTestResults('./benchmarks/aws/t2small-aws-magnetic-ubuntu/merge-8438.csv')

In [18]:
cecad_2xlarge_gp = ComputeResourceTestResults('./benchmarks/cecad/2xlarge-gp-ubuntu/merge-5274.csv')
cecad_2xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/2xlarge-hpc-ubuntu/merge-4929.csv')
cecad_3xlarge_gp = ComputeResourceTestResults('./benchmarks/cecad/3xlarge-gp-ubuntu/merge-6495.csv')
cecad_3xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/3xlarge-hpc-ubuntu/merge-6359.csv')
cecad_4xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/4xlarge-hpc-ubuntu/merge-7671.csv')
cecad_5xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/5xlarge-hpc-ubuntu/merge-5104.csv')
cecad_6xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/6xlarge-hpc-ubuntu/merge-4492.csv')
cecad_7xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/7xlarge-hpc-ubuntu/merge-1826.csv')
cecad_8xlarge_hpc = ComputeResourceTestResults('./benchmarks/cecad/8xlarge-hpc-ubuntu/merge-3615.csv')
cecad_dl1606g = ComputeResourceTestResults('./benchmarks/cecad/dl1606g-ubuntu/merge-6157.csv')
cecad_large_gp = ComputeResourceTestResults('./benchmarks/cecad/large-gp-ubuntu/merge-9893.csv')
cecad_large_hpc = ComputeResourceTestResults('./benchmarks/cecad/large-hpc-ubuntu/merge-7675.csv')
cecad_large_nova = ComputeResourceTestResults('./benchmarks/cecad/large-ubuntu/merge-9724.csv')
cecad_medium_gp = ComputeResourceTestResults('./benchmarks/cecad/medium-gp-ubuntu/merge-5312.csv')
cecad_medium_hpc = ComputeResourceTestResults('./benchmarks/cecad/medium-hpc-ubuntu/merge-8901.csv')
cecad_medium_nova = ComputeResourceTestResults('./benchmarks/cecad/medium-ubuntu/merge-2260.csv')
cecad_r610 = ComputeResourceTestResults('./benchmarks/cecad/r610-ubuntu/merge-8348.csv')
cecad_r900 = ComputeResourceTestResults('./benchmarks/cecad/r900-debian/merge-4836.csv')
cecad_small_gp = ComputeResourceTestResults('./benchmarks/cecad/small-gp-ubuntu/merge-3500.csv')
cecad_small_hpc = ComputeResourceTestResults('./benchmarks/cecad/small-hpc-ubuntu/merge-1569.csv')
cecad_small_test = ComputeResourceTestResults('./benchmarks/cecad/small-test-ubuntu/merge-1472.csv')
cecad_small_nova = ComputeResourceTestResults('./benchmarks/cecad/small-ubuntu/merge-4467.csv')
cecad_xlarge_gp = ComputeResourceTestResults('./benchmarks/cecad/xlarge-gp-ubuntu/merge-1562.csv')
cecad_large_hpc = ComputeResourceTestResults('./benchmarks/cecad/xlarge-hpc-ubuntu/merge-3341.csv')
cecad_xsmall_gp = ComputeResourceTestResults('./benchmarks/cecad/xsmall-gp-ubuntu/merge-3733.csv')
cecad_xsmall_hpc = ComputeResourceTestResults('./benchmarks/cecad/xsmall-hpc-ubuntu/merge-2556.csv')
cecad_xsmall_nova = ComputeResourceTestResults('./benchmarks/cecad/xsmall-ubuntu/merge-1877.csv')

In [19]:
print(aws_m4large_sdd._info )

{'Disk': '158GB', 'File System': 'ext4', 'Memory': '8192MB', 'Processor': 'Intel Xeon E5-2676 v3 @ 2.39GHz (2 Cores)', 'Network': 'Intel 82599 Virtual Function', 'Graphics': 'Cirrus Logic GD 5446', 'Monitor': '', 'Chipset': 'Intel 440FX- 82441FX PMC', 'Compiler': 'GCC 4.8.4', 'Kernel': '3.13.0-48-generic (x86_64)', 'OS': 'Ubuntu 14.04', 'System Layer': 'Xen HVM domU 4.2.amazon', 'Motherboard': 'Xen HVM domU'}


In [20]:
class TestInformation:
    
    CPU_TEST = 0
    MEMORY_TEST = 1
    DISK_TEST = 2
    
    categories = ("CPU","MEMORY","DISK")
    
    def __init__(self, test_name, units, category = MEMORY_TEST, more_is_better=True):
        self.test_name = test_name
        self.units = units
        self.category = category
        self.more_is_better = more_is_better
    
    def as_map(self):
        map_repr = {}
        
        map_repr['Test Name'] = self.test_name
        map_repr['Units'] = self.units
        map_repr['Category'] = self.categories[self.category]
        map_repr['More Is Better'] = self.more_is_better
        
        return map_repr
    
    def __eq__(self,other):
        return self.test_name == other.test_name and self.units == other.units
    
    def __str__(self):
        return str( self.as_map() )

In [21]:
test_info = TestInformation("SQLite", "MB/s", TestInformation.CPU_TEST, True)
test_info.as_map()

{'Category': 'CPU',
 'More Is Better': True,
 'Test Name': 'SQLite',
 'Units': 'MB/s'}

In [231]:
class TestSuite:
    
    def __init__(self, file = None):
        
        self._tests = {}
        
        if file is not None:
            self.load_from_file(file)
    
    def add_test(self, test_name, units, category=TestInformation.MEMORY_TEST, more_is_better=True ):
        test_info = TestInformation(test_name, units, category, more_is_better)
        self._tests[test_info.test_name] = test_info
    
    def load_from_file(self, file ):
        
        with open(file, newline='') as csvfile:
            
            test_result_reader = csv.reader(csvfile, delimiter=',')
             
            for row in test_result_reader:
                
                test_name = row[0]
                units = row[1]
                category = int(row[2])
                more_is_better = False
                
                if( row[3] == "True" ):
                    more_is_better = True
                
                self.add_test(test_name, units, category, more_is_better)
    
    def get_test_info(self, test_name ):
        return self._tests[test_name]

In [232]:
test_suite = TestSuite("./benchmarks/tests.csv")
print( test_suite._tests['7-Zip Compression - Compress Speed Test'] )

{'Test Name': '7-Zip Compression - Compress Speed Test', 'More Is Better': True, 'Category': 'CPU', 'Units': 'MIPS'}


In [233]:
import scipy as sp
from sklearn.cluster import KMeans

class ComputeResourceTestTournament:
    
    POINTS_PER_WIN = 3
    POINTS_PER_TIE = 1
    POINTS_PER_LOSE = 0
    
    def __init__(self, test_name, more_is_better=True):
        self._test_name = test_name
        self._more_is_better = more_is_better
        self._players = {}
        self._km = None
        self._cluster_results = {}
        self._tournament = {}
        self._results = {}
    
    def add_player(self, player_name, player):
        try:
            test_result = player.get_test_results( self._test_name )
            self._players[ player_name ] = test_result[0]
        except TestNotFound as e:
            self._players[ player_name ] = sp.NAN
            
    def clusterize_results( self, number_clusters=10, init_mode='random', number_init=10, max_iterations=300 ):
        
        self._km = KMeans(n_clusters=number_clusters, init=init_mode, n_init=number_init, max_iter=max_iterations, random_state=0)
        
        # Sort players by value
        sorted_players = [ (k,v) for v,k in sorted([(v,k) for k,v in self._players.items() ]) ]
        
        try:
            
            # Create a 2-dimensional array for doing K-Means
            results = [[w,w] for w in sorted(list(self._players.values()))]
            results_as_np = sp.array(results)
        
            # Execute cluster algorithm
            y_km = self._km.fit_predict( results_as_np ) 
        
            # Create clustering results {(p_i,clusterid_i)}
            for i,item in enumerate(y_km) :
                self._cluster_results[ sorted_players[i][0] ] = y_km[i]
        
        except ValueError as e:  #Doesn't work if Nan values
            
            results = []
            for r in sorted(list(self._players.values())):
                if r is not sp.NAN:
                    results.append( [r,r] )
            
            results_as_np = sp.array(results)
                            
            y_km = self._km.fit_predict( results_as_np ) # Call clustering without Nan
            
            # Create clustering results {(p_i,clusterid_i)}
            
            assign_index = 0
            
            for info in sorted_players:
                player_name = info[0]
                
                if self._players[ player_name ] is not sp.NAN:
                    self._cluster_results[ player_name ] = y_km[assign_index]  
                    assign_index = assign_index + 1
                else:
                    self._cluster_results[ player_name ] = sp.NAN
  
    def play_tournament(self):
        
        for key1 in self._players:
            for key2 in self._players:
                
                if key1 != key2:
                    
                    cluster_player_1 = self._cluster_results[ key1 ]
                    cluster_player_2 = self._cluster_results[ key2 ]
                    
                    if cluster_player_1 is sp.NAN: #Player doesn't compete in tournamet. Automatically loses
                        points = self.POINTS_PER_LOSE
                    else:
                        if cluster_player_2 is sp.NAN: #Player doesn't compete in tournamet. Automatically wins
                            points = self.POINTS_PER_WIN
                        else:
                            
                            points = self.POINTS_PER_TIE

                            # Different cluster?
                            if cluster_player_1 != cluster_player_2:

                                if( self._km.cluster_centers_[cluster_player_1][0] > self._km.cluster_centers_[cluster_player_2][0] ):
                                    if self._more_is_better:
                                        points = self.POINTS_PER_WIN
                                    else:
                                        points = self.POINTS_PER_LOSE
                                else:
                                    if self._more_is_better:
                                        points = self.POINTS_PER_LOSE
                                    else:
                                        points = self.POINTS_PER_WIN

                    match_result = (key2, points)

                    if( key1 not in self._tournament):
                        self._tournament[key1] = []

                    self._tournament[key1].append( match_result )
            
    def calculate_tournament_results(self ):
        # results is a dictionary with the player as the key and a list [points,position] as the value
    
        # Calculate points and positions for every player
        # First, calculate points
    
        for player in self._tournament:
    
            points = 0
        
            for (op,p) in self._tournament[player]:
                points = points + p
        
            self._results[player] = [points,0]
            
        # Second, recalculate positions
        
        unsorted_cluster_centers = []
        for centers in self._km.cluster_centers_:
            unsorted_cluster_centers.append(centers[0])
        
        sorted_cluster_centers = sorted( unsorted_cluster_centers, reverse=(self._more_is_better) )
        
        for i,cc in enumerate(sorted_cluster_centers):
            # Get cluster index of this value
            index = unsorted_cluster_centers.index(cc)
            
            # Update positions
            
            for (k,v) in self._cluster_results.items():
                if index == v:
                    self._results[k][1] = i
                if v is sp.NAN:
                    self._results[k][1] = len( unsorted_cluster_centers ) +1

    def get_points(self, player ):
        return self._results[player][0]

    def get_position(self, player):
        return self._results[player][1]

In [220]:
test = ComputeResourceTestTournament('Tachyon - Total Time',False)
test.add_player( 'aws_m1large_variable', aws_m1large_variable )
test.add_player( 'aws_m4xlarge_variable', aws_m4xlarge_variable )
test.add_player( 'aws_m4large_magnetic', aws_m4large_magnetic )
test.add_player( 'aws_m4large_sdd', aws_m4large_sdd )
test.add_player( 'aws_t2medium_magnetic', aws_t2medium_magnetic )
test.add_player( 'aws_t2medium_variable', aws_t2medium_variable )
test.add_player( 'aws_t2small_magnetic', aws_t2small_magnetic )
test.add_player( 'cecad_2xlarge_gp', cecad_2xlarge_gp )
test.add_player( 'cecad_2xlarge_hpc', cecad_2xlarge_hpc )
test.add_player( 'cecad_3xlarge_gp', cecad_3xlarge_gp )
test.add_player( 'cecad_3xlarge_hpc', cecad_3xlarge_hpc )
test.add_player( 'cecad_4xlarge_hpc', cecad_4xlarge_hpc )
test.add_player( 'cecad_5xlarge_hpc', cecad_5xlarge_hpc )
test.add_player( 'cecad_6xlarge_hpc', cecad_6xlarge_hpc )
test.add_player( 'cecad_7xlarge_hpc', cecad_7xlarge_hpc )
test.add_player( 'cecad_8xlarge_hpc', cecad_8xlarge_hpc )
test.add_player( 'cecad_dl1606g', cecad_dl1606g )
test.add_player( 'cecad_large_gp', cecad_large_gp )
test.add_player( 'cecad_large_hpc', cecad_large_hpc )
test.add_player( 'cecad_large_nova', cecad_large_nova )
test.add_player( 'cecad_medium_gp', cecad_medium_gp )
test.add_player( 'cecad_medium_hpc', cecad_medium_hpc )
test.add_player( 'cecad_medium_nova', cecad_medium_nova )
test.add_player( 'cecad_r610', cecad_r610 )
test.add_player( 'cecad_r900', cecad_r900 )
test.add_player( 'cecad_small_gp', cecad_small_gp )
test.add_player( 'cecad_small_hpc', cecad_small_hpc )
test.add_player( 'cecad_small_test', cecad_small_test )
test.add_player( 'cecad_small_nova', cecad_small_nova )
test.add_player( 'cecad_xlarge_gp', cecad_xlarge_gp )
test.add_player( 'cecad_xsmall_gp', cecad_xsmall_gp )
test.add_player( 'cecad_xsmall_hpc', cecad_xsmall_hpc )
test.add_player( 'cecad_xsmall_nova', cecad_xsmall_nova )
test._players

{'aws_m1large_variable': 71.18,
 'aws_m4large_magnetic': 68.47,
 'aws_m4large_sdd': 68.49,
 'aws_m4xlarge_variable': 8.66,
 'aws_t2medium_magnetic': 193.24,
 'aws_t2medium_variable': 188.99,
 'aws_t2small_magnetic': 389.23,
 'cecad_2xlarge_gp': 10.38,
 'cecad_2xlarge_hpc': 10.21,
 'cecad_3xlarge_gp': 10.92,
 'cecad_3xlarge_hpc': 10.23,
 'cecad_4xlarge_hpc': 22.7,
 'cecad_5xlarge_hpc': 11.55,
 'cecad_6xlarge_hpc': 7.98,
 'cecad_7xlarge_hpc': 7.27,
 'cecad_8xlarge_hpc': 8.06,
 'cecad_dl1606g': 19.3,
 'cecad_large_gp': 25.5,
 'cecad_large_hpc': 20.08,
 'cecad_large_nova': 24.58,
 'cecad_medium_gp': 47.3,
 'cecad_medium_hpc': 90.77,
 'cecad_medium_nova': 48.54,
 'cecad_r610': 10.31,
 'cecad_r900': nan,
 'cecad_small_gp': 93.48,
 'cecad_small_hpc': 181.27,
 'cecad_small_nova': nan,
 'cecad_small_test': 154.88,
 'cecad_xlarge_gp': 14.13,
 'cecad_xsmall_gp': 93.34,
 'cecad_xsmall_hpc': 179.01,
 'cecad_xsmall_nova': 94.12}

In [221]:
test.clusterize_results( 10, 'random', 10, 300 )

In [222]:
test._cluster_results

{'aws_m1large_variable': 8,
 'aws_m4large_magnetic': 8,
 'aws_m4large_sdd': 8,
 'aws_m4xlarge_variable': 0,
 'aws_t2medium_magnetic': 4,
 'aws_t2medium_variable': 4,
 'aws_t2small_magnetic': 1,
 'cecad_2xlarge_gp': 0,
 'cecad_2xlarge_hpc': 0,
 'cecad_3xlarge_gp': 0,
 'cecad_3xlarge_hpc': 0,
 'cecad_4xlarge_hpc': 2,
 'cecad_5xlarge_hpc': 3,
 'cecad_6xlarge_hpc': 0,
 'cecad_7xlarge_hpc': 0,
 'cecad_8xlarge_hpc': 0,
 'cecad_dl1606g': 7,
 'cecad_large_gp': 2,
 'cecad_large_hpc': 7,
 'cecad_large_nova': 2,
 'cecad_medium_gp': 6,
 'cecad_medium_hpc': 9,
 'cecad_medium_nova': 6,
 'cecad_r610': 0,
 'cecad_r900': nan,
 'cecad_small_gp': 9,
 'cecad_small_hpc': 4,
 'cecad_small_nova': nan,
 'cecad_small_test': 5,
 'cecad_xlarge_gp': 3,
 'cecad_xsmall_gp': 9,
 'cecad_xsmall_hpc': 4,
 'cecad_xsmall_nova': 9}

In [223]:
test._km.cluster_centers_

array([[   9.33555556,    9.33555556],
       [ 389.23      ,  389.23      ],
       [  24.26      ,   24.26      ],
       [  12.84      ,   12.84      ],
       [ 185.6275    ,  185.6275    ],
       [ 154.88      ,  154.88      ],
       [  47.92      ,   47.92      ],
       [  19.69      ,   19.69      ],
       [  69.38      ,   69.38      ],
       [  92.9275    ,   92.9275    ]])

In [224]:
test.play_tournament()

In [225]:
test._tournament["aws_m1large_variable"]

[('cecad_small_gp', 3),
 ('cecad_xsmall_nova', 3),
 ('cecad_2xlarge_hpc', 0),
 ('cecad_3xlarge_hpc', 0),
 ('cecad_xsmall_hpc', 3),
 ('cecad_6xlarge_hpc', 0),
 ('aws_m4xlarge_variable', 0),
 ('cecad_medium_gp', 0),
 ('cecad_3xlarge_gp', 0),
 ('cecad_large_gp', 0),
 ('aws_t2medium_magnetic', 3),
 ('cecad_large_hpc', 0),
 ('cecad_4xlarge_hpc', 0),
 ('cecad_2xlarge_gp', 0),
 ('cecad_r610', 0),
 ('cecad_7xlarge_hpc', 0),
 ('cecad_medium_nova', 0),
 ('cecad_medium_hpc', 3),
 ('cecad_xlarge_gp', 0),
 ('cecad_small_test', 3),
 ('cecad_large_nova', 0),
 ('cecad_dl1606g', 0),
 ('aws_m4large_magnetic', 1),
 ('aws_t2medium_variable', 3),
 ('cecad_small_nova', 3),
 ('cecad_r900', 3),
 ('aws_t2small_magnetic', 3),
 ('cecad_5xlarge_hpc', 0),
 ('aws_m4large_sdd', 1),
 ('cecad_xsmall_gp', 3),
 ('cecad_small_hpc', 3),
 ('cecad_8xlarge_hpc', 0)]

In [226]:
test._cluster_results.items()

dict_items([('cecad_medium_hpc', 9), ('cecad_small_gp', 9), ('cecad_xsmall_nova', 9), ('cecad_2xlarge_hpc', 0), ('cecad_3xlarge_hpc', 0), ('cecad_xsmall_hpc', 4), ('cecad_6xlarge_hpc', 0), ('aws_m1large_variable', 8), ('aws_t2medium_magnetic', 4), ('aws_m4xlarge_variable', 0), ('cecad_medium_gp', 6), ('cecad_3xlarge_gp', 0), ('cecad_large_gp', 2), ('cecad_small_test', 5), ('cecad_large_hpc', 7), ('cecad_4xlarge_hpc', 2), ('cecad_2xlarge_gp', 0), ('cecad_7xlarge_hpc', 0), ('cecad_medium_nova', 6), ('cecad_r610', 0), ('cecad_xlarge_gp', 3), ('cecad_large_nova', 2), ('cecad_dl1606g', 7), ('aws_m4large_magnetic', 8), ('aws_t2medium_variable', 4), ('cecad_small_nova', nan), ('cecad_r900', nan), ('aws_t2small_magnetic', 1), ('cecad_5xlarge_hpc', 3), ('aws_m4large_sdd', 8), ('cecad_xsmall_gp', 9), ('cecad_small_hpc', 4), ('cecad_8xlarge_hpc', 0)])

In [227]:
test.calculate_tournament_results()
points = test.get_points("aws_m1large_variable")
points

38

In [228]:
points = test.get_points("cecad_dl1606g")
points

61

In [229]:
points = test.get_points("cecad_r610")
points

80

In [230]:
test._results

{'aws_m1large_variable': [38, 5],
 'aws_m4large_magnetic': [38, 5],
 'aws_m4large_sdd': [38, 5],
 'aws_m4xlarge_variable': [80, 0],
 'aws_t2medium_magnetic': [12, 8],
 'aws_t2medium_variable': [12, 8],
 'aws_t2small_magnetic': [6, 9],
 'cecad_2xlarge_gp': [80, 0],
 'cecad_2xlarge_hpc': [80, 0],
 'cecad_3xlarge_gp': [80, 0],
 'cecad_3xlarge_hpc': [80, 0],
 'cecad_4xlarge_hpc': [53, 3],
 'cecad_5xlarge_hpc': [67, 1],
 'cecad_6xlarge_hpc': [80, 0],
 'cecad_7xlarge_hpc': [80, 0],
 'cecad_8xlarge_hpc': [80, 0],
 'cecad_dl1606g': [61, 2],
 'cecad_large_gp': [53, 3],
 'cecad_large_hpc': [61, 2],
 'cecad_large_nova': [53, 3],
 'cecad_medium_gp': [46, 4],
 'cecad_medium_hpc': [27, 6],
 'cecad_medium_nova': [46, 4],
 'cecad_r610': [80, 0],
 'cecad_r900': [0, 11],
 'cecad_small_gp': [27, 6],
 'cecad_small_hpc': [12, 8],
 'cecad_small_nova': [0, 11],
 'cecad_small_test': [21, 7],
 'cecad_xlarge_gp': [67, 1],
 'cecad_xsmall_gp': [27, 6],
 'cecad_xsmall_hpc': [12, 8],
 'cecad_xsmall_nova': [27, 6]}

In [205]:
sorted_players = [ (k,v) for v,k in sorted([(v,k) for k,v in test._players.items() ]) ]
sorted_players

[('cecad_small_test', 544.39),
 ('cecad_xsmall_hpc', 1104.26),
 ('cecad_small_hpc', 1241.54),
 ('cecad_small_gp', 1491.12),
 ('cecad_xsmall_gp', 1529.08),
 ('cecad_medium_hpc', 1663.36),
 ('aws_m1large_variable', 1690.01),
 ('cecad_5xlarge_hpc', 1710.06),
 ('cecad_xlarge_gp', 1756.55),
 ('cecad_3xlarge_gp', 1934.58),
 ('cecad_2xlarge_gp', 2060.34),
 ('cecad_2xlarge_hpc', 2173.17),
 ('cecad_4xlarge_hpc', 2296.85),
 ('cecad_8xlarge_hpc', 2360.89),
 ('cecad_3xlarge_hpc', 2395.94),
 ('cecad_large_gp', 2406.96),
 ('cecad_7xlarge_hpc', 2459.69),
 ('cecad_6xlarge_hpc', 2573.65),
 ('cecad_large_hpc', 2799.15),
 ('cecad_xsmall_nova', 3237.65),
 ('cecad_small_nova', 3321.2),
 ('cecad_medium_gp', 3503.61),
 ('cecad_large_nova', 3863.2),
 ('aws_t2small_magnetic', 3905.83),
 ('cecad_medium_nova', 4139.85),
 ('aws_m4large_sdd', 4864.89),
 ('aws_m4large_magnetic', 5022.29),
 ('aws_t2medium_magnetic', 6315.54),
 ('cecad_dl1606g', 7205.38),
 ('cecad_r610', 8174.97),
 ('aws_m4xlarge_variable', 10792.74)

In [144]:
import scipy as sp
test_results = sp.array( sorted(list(test._players.values()) ) ) 
test_results

array([  1811.48,   2742.36,   3130.86,   3784.74,   3787.94,   3790.1 ,
         4317.39,   4366.38,   4529.83,   4884.07,   5419.52,   5530.85,
         5944.23,   6017.76,   6117.3 ,   6130.25,   6130.31,   6343.94,
         6350.59,   6400.35,   7748.59,   8430.88,   9901.69,  10181.49,
        10225.84,  10862.81,  11144.29,  12142.59,  12681.31,  18732.27,
        19448.22,  20845.24,  26801.54])

In [145]:
import matplotlib.pyplot as plt
#plt.scatter([w for w in range( len( stream_copy._players  ) ) ],stream_copy_results,s=10)
plt.scatter(test_results, test_results,s=20)
plt.title(test._test_name)
plt.xlabel("MB/s")
plt.ylabel("MB/s")
plt.autoscale(tight=True)
plt.grid(True, linestyle='-', color='0.75')
plt.show()

In [146]:
from sklearn.cluster import KMeans
km = KMeans(n_clusters=10, init='random', n_init=10, max_iter=300, random_state=0)

In [147]:
results = [[w,w] for w in sorted(list(test._players.values()))]
results


[[1811.48, 1811.48],
 [2742.36, 2742.36],
 [3130.86, 3130.86],
 [3784.74, 3784.74],
 [3787.94, 3787.94],
 [3790.1, 3790.1],
 [4317.39, 4317.39],
 [4366.38, 4366.38],
 [4529.83, 4529.83],
 [4884.07, 4884.07],
 [5419.52, 5419.52],
 [5530.85, 5530.85],
 [5944.23, 5944.23],
 [6017.76, 6017.76],
 [6117.3, 6117.3],
 [6130.25, 6130.25],
 [6130.31, 6130.31],
 [6343.94, 6343.94],
 [6350.59, 6350.59],
 [6400.35, 6400.35],
 [7748.59, 7748.59],
 [8430.88, 8430.88],
 [9901.69, 9901.69],
 [10181.49, 10181.49],
 [10225.84, 10225.84],
 [10862.81, 10862.81],
 [11144.29, 11144.29],
 [12142.59, 12142.59],
 [12681.31, 12681.31],
 [18732.27, 18732.27],
 [19448.22, 19448.22],
 [20845.24, 20845.24],
 [26801.54, 26801.54]]

In [148]:
data_to_cluster = sp.array(results)
data_to_cluster


array([[  1811.48,   1811.48],
       [  2742.36,   2742.36],
       [  3130.86,   3130.86],
       [  3784.74,   3784.74],
       [  3787.94,   3787.94],
       [  3790.1 ,   3790.1 ],
       [  4317.39,   4317.39],
       [  4366.38,   4366.38],
       [  4529.83,   4529.83],
       [  4884.07,   4884.07],
       [  5419.52,   5419.52],
       [  5530.85,   5530.85],
       [  5944.23,   5944.23],
       [  6017.76,   6017.76],
       [  6117.3 ,   6117.3 ],
       [  6130.25,   6130.25],
       [  6130.31,   6130.31],
       [  6343.94,   6343.94],
       [  6350.59,   6350.59],
       [  6400.35,   6400.35],
       [  7748.59,   7748.59],
       [  8430.88,   8430.88],
       [  9901.69,   9901.69],
       [ 10181.49,  10181.49],
       [ 10225.84,  10225.84],
       [ 10862.81,  10862.81],
       [ 11144.29,  11144.29],
       [ 12142.59,  12142.59],
       [ 12681.31,  12681.31],
       [ 18732.27,  18732.27],
       [ 19448.22,  19448.22],
       [ 20845.24,  20845.24],
       [

In [23]:
y_km = km.fit_predict(data_to_cluster)
y_km

array([0, 0, 0, 0, 0, 0, 0, 3, 3, 7, 7, 7, 2, 2, 2, 2, 2, 8, 8, 9, 9, 5, 5,
       5, 5, 4, 6, 6, 1, 1], dtype=int32)

In [24]:
km.labels_

array([0, 0, 0, 0, 0, 0, 0, 3, 3, 7, 7, 7, 2, 2, 2, 2, 2, 8, 8, 9, 9, 5, 5,
       5, 5, 4, 6, 6, 1, 1], dtype=int32)

In [151]:
km.cluster_centers_

array([[  20.81428571,   20.81428571],
       [ 645.455     ,  645.455     ],
       [ 141.272     ,  141.272     ],
       [  36.12      ,   36.12      ],
       [ 563.27      ,  563.27      ],
       [ 512.6675    ,  512.6675    ],
       [ 583.435     ,  583.435     ],
       [  51.72      ,   51.72      ],
       [ 187.055     ,  187.055     ],
       [ 444.555     ,  444.555     ]])

In [26]:
len(y_km)

30

In [57]:
clustering_results = {}
for i,item in enumerate(y_km) :
    clustering_results[ sorted_players[i][0] ] = y_km[i]
clustering_results

{'aws_m1large_variable': 3,
 'aws_m4large_magnetic': 7,
 'aws_m4large_sdd': 0,
 'aws_m4xlarge_variable': 0,
 'aws_t2medium_magnetic': 7,
 'aws_t2medium_variable': 3,
 'aws_t2small_magnetic': 7,
 'cecad_2xlarge_gp': 2,
 'cecad_2xlarge_hpc': 5,
 'cecad_3xlarge_gp': 2,
 'cecad_3xlarge_hpc': 6,
 'cecad_4xlarge_hpc': 5,
 'cecad_5xlarge_hpc': 6,
 'cecad_6xlarge_hpc': 5,
 'cecad_7xlarge_hpc': 9,
 'cecad_8xlarge_hpc': 9,
 'cecad_dl1606g': 4,
 'cecad_large_gp': 2,
 'cecad_large_hpc': 1,
 'cecad_large_nova': 0,
 'cecad_medium_gp': 8,
 'cecad_medium_nova': 0,
 'cecad_r610': 0,
 'cecad_small_gp': 2,
 'cecad_small_nova': 0,
 'cecad_small_test': 5,
 'cecad_xlarge_gp': 2,
 'cecad_xsmall_gp': 8,
 'cecad_xsmall_hpc': 1,
 'cecad_xsmall_nova': 0}

In [28]:
test._players.keys()

dict_keys(['aws_m4xlarge_variable', 'cecad_xsmall_hpc', 'cecad_5xlarge_hpc', 'cecad_2xlarge_hpc', 'cecad_dl1606g', 'cecad_medium_gp', 'cecad_7xlarge_hpc', 'cecad_xsmall_nova', 'aws_t2medium_variable', 'aws_t2medium_magnetic', 'cecad_large_hpc', 'cecad_small_gp', 'cecad_large_nova', 'cecad_6xlarge_hpc', 'cecad_8xlarge_hpc', 'cecad_3xlarge_hpc', 'cecad_small_test', 'cecad_2xlarge_gp', 'cecad_xlarge_gp', 'cecad_medium_nova', 'aws_m4large_magnetic', 'cecad_xsmall_gp', 'aws_m1large_variable', 'cecad_large_gp', 'cecad_3xlarge_gp', 'cecad_4xlarge_hpc', 'cecad_small_nova', 'aws_m4large_sdd', 'cecad_r610', 'aws_t2small_magnetic'])

In [237]:
class EquivalenceTournament:
        
    def __init__(self):
        self._competitions = []
        self._players = []
    
    def add_player(self, new_player):
        self._players.append( new_player )
    
    def add_players(self, new_players ):
        self._players.extend( new_players )
    
    def add_competition(self, competition):
        self._competitions.append( competition )
    
    def add_competitions(self, competitions):
        self._competitions.extend( competitions )
    
    def add_test_suite(self, file):
        test_suite = TestSuite(file)
        for key,test_info in test_suite._tests.items():
            self.add_competition(test_info)
    
    def load_players_from_file(self, file):
        
        with open(file, newline='') as csvfile:
            
            player_result_reader = csv.reader(csvfile, delimiter=',')
             
            for row in player_result_reader:
                
                player_name = row[0]
                player_info_path = row[1]
                
                # Create info 
                
                new_player = ComputeResourceTestResults(player_info_path)
                
                self.add_player(new_player)
        
    
    def play_tournament_at_competition(self, players, competition):
        pass
    
    def play_tournament_at_competitions(self, players, competitions):
        pass
    
    def play_tournament_at_all_competitions(self, players ):
        pass
    
    def play_complete_tournament(self):
        pass
    
    def get_tournament_positions(self):
        pass
    
    def get_tournament_results(self):
        pass
    

In [242]:
equivalence_tournament = EquivalenceTournament()
equivalence_tournament.add_test_suite("./benchmarks/tests.csv")
equivalence_tournament.load_players_from_file("./benchmarks/players.csv")

In [250]:
equivalence_tournament._players[33]._info.as_map()

{'Chipset': 'Intel 440FX- 82441FX PMC',
 'Compiler': 'GCC 4.8.4',
 'Disk': '40GB',
 'File System': 'ext4',
 'Graphics': 'Cirrus Logic GD 5446',
 'Kernel': '3.13.0-62-generic (x86_64)',
 'Memory': '1024MB',
 'Monitor': '',
 'Motherboard': 'OpenStack Foundation Nova v2014.2.2',
 'Network': 'Red Hat Virtio device',
 'OS': 'Ubuntu 14.04',
 'Processor': 'Intel Core i7 9xx @ 2.93GHz (1 Core)',
 'System Layer': ''}

In [251]:
len( equivalence_tournament._players )

34