In [1]:
import numpy as np
import pickle
import random
import json
import os
import plotly.graph_objects as go
import sklearn.cluster as cluster

from tqdm.notebook import tqdm

# A Process class which emulates a process in a server. It has a pid and a length measured in numbers of instructions.
# Furthermore, it possesses the __repr__ method which is used to print the object.
# Moreover, it possesses a to_json method which is used to convert the object to a json string and a from_json to convert it back.

class Process:
    def __init__(self, pid, length):
        self.pid = pid
        self.length = length

    def __repr__(self):
        return f"Process(pid={self.pid}, length={self.length})"

    def to_json(self):
        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
    
    @staticmethod
    def from_json(json_string):
        return json.loads(json_string)

In [2]:
# A Server class which emulates a server. It has a name, a number of cpus, the speed of its cpu measured in GHz and a workload list, which represents 
# the processes that are assigned to the server. The workload starts as empty. 
# Furthermore, it possesses the __repr__ method which is used to print the object.
# Moreover, it possesses a to_json method which is used to convert the object to a json string and a from_json to convert it back.

class Server:
    def __init__(self, name, cpus, cpu_speed):
        self.name = name
        self.cpus = cpus
        self.cpu_speed = cpu_speed

    def __repr__(self):
        return f"Server(name={self.name}, cpus={self.cpus}, cpu_speed={self.cpu_speed}"

    def to_json(self):
        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
    
    @staticmethod
    def from_json(json_string):
        return json.loads(json_string)


In [3]:
# Function to find the magnitude of a vector.
def magnitude(vector):
    return np.sqrt(np.sum(np.square(vector)))

# Function to find the module of a vector.
def module(vector):
    return np.sum(np.square(vector))

# function to rescale vector between 0 and 1 and return it as a list
def rescale_vector(vector):
    return list((vector - np.min(vector)) / (np.max(vector) - np.min(vector)))

In [4]:

# A Solution class, wich represents a solution to the problem. is made of a list of tuples in the form <server, associated processes>
# The solution object possesses a fitness value, which is a vector of the fitness of each servers with respect to their associated processes. 
# The fitness is calculated as the sum of the length of the processes assigned to the server divided by the number of cpus of the server times their speed.
# It possesses the __repr__ method which is used to print the object.
# It possesses a to_json method which is used to convert the object to a json string and a from_json to convert it back.

class Solution:
    def __init__(self, solution):
        self.solution = solution
        self.fitness = rescale_vector(self.calculate_fitness())

    def __repr__(self):
        return f"Solution(solution={self.solution}, fitness={self.fitness})"

    def to_json(self):
        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
    
    @staticmethod
    def from_json(json_string):
        return json.loads(json_string)

    def calculate_fitness(self):
        fitness = []
        for server, processes in self.solution:
            fitness.append(sum([process.length for process in processes]) * (server.cpus * server.cpu_speed))
        return fitness
        
    def __lt__(self, other):
        if magnitude(self.fitness) < magnitude(other.fitness):
            return 1
        elif magnitude(self.fitness) > magnitude(other.fitness):
            return -1
        else:
            return 0

In [5]:
# A clusterization algorithm which:
# 1. Clusterizes the processes in N subsets, with N equal to the number of servers
# 2. Assign the servers to a cluster in a greedy fashion.
#    > The server that fits best the cluster is assigned to it.
#    > The server that fits best the cluster is the one that has the highest fitness value.
#    > The fitness value is calculated as the sum of the length of the processes assigned to the server times by the number of cpus of the server times their speed.

class GreeedyClusterization:
    def __init__(self, servers, processes):
        self.processes = processes
        self.servers = servers
        self.solution = self.clusterize()
        self.name = "greedy"

    def clusterize(self):
        clusters = []
        for i in range(len(self.servers)):
            clusters.append([])
        for i in range(len(self.processes)):
            clusters[i % len(self.servers)].append(self.processes[i])
        solution = []
        for i in range(len(self.servers)):
            solution.append((self.servers[i], clusters[i]))
        return Solution(solution)

        

In [6]:
# Clusterization algorithm which:
# 1. Clusterizes the processes in N subsets, with N equal to the number of servers
# 2. Assign the servers to a cluster by using the k-means algorithm.
#    > The server that fits best the cluster is assigned to it.
#    > The server that fits best the cluster is the one that has the highest fitness value.+
# Use of the kmeans algorithm from the sklearn library.

class KMeansClusterization:
    def __init__(self, servers, processes):
        self.processes = processes
        self.servers = servers
        self.solution = self.clusterize()
        self.name = "kmeans"

    def clusterize(self):
        clusters = []
        for i in range(len(self.servers)):
            clusters.append([])
        for i in range(len(self.processes)):
            clusters[i % len(self.servers)].append(self.processes[i])
        solution = []
        for i in range(len(self.servers)):
            solution.append((self.servers[i], clusters[i]))
        return Solution(solution)

In [7]:
def execute(ga):
    clusters = []
    for j in tqdm(range(10)):
        with open(f"../Genetic/servers/{j}.pickle", "rb") as f:
            servers = pickle.load(f)
        with open(f"../Genetic/processes/{j}.pickle", "rb") as f:
            processes = pickle.load(f)
        if ga == "greedy":
            algorithm = GreeedyClusterization(servers, processes)
        elif ga == "kmeans":
            algorithm = KMeansClusterization(servers, processes)
        clusters.append(algorithm.clusterize())
    return clusters

In [8]:
greed = execute("greedy")

  0%|          | 0/10 [00:00<?, ?it/s]

In [13]:
for solution in greed:
    sol = solution 
    #print(magnitude(solution.fitness))

In [34]:
lst = []
for subgreed in greed:
    lst.append(subgreed.fitness)

In [36]:
means = []
for i in range(len(lst)):
    submeans = []
    for j in range(len(lst[0])):
        submeans.append(lst[j][i])
    means.append(np.mean(submeans))

In [37]:
means

[0.4467059764917848,
 0.4154749758621986,
 0.4439058637856622,
 0.20396144041707961,
 0.4194880778208465,
 0.3312832490457533,
 0.3942544274016749,
 0.4249287316496904,
 0.20844783619615764,
 0.4952326160868642]

In [12]:
kmeans = execute("kmeans")

  0%|          | 0/10 [00:00<?, ?it/s]