In [68]:
import numpy as np
import math
from itertools import combinations
import pandas as pd


In [69]:
class Client:
    def __init__(self, attributes):
        self.attributes = attributes
        self.clientID = attributes[0]

        self.distribution = np.array( attributes[1:-2])
        self.union_dist=np.full(len(self.distribution), 1 / len(self.distribution))
        self.kl_divergence = self.calculate_kl_divergence()
        self.completion_time = attributes[-1]

    def __hash__(self):
        return hash(self.clientID)

    def __eq__(self, other):
        if not isinstance(other, Client):
            return NotImplemented
        return self.clientID == other.clientID

    def get_distribution(self):
        return self.distribution

    def get_attributes(self):
        return self.attributes

    def get_clientID(self):
        return self.clientID

    def calculate_score(self):
        # Calculate score based on some of the attributes
        return np.sum(self.attributes[1:-2])
    
    def calculate_kl_divergence(self):
        """
        计算两个分布之间的KL散度。
        :param p: 分布P，一个概率分布数组。
        :param q: 分布Q，另一个概率分布数组。
        :return: P和Q之间的KL散度。
        """
        # 确保概率分布不包含0，因为log(0)是未定义的
        client_dist = self.distribution / sum(self.distribution)
        client_dist = np.maximum(client_dist, 1e-12)
        union_dist = np.maximum(self.union_dist, 1e-12)
        
        return np.sum(client_dist * np.log(client_dist / union_dist)) 
    
    def __str__(self) -> str:
        return f"Client ID:{self.clientID}. Distribution:{self.distribution}. KL Divergence:{self.kl_divergence}"
        # print("Client distribution:", self.distribution)

# Example usage
attributes = [1, 5, 4, 3, 7, 2, 8, 6]  # Example list of attributes
client = Client(attributes)
print("Client ID:", client.get_clientID())
print("Client distribution:", client.get_distribution())
print("Client kl divergence:", client.calculate_kl_divergence())


Client ID: 1
Client distribution: [5 4 3 7 2]
Client kl divergence: 0.08376639267074756


In [70]:
class Group:
    def __init__(self, clients, get_clients_per_group):
        if len(clients) != get_clients_per_group():
            raise ValueError(f"A complete group must have: {get_clients_per_group()} clients!")

        self.clients = clients
        # self.AD = self.calculate_ad()
        # self.GH = self.calculate_gh()
        # self.maxDistance = self.calculate_max_distance()

        self.group_distribution = self.calculate_group_distribution()
        self.union_dist=np.full(len(self.group_distribution), 1 / len(self.group_distribution))

        self.group_kl_divergence = self.calculate_group_kl_divergence()

        self.group_mean_kl_divergence = np.mean([client.kl_divergence for client in self.clients])

    def calculate_group_kl_divergence(self):
        """
        计算两个分布之间的KL散度。
        :param p: 分布P，一个概率分布数组。
        :param q: 分布Q，另一个概率分布数组。
        :return: P和Q之间的KL散度。
        """
        # 确保概率分布不包含0，因为log(0)是未定义的
        group_dist = np.maximum(self.group_distribution, 1e-12)
        union_dist = np.maximum(self.union_dist, 1e-12)
        
        return np.sum(group_dist * np.log(group_dist / union_dist))   

    def calculate_group_distribution(self):
        """
        计算组内所有客户的分布的平均值。
        :return: 组内所有客户的分布的平均值。
        """
        distribution_sum = np.sum([client.get_distribution() for client in self.clients], axis=0)

        return distribution_sum / sum(distribution_sum)


    def __eq__(self, other):
        if not isinstance(other, Group):
            return False
        if len(self.clients) != len(other.clients):
            return False
        return all(self.clients[i].get_clientID() == other.clients[i].get_clientID() for i in range(len(self.clients)))
    
    def get_order_clients(self):
        # 得到顺序排序的client
        return sorted(self.clients, key=lambda client: client.kl_divergence())

    # def calculate_ad(self):
    #     ordered_clients = sorted(self.clients, key=lambda client: client.get_score())
    #     return (ordered_clients[3].get_score() + ordered_clients[0].get_score()) / 2

    # def calculate_gh(self):
    #     ordered_clients = sorted(self.clients, key=lambda client: client.get_score())
    #     ad = self.calculate_ad()
    #     return (ordered_clients[3].get_score() - ordered_clients[0].get_score()) / \
    #            (1 + abs(ad - ordered_clients[1].get_score()) + abs(ad - ordered_clients[2].get_score()))

    # def calculate_max_distance(self):
    #     distances = [self.euclidean_distance(st1, st2) for st1, st2 in combinations(self.clients, 2)]
    #     return max(distances)

    # def euclidean_distance(self, s1, s2):
    #     s1_attributes = s1.get_attributes()
    #     s2_attributes = s2.get_attributes()
    #     return math.sqrt(sum((s1_attributes[i] - s2_attributes[i]) ** 2 for i in range(1, 8)))

    # Additional getters
    def get_clients(self):
        return self.clients

    # def get_gh(self):
    #     return self.GH

    # def get_ad(self):
    #     return self.AD

    # def get_max_distance(self):
    #     return self.maxDistance

# Assuming client class and a function get_clients_per_group() are defined
# Example usage
clients = [Client([i, i+1, i+2, i+3, i+4, i+5, i+6, i+7, i+8]) for i in range(4)]

group = Group(clients, lambda: 4)
print("Group KL Divergence:", group.group_kl_divergence)
print("Group Mean KL Divergence:", group.group_mean_kl_divergence)
print("Group Distribution:", group.group_distribution)



Group KL Divergence: 0.06050698143760225
Group Mean KL Divergence: 0.07242839102916594
Group Distribution: [0.08333333 0.11666667 0.15       0.18333333 0.21666667 0.25      ]


In [71]:
class Population:
    def __init__(self, groups):
        self.groups = groups
        self.populationFitness = self.calculate_population_fitness()
        # self.minEuclideanDistance = self.calculate_min_euclidean_distance()

    def get_population_fitness(self):
        return self.populationFitness

    # def get_min_euclidean_distance(self):
    #     return self.minEuclideanDistance

    def get_groups(self):
        return self.groups

    # def calculate_min_euclidean_distance(self):
    #     min_euclidean_distance = float('inf')
    #     for group in self.groups:
    #         if group.get_max_distance() < min_euclidean_distance:
    #             min_euclidean_distance = group.get_max_distance()
    #     return min_euclidean_distance

    def calculate_population_fitness(self):
        fitness = 0.0
        for group in self.groups:
            fitness += group.group_kl_divergence
        return fitness

    def get_fitness(self):
        return self.populationFitness

# Example usage
# Assuming that Group class and Student class are properly defined elsewhere
groups = [Group([Client([i, i+1, i+2, i+3, i+4, i+5, i+6, i+7, i+8]) for i in range(k, k+4)], lambda: 4) for k in range(0, 16, 4)]
population = Population(groups)
print("Population Fitness:", population.get_population_fitness())
# print("Min Euclidean Distance:", population.get_min_euclidean_distance())


Population Fitness: 0.09243746267811087


In [72]:
import random

class GeneticAlgorithm:
    MAX_ITERATIONS = 300
    MUTATION_RATE = 0.15
    INVERSION_RATE = 0.05
    NUMBER_OF_POPULATIONS = 300
    ELITISM = 2
    TOURNAMENT_SIZE = 2
    MAX_CONSECUTIVE = 500
    CLIENT_PER_GROUP = 4
    MIN_MEMBER_DIST = 2

    def __init__(self, data_df):
        self.client_info = data_df
        self.populations = []
        self.epoch_fitness = {0: 0.0}
        self.best_population = None
        self.epoch = 0

        if len(self.client_info) % self.CLIENT_PER_GROUP != 0:
            raise ValueError("Error: number of student records not divisible by number of groups")

        clients = self.create_clients()
        student_indexes = list(range(len(clients)))

        for _ in range(self.NUMBER_OF_POPULATIONS):
            random.shuffle(student_indexes)
            groups = []

            for i in range(0, len(student_indexes), self.CLIENT_PER_GROUP):
                group_clients = [clients[j] for j in student_indexes[i:i+self.CLIENT_PER_GROUP]]
                group = Group(group_clients,lambda: 4)
                groups.append(group)

            population = Population(groups)
            self.populations.append(population)

    def evolve(self):
        terminate = False
        while not terminate:
            self.epoch += 1
            new_populations = self.get_elite_populations()

            while len(new_populations) < self.NUMBER_OF_POPULATIONS:
                first_parent = random.choice(self.populations)
                second_parent = random.choice(self.populations)
                children = self.crossover(first_parent, second_parent)

                for child in children:
                    if random.random() < self.MUTATION_RATE:
                        child = self.mutate(child)
                    if random.random() < self.INVERSION_RATE:
                        child = self.invert(child)

                    new_populations.append(child)
                    if len(new_populations) >= self.NUMBER_OF_POPULATIONS:
                        break

            self.populations = new_populations
            self.update_best_population()
            terminate = self.terminate()

    def crossover(self, parent1, parent2):
        # Simplified crossover logic
        pass

    def mutate(self, population):
        # Simplified mutation logic
        pass

    def invert(self, population):
        # Simplified inversion logic
        pass

    def terminate(self):
        if self.epoch >= self.MAX_ITERATIONS:
            return True
        if len(self.epoch_fitness) > self.MAX_CONSECUTIVE and self.epoch_fitness[self.epoch] == self.epoch_fitness[self.epoch - self.MAX_CONSECUTIVE]:
            return True
        return False

    def get_elite_populations(self):
        sorted_populations = sorted(self.populations, key=lambda x: x.get_fitness(), reverse=True)
        return sorted_populations[:self.ELITISM]

    def update_best_population(self):
        current_best = max(self.populations, key=lambda x: x.get_fitness())
        if self.best_population is None or current_best.get_fitness() > self.best_population.get_fitness():
            self.best_population = current_best

    def create_clients(self):
        return [Client(attributes) for attributes in self.client_info.values]

# Classes Population, Group, and Student would need to be defined with appropriate methods.
df = pd.read_csv('clients_info.csv')
gga = GeneticAlgorithm(df)


In [73]:
# 对clients中的client进行排序，根据client的kl散度
for client in clients:
    print(client)
sorted_clients = sorted(clients, key=lambda client: client.kl_divergence)
for client in sorted_clients:
    print(client)

Client ID:0. Distribution:[1 2 3 4 5 6]. KL Divergence:0.1293825100352326
Client ID:1. Distribution:[2 3 4 5 6 7]. KL Divergence:0.07541986591001505
Client ID:2. Distribution:[3 4 5 6 7 8]. KL Divergence:0.04966597033274467
Client ID:3. Distribution:[4 5 6 7 8 9]. KL Divergence:0.035245217838671454
Client ID:3. Distribution:[4 5 6 7 8 9]. KL Divergence:0.035245217838671454
Client ID:2. Distribution:[3 4 5 6 7 8]. KL Divergence:0.04966597033274467
Client ID:1. Distribution:[2 3 4 5 6 7]. KL Divergence:0.07541986591001505
Client ID:0. Distribution:[1 2 3 4 5 6]. KL Divergence:0.1293825100352326


In [74]:
df = pd.read_csv('clients_info.csv')
df['0'].astype(int)

0       1
1       2
2       3
3       4
4       5
     ... 
95     96
96     97
97     98
98     99
99    100
Name: 0, Length: 100, dtype: int64

In [75]:
for index, row in df.iterrows():
    print(index, row)

0 0     1.0
1     0.0
2     0.0
3     0.0
4     0.5
5     0.5
6     0.0
7     0.0
8     0.0
9     0.0
10    0.0
11    5.0
Name: 0, dtype: float64
1 0     2.0
1     0.0
2     0.0
3     0.0
4     0.0
5     0.5
6     0.0
7     0.0
8     0.0
9     0.5
10    0.0
11    5.0
Name: 1, dtype: float64
2 0     3.0
1     0.5
2     0.5
3     0.0
4     0.0
5     0.0
6     0.0
7     0.0
8     0.0
9     0.0
10    0.0
11    1.0
Name: 2, dtype: float64
3 0     4.0
1     0.0
2     0.0
3     0.0
4     0.0
5     0.0
6     0.0
7     0.0
8     0.0
9     0.5
10    0.5
11    1.0
Name: 3, dtype: float64
4 0     5.0
1     0.0
2     0.5
3     0.0
4     0.0
5     0.0
6     0.0
7     0.5
8     0.0
9     0.0
10    0.0
11    4.0
Name: 4, dtype: float64
5 0     6.0
1     0.0
2     0.0
3     0.0
4     0.5
5     0.0
6     0.5
7     0.0
8     0.0
9     0.0
10    0.0
11    5.0
Name: 5, dtype: float64
6 0     7.0
1     0.0
2     0.0
3     0.0
4     0.5
5     0.5
6     0.0
7     0.0
8     0.0
9     0.0
10    0.0
11    4.0
Na

In [76]:
for v in df.values:
    print(v)

[1.  0.  0.  0.  0.5 0.5 0.  0.  0.  0.  0.  5. ]
[2.  0.  0.  0.  0.  0.5 0.  0.  0.  0.5 0.  5. ]
[3.  0.5 0.5 0.  0.  0.  0.  0.  0.  0.  0.  1. ]
[4.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 1. ]
[5.  0.  0.5 0.  0.  0.  0.  0.5 0.  0.  0.  4. ]
[6.  0.  0.  0.  0.5 0.  0.5 0.  0.  0.  0.  5. ]
[7.  0.  0.  0.  0.5 0.5 0.  0.  0.  0.  0.  4. ]
[8.  0.  0.  0.  0.  0.  0.  0.  0.5 0.  0.5 4. ]
[9.  0.  0.  0.  0.  0.  0.5 0.  0.  0.5 0.  1. ]
[10.   0.   0.   0.   0.   0.   0.   0.5  0.   0.   0.5  1. ]
[11.   0.   0.5  0.   0.5  0.   0.   0.   0.   0.   0.   1. ]
[12.   0.   0.   0.   0.   0.5  0.   0.5  0.   0.   0.   3. ]
[13.   0.   0.   0.   0.   0.   0.5  0.   0.   0.5  0.   3. ]
[14.   0.5  0.   0.   0.   0.5  0.   0.   0.   0.   0.   3. ]
[15.   0.   0.   0.   0.5  0.   0.   0.   0.5  0.   0.   3. ]
[16.   0.   0.   0.   0.   0.5  0.   0.   0.5  0.   0.   5. ]
[17.   0.   0.   0.   0.   0.   0.   0.   0.5  0.   0.5  4. ]
[18.   0.   0.   0.   0.   0.5  0.   0.   0.   0.5  0.