<a href="https://colab.research.google.com/github/alimirash/AI_HW_Iris_Clustring/blob/main/AI_HW_Iris_Clustring.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
import numpy as np
import pandas as pd
from sklearn import datasets

iris = datasets.load_iris()

df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target

In [27]:
def initialize_population(pop_size, num_points, num_clusters):
    population = []
    for _ in range(pop_size):
        chromosome = np.random.randint(1, num_clusters + 1, num_points)
        population.append(chromosome)
    return population

In [28]:
def mutation(chromosome, mutation_rate, num_clusters):
    mutated_chromosome = chromosome.copy()
    for i in range(len(mutated_chromosome)):
        if np.random.rand() < mutation_rate:
            mutated_chromosome[i] = np.random.randint(1, num_clusters + 1)
    return mutated_chromosome


def crossover(parent1, parent2):
    crossover_point = np.random.randint(1, len(parent1))
    child1 = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
    child2 = np.concatenate((parent2[:crossover_point], parent1[crossover_point:]))
    return child1, child2


In [29]:
from sklearn.metrics import pairwise_distances

def objective_function(chromosome, data, num_clusters):
    # Retrieve indices of data points in each cluster
    clusters = [np.where(chromosome == i)[0] for i in range(1, num_clusters + 1)]

    # Calculate centroids for each cluster
    centroids = [np.mean(data[cluster], axis=0) for cluster in clusters]

    # Calculate WCSS (Within-Cluster Sum of Squares)
    wcss = 0
    for i, cluster in enumerate(clusters):
        if len(cluster) > 0:
            distances = pairwise_distances(data[cluster], [centroids[i]], metric='euclidean')
            wcss += np.sum(distances ** 2)

    return wcss

In [30]:
def genetic_algorithm(data, num_clusters, pop_size, num_generations, mutation_rate):
    num_points = len(data)
    population = initialize_population(pop_size, num_points, num_clusters)
    for generation in range(num_generations):
        # Evaluate fitness of each chromosome
        fitness_scores = [objective_function(chromosome, data, num_clusters) for chromosome in population]

        # Select parents for crossover
        parents = np.random.choice(range(pop_size), size=pop_size, replace=True, p=np.flip(np.array(fitness_scores))/np.sum(fitness_scores))

        # Create new population through crossover and mutation
        new_population = []
        for i in range(0, pop_size, 2):
            child1, child2 = crossover(population[parents[i]], population[parents[i+1]])
            child1 = mutation(child1, mutation_rate, num_clusters)
            child2 = mutation(child2, mutation_rate, num_clusters)
            new_population.extend([child1, child2])
        population = new_population[:pop_size]

    # Select the best chromosome as the final clustering configuration
    best_chromosome = population[np.argmin([objective_function(chromosome, data, num_clusters) for chromosome in population])]
    return best_chromosome

population_size = 20
num_clusters = 3
num_generations = 50
mutation_rate = 0.1

best_clustering = genetic_algorithm(df.iloc[:, :-1].values, num_clusters, population_size, num_generations, mutation_rate)
print("Clustering Configuration:", best_clustering)

Clustering Configuration: [2 1 2 2 1 2 2 3 2 2 1 2 2 2 1 2 3 3 3 3 2 1 3 3 2 2 1 2 2 1 2 3 3 1 3 2 2
 3 2 2 2 2 2 1 3 3 1 2 3 2 3 2 3 2 3 3 1 3 2 1 2 2 2 1 2 2 1 3 2 2 3 3 3 3
 3 1 2 1 1 3 3 3 2 2 2 2 3 1 3 2 3 1 2 2 2 3 3 3 2 3 1 2 2 2 1 3 3 3 1 1 1
 1 1 1 2 2 1 2 1 1 1 2 3 3 3 2 3 2 3 2 2 2 2 1 3 1 2 3 2 1 3 2 1 2 1 1 1 3
 3 2]
