In [1]:
from sklearn.model_selection import cross_val_score, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from DataProcessor import DataProcessor
import numpy as np
import math
from Chromosome import *
from Recombination import Recombination
import random

In [2]:
dp = DataProcessor("Datasets/DS02.csv")

In [3]:
dp.load_data()

In [4]:
num_objectives = 2
y = np.array(dp.dataset["LABEL"])
X = np.array(dp.dataset.drop("LABEL", axis=1, inplace=False))
num_features = dp.num_features()
population_size = 100
maxFEs = 3000
Q = 5
LP = 5 #number of generation
RD = []
PN = []
mutation_probability = 1
sigma = 0.0001

In [5]:
crossover_keys = ["onepoint_crossover", "twopoint_crossover", "uniform_crossover", "shuffle_crossover", "reduced_surrogate_crossover"]
default_value = 0
nReward = {key: default_value for key in crossover_keys}
nPenalty = {key: default_value for key in crossover_keys}


distance = 0 initially for every individual

The individuals in the front are then sorted based on each objective function.

The crowding distance for individuals at the boundaries of the front (the extreme values) is set to infinity

The crowding distance is sum of normalized distances in each objective space between the individual and its two neighbors.    

In [6]:
def fitness_function_1(chromosome):
    chromosome = np.array(chromosome)
    selected_features = X[:, chromosome == 1]
    classifier = KNeighborsClassifier(n_neighbors=3)
    accuracy_scores = cross_val_score(classifier, selected_features, y, cv=3)
    mean_error = 1 - np.mean(accuracy_scores)
    return mean_error

def fitness_function_2(chromosome):
    return np.sum(chromosome)

In [7]:
objective_functions = [fitness_function_1,fitness_function_2]

In [8]:
def initialize_population(num_features, population_size):
    population = []
    for i in range(population_size):
        values = list(np.random.randint(2, size=num_features))
        chromosome = Chromosome(values)
        chromosome.objectives = [f(values) for f in objective_functions]
        
        population.append(chromosome)
    return population

In [9]:
def environmental_selection(population):
    remaining_pop_size = population_size
    new_population = []
    fronts = fast_non_dominated_sort(population)
    for front in fronts:
        front_size = len(front)
        if remaining_pop_size > front_size:
            new_population += front
            remaining_pop_size -= front_size
        else:
            crowding_distance(front)
            front.sort(key=lambda chromosome: chromosome.crowding_distance, reverse=True)
            new_population += front[:remaining_pop_size]
            break
    
    return new_population


In [10]:
def init_OSP():
    return {key: 1/len(crossover_keys) for key in crossover_keys}

In [11]:
def update_OSP():
    sum_crossover_RD = {key: sum(row[key] for row in RD) for key in crossover_keys}
    sum_crossover_PN = {key: sum(row[key] for row in PN) for key in crossover_keys}
    non_zero_sum = {key: sum_crossover_RD[key] if sum_crossover_RD[key] != 0 else sigma for key in crossover_keys}
    crossover_probabilities = {key: sum_crossover_RD[key] / (non_zero_sum[key] + sum_crossover_PN[key]) for key in sum_crossover_RD.keys()}
    denominator = sum(crossover_probabilities.values())
    normalized_probabilities = {key: crossover_probabilities[key] / denominator for key in crossover_probabilities.keys()}
    return normalized_probabilities

In [12]:
def parent_selection(population):
    return np.random.choice(population,2)

In [13]:
def credit_assignment(parents, offsprings, crossover, nReward, nPenalty):
    p_nd, p_d = dominance_comparison(parents)
    if p_d != None:
        for parent in p_nd:
            for offspring in offsprings:
                if dominate(parent,offspring):
                    nPenalty[crossover.__name__] += 1
                else: 
                    nReward[crossover.__name__] += 1
    else:
        for offspring in offsprings:
            if all(not dominate(parent, offspring) for parent in parents):
                nReward[crossover.__name__] += 1
            else:
                nPenalty[crossover.__name__] += 1    
    return nReward, nPenalty

In [14]:
# if nobody dominates the solution it goes to non-dominated set
def dominance_comparison(chromosomes):
    non_dominated_set = []
    dominated_set = []
    for i, sol1 in enumerate(chromosomes):
        is_dominated_by_others = any(dominate(sol2, sol1) for j, sol2 in enumerate(chromosomes) if i != j)
        if not is_dominated_by_others:
            non_dominated_set.append(sol1)
        else:
            dominated_set.append(sol1)
    return non_dominated_set, dominated_set

In [15]:
def uniform_mutation(chromosome):
    mutated_chromosome = chromosome.copy()
    for i in range(len(mutated_chromosome)):
        if random.uniform(0, 1) < mutation_probability:
            mutated_chromosome[i] = 1 - mutated_chromosome[i]
    return mutated_chromosome

In [16]:
def find_non_dominated_solution(population):
    fronts = fast_non_dominated_sort(population)
    return fronts[0]

In [25]:
# def nsga2(population_size, num_generations, num_features, objective_functions):
def NSGAII(population_size, LP, num_features, objective_functions):
    count_evaluation = 0
    k = 0
    population = initialize_population(num_features, population_size)
    crossover_probability = init_OSP()
    while count_evaluation < maxFEs:
        default_value = 0
        nReward = {key: default_value for key in crossover_keys}
        nPenalty = {key: default_value for key in crossover_keys}
        new_population = []
        for i in range(int(population_size/2)):
            parent1, parent2 = parent_selection(population)
            rc = Recombination(parent1.values, parent2.values)
            crossover = rc.roulette_wheel_selection(crossover_probability)
            offspring1, offspring2 = rc.execute_crossover(crossover)
            offspring1 = uniform_mutation(offspring1)
            offspring2 = uniform_mutation(offspring2)
            offspring1, offspring2 = Chromosome(offspring1), Chromosome(offspring2)
            offspring1.objectives = [f(offspring1.values) for f in objective_functions]
            offspring2.objectives = [f(offspring2.values) for f in objective_functions]
            count_evaluation += 2
            nReward, nPenalty = credit_assignment([parent1,parent2], [offspring1,offspring2], crossover, nReward, nPenalty)
            new_population += [offspring1, offspring2]
        k += 1
        RD.append(nReward)
        PN.append(nPenalty)
        if k == LP:
            crossover_probability = update_OSP()
            k = 0
        if count_evaluation % 100000 == 0:
            print(count_evaluation)
            
        current_pool = new_population + population
        
        distinct_objects = []
        for obj in current_pool:
            if not any(existing_obj.values == obj.values for existing_obj in distinct_objects):
                distinct_objects.append(obj)
        population = environmental_selection(distinct_objects)
        current_solutions = find_non_dominated_solution(population)
    return current_solutions

In [26]:
current_solutions = NSGAII(population_size, LP, num_features, objective_functions)

In [27]:
for solution in current_solutions:
    print("obejective: ", solution.objectives)

obejective:  [0.22222222222222232, 33]
obejective:  [0.22222222222222232, 33]
obejective:  [0.18518518518518523, 35]
obejective:  [0.2962962962962964, 15]
obejective:  [0.5555555555555556, 13]
obejective:  [0.22222222222222232, 33]
obejective:  [0.4814814814814815, 14]
obejective:  [0.2962962962962963, 22]
obejective:  [0.2592592592592592, 23]


In [30]:
current_solutions[1].values == current_solutions[5].values

False