In [1]:
# genetic algorithm search for continuous function optimization
from numpy.random import randint
from numpy.random import rand
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from datetime import timedelta

import pandas as pd
import time
import numpy as np

In [16]:
# Algoritma Genetika
def create_individual(gen_length):
    genotype_max_value = 2 ** gen_length
    random_genotype = randint(0, genotype_max_value)
    return random_genotype

In [47]:
# fitness function
def fitness(gen):
    # Convert the integer to its binary representation
    binary_string = bin(gen)[2:]
    padded_binary_string_bin = binary_string.zfill(24)
        
    boolean_mask = [bool(int(x)) for x in padded_binary_string_bin]
    X_train_selected = X_train[:, boolean_mask]
    X_test_selected = X_test[:, boolean_mask]

    svm = SVC(kernel='linear', decision_function_shape='ovr', random_state=42)
    svm.fit(X_train_selected, y_train)
    y_pred = svm.predict(X_test_selected)
    acc = accuracy_score(y_test, y_pred)

    return acc

In [9]:
def roulette_wheel_selection(population_with_fitness):
    total_fitness = sum(fitness for _, fitness in population_with_fitness)
    selection_probs = [fitness/total_fitness for _, fitness in population_with_fitness]
    parent_indices = np.random.choice(len(population_with_fitness), size=2, p=selection_probs, replace=False)
    return [population_with_fitness[i][0] for i in parent_indices]

In [57]:
def one_point_crossover(p1, p2):
    # children are copies of parents by default
    c1, c2 = p1, p2
    # check for recombination
    
    # select crossover point that is not on the end of the string
    crossover_point = randint(1, 24-2) #24 = jumlah maksimal kromosom

    # Convert the integers to binary strings and pad with leading zeros
    bin1 = format(c1, '05b')
    bin2 = format(c2, '05b')

    # Perform the crossover
    bin1_new = bin1[:crossover_point] + bin2[crossover_point:]
    bin2_new = bin2[:crossover_point] + bin1[crossover_point:]

    # Convert the new binary strings back to integers
    new_int1 = int(bin1_new, 2)
    new_int2 = int(bin2_new, 2)
    
    return [new_int1, new_int2]

In [74]:
def mutate(individual):
    #pilih bagian gen random yang dimutasi
    mutation_point = randint(0, 24-1) #24 jumlah maks gen

    #ubah int jadi biner
    binary_string = bin(individual)[2:]
    padded_binary_string_bin = binary_string.zfill(24)

    # Convert the binary string to a list of binary
    binary_list = [int(digit) for digit in padded_binary_string_bin]

    # Flip the bit at the specified position
    if binary_list[mutation_point] == '0':
        binary_list[mutation_point] = '1'
    else:
        binary_list[mutation_point] = '0'

    new_binary_string = ''.join(str(digit) for digit in binary_list)
    new_individual = int(new_binary_string, 2)
    return new_individual

In [2]:
#load dataset
DATA_PATH = "glcm_kidney_illness.csv"
df = pd.read_csv(DATA_PATH)
X = df.iloc[:,1:-1]
y = df.label

#normalize
scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [87]:
population_size = 8
generations = 10
gen_length = 24

population = [create_individual(gen_length) for _ in range(population_size)]

In [88]:
start_time = time.time()
best_fitness = []
best_individuals = []
for generation in range(generations):
    generation_start_time = time.time()
    print(f"\n=== Generation {generation+1}/{generations} ===")

    # Hitung nilai fitness untuk setiap individu dalam populasi
    population_with_fitness = []
    for individual in population:
        fitness_value = fitness(individual)
        population_with_fitness.append((individual, fitness_value))
        # Mencetak parameter individu saat ini
        print(f"Pelatihan Dengan Individu Pilihan: "
              f"gen: {bin(individual)[2:]}, "
              f"Fitness: {fitness_value}")

    # Pilih orang tua menggunakan roulette wheel selection
    selected_parents = roulette_wheel_selection(population_with_fitness)
    print("\nMemilih Parents Untuk Generasi Selanjutnya:")
    for parent in selected_parents:
        print(bin(parent)[2:])

    # Buat populasi baru melalui crossover dan mutasi
    new_population = []
    while len(new_population) < population_size:
        parent1, parent2 = selected_parents
        child1, child2 = one_point_crossover(parent1, parent2)
        child1 = mutate(child1)
        child2 = mutate(child2)
        new_population.extend([child1, child2])
        # Mencetak parameter anak-anak baru yang dihasilkan
        print(f"Membuat Child dengan Crossover dan Mutasi:"
              f"{bin(child1)[2:]}, {bin(child2)[2:]}")
        print("")
        print("")

    # Potong populasi baru agar sesuai dengan ukuran populasi yang ditentukan
    new_population = new_population[:population_size]

    # Perbarui populasi dengan populasi baru
    population = new_population
    generation_end_time = time.time()
    generation_duration = generation_end_time - generation_start_time
    print(f"Waktu yang Dibutuhkan untuk Generasi {generation+1}: {str(timedelta(seconds=generation_duration))}")

    #catat individu terbaik
    current_best_individual = max(population, key=lambda ind: fitness(ind))
    current_best_fitness = fitness(current_best_individual)
    best_individuals.append(current_best_individual)
    best_fitness.append(current_best_fitness)

# Evaluasi individu terbaik setelah loop generasi selesai
best_individual = max(population, key=lambda ind: fitness(ind))
best_acc = fitness(best_individual)
print(f"Individu terbaik : "
      f"gen: {bin(best_individual)[2:]}, "
      f"Accuracy: {best_acc}")
end_time = time.time()


=== Generation 1/10 ===
Pelatihan Dengan Individu Pilihan: gen: 100011100010001001100000, Fitness: 0.6919678714859437
Pelatihan Dengan Individu Pilihan: gen: 110011100000101000001100, Fitness: 0.7024096385542169
Pelatihan Dengan Individu Pilihan: gen: 111110110110001110111101, Fitness: 0.7518072289156627
Pelatihan Dengan Individu Pilihan: gen: 11010100000010011101000, Fitness: 0.7040160642570281
Pelatihan Dengan Individu Pilihan: gen: 11111101100111101100000, Fitness: 0.7670682730923695
Pelatihan Dengan Individu Pilihan: gen: 101111101000010101100001, Fitness: 0.751004016064257
Pelatihan Dengan Individu Pilihan: gen: 10100100110011100101101, Fitness: 0.729718875502008
Pelatihan Dengan Individu Pilihan: gen: 11001001110011011110010, Fitness: 0.714859437751004

Memilih Parents Untuk Generasi Selanjutnya:
111110110110001110111101
10100100110011100101101
Membuat Child dengan Crossover dan Mutasi:1111011011000111011101, 101001001100111001011101


Membuat Child dengan Crossover dan Mutasi:1

In [89]:
#waktu untuk train dan prediksi tanpa seleksi fitur
start_time = time.time()

svm = SVC(kernel='linear', decision_function_shape='ovr', random_state=42)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Akurasi tanpa seleksi fitur: ", acc)

end_time = time.time()
total_duration = end_time - start_time
print(f"\nTotal Waktu yang Dibutuhkan untuk train tanpa seleksi fitur: {str(timedelta(seconds=total_duration))}")

Akurasi tanpa seleksi fitur:  0.7726907630522089

Total Waktu yang Dibutuhkan untuk train tanpa seleksi fitur: 0:00:08.091132


In [90]:
#seleksi
binary_string = bin(best_individual)[2:]
padded_binary_string_bin = binary_string.zfill(24)
boolean_mask = [bool(int(x)) for x in padded_binary_string_bin]
X_train_selected = X_train[:, boolean_mask]
X_test_selected = X_test[:, boolean_mask]

#waktu untuk train dan prediksi dengan seleksi fitur
start_time = time.time()

svm = SVC(kernel='linear', decision_function_shape='ovr', random_state=42)
svm.fit(X_train_selected, y_train)
y_pred = svm.predict(X_test_selected)
acc = accuracy_score(y_test, y_pred)
print(f"Akurasi dengan seleksi fitur: ", acc)

end_time = time.time()
total_duration = end_time - start_time
print(f"\nTotal Waktu yang Dibutuhkan untuk train tanpa seleksi fitur: {str(timedelta(seconds=total_duration))}")

Akurasi dengan seleksi fitur:  0.695582329317269

Total Waktu yang Dibutuhkan untuk train tanpa seleksi fitur: 0:00:09.867352
