In [1]:
# genetic algorithm search for continuous function optimization
from numpy.random import randint
from numpy.random import rand
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from datetime import timedelta

import pandas as pd
import time
import numpy as np
import random

In [2]:
# Algoritma Genetika
def create_individual(length):
    return ''.join(random.choice('01') for _ in range(length))

In [3]:
# fitness function
def fitness(gen):        
    boolean_mask = [bool(int(x)) for x in gen]
    X_train_selected = X_train[:, boolean_mask]
    X_test_selected = X_test[:, boolean_mask]

    svm = SVC(kernel='linear', decision_function_shape='ovr', random_state=42)
    svm.fit(X_train_selected, y_train)
    y_pred = svm.predict(X_test_selected)
    acc = accuracy_score(y_test, y_pred)

    return acc

In [5]:
def roulette_wheel_selection(population_with_fitness):
    total_fitness = sum(fitness for _, fitness in population_with_fitness)
    selection_probs = [fitness/total_fitness for _, fitness in population_with_fitness]
    parent_indices = np.random.choice(len(population_with_fitness), size=2, p=selection_probs, replace=False)
    return [population_with_fitness[i][0] for i in parent_indices]

In [6]:
def one_point_crossover(p1, p2):
    # children are copies of parents by default
    c1, c2 = p1, p2
    # check for recombination
    
    # select crossover point that is not on the end of the string
    crossover_point = randint(1, 120-2) #120 = jumlah maksimal kromosom

    # Convert the integers to binary strings and pad with leading zeros
    # bin1 = format(c1, '05b')
    # bin2 = format(c2, '05b')

    # Perform the crossover
    c1_new = c1[:crossover_point] + c2[crossover_point:]
    c2_new = c2[:crossover_point] + c1[crossover_point:]

    # Convert the new binary strings back to integers
    # new_int1 = int(bin1_new, 2)
    # new_int2 = int(bin2_new, 2)
    
    return [c1_new, c2_new]

In [7]:
def mutate(individual):
    #pilih bagian gen random yang dimutasi
    mutation_point = randint(0, 120-1) #120 jumlah maks gen

    # Convert the binary string to a list of binary
    binary_list = [int(digit) for digit in individual]

    # Flip the bit at the specified position
    if binary_list[mutation_point] == '0':
        binary_list[mutation_point] = '1'
    else:
        binary_list[mutation_point] = '0'

    new_binary_string = ''.join(str(digit) for digit in binary_list)
    return new_binary_string

In [11]:
#load dataset
DATA_PATH = "glcm_kidney_illness_extended_dists.csv"
df = pd.read_csv(DATA_PATH)
X = df.iloc[:,1:-1]
y = df.label

#normalize
scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
population_size = 4
generations = 4
gen_length = 120 #1 fitur ada 4 sudut (0, 45, 90, 135) dan distance=1, 2, 3, 4, 5

population = [create_individual(gen_length) for _ in range(population_size)]

In [16]:
start_time = time.time()
best_fitness = []
best_individuals = []
for generation in range(generations):
    generation_start_time = time.time()
    print(f"\n=== Generation {generation+1}/{generations} ===")

    # Hitung nilai fitness untuk setiap individu dalam populasi
    population_with_fitness = []
    for individual in population:
        fitness_value = fitness(individual)
        population_with_fitness.append((individual, fitness_value))
        # Mencetak parameter individu saat ini
        print(f"Pelatihan Dengan Individu Pilihan: "
              f"gen: {individual}, "
              f"Fitness: {fitness_value}")

    # Pilih orang tua menggunakan roulette wheel selection
    selected_parents = roulette_wheel_selection(population_with_fitness)
    print("\nMemilih Parents Untuk Generasi Selanjutnya:")
    for parent in selected_parents:
        print(parent)

    # Buat populasi baru melalui crossover dan mutasi
    new_population = []
    while len(new_population) < population_size:
        parent1, parent2 = selected_parents
        child1, child2 = one_point_crossover(parent1, parent2)
        child1 = mutate(child1)
        child2 = mutate(child2)
        new_population.extend([child1, child2])
        # Mencetak parameter anak-anak baru yang dihasilkan
        print(f"Membuat Child dengan Crossover dan Mutasi:"
              f"{child1}, {child2}")
        print("")
        print("")

    # Potong populasi baru agar sesuai dengan ukuran populasi yang ditentukan
    new_population = new_population[:population_size]

    # Perbarui populasi dengan populasi baru
    population = new_population
    generation_end_time = time.time()
    generation_duration = generation_end_time - generation_start_time
    print(f"Waktu yang Dibutuhkan untuk Generasi {generation+1}: {str(timedelta(seconds=generation_duration))}")

    #catat individu terbaik
    current_best_individual = max(population, key=lambda ind: fitness(ind))
    current_best_fitness = fitness(current_best_individual)
    best_individuals.append(current_best_individual)
    best_fitness.append(current_best_fitness)

# Evaluasi individu terbaik setelah loop generasi selesai
best_individual = max(population, key=lambda ind: fitness(ind))
best_acc = fitness(best_individual)
print(f"Individu terbaik : "
      f"gen: {best_individual}, "
      f"Accuracy: {best_acc}")
end_time = time.time()


=== Generation 1/4 ===
Pelatihan Dengan Individu Pilihan: gen: 001001111001111111101001111000111011011001111001100110111110111111000010001000100100110000111101010101000111111001110011, Fitness: 0.8726907630522088
Pelatihan Dengan Individu Pilihan: gen: 010100100001001111100000111100011001010111101100010010100011110111100110010000001001100001101011110111000010011001100100, Fitness: 0.8493975903614458
Pelatihan Dengan Individu Pilihan: gen: 000100101100010001110101100010010001100000001001011100110100110000011001111000001001110010100110100110100000100100101101, Fitness: 0.831726907630522
Pelatihan Dengan Individu Pilihan: gen: 001001110000010011011100011100110010110101100010111110100001111110101110000000101011111001101110001100011110010100100000, Fitness: 0.8558232931726908

Memilih Parents Untuk Generasi Selanjutnya:
000100101100010001110101100010010001100000001001011100110100110000011001111000001001110010100110100110100000100100101101
001001110000010011011100011100110010110101100010111

In [18]:
#waktu untuk train dan prediksi tanpa seleksi fitur
start_time = time.time()

svm = SVC(kernel='linear', decision_function_shape='ovr', random_state=42)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Akurasi tanpa seleksi fitur: ", acc)

end_time = time.time()
total_duration = end_time - start_time
print(f"\nTotal Waktu yang Dibutuhkan untuk train tanpa seleksi fitur: {str(timedelta(seconds=total_duration))}")

Akurasi tanpa seleksi fitur:  0.8863453815261044

Total Waktu yang Dibutuhkan untuk train tanpa seleksi fitur: 0:00:12.750573


In [19]:
#seleksi
# binary_string = bin(best_individual)[2:]
# padded_binary_string_bin = binary_string.zfill(120)
boolean_mask = [bool(int(x)) for x in best_individual]
X_train_selected = X_train[:, boolean_mask]
X_test_selected = X_test[:, boolean_mask]

#waktu untuk train dan prediksi dengan seleksi fitur
start_time = time.time()

svm = SVC(kernel='linear', decision_function_shape='ovr', random_state=42)
svm.fit(X_train_selected, y_train)
y_pred = svm.predict(X_test_selected)
acc = accuracy_score(y_test, y_pred)
print(f"Akurasi dengan seleksi fitur: ", acc)

end_time = time.time()
total_duration = end_time - start_time
print(f"\nTotal Waktu yang Dibutuhkan untuk train dengan seleksi fitur: {str(timedelta(seconds=total_duration))}")

Akurasi dengan seleksi fitur:  0.8538152610441767

Total Waktu yang Dibutuhkan untuk train dengan seleksi fitur: 0:00:06.816623
