<a href="https://colab.research.google.com/github/Zlmknc/ML-CKD/blob/main/ckd_GA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Genetic Algorithms with Feature Selection

In [33]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
!pip install deap
from deap import base, creator, tools, algorithms
import random
import pickle  # Checkpoint için


X_original = df.drop('classification', axis=1)
y = df['classification']
scaler = StandardScaler()
X_scaled_original = scaler.fit_transform(X_original)

NUM_FEATURES = X_scaled_original.shape[1]
CHECKPOINT_PATH = "ga_feature_selection_checkpoint.pkl"  # Checkpoint dosyasının adı

# Uygunluk fonksiyonu (10 katlı çapraz doğrulama ile F1-skoru)
def evaluate_features(individual):
    selected_features = [i for i, bit in enumerate(individual) if bit == 1]
    if not selected_features:
        return 0.0,  # En az bir özellik seçilmeli

    X_selected = X_scaled_original[:, selected_features]
    model = KNeighborsClassifier(n_neighbors=5) # Örnek bir sınıflandırıcı
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    f1 = cross_val_score(model, X_selected, y, cv=skf, scoring='f1_weighted', error_score='raise')
    return np.mean(f1),

# DEAP kütüphanesi ile GA kurulumu
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=NUM_FEATURES)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", evaluate_features)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.5)  # Mutasyon eklendi
toolbox.register("select", tools.selTournament, tournsize=3) # Seçim eklendi

# GA Parametreleri
POP_SIZE = 50
CXPB = 0.7
MUTPB = 0.2
NGEN = 40
RANDOM_SEED = 42

random.seed(RANDOM_SEED)

# Başlangıç popülasyonu oluştur
population = toolbox.population(n=POP_SIZE)

# Uygunluklarını değerlendir
fitnesses = list(toolbox.map(toolbox.evaluate, population))
for ind, fit in zip(population, fitnesses):
    ind.fitness.values = fit

# Evrimsel Döngü
for gen in range(NGEN):
    print(f"Nesil {gen}")
    offspring = algorithms.varAnd(population, toolbox, cxpb=CXPB, mutpb=MUTPB)
    fitnesses = list(toolbox.map(toolbox.evaluate, offspring))
    for ind, fit in zip(offspring, fitnesses):
        ind.fitness.values = fit
    population = toolbox.select(offspring, k=POP_SIZE)

# En iyi bireyi al
best_ind = tools.selBest(population, k=1)[0]
best_features = [i for i, bit in enumerate(best_ind) if bit == 1]
print(f"En iyi öznitelikler (indeksler): {best_features}")
X_selected = X_scaled_original[:, best_features]

# Veriyi eğitim ve test kümelerine ayır
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=RANDOM_SEED, stratify=y)


# 9 algoritma ve isimleri
classifiers = {
    "KNN": KNeighborsClassifier(n_neighbors=3, weights='distance', metric='minkowski', p=2),
    "Naive Bayes (Gaussian)": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(criterion='entropy', splitter='best', max_depth=10, random_state=RANDOM_SEED),
    "SVM (RBF)": SVC(kernel='rbf', C=10, gamma=0.1, probability=True, random_state=RANDOM_SEED),
    "SVM (Polynomial)": SVC(kernel='poly', C=1, degree=3, gamma='scale', coef0=0, probability=True, random_state=RANDOM_SEED),
    "Logistic Regression": LogisticRegression(solver='liblinear', C=1, penalty='l1', class_weight='balanced', random_state=RANDOM_SEED, max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100, max_depth=10, min_samples_split=5, min_samples_leaf=2, bootstrap=True, random_state=RANDOM_SEED),
    "MLP": MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', alpha=0.001, learning_rate='adaptive', max_iter=500, random_state=RANDOM_SEED)
}

results = {}
for name, clf in classifiers.items():
    print(f"Eğitiliyor ve değerlendiriliyor: {name}")
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    mcc = matthews_corrcoef(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)

    results[name] = {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "mcc": mcc,
        "confusion": confusion
    }

# Sonuçları yazdır
print("\nSonuçlar:")
for name, metrics in results.items():
    print(f"\n{name}:")
    for metric_name, value in metrics.items():
        if metric_name != "confusion":
            print(f" {metric_name}: {value:.4f}")
        else:
            print(f" {metric_name}:\n {value}")

Collecting deap
  Downloading deap-1.4.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading deap-1.4.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (135 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.6/135.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deap
Successfully installed deap-1.4.3
Nesil 0
Nesil 1
Nesil 2
Nesil 3
Nesil 4
Nesil 5
Nesil 6
Nesil 7
Nesil 8
Nesil 9
Nesil 10
Nesil 11
Nesil 12
Nesil 13
Nesil 14
Nesil 15
Nesil 16
Nesil 17
Nesil 18
Nesil 19
Nesil 20
Nesil 21
Nesil 22
Nesil 23
Nesil 24
Nesil 25
Nesil 26
Nesil 27
Nesil 28
Nesil 29
Nesil 30
Nesil 31
Nesil 32
Nesil 33
Nesil 34
Nesil 35
Nesil 36
Nesil 37
Nesil 38
Nesil 39
En iyi öznitelikler (indeksler): [2, 4, 5, 7, 8, 10, 12, 14, 15, 18, 21, 22, 23]
Eğitiliyor ve değerlendiriliyor: KNN
Eğitiliyor ve değerlendiriliyor: Naive Bayes (Gaus