In [6]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt


### Load Iris dataset

In [7]:
iris = load_iris()
X = iris.data
y = iris.target


### Define fitness function

In [8]:
 

def predict_accuracy(individual):
    # Extract features
    features = []
    for i in range(len(individual)):
        if individual[i]:
            features.append(i)
    if len(features) == 0:
        return 0
    # Train KNN classifier
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X[:, features], y)
    # Calculate accuracy
    accuracy = knn.score(X[:, features], y)
    return accuracy


### Define genetic algorithm

In [9]:
 

def genetic_algorithm(pop_size, num_generations, mutation_rate):
    # Initialize population
    population = np.random.randint(2, size=(pop_size, X.shape[1]))
    # Iterate through generations
    for generation in range(num_generations):
        # Calculate fitness scores
        fitness_scores = np.array([predict_accuracy(individual) for individual in population])
        # Select parents
        parent_indices = np.random.choice(pop_size, size=pop_size, replace=True, p=fitness_scores/fitness_scores.sum())
        parents = population[parent_indices]
        # Create offspring
        offspring = np.zeros_like(parents)
        for i in range(pop_size):
            # Crossover
            crossover_point = np.random.randint(X.shape[1])
            offspring[i] = np.concatenate((parents[i, :crossover_point], parents[(i+1)%pop_size, crossover_point:]))
            # Mutation
            for j in range(X.shape[1]):
                if np.random.rand() < mutation_rate:
                    offspring[i, j] = 1 - offspring[i, j]
        # Replace population with offspring
        population = offspring
    # Calculate final fitness scores
    arr1 = []
    for individual in population:
        arr1.append(predict_accuracy(individual))
    fitness_scores = np.array(arr1)

    # fitness_scores = np.array([predict_accuracy(individual) for individual in population])
    # Return individual with highest fitness score
    return population[np.argmax(fitness_scores)]


### Run genetic algorithm

In [10]:
best_individual = genetic_algorithm(pop_size=100, num_generations=100, mutation_rate=0.01)
best_features = []
for i in range(len(best_individual)):
    if best_individual[i]:
        best_features.append(i)
# best_features = [i for i in range(len(best_individual)) if best_individual[i]]
print("Best features: {}".format(best_features))
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X[:, best_features], y)
print("Accuracy: {:.2f}%".format(knn.score(X[:, best_features], y)*100))

Best features: [0, 2, 3]
Accuracy: 97.33%
