In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.linear_model import LogisticRegression as LR
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
iris = datasets.load_iris()
data = iris.data
labels = iris.target
num_samples,num_features = data.shape[0],data.shape[1]
print(f'num_samples,num_features = {(num_samples,num_features)}')
indices = np.arange(num_samples)
X_train, X_test, y_train, y_test,train_indices,test_indices = train_test_split(data, labels,indices, test_size=0.6, random_state=42)
train_indices = np.arange(1,num_samples,4)
test_indices = np.arange(0,num_samples,4)
print(f'train_indices ={train_indices.shape},test_indices ={test_indices.shape}')

In [None]:

from numpy.random import randint
from numpy.random import rand


def select_features(elem,features):
    selected_elem = np.where(elem==1)[0]
    selected_features = features[:,selected_elem]
    return selected_features

def classification_accuracy(labels,preds):
    correct = np.where(labels == preds)[0]
    accuracy = correct.shape[0]/labels.shape[0]
    return accuracy

def objective(pop,data,labels,train_ind,test_ind):
    accuracies = np.zeros(pop.shape[0])
    idx= 0
    for elem in pop:
        selected_features = select_features(elem,data)
        train_data = selected_features[train_ind,:]
        test_data = selected_features[test_ind,:]
        if train_data.shape[0]==0 or train_data.shape[1]==0 or test_data.shape[0]==0 or test_data.shape[1]==0:
            continue
        train_labels = labels[train_indices]
        test_labels = labels[test_indices]
        LR_classifier = LR(random_state=0)
        LR_classifier.fit(X=train_data, y=train_labels)
        predictions = LR_classifier.predict(test_data)
        accuracies[idx] = classification_accuracy(test_labels, predictions)
        idx = idx + 1
    return accuracies

def parent_selection(pop,n_pop,scores,k=3):
    selected = []
    for _ in range(n_pop):
        idx = randint(len(pop))
        for ix in randint(0, len(pop),k-1):
            # check if better (e.g. perform a tournament)
            if scores[ix] < scores[idx]:
                idx = ix
        selected.append(pop[idx])
    return selected

def crossover(p1,p2,r_cross):
    c1 = p1.copy()
    c2 = p2.copy()
    if rand() < r_cross:
        pt = randint(1, len(p1)-2)
        c1 = list(p1[:pt])+list(p2[pt:])
        c2 = list(p2[:pt])+list(p1[pt:])
    return [np.array(c1), np.array(c2)]

def mutation(bitstring, r_mut):
    for i in range(len(bitstring)):
        # check for a mutation
        if rand() < r_mut:
            # flip the bit
            bitstring[i] = 1 - bitstring[i]
    return bitstring

def get_children(selected_parents,n_pop,r_cross,r_mut):
    children = []
    for i in range(0, n_pop, 2):
        p1, p2 = selected_parents[i], selected_parents[i+1]
        for c in crossover(p1, p2, r_cross):
            mutation(c, r_mut)
            children.append(c)
    return np.array(children)

def genetic_algorithm(epochs,data,labels,train_indices,test_indices):
    pop_size = 10
    k = 4
    r_cross = 0.9
    r_mut = 1/pop_size
    pop_shape = (pop_size, num_features)
    #initial population
    new_population = np.random.randint(low=0, high=2, size=pop_shape)
    best_outputs = []
    num_generations = epochs
    for gen in range(num_generations):
        #measure fitness of each member in population
        scores = objective(new_population, data, labels, train_indices, test_indices)
        
        #print current best in population
        best_outputs.append(np.max(scores))
        print(f"Gen: {gen} => Best result : {best_outputs[-1]}")

        #Select parent in current population to generate children for next generation
        selected = parent_selection(new_population,pop_size,scores)
        
        #Get children of parents
        children = get_children(selected,pop_size,r_cross,r_mut)
        
        #replace old population
        new_population = children
    
    best_outputs.append(np.max(scores))
    print(f"Gen: {gen} => Best result : {best_outputs[-1]}")

    # Getting the best solution after iterating finishing all generations.
    # At first, the fitness is calculated for each solution in the final generation.
    scores = objective(new_population, data, labels, train_indices, test_indices)
    # Then return the index of that solution corresponding to the best fitness.
    best_match_idx = np.where(scores == np.max(scores))[0]
    best_match_idx = best_match_idx[0]
    print(f'np.max(scores) ={np.max(scores)}')
    best_solution = new_population[best_match_idx, :]
    best_solution_indices = np.where(best_solution == 1)[0]
    best_solution_num_elements = best_solution_indices.shape[0]
    best_solution_fitness = scores[best_match_idx]

    print("best_match_idx : ", best_match_idx)
    print("best_solution : ", best_solution)
    print("Selected indices : ", best_solution_indices)
    print("Number of selected elements : ", best_solution_num_elements)
    print("Best solution fitness : ", best_solution_fitness)

    plt.plot(best_outputs)
    plt.xlabel("Iteration")
    plt.ylabel("Fitness")
    plt.show()

genetic_algorithm(100,data,labels,train_indices,test_indices)