In [1]:
from ucimlrepo import fetch_ucirepo 

# fetch dataset 
optical_recognition_of_handwritten_digits = fetch_ucirepo(id=80) 
  
# data (as pandas dataframes) 
X = optical_recognition_of_handwritten_digits.data.features 
y = optical_recognition_of_handwritten_digits.data.targets 
  
# metadata 
print(optical_recognition_of_handwritten_digits.metadata) 

# variable information 
print(optical_recognition_of_handwritten_digits.variables) 

{'uci_id': 80, 'name': 'Optical Recognition of Handwritten Digits', 'repository_url': 'https://archive.ics.uci.edu/dataset/80/optical+recognition+of+handwritten+digits', 'data_url': 'https://archive.ics.uci.edu/static/public/80/data.csv', 'abstract': 'Two versions of this database available; see folder', 'area': 'Computer Science', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 5620, 'num_features': 64, 'feature_types': ['Integer'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1998, 'last_updated': 'Wed Aug 23 2023', 'dataset_doi': '10.24432/C50P49', 'creators': ['E. Alpaydin', 'C. Kaynak'], 'intro_paper': {'title': 'Methods of Combining Multiple Classifiers and Their Applications to Handwritten Digit Recognition', 'authors': 'C. Kaynak', 'published_in': 'MSc Thesis, Institute of Graduate Studies in Science and Engineering, Bogazici University', 

In [2]:
y= y.to_numpy()
y =y.ravel()

In [3]:
import numpy as np
import random

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.9750889679715302


In [6]:
def cal_pop_fitness(pop, X_train, y_train, X_test, y_test):
    fitness = []
    for i in range(len(pop)):
        columns = np.unique(pop[i])
        
        X_train_ind = X_train.iloc[:,columns]
        rf_classifier.fit(X_train_ind, y_train)

        y_pred = rf_classifier.predict(X_test.iloc[:,columns])

        accuracy = accuracy_score(y_test, y_pred)

        fitness.append(accuracy)

    return fitness

def select_mating_pool(pop, fitness, num_parents):
    # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.
    parents = np.empty((num_parents, pop.shape[1]))
    for parent_num in range(num_parents):
        max_fitness_idx = np.where(fitness == np.max(fitness))
        max_fitness_idx = max_fitness_idx[0][0]
        parents[parent_num, :] = pop[max_fitness_idx, :]
        fitness[max_fitness_idx] = -1
    return parents

def crossover(parents, offspring_size):
    offspring = np.empty(offspring_size)

    for k in range(offspring_size[0]):
        # Always perform one-point crossover.
        crossover_point = np.random.randint(1, parents.shape[1])
        parent1_idx = k % parents.shape[0]
        parent2_idx = (k + 1) % parents.shape[0]
        offspring[k, :crossover_point] = parents[parent1_idx, :crossover_point]
        offspring[k, crossover_point:] = parents[parent2_idx, crossover_point:]

    return offspring

def mutation(offspring_crossover):
    # Mutation changes a single gene in each offspring randomly.
    for idx in range(offspring_crossover.shape[0]):
        # The random value to be added to the gene.
        random_value = np.random.uniform(-1.0, 1.0, 1)
        offspring_crossover[idx, 4] = offspring_crossover[idx, 4] + random_value
    return offspring_crossover

In [7]:
col_len = 32

sol_per_pop = 8
num_parents_mating = 4

# Defining the population size.
pop_size = (sol_per_pop,col_len) # The population will have sol_per_pop chromosome where each chromosome has num_weights genes.
#Creating the initial population.
new_population = np.random.randint(63, size=pop_size)
print(new_population)

[[48  3 26  7 19  4 12  3  1 29 52  6 16 19 38 32 10 27 60 59 19 19 14 49
   7 33  7 39  4 49 14 27]
 [20 56 11  2 49 56 46 27 21 14  4 10 20  7 17 58 28 45  0 28 14 31 48 41
  38 55 60 44 27  9 30 24]
 [18  7 35 43 48 52 43 56 41 24 28 54 58  9 57 45  6 12 46 56 21 37 34 51
  15  9 35 16 57  4 62 35]
 [62 51 32  1 36 61 60 38 62 26 19 45 25 29  4 62 57 42 30 61 38 50 16 23
  16 21  5 52 32 13 10 42]
 [35 61  0 37 45 30  9 31  8 36 62 50 59 22 58 35 25 33 45 50 41 27 39  4
  33 32 38 18 44 28 23  6]
 [13 56 16 33 59  1 45 17 49 60 51 42 22 45  9 51 36 30 13 29  4 16 36 56
  39 16 31 47 30 56 11 10]
 [44 20  5  8 45  0 58  2 12  0 33 41 26 24 44 54  6 26  5 36 46 62 53 48
  47  8 26 55 26 18 42  0]
 [17 31  2 60 41 45 15 19 12 13 47 49 42 33 31 58 43 49 32 36  6  5 41 14
   1  9 56 22 12  8 56 38]]


In [8]:
num_generations = 10
for generation in range(num_generations):
    # Measing the fitness of each chromosome in the population.
    fitness = cal_pop_fitness(new_population, X_train,y_train,X_test,y_test)

    print(fitness)

    # Selecting the best parents in the population for mating.
    parents = select_mating_pool(new_population, fitness, 
                                      num_parents_mating)
    
    # print(parents)

    # Generating next generation using crossover.
    offspring_crossover = crossover(parents,
                                       offspring_size=(pop_size[0]-parents.shape[0], col_len))


    # # Adding some variations to the offsrping using mutation.
    # offspring_mutation = mutation(offspring_crossover)

    # print(offspring_mutation)

    # Creating the new population based on the parents and offspring.
    new_population[0:parents.shape[0], :] = parents
    new_population[parents.shape[0]:, :] = offspring_crossover  # should be offspring_mutation

    # new_population[0:parents.shape[0], :] = offspring_crossover

    # The best result in the current iteration.
    if generation%10 ==0:
        print("Generation : ", generation)
        # print(offspring_crossover)
        print()

[0.8896797153024911, 0.9211150652431791, 0.9537366548042705, 0.9561091340450771, 0.9448398576512456, 0.9098457888493475, 0.9306049822064056, 0.9276393831553974]
Generation :  0

[0.9561091340450771, 0.9537366548042705, 0.9448398576512456, 0.9306049822064056, 0.9584816132858838, 0.9466192170818505, 0.9288256227758007, 0.9495848161328588]
[0.9584816132858838, 0.9561091340450771, 0.9537366548042705, 0.9495848161328588, 0.9561091340450771, 0.9395017793594306, 0.9377224199288257, 0.9584816132858838]
[0.9584816132858838, 0.9584816132858838, 0.9561091340450771, 0.9561091340450771, 0.9584816132858838, 0.9578884934756821, 0.9561091340450771, 0.9584816132858838]
[0.9584816132858838, 0.9584816132858838, 0.9584816132858838, 0.9584816132858838, 0.9584816132858838, 0.9584816132858838, 0.9584816132858838, 0.9584816132858838]
[0.9584816132858838, 0.9584816132858838, 0.9584816132858838, 0.9584816132858838, 0.9584816132858838, 0.9584816132858838, 0.9584816132858838, 0.9584816132858838]
[0.95848161328588

In [9]:
# Getting the best solution after iterating finishing all generations.
#At first, the fitness is calculated for each solution in the final generation.
fitness = cal_pop_fitness(new_population, X_train, y_train, X_test,y_test)
# Then return the index of that solution corresponding to the best fitness.
best_match_idx = np.where(fitness == np.max(fitness))

print("Best solution : ", new_population[best_match_idx, :])
print("Best solution fitness : ", fitness[best_match_idx[0][0]])

Best solution :  [[[62 51 32  1 36 61 60 38 62 26 19 45 25 29  4 62 57 42 30 61 38 50 16
   23 16 21  5 52 32 13 10 35]
  [44 51 32  1 36 61 60 38 62 26 19 45 25 29  4 62 57 42 30 61 38 50 16
   23 16 21  5 52 32 13 10 35]
  [62 51 32  1 36 61 60 38 62 26 19 45 25 29  4 62 57 42 30 61 38 50 16
   23 16 21  5 52 32 13 10 35]
  [62 51 32  1 36 61 60 38 62 26 19 45 25 29  4 62 57 42 30 61 38 50 16
   23 16 21  5 52 32 13 10 35]
  [62 51 32  1 36 61 60 38 62 26 19 45 25 29  4 62 57 42 30 61 38 50 16
   23 16 21  5 52 32 13 10 35]
  [44 51 32  1 36 61 60 38 62 26 19 45 25 29  4 62 57 42 30 61 38 50 16
   23 16 21  5 52 32 13 10 35]
  [62 51 32  1 36 61 60 38 62 26 19 45 25 29  4 62 57 42 30 61 38 50 16
   23 16 21  5 52 32 13 10 35]
  [62 51 32  1 36 61 60 38 62 26 19 45 25 29  4 62 57 42 30 61 38 50 16
   23 16 21  5 52 32 13 10 35]]]
Best solution fitness :  0.9584816132858838
