In [2]:
import numpy as np
import pandas as pd
import LinReg

In [3]:
myRNG = np.random.default_rng()
regressor = LinReg.LinReg()

def generate_initial_population(population_size, num_features):
    return myRNG.integers(0, 1, size=(population_size, num_features), endpoint=True)

def greate_fitness_scores(data, population):
    fitness_scores = []
    for i in range(population.shape[0]):
        X = regressor.get_columns(data.values, population[i])
        fitness = regressor.get_fitness(X[:,:-1], X[:,-1])
        fitness_scores.append((fitness, i))
    return fitness_scores

def parent_selection(data, population, num_parents):
    fitness_scores = greate_fitness_scores(data, population)
    fitness_scores.sort()
    print(fitness_scores)
    selected_parents = fitness_scores[:num_parents]
    selected_parents = [population[i] for fitness, i in selected_parents]
    return np.array(selected_parents)

In [4]:
data = pd.read_csv('dataset.txt', header=None)
num_features = data.shape[1] 
population_size = 20
generation_number = 50
crossover_rate = 0.8


mutation_rate = 0.1

In [5]:
initial_pop = generate_initial_population(population_size, num_features)
initial_pop.shape

(20, 102)

In [6]:
initial_pop

array([[0, 1, 0, ..., 1, 1, 0],
       [1, 1, 0, ..., 0, 0, 0],
       [1, 0, 1, ..., 1, 1, 0],
       ...,
       [1, 1, 1, ..., 1, 1, 0],
       [0, 0, 0, ..., 1, 1, 1],
       [1, 0, 0, ..., 1, 1, 0]])

In [7]:
num_parents = 5
parents = parent_selection(data, initial_pop, num_parents)

[(0.06487976679667405, 16), (0.07909784701283304, 7), (0.08620099350711309, 4), (0.1097077108935049, 1), (0.1333763683937697, 3), (0.13585193156296563, 10), (0.13706345274032256, 5), (0.14029583941771845, 14), (0.1433401866086899, 15), (0.14575354845207508, 9), (0.14830777356542713, 6), (0.14853369974784247, 18), (0.15428112488372472, 13), (0.18443714145421317, 2), (0.18944891589724716, 8), (0.19296806973135142, 0), (0.19313164704919136, 17), (0.1975985461251567, 12), (0.21324160267153974, 19), (0.21766408209155855, 11)]


In [8]:
myRNG.random()

0.42113121355602945

In [9]:
np.random.rand()

0.26845150290695796

In [10]:
def mutate_gene(bit_string, mutation_rate):
    for i in range(len(bit_string)):
        if myRNG.random() < mutation_rate:
            bit_string[i] = 1 - bit_string[i]

    return bit_string

In [11]:
def generate_offspring(parent1, parent2, crossover_rate):
    if myRNG.random() < crossover_rate:
        crossover_point = myRNG.integers(1, len(parent1))
        print(crossover_point)
        offspring1 = np.concatenate([parent1[:crossover_point], parent2[crossover_point:]])
        offspring2 = np.concatenate([parent2[:crossover_point], parent1[crossover_point:]])
        return offspring1, offspring2
    else:
        return parent1, parent2

In [12]:
a = np.array([1, 1, 0, 0, 0])
b = np.array([1, 0, 1, 0, 1])

generate_offspring(a, b, 0.8)

2


(array([1, 1, 1, 0, 1]), array([1, 0, 0, 0, 0]))

In [13]:
parents2 = greate_fitness_scores(data, initial_pop)

In [14]:
parents2

[(0.2121887583141375, 0),
 (0.11436765934096178, 1),
 (0.2007598989460437, 2),
 (0.12898028947778298, 3),
 (0.08107661106604484, 4),
 (0.14293281803094057, 5),
 (0.13668209566080874, 6),
 (0.06740839831511619, 7),
 (0.1908755061762512, 8),
 (0.14844357859689838, 9),
 (0.13746348797219018, 10),
 (0.20202522613236035, 11),
 (0.19741685973791187, 12),
 (0.13882715559530212, 13),
 (0.14806707793272061, 14),
 (0.13543255657502137, 15),
 (0.06535580274384307, 16),
 (0.20426643707765943, 17),
 (0.1289056752303825, 18),
 (0.20330477515890233, 19)]

In [15]:
parents_sum = sum([fitness for fitness, i in parents2])

In [16]:
parents_sum

2.98478066808128

In [29]:
a = np.array([ 1, 0, 0, 0, 0, 0, 0, 0])

In [30]:
a_int = int(''.join(map(str, a)), 2)
a_int

128

In [31]:
a_scaled = a_int * (128 / 2**len(a))
a_scaled

64.0

In [32]:
np.sin(a_scaled)

0.9200260381967906

In [21]:
initial_pop

array([[0, 1, 0, ..., 1, 1, 0],
       [1, 1, 0, ..., 0, 0, 0],
       [1, 0, 1, ..., 1, 1, 0],
       ...,
       [1, 1, 1, ..., 1, 1, 0],
       [0, 0, 0, ..., 1, 1, 1],
       [1, 0, 0, ..., 1, 1, 0]])

In [22]:
def greate_fitness_scores(population):
    fitness_scores = []
    for i in range(population.shape[0]):
        int_value = int(''.join(map(str, population[i])), 2)
        scaled_value = int_value * (128 / 2**population.shape[1])

        fitness = np.sin(scaled_value)
        fitness_scores.append(fitness)
    return np.array(fitness_scores)

greate_fitness_scores(initial_pop)

array([-0.70755766,  0.06447822, -0.63437175,  0.974942  , -0.4817675 ,
        0.99999284, -0.56262089, -0.27605425, -0.99983207, -0.00271308,
       -0.98503992,  0.92008323,  0.98754703, -0.77442813,  0.48679361,
       -0.18356465, -0.67134659, -0.23603311,  0.96521266,  0.11116076])

In [23]:
def select_best_two(population):
    fitness_scores = greate_fitness_scores(population)
    best_two = np.argsort(fitness_scores)[-2:]
    return population[best_two]

select_best_two(initial_pop)

array([[0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
        0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,
        1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0,
        1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0],
       [0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0,
        0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1,
        0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
        0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1]])