In [358]:
import pandas as pd
import numpy as np
import copy

In [359]:
# Read Excel file into a DataFrame.
df = pd.read_excel('data.xlsx')

# Remove column.
df = df.drop(columns=['Unnamed: 0'])

# Change column name.
df = df.rename(columns={'Unnamed: 19': 'Reference'})

# Create column of reference with A and I.
df_ref_column = df['Reference']

# Store the DataFrame reference of A's and I's in a int.
total_A = df["Reference"].value_counts()["A"]
total_I = df["Reference"].value_counts()["I"]

# Define operative DataFrame:
# with the columns from X11 to X92, without Reference column.
df_op = df.loc[:, "X11": "X92"]

# Count the number of operatives columns (between X11 and X92).
nbr_operatives_columns = len(df_op.columns)

df.head()

Unnamed: 0,X11,X12,X21,X22,X31,X32,X41,X42,X51,X52,X61,X62,X71,X72,X81,X82,X91,X92,Reference
0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
2,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
3,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
4,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,A


In [360]:
# Create firstborns (chromosomes).

# Set the seed for reproducibility
#np.random.seed(42)

# Generate 6 random chromosomes.
chromosome1 = -1 + 2 * np.random.random(nbr_operatives_columns + 1) # +1 for the scalar
chromosome2 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
chromosome3 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
chromosome4 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
chromosome5 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
chromosome6 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)

# Create re-usable DataFrame with the current lineage
df_current_lineage = pd.DataFrame({'chromo_1':chromosome1,
								   'chromo_2':chromosome2,
								   'chromo_3':chromosome3,
								   'chromo_4':chromosome4,
								   'chromo_5':chromosome5,
								   'chromo_6':chromosome6})

print(df_current_lineage)

    chromo_1  chromo_2  chromo_3  chromo_4  chromo_5  chromo_6
0   0.250630 -0.894212 -0.162126 -0.532154  0.469313 -0.411255
1  -0.550753  0.333661 -0.459144 -0.792580  0.643102 -0.484152
2  -0.825353 -0.111995 -0.860215 -0.302605 -0.648352 -0.244718
3   0.163822 -0.483008 -0.949391  0.534962  0.242638 -0.938342
4   0.862706  0.212899  0.346454 -0.652017  0.329212  0.364422
5   0.772397  0.250929 -0.300332 -0.734909  0.258549  0.228383
6   0.939819  0.598308  0.831544  0.475357  0.535732 -0.531825
7   0.939716  0.389978 -0.130931 -0.049132 -0.196409 -0.504604
8   0.599719 -0.588415  0.373574 -0.698852 -0.865570 -0.129012
9   0.834616 -0.003825  0.728812 -0.868898 -0.917026 -0.674530
10 -0.883539 -0.407177 -0.126668 -0.769401 -0.989423  0.805290
11 -0.830665 -0.899993  0.718583 -0.916913 -0.003561  0.935551
12  0.343487 -0.808200 -0.905600  0.120421  0.086255  0.469641
13 -0.595049 -0.655594 -0.669224 -0.914664 -0.239396  0.740107
14  0.408580 -0.316980  0.475053 -0.826029 -0.269891 -0

In [361]:
# Function to aply each chromosome for each line of the DataFrame
# and return the result (line * chromosome).sum().
def chromo_action(row, chromosome, scalar, trash):
	res_mult = row * chromosome
	res_sum = res_mult.sum() + scalar
	return (res_sum)

# Iterate df_current_lineage in database.
i = 0
trash = None
df_current_lineage_fitness = pd.DataFrame()
max_i = len(df_current_lineage.columns)
while i < max_i:
	current_chromosome = (df_current_lineage.iloc[1:, i]).values
	scalar = df_current_lineage.iloc[0, i]
	chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, scalar, trash))
	df_current_lineage_fitness[f'Chromo {i+1} result'] = chromo_result
	i = i + 1

# Create chromo reference: a DataFrame that contain
# 'A' if number > 0 and 'I' if number < 0.
df_current_lineage_fitness = df_current_lineage_fitness.applymap(lambda x: 'A' if x > 0 else 'I')

# Calculate how many A's and how many I's were correct.
def chromo_count_matches(column):
	relative_A = ((column == 'A') & (df_ref_column == 'A')).sum()
	relative_I = ((column == 'I') & (df_ref_column == 'I')).sum()
	return pd.Series({'Right A': relative_A, 'Right I': relative_I})

df_current_lineage_fitness = df_current_lineage_fitness.apply(chromo_count_matches)

# Calculate the fitness for all the six chromosome
def fitness_calculation(column):
	relative_numerator = np.prod(column)
	fitness = relative_numerator / (total_A * total_I)
	return fitness

fitness_values = df_current_lineage_fitness.apply(fitness_calculation)
df_current_lineage_fitness.loc['Fitness'] = fitness_values

# Removing obsolete A and I rows
i_remove = ['Right A', 'Right I']
df_current_lineage_fitness = df_current_lineage_fitness.drop(i_remove)

df_current_lineage_fitness

Unnamed: 0,Chromo 1 result,Chromo 2 result,Chromo 3 result,Chromo 4 result,Chromo 5 result,Chromo 6 result
Fitness,0.078947,0.037281,0.427632,0.0,0.052632,0.298246


## Raffle

In [362]:
# Get two raffle points to choose two chromosomes.
# The choosen chromosomes are going to be crossed.

# Create array with fitness values.
fitness_array = (df_current_lineage_fitness.iloc[0, :]).values

# Sum the array.
fitness_array_sum = fitness_array.sum()

# Create new array with cumulative sum.
norm_fit_0 = round((fitness_array[0]/ fitness_array_sum) * 100)
norm_fit_1 = round((fitness_array[1]/ fitness_array_sum) * 100) + norm_fit_0
norm_fit_2 = round((fitness_array[2]/ fitness_array_sum) * 100) + norm_fit_1
norm_fit_3 = round((fitness_array[3]/ fitness_array_sum) * 100) + norm_fit_2
norm_fit_4 = round((fitness_array[4]/ fitness_array_sum) * 100) + norm_fit_3
norm_fit_5 = round((fitness_array[5]/ fitness_array_sum) * 100) + norm_fit_4
norm_fitness_array = np.array([norm_fit_0, norm_fit_1, norm_fit_2, norm_fit_3, norm_fit_4, norm_fit_5])
# norm_fitness_array[5] = 1 (this value must be equal to one).
# This is not used, is just a check.

# raffle function: it resturn a position given a raffle number.
def get_raffle_point(raffle):
	max_i = len(norm_fitness_array)
	i = 0
	while i < max_i:
		if 0 <= raffle <= norm_fitness_array[0]:
			return 0
		elif norm_fitness_array[i] < raffle <= norm_fitness_array[i + 1]:
			return i
		i = i + 1

# Get two raffle points given a random raffle.
raffle_1 = round(np.random.random() * 100)
raffle_point_1 = get_raffle_point(raffle_1)
raffle_2 = round(np.random.random() * 100)
raffle_point_2 = get_raffle_point(raffle_2)

# Selec (from the random above) a mother and a father chromosome.
father_chromosome = df_current_lineage.iloc[:, raffle_point_1]
mother_chromosome = df_current_lineage.iloc[:, raffle_point_2]

## Cross

In [363]:
father_chromosome.values

array([ 0.46931317,  0.64310223, -0.64835169,  0.24263779,  0.32921216,
        0.2585495 ,  0.53573197, -0.19640881, -0.86557005, -0.91702557,
       -0.98942259, -0.00356106,  0.08625538, -0.23939617, -0.26989112,
       -0.41491679,  0.39652823, -0.71607572,  0.12285726])

In [364]:
mother_chromosome.values

array([ 0.46931317,  0.64310223, -0.64835169,  0.24263779,  0.32921216,
        0.2585495 ,  0.53573197, -0.19640881, -0.86557005, -0.91702557,
       -0.98942259, -0.00356106,  0.08625538, -0.23939617, -0.26989112,
       -0.41491679,  0.39652823, -0.71607572,  0.12285726])

In [365]:
# cross function: it cross a father and a mother and generates a new born that
# is part mother and part father. The crossing point is random.
def cross_father_mother(father, mother):
	cross_point = round(18 * np.random.random())
	paternal_sperm  = father[cross_point:]
	maternal_egg = mother[:cross_point]
	new_born = np.hstack((maternal_egg, paternal_sperm))
	return (new_born)

# Create 3 new borns with the same father and mother.
new_born_1 = cross_father_mother(father_chromosome.values, mother_chromosome.values)
new_born_2 = cross_father_mother(father_chromosome.values, mother_chromosome.values)
new_born_3 = cross_father_mother(father_chromosome.values, mother_chromosome.values)

print(new_born_1)
print()
print(new_born_2)
print()
print(new_born_3)

[ 0.46931317  0.64310223 -0.64835169  0.24263779  0.32921216  0.2585495
  0.53573197 -0.19640881 -0.86557005 -0.91702557 -0.98942259 -0.00356106
  0.08625538 -0.23939617 -0.26989112 -0.41491679  0.39652823 -0.71607572
  0.12285726]

[ 0.46931317  0.64310223 -0.64835169  0.24263779  0.32921216  0.2585495
  0.53573197 -0.19640881 -0.86557005 -0.91702557 -0.98942259 -0.00356106
  0.08625538 -0.23939617 -0.26989112 -0.41491679  0.39652823 -0.71607572
  0.12285726]

[ 0.46931317  0.64310223 -0.64835169  0.24263779  0.32921216  0.2585495
  0.53573197 -0.19640881 -0.86557005 -0.91702557 -0.98942259 -0.00356106
  0.08625538 -0.23939617 -0.26989112 -0.41491679  0.39652823 -0.71607572
  0.12285726]


## Mutate

In [366]:
df_current_lineage

Unnamed: 0,chromo_1,chromo_2,chromo_3,chromo_4,chromo_5,chromo_6
0,0.25063,-0.894212,-0.162126,-0.532154,0.469313,-0.411255
1,-0.550753,0.333661,-0.459144,-0.79258,0.643102,-0.484152
2,-0.825353,-0.111995,-0.860215,-0.302605,-0.648352,-0.244718
3,0.163822,-0.483008,-0.949391,0.534962,0.242638,-0.938342
4,0.862706,0.212899,0.346454,-0.652017,0.329212,0.364422
5,0.772397,0.250929,-0.300332,-0.734909,0.258549,0.228383
6,0.939819,0.598308,0.831544,0.475357,0.535732,-0.531825
7,0.939716,0.389978,-0.130931,-0.049132,-0.196409,-0.504604
8,0.599719,-0.588415,0.373574,-0.698852,-0.86557,-0.129012
9,0.834616,-0.003825,0.728812,-0.868898,-0.917026,-0.67453


In [367]:
# Get random a single value from all the current_lineage database.
def get_random_value(df_current_lineage):
	# Select a random row
	random_row = df_current_lineage.sample()
	# Select a random column
	random_column = np.random.choice(df_current_lineage.columns)
	# Get the value at the random row and random column
	random_value = random_row[random_column].values[0]
	return random_value

In [368]:
# Function to mutate a given new born.
def mutate_newborn(new_born, random_value):
	mutation_point = round(18 * np.random.random())
	mutated_new_born = copy.deepcopy(new_born)
	mutated_new_born[mutation_point] = random_value
	return mutated_new_born

In [369]:
# Mutating the three new borns
random_value = get_random_value(df_current_lineage)
mutated_new_born_1 = mutate_newborn(new_born_1, random_value)
random_value = get_random_value(df_current_lineage)
mutated_new_born_2 = mutate_newborn(new_born_2, random_value)
random_value = get_random_value(df_current_lineage)
mutated_new_born_3 = mutate_newborn(new_born_3, random_value)

print(mutated_new_born_1)
print(mutated_new_born_2)
print(mutated_new_born_3)

[ 0.46931317  0.64310223 -0.64835169  0.24263779  0.64310223  0.2585495
  0.53573197 -0.19640881 -0.86557005 -0.91702557 -0.98942259 -0.00356106
  0.08625538 -0.23939617 -0.26989112 -0.41491679  0.39652823 -0.71607572
  0.12285726]
[ 0.46931317  0.64310223 -0.64835169  0.24263779  0.32921216  0.2585495
  0.53573197 -0.19640881 -0.86557005  0.93981905 -0.98942259 -0.00356106
  0.08625538 -0.23939617 -0.26989112 -0.41491679  0.39652823 -0.71607572
  0.12285726]
[ 0.64310223  0.64310223 -0.64835169  0.24263779  0.32921216  0.2585495
  0.53573197 -0.19640881 -0.86557005 -0.91702557 -0.98942259 -0.00356106
  0.08625538 -0.23939617 -0.26989112 -0.41491679  0.39652823 -0.71607572
  0.12285726]


## Form new lineage

In [370]:
# Create dataframe with the three mutated newborns
df_three_newborn = pd.DataFrame({'Newborn 1':mutated_new_born_1, 'Newborn 2':mutated_new_born_2, 'Newborn 3':mutated_new_born_3})
df_three_newborn

Unnamed: 0,Newborn 1,Newborn 2,Newborn 3
0,0.469313,0.469313,0.643102
1,0.643102,0.643102,0.643102
2,-0.648352,-0.648352,-0.648352
3,0.242638,0.242638,0.242638
4,0.643102,0.329212,0.329212
5,0.258549,0.258549,0.258549
6,0.535732,0.535732,0.535732
7,-0.196409,-0.196409,-0.196409
8,-0.86557,-0.86557,-0.86557
9,-0.917026,0.939819,-0.917026


In [371]:
# Apply the same fitness flow to the new dataframe with the newborns

# Iterate df_current_lineage in database.
i = 0
trash = None
df_three_newborn_fitness = pd.DataFrame()
max_i = len(df_three_newborn.columns)
while i < max_i:
	current_chromosome = (df_three_newborn.iloc[1:, i]).values
	scalar = df_three_newborn.iloc[0, i]
	chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, scalar, trash))
	df_three_newborn_fitness[f'Newborn {i+1} result'] = chromo_result
	i = i + 1

# Create chromo reference: a DataFrame that contain
# 'A' if number > 0 and 'I' if number < 0.
df_three_newborn_fitness = df_three_newborn_fitness.applymap(lambda x: 'A' if x > 0 else 'I')

# Calculate how many A's and how many I's were correct.
df_three_newborn_fitness = df_three_newborn_fitness.apply(chromo_count_matches)

# Calculate the fitness for all the six chromosome
fitness_values = df_three_newborn_fitness.apply(fitness_calculation)
df_three_newborn_fitness.loc['Fitness'] = fitness_values

# Removing obsolete A and I rows
i_remove = ['Right A', 'Right I']
df_three_newborn_fitness = df_three_newborn_fitness.drop(i_remove)

df_three_newborn_fitness

Unnamed: 0,Newborn 1 result,Newborn 2 result,Newborn 3 result
Fitness,0.061404,0.02193,0.087719


In [372]:
min_column_name = (df_three_newborn_fitness.min().idxmin())
min_column_name = df_three_newborn_fitness.columns.get_loc(min_column_name)
df_three_newborn_less_one = df_three_newborn.drop(df_three_newborn.columns[min_column_name], axis=1)
print(df_three_newborn_less_one)

    Newborn 1  Newborn 3
0    0.469313   0.643102
1    0.643102   0.643102
2   -0.648352  -0.648352
3    0.242638   0.242638
4    0.643102   0.329212
5    0.258549   0.258549
6    0.535732   0.535732
7   -0.196409  -0.196409
8   -0.865570  -0.865570
9   -0.917026  -0.917026
10  -0.989423  -0.989423
11  -0.003561  -0.003561
12   0.086255   0.086255
13  -0.239396  -0.239396
14  -0.269891  -0.269891
15  -0.414917  -0.414917
16   0.396528   0.396528
17  -0.716076  -0.716076
18   0.122857   0.122857


In [373]:
df_current_lineage_fitness

Unnamed: 0,Chromo 1 result,Chromo 2 result,Chromo 3 result,Chromo 4 result,Chromo 5 result,Chromo 6 result
Fitness,0.078947,0.037281,0.427632,0.0,0.052632,0.298246


In [383]:
min_column_name = (df_current_lineage_fitness.min().idxmin())
min_column_index_one = df_current_lineage_fitness.columns.get_loc(min_column_name)
df_current_lineage_fitness_less_one = df_current_lineage_fitness.drop(df_current_lineage_fitness.columns[min_column_index_one], axis=1)

print(df_current_lineage_fitness_less_one)

min_column_name = (df_current_lineage_fitness_less_one.min().idxmin())
min_column_index_two = df_current_lineage_fitness_less_one.columns.get_loc(min_column_name)

df_current_lineage_less_two = df_current_lineage.drop(df_current_lineage.columns[[min_column_index_one, min_column_index_two]], axis=1)

print(df_current_lineage_less_two)

         Chromo 1 result  Chromo 2 result  Chromo 3 result  Chromo 5 result  \
Fitness         0.078947         0.037281         0.427632         0.052632   

         Chromo 6 result  
Fitness         0.298246  
    chromo_1  chromo_3  chromo_5  chromo_6
0   0.250630 -0.162126  0.469313 -0.411255
1  -0.550753 -0.459144  0.643102 -0.484152
2  -0.825353 -0.860215 -0.648352 -0.244718
3   0.163822 -0.949391  0.242638 -0.938342
4   0.862706  0.346454  0.329212  0.364422
5   0.772397 -0.300332  0.258549  0.228383
6   0.939819  0.831544  0.535732 -0.531825
7   0.939716 -0.130931 -0.196409 -0.504604
8   0.599719  0.373574 -0.865570 -0.129012
9   0.834616  0.728812 -0.917026 -0.674530
10 -0.883539 -0.126668 -0.989423  0.805290
11 -0.830665  0.718583 -0.003561  0.935551
12  0.343487 -0.905600  0.086255  0.469641
13 -0.595049 -0.669224 -0.239396  0.740107
14  0.408580  0.475053 -0.269891 -0.565989
15 -0.706663  0.472890 -0.414917 -0.751920
16  0.441504  0.395751  0.396528 -0.701334
17  0.615599 

In [385]:
df_new_lineage = pd.concat([df_current_lineage_less_two, df_three_newborn_less_one], axis=1)
new_columns_names = ['chromo_1', 'chromo_2', 'chromo_3', 'chromo_4', 'chromo_5', 'chromo_6']
df_new_lineage.columns = new_columns_names
print(df_new_lineage)

    chromo_1  chromo_2  chromo_3  chromo_4  chromo_5  chromo_6
0   0.250630 -0.162126  0.469313 -0.411255  0.469313  0.643102
1  -0.550753 -0.459144  0.643102 -0.484152  0.643102  0.643102
2  -0.825353 -0.860215 -0.648352 -0.244718 -0.648352 -0.648352
3   0.163822 -0.949391  0.242638 -0.938342  0.242638  0.242638
4   0.862706  0.346454  0.329212  0.364422  0.643102  0.329212
5   0.772397 -0.300332  0.258549  0.228383  0.258549  0.258549
6   0.939819  0.831544  0.535732 -0.531825  0.535732  0.535732
7   0.939716 -0.130931 -0.196409 -0.504604 -0.196409 -0.196409
8   0.599719  0.373574 -0.865570 -0.129012 -0.865570 -0.865570
9   0.834616  0.728812 -0.917026 -0.674530 -0.917026 -0.917026
10 -0.883539 -0.126668 -0.989423  0.805290 -0.989423 -0.989423
11 -0.830665  0.718583 -0.003561  0.935551 -0.003561 -0.003561
12  0.343487 -0.905600  0.086255  0.469641  0.086255  0.086255
13 -0.595049 -0.669224 -0.239396  0.740107 -0.239396 -0.239396
14  0.408580  0.475053 -0.269891 -0.565989 -0.269891 -0