In [68]:
import pandas as pd
import numpy as np
import copy

In [69]:
# Read Excel file into a DataFrame.
df = pd.read_excel('data.xlsx')

# Remove column.
df = df.drop(columns=['Unnamed: 0'])

# Change column name.
df = df.rename(columns={'Unnamed: 19': 'Reference'})

# Create column of reference with A and I.
df_ref_column = df['Reference']

# Store the DataFrame reference of A's and I's in a int.
total_A = df["Reference"].value_counts()["A"]
total_I = df["Reference"].value_counts()["I"]

# Define operative DataFrame:
# with the columns from X11 to X92, without Reference column.
df_op = df.loc[:, "X11": "X92"]

# Count the number of operatives columns (between X11 and X92).
nbr_operatives_columns = len(df_op.columns)

df.head()

Unnamed: 0,X11,X12,X21,X22,X31,X32,X41,X42,X51,X52,X61,X62,X71,X72,X81,X82,X91,X92,Reference
0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
2,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
3,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
4,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,A


In [70]:
# Create firstborns (chromosomes).

# Set the seed for reproducibility
#np.random.seed(42)

# Generate 6 random chromosomes.
chromosome1 = -1 + 2 * np.random.random(nbr_operatives_columns + 1) # +1 for the scalar
chromosome2 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
chromosome3 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
chromosome4 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
chromosome5 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
chromosome6 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)

# Create re-usable DataFrame with the current lineage
df_current_lineage = pd.DataFrame({'chromo_1':chromosome1,
								   'chromo_2':chromosome2,
								   'chromo_3':chromosome3,
								   'chromo_4':chromosome4,
								   'chromo_5':chromosome5,
								   'chromo_6':chromosome6})

print(df_current_lineage)

    chromo_1  chromo_2  chromo_3  chromo_4  chromo_5  chromo_6
0   0.225699  0.206762 -0.882324 -0.246591  0.313724  0.788192
1   0.484741  0.836177  0.194184 -0.694950 -0.623641  0.626074
2  -0.380300 -0.988987  0.401273 -0.156678  0.841206  0.474705
3   0.123742  0.661210  0.072325 -0.694553  0.073595 -0.078076
4  -0.864996 -0.553009  0.559548 -0.421123 -0.717663  0.012047
5   0.099024  0.790608  0.343171  0.082034  0.280572 -0.128902
6   0.435291  0.084518  0.301262  0.779074 -0.060964 -0.709309
7  -0.205059 -0.937848 -0.285459 -0.382372  0.564361 -0.892575
8   0.241838 -0.336696  0.289784 -0.816599  0.428484  0.766985
9  -0.660815  0.765284 -0.639188 -0.412982  0.386769 -0.073098
10  0.180842 -0.440678 -0.598121  0.162395 -0.518764 -0.873013
11 -0.960721  0.909938 -0.828986  0.186571  0.013701 -0.793213
12 -0.805164  0.836611 -0.869685 -0.792085  0.869060  0.486652
13  0.137076 -0.578705  0.365188  0.234773  0.025763  0.030046
14  0.016361 -0.288497 -0.770359  0.590215 -0.265809  0

In [71]:
# Function to aply each chromosome for each line of the DataFrame
# and return the result (line * chromosome).sum().
def chromo_action(row, chromosome, scalar, trash):
	res_mult = row * chromosome
	res_sum = res_mult.sum() + scalar
	return (res_sum)

# Iterate df_current_lineage in database.
i = 0
trash = None
df_chromo_result = pd.DataFrame()
max_i = len(df_current_lineage.columns)
while i < max_i:
	current_chromosome = (df_current_lineage.iloc[1:, i]).values
	scalar = df_current_lineage.iloc[0, i]
	chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, scalar, trash))
	df_chromo_result[f'Chromo {i+1} result'] = chromo_result
	i = i + 1

# Create chromo reference: a DataFrame that contain
# 'A' if number > 0 and 'I' if number < 0.
df_chromo_result = df_chromo_result.applymap(lambda x: 'A' if x > 0 else 'I')

# Calculate how many A's and how many I's were correct.
def chromo_count_matches(column):
	relative_A = ((column == 'A') & (df_ref_column == 'A')).sum()
	relative_I = ((column == 'I') & (df_ref_column == 'I')).sum()
	return pd.Series({'Right A': relative_A, 'Right I': relative_I})

df_chromo_result = df_chromo_result.apply(chromo_count_matches)

# Calculate the fitness for all the six chromosome
def fitness_calculation(column):
	relative_numerator = np.prod(column)
	fitness = relative_numerator / (total_A * total_I)
	return fitness

fitness_values = df_chromo_result.apply(fitness_calculation)
df_chromo_result.loc['Fitness'] = fitness_values

# Removing obsolete A and I rows
i_remove = ['Right A', 'Right I']
df_chromo_result = df_chromo_result.drop(i_remove)

df_chromo_result

Unnamed: 0,Chromo 1 result,Chromo 2 result,Chromo 3 result,Chromo 4 result,Chromo 5 result,Chromo 6 result
Fitness,0.0,0.035088,0.0,0.041667,0.153509,0.140351


## Raffle

In [72]:
# Get two raffle points to choose two chromosomes.
# The choosen chromosomes are going to be crossed.

# Create array with fitness values.
fitness_array = (df_chromo_result.iloc[0, :]).values

# Sum the array.
fitness_array_sum = fitness_array.sum()

# Create new array with cumulative sum.
norm_fit_0 = round((fitness_array[0]/ fitness_array_sum) * 100)
norm_fit_1 = round((fitness_array[1]/ fitness_array_sum) * 100) + norm_fit_0
norm_fit_2 = round((fitness_array[2]/ fitness_array_sum) * 100) + norm_fit_1
norm_fit_3 = round((fitness_array[3]/ fitness_array_sum) * 100) + norm_fit_2
norm_fit_4 = round((fitness_array[4]/ fitness_array_sum) * 100) + norm_fit_3
norm_fit_5 = round((fitness_array[5]/ fitness_array_sum) * 100) + norm_fit_4
norm_fitness_array = np.array([norm_fit_0, norm_fit_1, norm_fit_2, norm_fit_3, norm_fit_4, norm_fit_5])
# norm_fitness_array[5] = 1 (this value must be equal to one).
# This is not used, is just a check.

# raffle function: it resturn a position given a raffle number.
def get_raffle_point(raffle):
	max_i = len(norm_fitness_array)
	i = 0
	while i < max_i:
		if 0 <= raffle <= norm_fitness_array[0]:
			return 0
		elif norm_fitness_array[i] < raffle <= norm_fitness_array[i + 1]:
			return i
		i = i + 1

# Get two raffle points given a random raffle.
raffle_1 = round(np.random.random() * 100)
raffle_point_1 = get_raffle_point(raffle_1)
raffle_2 = round(np.random.random() * 100)
raffle_point_2 = get_raffle_point(raffle_2)

# Selec (from the random above) a mother and a father chromosome.
father_chromosome = df_current_lineage.iloc[:, raffle_point_1]
mother_chromosome = df_current_lineage.iloc[:, raffle_point_2]

## Cross

In [73]:
father_chromosome.values

array([ 0.31372402, -0.62364097,  0.84120565,  0.07359514, -0.71766268,
        0.2805722 , -0.06096403,  0.56436125,  0.4284836 ,  0.38676871,
       -0.51876353,  0.01370116,  0.86905963,  0.02576328, -0.26580853,
       -0.67724325, -0.56016347,  0.42609284, -0.0635323 ])

In [74]:
mother_chromosome.values

array([-0.24659072, -0.69494993, -0.15667761, -0.69455252, -0.42112349,
        0.08203356,  0.77907371, -0.38237247, -0.81659913, -0.41298204,
        0.16239528,  0.18657061, -0.79208515,  0.23477262,  0.5902149 ,
        0.3277841 , -0.56502544, -0.18662032, -0.14102995])

In [75]:
# cross function: it cross a father and a mother and generates a new born that
# is part mother and part father. The crossing point is random.
def cross_father_mother(father, mother):
	cross_point = round(18 * np.random.random())
	paternal_sperm  = father[cross_point:]
	maternal_egg = mother[:cross_point]
	new_born = np.hstack((maternal_egg, paternal_sperm))
	return (new_born)

# Create 3 new borns with the same father and mother.
new_born_1 = cross_father_mother(father_chromosome.values, mother_chromosome.values)
new_born_2 = cross_father_mother(father_chromosome.values, mother_chromosome.values)
new_born_3 = cross_father_mother(father_chromosome.values, mother_chromosome.values)

print(new_born_1)
print()
print(new_born_2)
print()
print(new_born_3)

[-0.24659072 -0.69494993 -0.15667761 -0.69455252 -0.42112349  0.08203356
  0.77907371 -0.38237247 -0.81659913 -0.41298204  0.16239528  0.18657061
 -0.79208515  0.02576328 -0.26580853 -0.67724325 -0.56016347  0.42609284
 -0.0635323 ]

[-0.24659072 -0.69494993 -0.15667761 -0.69455252 -0.42112349  0.08203356
  0.77907371 -0.38237247 -0.81659913 -0.41298204  0.16239528  0.18657061
  0.86905963  0.02576328 -0.26580853 -0.67724325 -0.56016347  0.42609284
 -0.0635323 ]

[-0.24659072 -0.69494993 -0.15667761 -0.69455252 -0.42112349  0.08203356
  0.77907371 -0.38237247 -0.81659913 -0.41298204  0.16239528  0.18657061
 -0.79208515  0.23477262  0.5902149   0.3277841  -0.56016347  0.42609284
 -0.0635323 ]


## Mutate

In [76]:
df_current_lineage

Unnamed: 0,chromo_1,chromo_2,chromo_3,chromo_4,chromo_5,chromo_6
0,0.225699,0.206762,-0.882324,-0.246591,0.313724,0.788192
1,0.484741,0.836177,0.194184,-0.69495,-0.623641,0.626074
2,-0.3803,-0.988987,0.401273,-0.156678,0.841206,0.474705
3,0.123742,0.66121,0.072325,-0.694553,0.073595,-0.078076
4,-0.864996,-0.553009,0.559548,-0.421123,-0.717663,0.012047
5,0.099024,0.790608,0.343171,0.082034,0.280572,-0.128902
6,0.435291,0.084518,0.301262,0.779074,-0.060964,-0.709309
7,-0.205059,-0.937848,-0.285459,-0.382372,0.564361,-0.892575
8,0.241838,-0.336696,0.289784,-0.816599,0.428484,0.766985
9,-0.660815,0.765284,-0.639188,-0.412982,0.386769,-0.073098


In [77]:
# Get random a single value from all the current_lineage database.
def get_random_value(df_current_lineage):
	# Select a random row
	random_row = df_current_lineage.sample()
	# Select a random column
	random_column = np.random.choice(df_current_lineage.columns)
	# Get the value at the random row and random column
	random_value = random_row[random_column].values[0]
	return random_value

In [78]:
# Function to mutate a given new born.
def mutate_newborn(new_born, random_value):
	mutation_point = round(18 * np.random.random())
	mutated_new_born = copy.deepcopy(new_born)
	mutated_new_born[mutation_point] = random_value
	return mutated_new_born

In [79]:
# Mutating the three new borns
random_value = get_random_value(df_current_lineage)
mutated_new_born_1 = mutate_newborn(new_born_1, random_value)
random_value = get_random_value(df_current_lineage)
mutated_new_born_2 = mutate_newborn(new_born_2, random_value)
random_value = get_random_value(df_current_lineage)
mutated_new_born_3 = mutate_newborn(new_born_3, random_value)

print(mutated_new_born_1)
print(mutated_new_born_2)
print(mutated_new_born_3)

[-0.24659072  0.23477262 -0.15667761 -0.69455252 -0.42112349  0.08203356
  0.77907371 -0.38237247 -0.81659913 -0.41298204  0.16239528  0.18657061
 -0.79208515  0.02576328 -0.26580853 -0.67724325 -0.56016347  0.42609284
 -0.0635323 ]
[-0.24659072 -0.69494993 -0.15667761 -0.69455252 -0.42112349  0.08203356
  0.77907371 -0.38237247 -0.81659913 -0.41298204  0.16239528 -0.66081541
  0.86905963  0.02576328 -0.26580853 -0.67724325 -0.56016347  0.42609284
 -0.0635323 ]
[-0.24659072 -0.90328174 -0.15667761 -0.69455252 -0.42112349  0.08203356
  0.77907371 -0.38237247 -0.81659913 -0.41298204  0.16239528  0.18657061
 -0.79208515  0.23477262  0.5902149   0.3277841  -0.56016347  0.42609284
 -0.0635323 ]


## Form new lineage

In [80]:
df_current_lineage

Unnamed: 0,chromo_1,chromo_2,chromo_3,chromo_4,chromo_5,chromo_6
0,0.225699,0.206762,-0.882324,-0.246591,0.313724,0.788192
1,0.484741,0.836177,0.194184,-0.69495,-0.623641,0.626074
2,-0.3803,-0.988987,0.401273,-0.156678,0.841206,0.474705
3,0.123742,0.66121,0.072325,-0.694553,0.073595,-0.078076
4,-0.864996,-0.553009,0.559548,-0.421123,-0.717663,0.012047
5,0.099024,0.790608,0.343171,0.082034,0.280572,-0.128902
6,0.435291,0.084518,0.301262,0.779074,-0.060964,-0.709309
7,-0.205059,-0.937848,-0.285459,-0.382372,0.564361,-0.892575
8,0.241838,-0.336696,0.289784,-0.816599,0.428484,0.766985
9,-0.660815,0.765284,-0.639188,-0.412982,0.386769,-0.073098


In [81]:
df_chromo_result

Unnamed: 0,Chromo 1 result,Chromo 2 result,Chromo 3 result,Chromo 4 result,Chromo 5 result,Chromo 6 result
Fitness,0.0,0.035088,0.0,0.041667,0.153509,0.140351
