In [246]:
import pandas as pd
import numpy as np
import copy

## Receive database

In [247]:
def oil_database(df):
	# Remove column.
	df = df.drop(columns=['Unnamed: 0'])

	# Change column name.
	df = df.rename(columns={'Unnamed: 19': 'Reference'})

	# Create column of reference with A and I.
	df_ref_column = df['Reference']

	# Store the DataFrame reference of A's and I's in a int.
	total_A = df["Reference"].value_counts()["A"]
	total_I = df["Reference"].value_counts()["I"]

	# Define operative DataFrame:
	# with the columns from X11 to X92, without Reference column.
	df_op = df.loc[:, "X11": "X92"]

	# Count the number of operatives columns (between X11 and X92).
	nbr_operatives_columns = len(df_op.columns)
	return df_op, df_ref_column, total_A, total_I, nbr_operatives_columns

## Create firstborns

In [248]:
def create_firstborns(nbr_operatives_columns):
	# Create firstborns (chromosomes).

	# Generate 6 random chromosomes.
	chromosome1 = -1 + 2 * np.random.random(nbr_operatives_columns + 1) # +1 for the scalar
	chromosome2 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
	chromosome3 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
	chromosome4 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
	chromosome5 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
	chromosome6 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)

	# Create re-usable DataFrame with the current lineage
	df_current_lineage = pd.DataFrame({'chromo_1':chromosome1,
									'chromo_2':chromosome2,
									'chromo_3':chromosome3,
									'chromo_4':chromosome4,
									'chromo_5':chromosome5,
									'chromo_6':chromosome6})
	return df_current_lineage

## Calculate chromosomes fitnes

In [249]:
# Function to aply each chromosome for each line of the DataFrame
# and return the result (line * chromosome).sum().
def chromo_action(row, current_chromosome, scalar):
	res_mult = row * current_chromosome
	res_sum = res_mult.sum() + scalar
	return (res_sum)

# Calculate how many A's and how many I's were correct.
def chromo_count_matches(column, ref_column):
	relative_A = ((column == 'A') & (ref_column == 'A')).sum()
	relative_I = ((column == 'I') & (ref_column == 'I')).sum()
	return pd.Series({'Right A': relative_A, 'Right I': relative_I})

# Calculate the fitness for all the six chromosome
def fitness_calculation(column, total_A, total_I):
	relative_numerator = np.prod(column)
	fitness = relative_numerator / (total_A * total_I)
	return fitness

def calculate_chromosomes_fitnes(df_op, df_current_lineage, df_ref_column, total_A, total_I):
	# Iterate df_current_lineage in database.
	i = 0
	trash = None
	df_current_lineage_fitness = pd.DataFrame()
	max_i = len(df_current_lineage.columns)
	while i < max_i:
		current_chromosome = (df_current_lineage.iloc[1:, i]).values
		scalar = df_current_lineage.iloc[0, i]
		chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, scalar))
		df_current_lineage_fitness[f'Chromo {i+1} result'] = chromo_result
		i = i + 1

	# Create chromo reference: a DataFrame that contain
	# 'A' if number > 0 and 'I' if number < 0.
	df_current_lineage_fitness = df_current_lineage_fitness.applymap(lambda x: 'A' if x > 0 else 'I')
	df_current_lineage_fitness = df_current_lineage_fitness.apply(chromo_count_matches, ref_column=df_ref_column)
	fitness_values = df_current_lineage_fitness.apply(fitness_calculation, args=(total_A, total_I))
	df_current_lineage_fitness.loc['Fitness'] = fitness_values

	# Removing obsolete A and I rows
	i_remove = ['Right A', 'Right I']
	df_current_lineage_fitness = df_current_lineage_fitness.drop(i_remove)
	return df_current_lineage_fitness

## Raffle

In [250]:
# raffle function: it resturn a position given a raffle number.
def get_raffle_point(raffle, norm_fitness_array):
	max_i = len(norm_fitness_array) -1
	i = 0
	while i < max_i:
		if 0 <= raffle <= norm_fitness_array[0]:
			return 0
		elif norm_fitness_array[i] < raffle <= norm_fitness_array[i + 1]:
			return i
		i = i + 1
	return 0

def elect_father_and_mother(df_current_lineage, df_current_lineage_fitness):
	# Get two raffle points to choose two chromosomes.
	# The choosen chromosomes are going to be crossed.

	# Create array with fitness values.
	fitness_array = (df_current_lineage_fitness.iloc[0, :]).values

	# Sum the array.
	fitness_array_sum = fitness_array.sum()

	# Create new array with cumulative sum.
	norm_fit_0 = round((fitness_array[0]/ fitness_array_sum) * 100)
	norm_fit_1 = round((fitness_array[1]/ fitness_array_sum) * 100) + norm_fit_0
	norm_fit_2 = round((fitness_array[2]/ fitness_array_sum) * 100) + norm_fit_1
	norm_fit_3 = round((fitness_array[3]/ fitness_array_sum) * 100) + norm_fit_2
	norm_fit_4 = round((fitness_array[4]/ fitness_array_sum) * 100) + norm_fit_3
	norm_fit_5 = round((fitness_array[5]/ fitness_array_sum) * 100) + norm_fit_4
	norm_fitness_array = np.array([norm_fit_0, norm_fit_1, norm_fit_2, norm_fit_3, norm_fit_4, norm_fit_5])
	# norm_fitness_array[5] = 1 (this value must be equal to one).
	# This is not used, is just a check.

	# Get two raffle points given a random raffle.
	raffle_1 = round(np.random.random() * 100)
	raffle_point_1 = get_raffle_point(raffle_1, norm_fitness_array)
	raffle_2 = round(np.random.random() * 100)
	raffle_point_2 = get_raffle_point(raffle_2, norm_fitness_array)

	# Selec (from the random above) a mother and a father chromosome.
	father_chromosome = df_current_lineage.iloc[:, raffle_point_1]
	mother_chromosome = df_current_lineage.iloc[:, raffle_point_2]
	return (father_chromosome, mother_chromosome)

## Cross

In [251]:
# cross function: it cross a father and a mother and generates a new born that
# is part mother and part father. The crossing point is random.
def cross_father_mother(father, mother):
	cross_point = round(18 * np.random.random())
	paternal_sperm  = father[cross_point:]
	maternal_egg = mother[:cross_point]
	new_born = np.hstack((maternal_egg, paternal_sperm))
	return (new_born)

In [252]:
def cross_and_birth_newborns(father_chromosome, mother_chromosome):
	# Create 3 new borns with the same father and mother.
	new_born_1 = cross_father_mother(father_chromosome.values, mother_chromosome.values)
	new_born_2 = cross_father_mother(father_chromosome.values, mother_chromosome.values)
	new_born_3 = cross_father_mother(father_chromosome.values, mother_chromosome.values)
	return (new_born_1, new_born_2, new_born_3)

## Mutate

In [253]:
# Get random a single value from all the current_lineage database.
def get_random_value(df_current_lineage):
	# Select a random row
	random_row = df_current_lineage.sample()
	# Select a random column
	random_column = np.random.choice(df_current_lineage.columns)
	# Get the value at the random row and random column
	random_value = random_row[random_column].values[0]
	return random_value

In [254]:
# Function to mutate a given new born.
def mutate_newborn(new_born, random_value):
	mutation_point = round(18 * np.random.random())
	mutated_new_born = copy.deepcopy(new_born)
	mutated_new_born[mutation_point] = random_value
	return mutated_new_born

In [255]:
def mutate_the_three_newborns(df_current_lineage, new_born_1, new_born_2, new_born_3):
	# Mutating the three new borns
	random_value = get_random_value(df_current_lineage)
	mutated_new_born_1 = mutate_newborn(new_born_1, random_value)
	random_value = get_random_value(df_current_lineage)
	mutated_new_born_2 = mutate_newborn(new_born_2, random_value)
	random_value = get_random_value(df_current_lineage)
	mutated_new_born_3 = mutate_newborn(new_born_3, random_value)
	return mutated_new_born_1, mutated_new_born_2, mutated_new_born_3

## Form new lineage

In [256]:
def calculate_three_newborn_fitness(df_op, df_ref_column, total_A, total_I, mutated_new_born_1, mutated_new_born_2, mutated_new_born_3):
	# Create dataframe with the three mutated newborns
	df_three_newborn = pd.DataFrame({'Newborn 1': mutated_new_born_1, 'Newborn 2': mutated_new_born_2, 'Newborn 3': mutated_new_born_3})

	# Apply the same fitness flow to the new dataframe with the newborns

	# Iterate df_current_lineage in database.
	i = 0
	trash = None
	df_three_newborn_fitness = pd.DataFrame()
	max_i = len(df_three_newborn.columns)
	while i < max_i:
		current_chromosome = (df_three_newborn.iloc[1:, i]).values
		scalar = df_three_newborn.iloc[0, i]
		chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, scalar))
		df_three_newborn_fitness[f'Newborn {i+1} result'] = chromo_result
		i = i + 1

	# Create chromo reference: a DataFrame that contain
	# 'A' if number > 0 and 'I' if number < 0.
	df_three_newborn_fitness = df_three_newborn_fitness.applymap(lambda x: 'A' if x > 0 else 'I')

	# Calculate how many A's and how many I's were correct.
	df_three_newborn_fitness = df_three_newborn_fitness.apply(chromo_count_matches, ref_column=df_ref_column)

	# Calculate the fitness for all the six chromosome
	fitness_values = df_three_newborn_fitness.apply(fitness_calculation, args=(total_A, total_I))
	df_three_newborn_fitness.loc['Fitness'] = fitness_values

	# Removing obsolete A and I rows
	i_remove = ['Right A', 'Right I']
	df_three_newborn_fitness = df_three_newborn_fitness.drop(i_remove)
	return df_three_newborn, df_three_newborn_fitness

In [257]:
def select_new_lineage(df_current_lineage, df_current_lineage_fitness, df_three_newborn, df_three_newborn_fitness):
	# Choose the best two between the three newborns
	min_column_name = (df_three_newborn_fitness.min().idxmin())
	min_column_name = df_three_newborn_fitness.columns.get_loc(min_column_name)
	df_three_newborn_less_one = df_three_newborn.drop(df_three_newborn.columns[min_column_name], axis=1)

	# Remove the worst two between the six from the current lineage
	df_current_lineage_fitness_stacked = df_current_lineage_fitness.stack()
	smallest_indices = df_current_lineage_fitness_stacked.nsmallest(2).index
	min_column_index_one = df_current_lineage_fitness.columns.get_loc(smallest_indices[0][1])
	min_column_index_two = df_current_lineage_fitness.columns.get_loc(smallest_indices[1][1])
	df_current_lineage_less_two = df_current_lineage.drop(df_current_lineage.columns[[min_column_index_one, min_column_index_two]], axis=1)

	# Create new lineage by joining the two best from the newborns and the four best from the current lineage
	df_new_lineage = pd.concat([df_current_lineage_less_two, df_three_newborn_less_one], axis=1)
	new_columns_names = ['chromo_1', 'chromo_2', 'chromo_3', 'chromo_4', 'chromo_5', 'chromo_6']
	df_new_lineage.columns = new_columns_names
	return df_new_lineage


In [258]:
# Read Excel file into a DataFrame.
df = pd.read_excel('data.xlsx')

# Set the seed for reproducibility
# np.random.seed(40)


df_op, df_ref_column, total_A, total_I, nbr_operatives_columns = oil_database(df)
df_current_lineage = create_firstborns(nbr_operatives_columns)

print(f"df_current_lineage before iteration:\n{df_current_lineage}\n")

i = 0
while (i < 100):

	df_current_lineage_fitness = calculate_chromosomes_fitnes(df_op, df_current_lineage, df_ref_column, total_A, total_I)
	father_chromosome, mother_chromosome = elect_father_and_mother(df_current_lineage, df_current_lineage_fitness)
	new_born_1, new_born_2, new_born_3 = cross_and_birth_newborns(father_chromosome, mother_chromosome)
	mutated_new_born_1, mutated_new_born_2, mutated_new_born_3 = mutate_the_three_newborns(df_current_lineage, new_born_1, new_born_2, new_born_3)
	df_three_newborn, df_three_newborn_fitness = calculate_three_newborn_fitness(df_op, df_ref_column, total_A, total_I, mutated_new_born_1, mutated_new_born_2, mutated_new_born_3)
	df_new_lineage = select_new_lineage(df_current_lineage, df_current_lineage_fitness, df_three_newborn, df_three_newborn_fitness)
	df_current_lineage = df_new_lineage
	
	i = i + 1

print(f"df_current_lineage_fitness after iteration:\n{df_current_lineage_fitness}\n")


df_current_lineage before iteration:
    chromo_1  chromo_2  chromo_3  chromo_4  chromo_5  chromo_6
0  -0.392284 -0.458825  0.905813 -0.626523 -0.632654  0.381176
1   0.658339 -0.362057  0.633987  0.450608  0.328886  0.474140
2  -0.415806  0.185862  0.081046  0.584772 -0.315927  0.876117
3   0.217986  0.342841 -0.983822  0.914117  0.740008  0.132998
4   0.259299  0.898555 -0.614288 -0.758502 -0.167325 -0.640479
5  -0.682090  0.595374  0.031896 -0.027648 -0.769300 -0.222677
6   0.806703 -0.660572  0.308540 -0.538600 -0.176831 -0.608445
7   0.813925  0.134899 -0.309454  0.543788 -0.898280 -0.809393
8   0.725918  0.094412 -0.850527  0.131937  0.647962 -0.974007
9  -0.036696  0.678995  0.393235  0.230812  0.783981 -0.688847
10  0.545780  0.640083 -0.261750  0.208492 -0.875413  0.917901
11  0.360272  0.368887 -0.571951 -0.656512  0.342873 -0.379230
12  0.526828 -0.005822  0.758334 -0.320632  0.421185 -0.166017
13  0.638829  0.616290 -0.917103  0.165775  0.513346  0.468699
14  0.851545  0.86