In [735]:
import pandas as pd
import numpy as np
import copy

## Receive database

In [736]:
def oil_database(df):


	# Remove column.
	df = df.drop(columns=['Unnamed: 0'])

	# Change column name.
	df = df.rename(columns={'Unnamed: 5': 'Reference'})

	# Create column of reference with A and I.
	df_ref_column = df['Reference']

	# Store the DataFrame reference of A's and I's in a int.
	total_A = df["Reference"].value_counts()["A"]
	total_I = df["Reference"].value_counts()["I"]

	# Define operative DataFrame:
	# with the columns from X11 to X92, without Reference column.
	df_op = df.loc[:, "X11": "X22"]

	# Count the number of operatives columns (between X11 and X92).
	nbr_operatives_columns = len(df_op.columns)

	print(f"original dataframe:\n{df}\n")
	print(f"operative dataframe:\n{df_op}\n")
	return df_op, df_ref_column, total_A, total_I, nbr_operatives_columns

## Create firstborns

In [737]:
def create_firstborns(nbr_operatives_columns):
	# Create firstborns (chromosomes).

	# Generate 6 random chromosomes.
	chromosome1 = -1 + 2 * np.random.random(nbr_operatives_columns + 1) # +1 for the scalar
	chromosome2 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
	chromosome3 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
	chromosome4 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
	chromosome5 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
	chromosome6 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)

	# Create re-usable DataFrame with the current lineage
	df_current_lineage = pd.DataFrame({'chromo_1':chromosome1,
									'chromo_2':chromosome2,
									'chromo_3':chromosome3,
									'chromo_4':chromosome4,
									'chromo_5':chromosome5,
									'chromo_6':chromosome6})
	
	print(f"df_current_lineage:\n{df_current_lineage}\n")

	return df_current_lineage

## Calculate chromosomes fitnes

In [738]:
# Function to aply each chromosome for each line of the DataFrame
# and return the result (line * chromosome).sum().
def chromo_action(row, current_chromosome, scalar):
	res_mult = row * current_chromosome
	res_sum = res_mult.sum() + scalar
	return (res_sum)

# Calculate how many A's and how many I's were correct.
def chromo_count_matches(column, ref_column):
	relative_A = ((column == 'A') & (ref_column == 'A')).sum()
	relative_I = ((column == 'I') & (ref_column == 'I')).sum()
	return pd.Series({'Right A': relative_A, 'Right I': relative_I})

# Calculate the fitness for all the six chromosome
def fitness_calculation(column, total_A, total_I):
	relative_numerator = np.prod(column)
	fitness = relative_numerator / (total_A * total_I)
	return fitness

def calculate_chromosomes_fitnes(df_op, df_current_lineage, df_ref_column, total_A, total_I):
	# Iterate df_current_lineage in database.
	i = 0
	trash = None
	df_current_lineage_fitness = pd.DataFrame()
	max_i = len(df_current_lineage.columns)
	while i < max_i:
		current_chromosome = (df_current_lineage.iloc[1:, i]).values
		scalar = df_current_lineage.iloc[0, i]
		chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, scalar))
		df_current_lineage_fitness[f'Chromo {i+1} result'] = chromo_result
		i = i + 1

	# Create chromo reference: a DataFrame that contain
	# 'A' if number > 0 and 'I' if number < 0.
	df_current_lineage_fitness = df_current_lineage_fitness.applymap(lambda x: 'A' if x > 0 else 'I')
	df_current_lineage_fitness = df_current_lineage_fitness.apply(chromo_count_matches, ref_column=df_ref_column)
	fitness_values = df_current_lineage_fitness.apply(fitness_calculation, args=(total_A, total_I))
	df_current_lineage_fitness.loc['Fitness'] = fitness_values

	# Removing obsolete A and I rows
	i_remove = ['Right A', 'Right I']
	df_current_lineage_fitness = df_current_lineage_fitness.drop(i_remove)

	print(f"df_current_lineage_fitness:\n{df_current_lineage_fitness}\n")

	return df_current_lineage_fitness

## Raffle

In [739]:
# raffle function: it resturn a position given a raffle number.
def get_raffle_point(raffle, norm_fitness_array):
	max_i = len(norm_fitness_array)
	i = 0
	while i < max_i:
		if 0 <= raffle <= norm_fitness_array[0]:
			return 0
		elif norm_fitness_array[i] < raffle <= norm_fitness_array[i + 1]:
			return i
		i = i + 1

In [740]:
def elect_father_and_mother(df_current_lineage, df_current_lineage_fitness):
	# Get two raffle points to choose two chromosomes.
	# The choosen chromosomes are going to be crossed.

	# Create array with fitness values.
	fitness_array = (df_current_lineage_fitness.iloc[0, :]).values

	# Sum the array.
	fitness_array_sum = fitness_array.sum()

	# Create new array with cumulative sum.
	norm_fit_0 = round((fitness_array[0]/ fitness_array_sum) * 100)
	norm_fit_1 = round((fitness_array[1]/ fitness_array_sum) * 100) + norm_fit_0
	norm_fit_2 = round((fitness_array[2]/ fitness_array_sum) * 100) + norm_fit_1
	norm_fit_3 = round((fitness_array[3]/ fitness_array_sum) * 100) + norm_fit_2
	norm_fit_4 = round((fitness_array[4]/ fitness_array_sum) * 100) + norm_fit_3
	norm_fit_5 = round((fitness_array[5]/ fitness_array_sum) * 100) + norm_fit_4
	norm_fitness_array = np.array([norm_fit_0, norm_fit_1, norm_fit_2, norm_fit_3, norm_fit_4, norm_fit_5])
	# norm_fitness_array[5] = 1 (this value must be equal to one).
	# This is not used, is just a check.


	# Get two raffle points given a random raffle.
	raffle_1 = round(np.random.random() * 100)
	raffle_point_1 = get_raffle_point(raffle_1, norm_fitness_array)
	raffle_2 = round(np.random.random() * 100)
	raffle_point_2 = get_raffle_point(raffle_2, norm_fitness_array)

	# Selec (from the random above) a mother and a father chromosome.
	father_chromosome = df_current_lineage.iloc[:, raffle_point_1]
	mother_chromosome = df_current_lineage.iloc[:, raffle_point_2]


	print(f"norm_fitness_array:\n{norm_fitness_array}\n")
	print(f"father_chromosome:\n{father_chromosome}\n")
	print(f"mother_chromosome:\n{mother_chromosome}\n")


	return (father_chromosome, mother_chromosome)

## Cross

In [741]:
# cross function: it cross a father and a mother and generates a new born that
# is part mother and part father. The crossing point is random.
def cross_father_mother(father, mother):
	cross_point = round(5 * np.random.random())
	paternal_sperm  = father[cross_point:]
	maternal_egg = mother[:cross_point]
	new_born = np.hstack((maternal_egg, paternal_sperm))
	return (new_born)

In [742]:
def cross_and_birth_newborns(father_chromosome, mother_chromosome):
	# Create 3 new borns with the same father and mother.
	new_born_1 = cross_father_mother(father_chromosome.values, mother_chromosome.values)
	new_born_2 = cross_father_mother(father_chromosome.values, mother_chromosome.values)
	new_born_3 = cross_father_mother(father_chromosome.values, mother_chromosome.values)

	print(f"new born 1:\n{new_born_1}\n")
	print(f"new born 2:\n{new_born_2}\n")
	print(f"new born 3:\n{new_born_3}\n")

	return (new_born_1, new_born_2, new_born_3)

## Mutate

In [743]:
# Get random a single value from all the current_lineage database.
def get_random_value(df_current_lineage):
	# Select a random row
	random_row = df_current_lineage.sample()
	# Select a random column
	random_column = np.random.choice(df_current_lineage.columns)
	# Get the value at the random row and random column
	random_value = random_row[random_column].values[0]
	return random_value

In [744]:
# Function to mutate a given new born.
def mutate_newborn(new_born, random_value):
	mutation_point = round(5 * np.random.random())
	mutated_new_born = copy.deepcopy(new_born)
	mutated_new_born[mutation_point] = random_value
	return mutated_new_born

In [745]:
def mutate_the_three_newborns(df_current_lineage, new_born_1, new_born_2, new_born_3):
	# Mutating the three new borns
	random_value = get_random_value(df_current_lineage)
	mutated_new_born_1 = mutate_newborn(new_born_1, random_value)
	random_value = get_random_value(df_current_lineage)
	mutated_new_born_2 = mutate_newborn(new_born_2, random_value)
	random_value = get_random_value(df_current_lineage)
	mutated_new_born_3 = mutate_newborn(new_born_3, random_value)

	print(f"mutated new born 1:\n{mutated_new_born_1}\n")
	print(f"mutated new born 2:\n{mutated_new_born_2}\n")
	print(f"mutated new born 3:\n{mutated_new_born_3}\n")

	return mutated_new_born_1, mutated_new_born_2, mutated_new_born_3

## Form new lineage

In [746]:
def calculate_three_newborn_fitness(df_op, df_ref_column, total_A, total_I, mutated_new_born_1, mutated_new_born_2, mutated_new_born_3):
	# Create dataframe with the three mutated newborns
	df_three_newborn = pd.DataFrame({'Newborn 1': mutated_new_born_1, 'Newborn 2': mutated_new_born_2, 'Newborn 3': mutated_new_born_3})

	# Apply the same fitness flow to the new dataframe with the newborns

	# Iterate df_current_lineage in database.
	i = 0
	trash = None
	df_three_newborn_fitness = pd.DataFrame()
	max_i = len(df_three_newborn.columns)
	while i < max_i:
		current_chromosome = (df_three_newborn.iloc[1:, i]).values
		scalar = df_three_newborn.iloc[0, i]
		chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, scalar))
		df_three_newborn_fitness[f'Newborn {i+1} result'] = chromo_result
		i = i + 1

	# Create chromo reference: a DataFrame that contain
	# 'A' if number > 0 and 'I' if number < 0.
	df_three_newborn_fitness = df_three_newborn_fitness.applymap(lambda x: 'A' if x > 0 else 'I')

	# Calculate how many A's and how many I's were correct.
	df_three_newborn_fitness = df_three_newborn_fitness.apply(chromo_count_matches, ref_column=df_ref_column)

	# Calculate the fitness for all the six chromosome
	fitness_values = df_three_newborn_fitness.apply(fitness_calculation, args=(total_A, total_I))
	df_three_newborn_fitness.loc['Fitness'] = fitness_values

	# Removing obsolete A and I rows
	i_remove = ['Right A', 'Right I']
	df_three_newborn_fitness = df_three_newborn_fitness.drop(i_remove)

	print(f"df_three_newborn:\n{df_three_newborn}\n")
	print(f"df_three_newborn_fitness:\n{df_three_newborn_fitness}\n")

	return df_three_newborn, df_three_newborn_fitness

In [747]:
def select_new_lineage(df_current_lineage, df_current_lineage_fitness, df_three_newborn, df_three_newborn_fitness):
	# Choose the best two between the three newborns
	min_column_name = (df_three_newborn_fitness.min().idxmin())
	min_column_name = df_three_newborn_fitness.columns.get_loc(min_column_name)
	df_three_newborn_less_one = df_three_newborn.drop(df_three_newborn.columns[min_column_name], axis=1)

	print(f"df_three_newborn_less_one:\n{df_three_newborn_less_one}\n")


	# Remove the worst two between the six from the current lineage
	df_current_lineage_fitness_stacked = df_current_lineage_fitness.stack()
	smallest_indices = df_current_lineage_fitness_stacked.nsmallest(2).index
	min_column_index_one = df_current_lineage_fitness.columns.get_loc(smallest_indices[0][1])
	min_column_index_two = df_current_lineage_fitness.columns.get_loc(smallest_indices[1][1])
	df_current_lineage_less_two = df_current_lineage.drop(df_current_lineage.columns[[min_column_index_one, min_column_index_two]], axis=1)

	print(f"df_current_lineage_less_two:\n{df_current_lineage_less_two}\n")


	# Create new lineage by joining the two best from the newborns and the four best from the current lineage
	df_new_lineage = pd.concat([df_current_lineage_less_two, df_three_newborn_less_one], axis=1)
	new_columns_names = ['chromo_1', 'chromo_2', 'chromo_3', 'chromo_4', 'chromo_5', 'chromo_6']
	df_new_lineage.columns = new_columns_names

	print(f"df_new_lineage:\n{df_new_lineage}\n")


	return df_new_lineage


In [748]:
# Read Excel file into a DataFrame.
df = pd.read_excel('data_super_simple.xlsx')

# Set the seed for reproducibility
np.random.seed(40)

df_op, df_ref_column, total_A, total_I, nbr_operatives_columns = oil_database(df)
df_current_lineage = create_firstborns(nbr_operatives_columns)

i = 0
while (i < 1):

	df_current_lineage_fitness = calculate_chromosomes_fitnes(df_op, df_current_lineage, df_ref_column, total_A, total_I)
	father_chromosome, mother_chromosome = elect_father_and_mother(df_current_lineage, df_current_lineage_fitness)
	new_born_1, new_born_2, new_born_3 = cross_and_birth_newborns(father_chromosome, mother_chromosome)
	mutated_new_born_1, mutated_new_born_2, mutated_new_born_3 = mutate_the_three_newborns(df_current_lineage, new_born_1, new_born_2, new_born_3)
	df_three_newborn, df_three_newborn_fitness = calculate_three_newborn_fitness(df_op, df_ref_column, total_A, total_I, mutated_new_born_1, mutated_new_born_2, mutated_new_born_3)
	df_new_lineage = select_new_lineage(df_current_lineage, df_current_lineage_fitness, df_three_newborn, df_three_newborn_fitness)
	df_current_lineage = df_new_lineage

	print(df_current_lineage)
	
	i = i + 1


original dataframe:
   X11  X12  X21  X22 Reference
0    1    0    0    1         A
1    0    1    0    1         A
2    1    0    1    0         I
3    1    0    0    1         I
4    0    1    0    1         A

operative dataframe:
   X11  X12  X21  X22
0    1    0    0    1
1    0    1    0    1
2    1    0    1    0
3    1    0    0    1
4    0    1    0    1

df_current_lineage:
   chromo_1  chromo_2  chromo_3  chromo_4  chromo_5  chromo_6
0 -0.184626 -0.392175  0.961878  0.808534 -0.651595  0.016620
1 -0.889268  0.052799  0.201632 -0.100190 -0.101706  0.260047
2  0.577070  0.247624  0.627937 -0.762151  0.339590  0.670069
3 -0.425390  0.553551  0.417290  0.670600  0.914798 -0.105337
4 -0.099299  0.372483 -0.944931 -0.595504  0.666667  0.524581

df_current_lineage_fitness:
         Chromo 1 result  Chromo 2 result  Chromo 3 result  Chromo 4 result  \
Fitness         0.666667              0.0              0.0              0.0   

         Chromo 5 result  Chromo 6 result  
Fitness  

In [749]:
chromo_1 = np.array([-0.184626, -0.889268,  0.577070, -0.425390, -0.099299])
print(chromo_1)
print(df_op)

chromo_scalar = chromo_1[0]
chromo_op = chromo_1[1:]

lesson_1 = df_op.iloc[0, :].values
lesson_2 = df_op.iloc[1, :].values
lesson_3 = df_op.iloc[2, :].values
lesson_4 = df_op.iloc[3, :].values
lesson_5 = df_op.iloc[4, :].values

print((chromo_op * lesson_1).sum() + chromo_scalar)
print((chromo_op * lesson_2).sum() + chromo_scalar)
print((chromo_op * lesson_3).sum() + chromo_scalar)
print((chromo_op * lesson_4).sum() + chromo_scalar)
print((chromo_op * lesson_5).sum() + chromo_scalar)
print(df_ref_column)

print(total_A, total_I)
print((2*2)/(3*2))

[-0.18 -0.89  0.58 -0.43 -0.1 ]
   X11  X12  X21  X22
0    1    0    0    1
1    0    1    0    1
2    1    0    1    0
3    1    0    0    1
4    0    1    0    1
-1.173193
0.29314499999999993
-1.4992839999999998
-1.173193
0.29314499999999993
0    A
1    A
2    I
3    I
4    A
Name: Reference, dtype: object
3 2
0.6666666666666666


In [750]:
def my_func(row, chromo):
	row_array = row.values
	print(row_array)
	chromo_scalar = chromo[0]
	chromo_op = chromo[1:]
	result = (chromo_op * row_array).sum() + chromo_scalar
	return(result)
result = df_op.apply(my_func, axis=1, chromo=chromo_1)

print(result)

[1 0 0 1]
[0 1 0 1]
[1 0 1 0]
[1 0 0 1]
[0 1 0 1]
0   -1.173193
1    0.293145
2   -1.499284
3   -1.173193
4    0.293145
dtype: float64


In [751]:
# Sample Series
series1 = pd.Series([1, 2, 3], name='Series 1')
series2 = pd.Series([4, 5, 6], name='Series 2')
series3 = pd.Series([7, 8, 9], name='Series 3')

series = [series1, series2, series3]
df = pd.DataFrame()
i = 0
for serie in series:
	df[f'column {i}'] = serie
	i = i + 1

# Create a DataFrame from the Series
# df = pd.DataFrame({'Column 1': series1, 'Column 2': series2, 'Column 3': series3})

# Print the DataFrame
print(df)

   column 0  column 1  column 2
0         1         4         7
1         2         5         8
2         3         6         9


In [752]:
# Sample Series
series1 = pd.Series([1, 2, 3], name='Series 1')
series2 = pd.Series([4, 5, 6], name='Series 2')
series3 = pd.Series([7, 8, 9], name='Series 3')

# List of Series
series = [series1, series2, series3]

# Create an empty DataFrame
df = pd.DataFrame()

# Concatenate the Series along the columns axis
for i, serie in enumerate(series):
    df[f'Column {i}'] = serie

# Print the DataFrame
print(df)

   Column 0  Column 1  Column 2
0         1         4         7
1         2         5         8
2         3         6         9
