In [241]:
import pandas as pd
import numpy as np
import copy

In [242]:
# Read Excel file into a DataFrame.
df = pd.read_excel('data.xlsx')

# Remove column.
df = df.drop(columns=['Unnamed: 0'])

# Change column name.
df = df.rename(columns={'Unnamed: 19': 'Reference'})

# Create column of reference with A and I.
df_ref_column = df['Reference']

# Store the DataFrame reference of A's and I's in a int.
total_A = df["Reference"].value_counts()["A"]
total_I = df["Reference"].value_counts()["I"]

# Define operative DataFrame:
# with the columns from X11 to X92, without Reference column.
df_op = df.loc[:, "X11": "X92"]

# Count the number of operatives columns (between X11 and X92).
nbr_operatives_columns = len(df_op.columns)

df.head()

Unnamed: 0,X11,X12,X21,X22,X31,X32,X41,X42,X51,X52,X61,X62,X71,X72,X81,X82,X91,X92,Reference
0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
2,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
3,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
4,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,A


In [243]:
# Create firstborns (chromosomes).

# Set the seed for reproducibility
#np.random.seed(42)

# Generate 6 random chromosomes.
chromosome1 = -1 + 2 * np.random.random(nbr_operatives_columns + 1) # +1 for the scalar
chromosome2 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
chromosome3 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
chromosome4 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
chromosome5 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)
chromosome6 = -1 + 2 * np.random.random(nbr_operatives_columns + 1)

# Create re-usable DataFrame with the current lineage
df_current_lineage = pd.DataFrame({'chromo_1':chromosome1,
								   'chromo_2':chromosome2,
								   'chromo_3':chromosome3,
								   'chromo_4':chromosome4,
								   'chromo_5':chromosome5,
								   'chromo_6':chromosome6})

print(df_current_lineage)

    chromo_1  chromo_2  chromo_3  chromo_4  chromo_5  chromo_6
0  -0.893240  0.449923  0.829539 -0.576998 -0.113690 -0.702904
1   0.085454  0.351816 -0.707908 -0.519866  0.696768 -0.377718
2   0.939442  0.257263  0.988512  0.979540  0.985483 -0.227283
3  -0.389501  0.859917 -0.662081  0.677834 -0.073306  0.420463
4  -0.820535  0.772733 -0.095299  0.234220 -0.037589 -0.615950
5   0.649892 -0.669799 -0.365390  0.943343  0.364025 -0.975094
6   0.396295  0.439642  0.551867  0.799154 -0.323626  0.715258
7   0.890326 -0.223737 -0.286404 -0.515554 -0.173726 -0.944360
8   0.254737 -0.917037 -0.897913  0.091098 -0.686596  0.872575
9   0.468091 -0.073335  0.276966 -0.972560 -0.243196  0.368229
10  0.371519  0.064836  0.707771 -0.427511  0.869035 -0.283971
11  0.565102 -0.617108  0.997575  0.148116 -0.646251 -0.688151
12  0.216616  0.211433 -0.921014  0.106721  0.539866 -0.018545
13 -0.652786 -0.951113 -0.831782  0.970634 -0.366530 -0.577157
14  0.460491 -0.582048  0.452024 -0.428367  0.060972 -0

In [244]:
# Function to aply each chromosome for each line of the DataFrame
# and return the result (line * chromosome).sum().
def chromo_action(row, chromosome, scalar, trash):
	res_mult = row * chromosome
	res_sum = res_mult.sum() + scalar
	return (res_sum)

# Iterate df_current_lineage in database.
i = 0
trash = None
df_current_lineage_fitness = pd.DataFrame()
max_i = len(df_current_lineage.columns)
while i < max_i:
	current_chromosome = (df_current_lineage.iloc[1:, i]).values
	scalar = df_current_lineage.iloc[0, i]
	chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, scalar, trash))
	df_current_lineage_fitness[f'Chromo {i+1} result'] = chromo_result
	i = i + 1

# Create chromo reference: a DataFrame that contain
# 'A' if number > 0 and 'I' if number < 0.
df_current_lineage_fitness = df_current_lineage_fitness.applymap(lambda x: 'A' if x > 0 else 'I')

# Calculate how many A's and how many I's were correct.
def chromo_count_matches(column):
	relative_A = ((column == 'A') & (df_ref_column == 'A')).sum()
	relative_I = ((column == 'I') & (df_ref_column == 'I')).sum()
	return pd.Series({'Right A': relative_A, 'Right I': relative_I})

df_current_lineage_fitness = df_current_lineage_fitness.apply(chromo_count_matches)

# Calculate the fitness for all the six chromosome
def fitness_calculation(column):
	relative_numerator = np.prod(column)
	fitness = relative_numerator / (total_A * total_I)
	return fitness

fitness_values = df_current_lineage_fitness.apply(fitness_calculation)
df_current_lineage_fitness.loc['Fitness'] = fitness_values

# Removing obsolete A and I rows
i_remove = ['Right A', 'Right I']
df_current_lineage_fitness = df_current_lineage_fitness.drop(i_remove)

df_current_lineage_fitness

Unnamed: 0,Chromo 1 result,Chromo 2 result,Chromo 3 result,Chromo 4 result,Chromo 5 result,Chromo 6 result
Fitness,0.151316,0.0,0.526316,0.421053,0.460526,0.0


## Raffle

In [245]:
# Get two raffle points to choose two chromosomes.
# The choosen chromosomes are going to be crossed.

# Create array with fitness values.
fitness_array = (df_current_lineage_fitness.iloc[0, :]).values

# Sum the array.
fitness_array_sum = fitness_array.sum()

# Create new array with cumulative sum.
norm_fit_0 = round((fitness_array[0]/ fitness_array_sum) * 100)
norm_fit_1 = round((fitness_array[1]/ fitness_array_sum) * 100) + norm_fit_0
norm_fit_2 = round((fitness_array[2]/ fitness_array_sum) * 100) + norm_fit_1
norm_fit_3 = round((fitness_array[3]/ fitness_array_sum) * 100) + norm_fit_2
norm_fit_4 = round((fitness_array[4]/ fitness_array_sum) * 100) + norm_fit_3
norm_fit_5 = round((fitness_array[5]/ fitness_array_sum) * 100) + norm_fit_4
norm_fitness_array = np.array([norm_fit_0, norm_fit_1, norm_fit_2, norm_fit_3, norm_fit_4, norm_fit_5])
# norm_fitness_array[5] = 1 (this value must be equal to one).
# This is not used, is just a check.

# raffle function: it resturn a position given a raffle number.
def get_raffle_point(raffle):
	max_i = len(norm_fitness_array)
	i = 0
	while i < max_i:
		if 0 <= raffle <= norm_fitness_array[0]:
			return 0
		elif norm_fitness_array[i] < raffle <= norm_fitness_array[i + 1]:
			return i
		i = i + 1

# Get two raffle points given a random raffle.
raffle_1 = round(np.random.random() * 100)
raffle_point_1 = get_raffle_point(raffle_1)
raffle_2 = round(np.random.random() * 100)
raffle_point_2 = get_raffle_point(raffle_2)

# Selec (from the random above) a mother and a father chromosome.
father_chromosome = df_current_lineage.iloc[:, raffle_point_1]
mother_chromosome = df_current_lineage.iloc[:, raffle_point_2]

## Cross

In [246]:
father_chromosome.values

array([-0.57699824, -0.51986574,  0.97954006,  0.67783361,  0.23422034,
        0.94334275,  0.79915371, -0.51555398,  0.09109828, -0.97256003,
       -0.42751077,  0.14811624,  0.10672114,  0.97063371, -0.42836678,
       -0.48228877, -0.84512656,  0.54250719,  0.10267428])

In [247]:
mother_chromosome.values

array([-0.89323996,  0.08545443,  0.93944214, -0.38950124, -0.82053537,
        0.64989177,  0.39629522,  0.89032573,  0.25473713,  0.46809132,
        0.37151932,  0.56510175,  0.21661636, -0.65278614,  0.46049051,
       -0.06825958,  0.91795322,  0.76041578,  0.09786458])

In [248]:
# cross function: it cross a father and a mother and generates a new born that
# is part mother and part father. The crossing point is random.
def cross_father_mother(father, mother):
	cross_point = round(18 * np.random.random())
	paternal_sperm  = father[cross_point:]
	maternal_egg = mother[:cross_point]
	new_born = np.hstack((maternal_egg, paternal_sperm))
	return (new_born)

# Create 3 new borns with the same father and mother.
new_born_1 = cross_father_mother(father_chromosome.values, mother_chromosome.values)
new_born_2 = cross_father_mother(father_chromosome.values, mother_chromosome.values)
new_born_3 = cross_father_mother(father_chromosome.values, mother_chromosome.values)

print(new_born_1)
print()
print(new_born_2)
print()
print(new_born_3)

[-0.89323996  0.08545443  0.93944214 -0.38950124 -0.82053537  0.64989177
  0.39629522  0.89032573  0.25473713  0.46809132  0.37151932  0.56510175
  0.21661636  0.97063371 -0.42836678 -0.48228877 -0.84512656  0.54250719
  0.10267428]

[-0.89323996  0.08545443  0.93944214 -0.38950124 -0.82053537  0.64989177
  0.39629522  0.89032573  0.25473713  0.46809132  0.37151932  0.56510175
  0.21661636 -0.65278614 -0.42836678 -0.48228877 -0.84512656  0.54250719
  0.10267428]

[-0.89323996  0.08545443  0.93944214 -0.38950124 -0.82053537  0.64989177
  0.39629522  0.89032573  0.25473713  0.46809132 -0.42751077  0.14811624
  0.10672114  0.97063371 -0.42836678 -0.48228877 -0.84512656  0.54250719
  0.10267428]


## Mutate

In [249]:
df_current_lineage

Unnamed: 0,chromo_1,chromo_2,chromo_3,chromo_4,chromo_5,chromo_6
0,-0.89324,0.449923,0.829539,-0.576998,-0.11369,-0.702904
1,0.085454,0.351816,-0.707908,-0.519866,0.696768,-0.377718
2,0.939442,0.257263,0.988512,0.97954,0.985483,-0.227283
3,-0.389501,0.859917,-0.662081,0.677834,-0.073306,0.420463
4,-0.820535,0.772733,-0.095299,0.23422,-0.037589,-0.61595
5,0.649892,-0.669799,-0.36539,0.943343,0.364025,-0.975094
6,0.396295,0.439642,0.551867,0.799154,-0.323626,0.715258
7,0.890326,-0.223737,-0.286404,-0.515554,-0.173726,-0.94436
8,0.254737,-0.917037,-0.897913,0.091098,-0.686596,0.872575
9,0.468091,-0.073335,0.276966,-0.97256,-0.243196,0.368229


In [250]:
# Get random a single value from all the current_lineage database.
def get_random_value(df_current_lineage):
	# Select a random row
	random_row = df_current_lineage.sample()
	# Select a random column
	random_column = np.random.choice(df_current_lineage.columns)
	# Get the value at the random row and random column
	random_value = random_row[random_column].values[0]
	return random_value

In [251]:
# Function to mutate a given new born.
def mutate_newborn(new_born, random_value):
	mutation_point = round(18 * np.random.random())
	mutated_new_born = copy.deepcopy(new_born)
	mutated_new_born[mutation_point] = random_value
	return mutated_new_born

In [252]:
# Mutating the three new borns
random_value = get_random_value(df_current_lineage)
mutated_new_born_1 = mutate_newborn(new_born_1, random_value)
random_value = get_random_value(df_current_lineage)
mutated_new_born_2 = mutate_newborn(new_born_2, random_value)
random_value = get_random_value(df_current_lineage)
mutated_new_born_3 = mutate_newborn(new_born_3, random_value)

print(mutated_new_born_1)
print(mutated_new_born_2)
print(mutated_new_born_3)

[-0.89323996  0.08545443  0.93944214 -0.38950124 -0.82053537  0.64989177
  0.39629522  0.89032573  0.25473713  0.46809132  0.37151932  0.56510175
  0.21661636  0.97063371 -0.42836678 -0.48228877 -0.84512656  0.54250719
  0.10267428]
[-0.89323996  0.08545443  0.93944214 -0.38950124 -0.82053537  0.64989177
  0.39629522  0.89032573  0.25473713  0.46809132  0.37151932  0.56510175
  0.21661636 -0.65278614 -0.42836678  0.64989177 -0.84512656  0.54250719
  0.10267428]
[-0.28640379  0.08545443  0.93944214 -0.38950124 -0.82053537  0.64989177
  0.39629522  0.89032573  0.25473713  0.46809132 -0.42751077  0.14811624
  0.10672114  0.97063371 -0.42836678 -0.48228877 -0.84512656  0.54250719
  0.10267428]


## Form new lineage

In [253]:
# Create dataframe with the three mutated newborns
df_three_newborn = pd.DataFrame({'Newborn 1':mutated_new_born_1, 'Newborn 2':mutated_new_born_2, 'Newborn 3':mutated_new_born_3})
df_three_newborn

Unnamed: 0,Newborn 1,Newborn 2,Newborn 3
0,-0.89324,-0.89324,-0.286404
1,0.085454,0.085454,0.085454
2,0.939442,0.939442,0.939442
3,-0.389501,-0.389501,-0.389501
4,-0.820535,-0.820535,-0.820535
5,0.649892,0.649892,0.649892
6,0.396295,0.396295,0.396295
7,0.890326,0.890326,0.890326
8,0.254737,0.254737,0.254737
9,0.468091,0.468091,0.468091


In [254]:
# Apply the same fitness flow to the new dataframe with the newborns

# Iterate df_current_lineage in database.
i = 0
trash = None
df_three_newborn_fitness = pd.DataFrame()
max_i = len(df_three_newborn.columns)
while i < max_i:
	current_chromosome = (df_three_newborn.iloc[1:, i]).values
	scalar = df_three_newborn.iloc[0, i]
	chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, scalar, trash))
	df_three_newborn_fitness[f'Newborn {i+1} result'] = chromo_result
	i = i + 1

# Create chromo reference: a DataFrame that contain
# 'A' if number > 0 and 'I' if number < 0.
df_three_newborn_fitness = df_three_newborn_fitness.applymap(lambda x: 'A' if x > 0 else 'I')

# Calculate how many A's and how many I's were correct.
df_three_newborn_fitness = df_three_newborn_fitness.apply(chromo_count_matches)

# Calculate the fitness for all the six chromosome
fitness_values = df_three_newborn_fitness.apply(fitness_calculation)
df_three_newborn_fitness.loc['Fitness'] = fitness_values

# Removing obsolete A and I rows
i_remove = ['Right A', 'Right I']
df_three_newborn_fitness = df_three_newborn_fitness.drop(i_remove)

df_three_newborn_fitness

Unnamed: 0,Newborn 1 result,Newborn 2 result,Newborn 3 result
Fitness,0.041667,0.385965,0.061404


In [258]:
largest_values = (df_three_newborn_fitness.iloc[0, :]).nsmallest(2)
largest_indices = largest_values.index
print(largest_indices)


Index(['Newborn 1 result', 'Newborn 3 result'], dtype='object')


In [255]:
df_current_lineage_fitness

Unnamed: 0,Chromo 1 result,Chromo 2 result,Chromo 3 result,Chromo 4 result,Chromo 5 result,Chromo 6 result
Fitness,0.151316,0.0,0.526316,0.421053,0.460526,0.0


In [256]:
smallest_values = (df_current_lineage_fitness.iloc[0, :]).nsmallest(2)
smallest_indices = smallest_values.index
print(smallest_indices)


Index(['Chromo 2 result', 'Chromo 6 result'], dtype='object')


In [257]:

# Sample Pandas Series
series = pd.Series([5, 1, 8, 2, 10, 0])

# Find the two smallest values and their indices
smallest_values = series.nsmallest(2)
smallest_indices = smallest_values.index

largest_values = series.nlargest(2)
largest_indices = largest_values.index

# Print the indices of the two smallest values
print("Indices of the two smallest values:", smallest_indices)
print("Indices of the two smallest values:", largest_indices)


Indices of the two smallest values: Index([5, 1], dtype='int64')
Indices of the two smallest values: Index([4, 2], dtype='int64')
