In [336]:
import pandas as pd
import numpy as np

In [337]:
# Read Excel file into a DataFrame.
df = pd.read_excel('data.xlsx')

# Remove column.
df = df.drop(columns=['Unnamed: 0'])

# Change column name.
df = df.rename(columns={'Unnamed: 19': 'Reference'})

# Create column of reference with A and I.
df_ref_column = df['Reference']

# Store the DataFrame reference of A's and I's in a int.
total_A = df["Reference"].value_counts()["A"]
total_I = df["Reference"].value_counts()["I"]

# Define operative DataFrame:
# with the columns from X11 to X92, without Reference column.
df_op = df.loc[:, "X11": "X92"]

# Count the number of operatives columns (between X11 and X92).
nbr_operatives_columns = len(df_op.columns)

df.head()

Unnamed: 0,X11,X12,X21,X22,X31,X32,X41,X42,X51,X52,X61,X62,X71,X72,X81,X82,X91,X92,Reference
0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
2,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
3,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
4,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,A


In [338]:
# Create firstborns (chromosomes).

# Set the seed for reproducibility
#np.random.seed(42)

# Generate 6 random chromosomes.
chromosome1 = -1 + 2 * np.random.random(nbr_operatives_columns)
chromosome2 = -1 + 2 * np.random.random(nbr_operatives_columns)
chromosome3 = -1 + 2 * np.random.random(nbr_operatives_columns)
chromosome4 = -1 + 2 * np.random.random(nbr_operatives_columns)
chromosome5 = -1 + 2 * np.random.random(nbr_operatives_columns)
chromosome6 = -1 + 2 * np.random.random(nbr_operatives_columns)

# Create re-usable DataFrame with the current lineage
df_current_lineage = pd.DataFrame({'chromo_1':chromosome1,
								   'chromo_2':chromosome2,
								   'chromo_3':chromosome3,
								   'chromo_4':chromosome4,
								   'chromo_5':chromosome5,
								   'chromo_6':chromosome6})

df_current_lineage.head()

Unnamed: 0,chromo_1,chromo_2,chromo_3,chromo_4,chromo_5,chromo_6
0,0.797721,0.160114,-0.821126,-0.744858,0.074637,0.47998
1,0.316144,0.230988,-0.145695,-0.430663,-0.822228,-0.299697
2,-0.361811,-0.106485,-0.614226,0.755313,-0.694564,-0.660114
3,-0.952215,-0.880261,0.280308,-0.199523,0.657629,-0.784125
4,-0.045492,-0.082615,0.334298,0.070873,0.114818,-0.676314


In [339]:
# Function to aply each chromosome for each line of the DataFrame
# and return the result (line * chromosome).sum().
def chromo_action(row, chromosome, trash):
	res_mult = row * chromosome
	res_sum = res_mult.sum()
	return (res_sum)

# Iterate df_current_lineage in database.
i = 0
trash = None
df_chromo_result = pd.DataFrame()
max_i = len(df_current_lineage.columns)
while i < max_i:
	current_chromosome = (df_current_lineage.iloc[:, i]).values
	chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, trash))
	df_chromo_result[f'Chromo {i+1} result'] = chromo_result
	i = i + 1

# Create chromo reference: a DataFrame that contain
# 'A' if number > 0 and 'I' if number < 0.
df_chromo_result = df_chromo_result.applymap(lambda x: 'A' if x > 0 else 'I')

# Calculate how many A's and how many I's were correct.
def chromo_count_matches(column):
	relative_A = ((column == 'A') & (df_ref_column == 'A')).sum()
	relative_I = ((column == 'I') & (df_ref_column == 'I')).sum()
	return pd.Series({'Right A': relative_A, 'Right I': relative_I})

df_chromo_result = df_chromo_result.apply(chromo_count_matches)

# Calculate the fitness for all the six chromosome
def fitness_calculation(column):
	relative_numerator = np.prod(column)
	fitness = relative_numerator / (total_A * total_I)
	return fitness

fitness_values = df_chromo_result.apply(fitness_calculation)
df_chromo_result.loc['Fitness'] = fitness_values

# Removing obsolete A and I rows
i_remove = ['Right A', 'Right I']
df_chromo_result = df_chromo_result.drop(i_remove)

df_chromo_result

Unnamed: 0,Chromo 1 result,Chromo 2 result,Chromo 3 result,Chromo 4 result,Chromo 5 result,Chromo 6 result
Fitness,0.052632,0.105263,0.041667,0.118421,0.105263,0.032895


In [340]:
# Get two raffle points to choose two chromosomes.
# The choosen chromosomes are going to be crossed.

fitness_array = (df_chromo_result.iloc[0, :]).values

fitness_array_sum = fitness_array.sum()

norm_fit_0 = round((fitness_array[0]/ fitness_array_sum) * 100)
norm_fit_1 = round((fitness_array[1]/ fitness_array_sum) * 100) + norm_fit_0
norm_fit_2 = round((fitness_array[2]/ fitness_array_sum) * 100) + norm_fit_1
norm_fit_3 = round((fitness_array[3]/ fitness_array_sum) * 100) + norm_fit_2
norm_fit_4 = round((fitness_array[4]/ fitness_array_sum) * 100) + norm_fit_3
norm_fit_5 = round((fitness_array[5]/ fitness_array_sum) * 100) + norm_fit_4

norm_fitness_array = np.array([norm_fit_0, norm_fit_1, norm_fit_2, norm_fit_3, norm_fit_4, norm_fit_5])

norm_fitness_array_sum = norm_fitness_array.sum()
print(norm_fitness_array)

def get_raffle_point(raffle):
	max_i = len(norm_fitness_array)
	i = 0
	while i < max_i:
		if 0 <= raffle <= norm_fitness_array[0]:
			return 0
		elif norm_fitness_array[i] < raffle <= norm_fitness_array[i + 1]:
			return i
		i = i + 1

raffle_1 = round(np.random.random() * 100)
print(raffle_1)
raffle_point_1 = get_raffle_point(raffle_1)
print(raffle_point_1)
raffle_2 = round(np.random.random() * 100)
print(raffle_2)
raffle_point_2 = get_raffle_point(raffle_2)
print(raffle_point_2)

[ 12  35  44  70  93 100]
63
2
78
3


In [341]:
df_current_lineage.head()

Unnamed: 0,chromo_1,chromo_2,chromo_3,chromo_4,chromo_5,chromo_6
0,0.797721,0.160114,-0.821126,-0.744858,0.074637,0.47998
1,0.316144,0.230988,-0.145695,-0.430663,-0.822228,-0.299697
2,-0.361811,-0.106485,-0.614226,0.755313,-0.694564,-0.660114
3,-0.952215,-0.880261,0.280308,-0.199523,0.657629,-0.784125
4,-0.045492,-0.082615,0.334298,0.070873,0.114818,-0.676314


In [342]:
father_chromosome = df_current_lineage.iloc[:, raffle_point_1]
father_chromosome.head()

0   -0.821126
1   -0.145695
2   -0.614226
3    0.280308
4    0.334298
Name: chromo_3, dtype: float64

In [343]:
mother_chromosome = df_current_lineage.iloc[:, raffle_point_2]
mother_chromosome.head()

0   -0.744858
1   -0.430663
2    0.755313
3   -0.199523
4    0.070873
Name: chromo_4, dtype: float64