In [910]:
import pandas as pd
import numpy as np

In [911]:
# Read Excel file into a DataFrame.
df = pd.read_excel('data.xlsx')

# Remove column.
df = df.drop(columns=['Unnamed: 0'])

# Change column name.
df = df.rename(columns={'Unnamed: 19': 'Reference'})

# Create column of reference with A and I.
df_ref_column = df['Reference']

# Store the DataFrame reference of A's and I's in a int.
total_A = df["Reference"].value_counts()["A"]
total_I = df["Reference"].value_counts()["I"]

# Define operative DataFrame: 
# with the columns from X11 to X92, without Reference column.
df_op = df.loc[:, "X11": "X92"]

# Count the number of operatives columns (between X11 and X92).
nbr_op_columns = len(df_op.columns)

df.head()

Unnamed: 0,X11,X12,X21,X22,X31,X32,X41,X42,X51,X52,X61,X62,X71,X72,X81,X82,X91,X92,Reference
0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
2,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
3,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
4,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,A


In [912]:
# Define function to aply chromosome in each line of the DataFrame
# and return the result (line * chromosome).sum().
def chromo_action(row, chromosome, trash):
	res_mult = row * chromosome
	res_sum = res_mult.sum()
	return (res_sum)

In [913]:
# Create firstborns (chromosomes).

# Set the seed for reproducibility
#np.random.seed(42)

# Generate 6 random chromosomes.
chromosome1 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome2 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome3 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome4 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome5 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome6 = -1 + 2 * np.random.random(nbr_op_columns)

# Create re-usable DataFrame with the current lineage
df_current_lineage = pd.DataFrame({'chromo_1':chromosome1,
								   'chromo_2':chromosome2,
								   'chromo_3':chromosome3,
								   'chromo_4':chromosome4,
								   'chromo_5':chromosome5,
								   'chromo_6':chromosome6})

df_current_lineage.head()

Unnamed: 0,chromo_1,chromo_2,chromo_3,chromo_4,chromo_5,chromo_6
0,0.596611,0.565423,-0.794597,0.831577,-0.285833,-0.91163
1,0.794974,-0.968235,0.243709,0.04958,0.741248,0.224795
2,0.125348,-0.049867,0.641095,0.736567,0.942665,0.054944
3,0.821177,-0.893106,-0.514463,-0.204459,0.038678,-0.124089
4,-0.425033,0.226753,0.180072,-0.67093,-0.363598,0.397476


In [914]:
# Iterate df_current_lineage in database.
i = 0
trash = None
df_chromo_result = pd.DataFrame()
max_i = len(df_current_lineage.columns)
while i < max_i:
	current_chromosome = (df_current_lineage.iloc[:, i]).values
	chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, trash))
	df_chromo_result[f'chromo_result_{i+1}'] = chromo_result
	i = i + 1

df_chromo_result.head()

Unnamed: 0,chromo_result_1,chromo_result_2,chromo_result_3,chromo_result_4,chromo_result_5,chromo_result_6
0,2.384909,-0.880909,-0.850128,-0.282502,-0.83207,-2.074583
1,2.583271,-2.414566,0.188178,-1.064499,0.195011,-0.938158
2,-0.084048,1.110298,-0.287467,3.531235,-2.276139,-1.194989
3,0.611781,0.267059,-1.443025,2.59021,-3.180125,-1.374023
4,1.647919,-1.977017,1.339767,-0.288002,-1.429933,-2.458823


In [915]:
# Create chromo reference: a DataFrame that contain
# 'A' if number > 0 and 'I' if number < 0.
df_chromo_reference = df_chromo_result.applymap(lambda x: 'A' if x > 0 else 'I')

df_chromo_reference.head()

Unnamed: 0,chromo_result_1,chromo_result_2,chromo_result_3,chromo_result_4,chromo_result_5,chromo_result_6
0,A,I,I,I,I,I
1,A,I,A,I,A,I
2,I,A,I,A,I,I
3,A,A,I,A,I,I
4,A,I,A,I,I,I


In [916]:
# Calculate how many A's and how many I's were correct.
def chromo_count_matches(column):
	relative_A = ((column == 'A') & (df_ref_column == 'A')).sum()
	relative_I = ((column == 'I') & (df_ref_column == 'I')).sum()
	return pd.Series({'Relative_A': relative_A, 'Relative_I': relative_I})

df_chromo_relative = df_chromo_reference.apply(chromo_count_matches)

df_chromo_relative

Unnamed: 0,chromo_result_1,chromo_result_2,chromo_result_3,chromo_result_4,chromo_result_5,chromo_result_6
Relative_A,21,3,12,11,4,2
Relative_I,3,7,13,2,19,12


In [917]:
# Calculate the fitness for all the six chromosome
def fitness_calculation(column):
	relative_numerator = np.prod(column)
	fitness = relative_numerator / (total_A * total_I)
	return fitness

fitness_values = df_chromo_relative.apply(fitness_calculation)

df_chromo_relative.loc['Fitness'] = fitness_values

df_chromo_relative

Unnamed: 0,chromo_result_1,chromo_result_2,chromo_result_3,chromo_result_4,chromo_result_5,chromo_result_6
Relative_A,21.0,3.0,12.0,11.0,4.0,2.0
Relative_I,3.0,7.0,13.0,2.0,19.0,12.0
Fitness,0.138158,0.046053,0.342105,0.048246,0.166667,0.052632
