In [188]:
import pandas as pd
import numpy as np

In [189]:
# Read Excel file into a DataFrame.
df = pd.read_excel('data.xlsx')

# Remove column.
df = df.drop(columns=['Unnamed: 0'])

# Change column name.
df = df.rename(columns={'Unnamed: 19': 'Reference'})

# Create column of reference with A and I.
df_ref_column = df['Reference']

# Store the DataFrame reference of A's and I's in a int.
total_A = df["Reference"].value_counts()["A"]
total_I = df["Reference"].value_counts()["I"]

# Define operative DataFrame: 
# with the columns from X11 to X92, without Reference column.
df_op = df.loc[:, "X11": "X92"]

# Count the number of operatives columns (between X11 and X92).
nbr_operatives_columns = len(df_op.columns)

df.head()

Unnamed: 0,X11,X12,X21,X22,X31,X32,X41,X42,X51,X52,X61,X62,X71,X72,X81,X82,X91,X92,Reference
0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
2,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
3,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
4,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,A


In [190]:
# Create firstborns (chromosomes).

# Set the seed for reproducibility
#np.random.seed(42)

# Generate 6 random chromosomes.
chromosome1 = -1 + 2 * np.random.random(nbr_operatives_columns)
chromosome2 = -1 + 2 * np.random.random(nbr_operatives_columns)
chromosome3 = -1 + 2 * np.random.random(nbr_operatives_columns)
chromosome4 = -1 + 2 * np.random.random(nbr_operatives_columns)
chromosome5 = -1 + 2 * np.random.random(nbr_operatives_columns)
chromosome6 = -1 + 2 * np.random.random(nbr_operatives_columns)

# Create re-usable DataFrame with the current lineage
df_current_lineage = pd.DataFrame({'chromo_1':chromosome1,
								   'chromo_2':chromosome2,
								   'chromo_3':chromosome3,
								   'chromo_4':chromosome4,
								   'chromo_5':chromosome5,
								   'chromo_6':chromosome6})

df_current_lineage.head()

Unnamed: 0,chromo_1,chromo_2,chromo_3,chromo_4,chromo_5,chromo_6
0,0.958245,-0.617144,0.332301,0.456182,-0.459209,-0.147258
1,0.652952,-0.951075,0.153839,-0.436038,-0.714318,-0.85187
2,0.001497,0.099984,-0.612246,0.328038,-0.799956,0.345092
3,0.948392,-0.321915,0.340882,0.325058,0.653717,0.444818
4,-0.220991,0.209218,0.203082,0.021466,-0.464115,-0.265216


In [191]:
# Function to aply each chromosome for each line of the DataFrame
# and return the result (line * chromosome).sum().
def chromo_action(row, chromosome, trash):
	res_mult = row * chromosome
	res_sum = res_mult.sum()
	return (res_sum)

# Iterate df_current_lineage in database.
i = 0
trash = None
df_chromo_result = pd.DataFrame()
max_i = len(df_current_lineage.columns)
while i < max_i:
	current_chromosome = (df_current_lineage.iloc[:, i]).values
	chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, trash))
	df_chromo_result[f'Chromo {i+1} result'] = chromo_result
	i = i + 1

df_chromo_result.head()

Unnamed: 0,Chromo 1 result,Chromo 2 result,Chromo 3 result,Chromo 4 result,Chromo 5 result,Chromo 6 result
0,3.065409,-0.405989,1.261868,0.324124,-0.730617,-0.534756
1,2.760116,-0.739921,1.083406,-0.568096,-0.985726,-1.239367
2,1.509497,-0.166514,-1.270971,-1.08442,-2.478895,0.383901
3,2.456392,-0.588413,-0.317843,-1.087399,-1.025222,0.483627
4,3.369059,-0.885901,0.993875,-1.919903,-0.375692,-2.180054


In [192]:
# Create chromo reference: a DataFrame that contain
# 'A' if number > 0 and 'I' if number < 0.
df_chromo_result = df_chromo_result.applymap(lambda x: 'A' if x > 0 else 'I')

df_chromo_result.head()

Unnamed: 0,Chromo 1 result,Chromo 2 result,Chromo 3 result,Chromo 4 result,Chromo 5 result,Chromo 6 result
0,A,I,A,A,I,I
1,A,I,A,I,I,I
2,A,I,I,I,I,A
3,A,I,I,I,I,A
4,A,I,A,I,I,I


In [193]:
# Calculate how many A's and how many I's were correct.
def chromo_count_matches(column):
	relative_A = ((column == 'A') & (df_ref_column == 'A')).sum()
	relative_I = ((column == 'I') & (df_ref_column == 'I')).sum()
	return pd.Series({'Right A': relative_A, 'Right I': relative_I})

df_chromo_result = df_chromo_result.apply(chromo_count_matches)

df_chromo_result

Unnamed: 0,Chromo 1 result,Chromo 2 result,Chromo 3 result,Chromo 4 result,Chromo 5 result,Chromo 6 result
Right A,24,2,19,6,0,4
Right I,0,15,16,11,18,5


In [194]:
# Calculate the fitness for all the six chromosome
def fitness_calculation(column):
	relative_numerator = np.prod(column)
	fitness = relative_numerator / (total_A * total_I)
	return fitness

fitness_values = df_chromo_result.apply(fitness_calculation)

df_chromo_result.loc['Fitness'] = fitness_values

df_chromo_result

Unnamed: 0,Chromo 1 result,Chromo 2 result,Chromo 3 result,Chromo 4 result,Chromo 5 result,Chromo 6 result
Right A,24.0,2.0,19.0,6.0,0.0,4.0
Right I,0.0,15.0,16.0,11.0,18.0,5.0
Fitness,0.0,0.065789,0.666667,0.144737,0.0,0.04386


In [195]:
# Removing obsolete A and I rows

i_remove = ['Right A', 'Right I']

df_chromo_result = df_chromo_result.drop(i_remove)

df_chromo_result

Unnamed: 0,Chromo 1 result,Chromo 2 result,Chromo 3 result,Chromo 4 result,Chromo 5 result,Chromo 6 result
Fitness,0.0,0.065789,0.666667,0.144737,0.0,0.04386
