In [590]:
import pandas as pd
import numpy as np

In [591]:
# Read Excel file into a DataFrame.
df = pd.read_excel('data.xlsx')

# Remove column.
df = df.drop(columns=['Unnamed: 0'])

# Change column name.
df = df.rename(columns={'Unnamed: 19': 'Reference'})

# Create column of reference with A and I.
df_ref_column = df['Reference']

# Store the DataFrame reference of A's and I's in a int.
total_A = df["Reference"].value_counts()["A"]
total_I = df["Reference"].value_counts()["I"]

# Define operative DataFrame: 
# with the columns from X11 to X92, without Reference column.
df_op = df.loc[:, "X11": "X92"]

# Count the number of operatives columns (between X11 and X92).
nbr_op_columns = len(df_op.columns)

df.head()

Unnamed: 0,X11,X12,X21,X22,X31,X32,X41,X42,X51,X52,X61,X62,X71,X72,X81,X82,X91,X92,Reference
0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
2,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
3,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
4,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,A


In [592]:
# Define function to aply chromosome in each line of the DataFrame
# and return the result (line * chromosome).sum().
def chromo_action(row, chromosome, trash):
	res_mult = row * chromosome
	res_sum = res_mult.sum()
	return (res_sum)

In [593]:
# Create firstborns (chromosomes).

# Set the seed for reproducibility
#np.random.seed(42)

# Generate 6 random chromosomes.
chromosome1 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome2 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome3 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome4 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome5 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome6 = -1 + 2 * np.random.random(nbr_op_columns)

# Create re-usable DataFrame with the current lineage
df_current_lineage = pd.DataFrame({'chromo_1':chromosome1,
								   'chromo_2':chromosome2,
								   'chromo_3':chromosome3,
								   'chromo_4':chromosome4,
								   'chromo_5':chromosome5,
								   'chromo_6':chromosome6})

df_current_lineage.head()

Unnamed: 0,chromo_1,chromo_2,chromo_3,chromo_4,chromo_5,chromo_6
0,0.646675,0.613311,-0.62941,-0.348061,-0.267303,-0.83673
1,0.921231,-0.862268,-0.837095,-0.771628,-0.83151,0.073984
2,0.857785,0.093892,0.034567,0.903091,-0.100186,0.693112
3,0.475362,-0.048199,-0.123472,0.919086,0.14906,0.077185
4,0.961767,0.889847,0.383753,0.438085,-0.383553,-0.409753


In [594]:
# Iterate thru df_current_lineage.
i = 0
trash = None
df_chromo_result = pd.DataFrame()
max_i = len(df_current_lineage.columns)
while i < max_i:
	current_chromosome = (df_current_lineage.iloc[:, i]).values
	chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, trash))
	df_chromo_result[f'chromo_result_{i+1}'] = chromo_result
	i = i + 1

print(df_chromo_result)

    chromo_result_1  chromo_result_2  chromo_result_3  chromo_result_4  \
0          0.224990         3.606891         2.822826         1.658318   
1          0.499547         2.131312         2.615141         1.234752   
2          3.716545        -2.322068        -1.421011         1.471229   
3          3.334123        -2.464159        -1.579049         1.487224   
4          1.350886         1.145173         2.003998         1.950988   
5          2.893120         1.000089         1.378942        -0.800213   
6          1.068409        -2.354567         1.171633         0.699471   
7          2.865205        -1.335928        -0.809867         0.754992   
8          3.716545        -2.322068        -1.421011         1.471229   
9          1.410791        -1.249617         1.092523         1.562962   
10         1.734860         2.154833         1.869279        -0.442740   
11         2.805300         1.058861         0.101607         1.143018   
12         0.933815         1.266737  

In [595]:
# Call the specialized function to iterate over each line.
# trash needed (I don't know why).
trash = None

chromo_result = df_op.apply(func=chromo_action, axis=1, args=(chromosome1, trash))

chromo_result

0     0.224990
1     0.499547
2     3.716545
3     3.334123
4     1.350886
5     2.893120
6     1.068409
7     2.865205
8     3.716545
9     1.410791
10    1.734860
11    2.805300
12    0.933815
13    2.927269
14    3.656640
15    0.499547
16   -0.257239
17    1.212591
18    1.928024
19    2.339913
20    3.051290
21    3.139407
22    3.003500
23    4.090908
24    1.729482
25    0.385293
26    1.495308
27    1.260907
28    1.729482
29    2.987675
30    1.350886
31    1.350886
32    0.876933
33    3.482028
34    2.865205
35    2.865205
36    0.459507
37    2.103845
38    0.837835
39    3.099606
40    0.841929
41    5.260938
42    2.103845
dtype: float64

In [596]:
# Create chromo reference: a column (series) that contain
# 'A' if number > 0 and 'I' if number < 0.
chromo_reference = chromo_result.apply(lambda x: 'A' if x > 0 else 'I')

chromo_reference

0     A
1     A
2     A
3     A
4     A
5     A
6     A
7     A
8     A
9     A
10    A
11    A
12    A
13    A
14    A
15    A
16    I
17    A
18    A
19    A
20    A
21    A
22    A
23    A
24    A
25    A
26    A
27    A
28    A
29    A
30    A
31    A
32    A
33    A
34    A
35    A
36    A
37    A
38    A
39    A
40    A
41    A
42    A
dtype: object

In [597]:
# Calculate how many A's and how many I's were correct.
relative_A = ((df_ref_column == 'A') & (chromo_reference == 'A')).sum()
relative_I = ((df_ref_column == 'I') & (chromo_reference == 'I')).sum()

print(relative_A)
print(relative_I)

24
1


In [598]:
# Caulculate the fitness: the percentage of rightness of this chromosome.

fitness = (relative_A * relative_I) / (total_A * total_I)
print(fitness)

0.05263157894736842


In [599]:
# Sample DataFrame
df = pd.DataFrame({'A': [1, 2, 3],
                   'B': [4, 5, 6]})

# Sample pandas Series
series = pd.Series([10, 20])

# Perform the operation to add the Series to each row of the DataFrame
result = df.values * series.values
print(df.values)
print()
print(series.values)
print()
print(result)

# Create a new DataFrame from the result
result_df = pd.DataFrame(result, columns=df.columns)

# Print the resulting DataFrame
print(result_df.head())

[[1 4]
 [2 5]
 [3 6]]

[10 20]

[[ 10  80]
 [ 20 100]
 [ 30 120]]
    A    B
0  10   80
1  20  100
2  30  120
