In [803]:
import pandas as pd
import numpy as np

In [804]:
# Read Excel file into a DataFrame.
df = pd.read_excel('data.xlsx')

# Remove column.
df = df.drop(columns=['Unnamed: 0'])

# Change column name.
df = df.rename(columns={'Unnamed: 19': 'Reference'})

# Create column of reference with A and I.
df_ref_column = df['Reference']

# Store the DataFrame reference of A's and I's in a int.
total_A = df["Reference"].value_counts()["A"]
total_I = df["Reference"].value_counts()["I"]

# Define operative DataFrame: 
# with the columns from X11 to X92, without Reference column.
df_op = df.loc[:, "X11": "X92"]

# Count the number of operatives columns (between X11 and X92).
nbr_op_columns = len(df_op.columns)

df.head()

Unnamed: 0,X11,X12,X21,X22,X31,X32,X41,X42,X51,X52,X61,X62,X71,X72,X81,X82,X91,X92,Reference
0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,A
2,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
3,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,I
4,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,A


In [805]:
# Define function to aply chromosome in each line of the DataFrame
# and return the result (line * chromosome).sum().
def chromo_action(row, chromosome, trash):
	res_mult = row * chromosome
	res_sum = res_mult.sum()
	return (res_sum)

In [806]:
# Create firstborns (chromosomes).

# Set the seed for reproducibility
#np.random.seed(42)

# Generate 6 random chromosomes.
chromosome1 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome2 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome3 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome4 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome5 = -1 + 2 * np.random.random(nbr_op_columns)
chromosome6 = -1 + 2 * np.random.random(nbr_op_columns)

# Create re-usable DataFrame with the current lineage
df_current_lineage = pd.DataFrame({'chromo_1':chromosome1,
								   'chromo_2':chromosome2,
								   'chromo_3':chromosome3,
								   'chromo_4':chromosome4,
								   'chromo_5':chromosome5,
								   'chromo_6':chromosome6})

df_current_lineage.head()

Unnamed: 0,chromo_1,chromo_2,chromo_3,chromo_4,chromo_5,chromo_6
0,0.601195,0.24608,-0.203217,0.697305,0.894317,0.383074
1,0.677345,0.11307,0.646312,-0.030483,0.793026,0.405768
2,-0.371704,-0.023513,-0.998553,0.336033,0.221963,-0.663522
3,-0.698869,0.126424,-0.334934,-0.375646,-0.586378,0.924941
4,0.862897,-0.099331,-0.473267,0.3504,-0.210552,-0.681385


In [807]:
# Iterate df_current_lineage in database.
i = 0
trash = None
df_chromo_result = pd.DataFrame()
max_i = len(df_current_lineage.columns)
while i < max_i:
	current_chromosome = (df_current_lineage.iloc[:, i]).values
	chromo_result = df_op.apply(func=chromo_action, axis=1, args=(current_chromosome, trash))
	df_chromo_result[f'chromo_result_{i+1}'] = chromo_result
	i = i + 1

df_chromo_result.head()

Unnamed: 0,chromo_result_1,chromo_result_2,chromo_result_3,chromo_result_4,chromo_result_5,chromo_result_6
0,-1.771214,-0.661132,-0.390112,1.810522,0.059537,-0.263767
1,-1.695065,-0.794143,0.459416,1.082734,-0.041754,-0.241073
2,-0.109456,-2.234861,0.321619,0.915455,3.981142,2.871861
3,-0.436621,-2.084924,0.985237,0.203776,3.172802,4.460324
4,-1.508709,-1.271263,-0.653245,-0.0305,0.424172,0.866151


In [819]:
# Create chromo reference: a DataFrame that contain
# 'A' if number > 0 and 'I' if number < 0.
df_chromo_reference = df_chromo_result.applymap(lambda x: 'A' if x > 0 else 'I')

df_chromo_reference.head()

Unnamed: 0,chromo_result_1,chromo_result_2,chromo_result_3,chromo_result_4,chromo_result_5,chromo_result_6
0,I,I,I,A,A,I
1,I,I,A,A,I,I
2,I,I,A,A,A,A
3,I,I,A,A,A,A
4,I,I,I,I,A,A


In [809]:
def chromo_count_matches(column):
	relative_A = ((column == 'A') & (df_ref_column == 'A')).sum()
	relative_I = ((column == 'I') & (df_ref_column == 'I')).sum()
	return pd.Series({'Relative_A': relative_A, 'Relative_I': relative_I})

In [810]:
unknown_result = df_chromo_reference.apply(chromo_count_matches)

unknown_result

Unnamed: 0,chromo_result_1,chromo_result_2,chromo_result_3,chromo_result_4,chromo_result_5,chromo_result_6
Relative_A,3,0,11,17,20,16
Relative_I,18,19,3,4,0,1


In [811]:
# Calculate how many A's and how many I's were correct.
# relative_A = ((df_ref_column == 'A') & (df_chromo_reference == 'A')).sum()
# relative_I = ((df_ref_column == 'I') & (df_chromo_reference == 'I')).sum()

# print(relative_A)
# print(relative_I)

In [812]:
# Caulculate the fitness: the percentage of rightness of this chromosome.

fitness = (relative_A * relative_I) / (total_A * total_I)
print(fitness)

0    0.096491
1    0.078947
2    0.078947
dtype: float64


In [813]:
# Sample DataFrame
df = pd.DataFrame({'A': [1, 2, 3],
                   'B': [4, 5, 6]})

# Sample pandas Series
series = pd.Series([10, 20])

# Perform the operation to add the Series to each row of the DataFrame
result = df.values * series.values
print(df.values)
print()
print(series.values)
print()
print(result)

# Create a new DataFrame from the result
result_df = pd.DataFrame(result, columns=df.columns)

# Print the resulting DataFrame
print(result_df.head())

[[1 4]
 [2 5]
 [3 6]]

[10 20]

[[ 10  80]
 [ 20 100]
 [ 30 120]]
    A    B
0  10   80
1  20  100
2  30  120


In [814]:
# Sample DataFrame (10x3)
df = pd.DataFrame(np.random.choice(['A', 'I'], size=(5, 3)))

print(df)
print()

# Sample pandas Series with 10 elements
series = pd.Series(np.random.choice(['A', 'I'], size=5))

print(series)
print()

# Define a function to count occurrences of 'A' and 'I' in each column
def count_matches(column):
    relative_A = ((column == 'A') & (series == 'A')).sum()
    relative_I = ((column == 'I') & (series == 'I')).sum()
    return pd.Series({'Relative_A': relative_A, 'Relative_I': relative_I})

# Apply the function to each column of the DataFrame
result_df = df.apply(count_matches)

# Print the resulting DataFrame
print(result_df)

   0  1  2
0  I  A  A
1  I  A  A
2  A  I  A
3  A  A  I
4  A  A  A

0    A
1    I
2    A
3    I
4    I
dtype: object

            0  1  2
Relative_A  1  1  2
Relative_I  1  0  1


In [815]:
# Sample DataFrame (10x3)
df = pd.DataFrame(np.random.choice(['A', 'I'], size=(5, 3)))

# Sample pandas Series with 10 elements
series = pd.Series(np.random.choice(['A', 'I'], size=5))

# Compare each element of the DataFrame with the corresponding element of the Series
comparison_result = df.eq(series, axis=0)

# Count the occurrences of 'A' and 'I' for each column
relative_A = comparison_result.sum()
relative_I = comparison_result.size - relative_A

# Create a new DataFrame from the counts
result_df = pd.DataFrame({'Relative_A': relative_A, 'Relative_I': relative_I})

# Print the resulting DataFrame
print(result_df)

   Relative_A  Relative_I
0           2          13
1           4          11
2           1          14
