In [1]:
import pandas as pd
import numpy as np

# Load Excel files
fplm = pd.read_excel(r"C:\Users\incha\MTdata\Indian_Community\Sorted_FPLM_Core_FVA_match.xlsx")
fpl = pd.read_excel(r"C:\Users\incha\MTdata\Indian_Community\Sorted_FPL_Core_FVA_match.xlsx")
flm = pd.read_excel(r"C:\Users\incha\MTdata\Indian_Community\Sorted_FLM_Core_FVA_match.xlsx")
fpm = pd.read_excel(r"C:\Users\incha\MTdata\Indian_Community\Sorted_FPM_Core_FVA_match.xlsx")
plm = pd.read_excel(r"C:\Users\incha\MTdata\Indian_Community\Sorted_PLM_Core_FVA_match.xlsx")

# Use 'Core Function' and 'Pathway' from FPLM for annotation
fplm = fplm[['reactions', 'minimum', 'maximum', 'Core Function', 'Pathway']].rename(columns={
    'minimum': 'min_FPLM', 'maximum': 'max_FPLM'
})
fpl = fpl[['reactions', 'minimum', 'maximum']].rename(columns={'minimum': 'min_FPL', 'maximum': 'max_FPL'})
flm = flm[['reactions', 'minimum', 'maximum']].rename(columns={'minimum': 'min_FLM', 'maximum': 'max_FLM'})
fpm = fpm[['reactions', 'minimum', 'maximum']].rename(columns={'minimum': 'min_FPM', 'maximum': 'max_FPM'})
plm = plm[['reactions', 'minimum', 'maximum']].rename(columns={'minimum': 'min_PLM', 'maximum': 'max_PLM'})

# Merge all files
merged = fplm.merge(fpl, on='reactions', how='outer') \
             .merge(flm, on='reactions', how='outer') \
             .merge(fpm, on='reactions', how='outer') \
             .merge(plm, on='reactions', how='outer')

# Fill NaNs with 0 for comparison
merged_filled = merged.fillna(0)

# Compute % differences from FPLM
merged_filled['diff_min_FPL'] = np.abs(merged_filled['min_FPL'] - merged_filled['min_FPLM']) / (np.abs(merged_filled['min_FPLM']) + 1e-6)
merged_filled['diff_max_FPL'] = np.abs(merged_filled['max_FPL'] - merged_filled['max_FPLM']) / (np.abs(merged_filled['max_FPLM']) + 1e-6)

merged_filled['diff_min_FLM'] = np.abs(merged_filled['min_FLM'] - merged_filled['min_FPLM']) / (np.abs(merged_filled['min_FPLM']) + 1e-6)
merged_filled['diff_max_FLM'] = np.abs(merged_filled['max_FLM'] - merged_filled['max_FPLM']) / (np.abs(merged_filled['max_FPLM']) + 1e-6)

merged_filled['diff_min_FPM'] = np.abs(merged_filled['min_FPM'] - merged_filled['min_FPLM']) / (np.abs(merged_filled['min_FPLM']) + 1e-6)
merged_filled['diff_max_FPM'] = np.abs(merged_filled['max_FPM'] - merged_filled['max_FPLM']) / (np.abs(merged_filled['max_FPLM']) + 1e-6)

merged_filled['diff_min_PLM'] = np.abs(merged_filled['min_PLM'] - merged_filled['min_FPLM']) / (np.abs(merged_filled['min_FPLM']) + 1e-6)
merged_filled['diff_max_PLM'] = np.abs(merged_filled['max_PLM'] - merged_filled['max_FPLM']) / (np.abs(merged_filled['max_FPLM']) + 1e-6)

# Identify reactions differing by more than 10%
threshold = 0.10

diff_FPL = merged_filled[
    (merged_filled['diff_min_FPL'] > threshold) |
    (merged_filled['diff_max_FPL'] > threshold)
][['reactions', 'Core Function', 'Pathway']]

diff_FLM = merged_filled[
    (merged_filled['diff_min_FLM'] > threshold) |
    (merged_filled['diff_max_FLM'] > threshold)
][['reactions', 'Core Function', 'Pathway']]

diff_FPM = merged_filled[
    (merged_filled['diff_min_FPM'] > threshold) |
    (merged_filled['diff_max_FPM'] > threshold)
][['reactions', 'Core Function', 'Pathway']]

diff_PLM = merged_filled[
    (merged_filled['diff_min_PLM'] > threshold) |
    (merged_filled['diff_max_PLM'] > threshold)
][['reactions', 'Core Function', 'Pathway']]

# Reactions only in FPLM
only_in_FPLM = merged[
    merged['min_FPLM'].notna() &
    merged[['min_FPL', 'min_FLM', 'min_FPM', 'min_PLM']].isna().all(axis=1)
][['reactions', 'Core Function', 'Pathway']]

# Display summary with core function and pathway
print("\n--- Reactions Significantly Different from FPLM ---")
print(f"FPL: {len(diff_FPL)} reactions\n", diff_FPL.head(10))
print(f"FLM: {len(diff_FLM)} reactions\n", diff_FLM.head(10))
print(f"FPM: {len(diff_FPM)} reactions\n", diff_FPM.head(10))
print(f"PLM: {len(diff_PLM)} reactions\n", diff_PLM.head(10))

print("\n--- Reactions Only in FPLM ---")
print(f"{len(only_in_FPLM)} reactions\n", only_in_FPLM.head(10))



--- Reactions Significantly Different from FPLM ---
FPL: 12772602 reactions
                reactions Core Function             Pathway
1525201  Fp_ACBIPGT_Fp_c             0  Metabolic pathways
1525202  Fp_ACBIPGT_Fp_c             0  Metabolic pathways
1525203  Fp_ACBIPGT_Fp_c             0  Metabolic pathways
1525204  Fp_ACBIPGT_Fp_c             0  Metabolic pathways
1525205  Fp_ACBIPGT_Fp_c             0  Metabolic pathways
1525206  Fp_ACBIPGT_Fp_c             0  Metabolic pathways
1525207  Fp_ACBIPGT_Fp_c             0  Metabolic pathways
1525208  Fp_ACBIPGT_Fp_c             0  Metabolic pathways
1525209  Fp_ACBIPGT_Fp_c             0  Metabolic pathways
1525210  Fp_ACBIPGT_Fp_c             0  Metabolic pathways
FLM: 19058313 reactions
          reactions Core Function               Pathway
0  Fp_ACACT1r_Fp_c             0  Butanoate metabolism
1  Fp_ACACT1r_Fp_c             0  Butanoate metabolism
2  Fp_ACACT1r_Fp_c             0  Butanoate metabolism
3  Fp_ACACT1r_Fp_c          