In [None]:
"""
Code function:
1) This code merges a compound database (with previously determined physicochemical properties and chirality using the code:
   'Physicochemical properties and chirality.ipynb') with another set of new compounds whose descriptors were calculated separately 
   with the same code.

Requirements/Notes:
1) Both databases are merged based on their 'ID'. Therefore, both databases MUST have:
    - A column with the header 'ID'
    - Matching IDs for the compounds to be updated.

    Original database (example format, first 5 rows):
        ID      Smiles        SlogP
        001     CCO           0.77
        002     C1CCO1        0.85
        003     CCN(CC)C      2.54
        004     COC           <empty>
        005     CC(=O)O       <empty>

        - Contains compounds with properties already calculated (IDs 001–003).
        - Also contains new compounds (IDs 004–005) with missing properties.
        - Keeps other metadata fields (not shown in this example).

    New compounds (results file example format, corresponding to missing ones):
        ID      Smiles        SlogP
        004     COC           1.12
        005     CC(=O)O       -0.34

        - Only the calculated descriptors and chirality information of the new compounds.
        - Does not need extra metadata.

    1.1) Before applying this code, both databases should have their descriptors calculated with 
         'Physicochemical properties and chirality.ipynb'.

2) The descriptors of the 'new compounds' MUST be calculated with 
   'Physicochemical properties and chirality.ipynb' in a separate file from the 'original database'.

3) Once both databases have the physicochemical properties calculated with the code mentioned above,
   use this script to merge them.

4) The file database merged.xlsx contains the calculated descriptors and chirality information of all the new compounds.
"""

import pandas as pd

# === 1. Input files ===
results_file = "Input the name of the new compounds file.xlsx"  # New compounds (properties already calculated)
database_file = "Input the name of the Original database file.xlsx"  # Original database
output_file = "database merged.xlsx"

# === 2. Load data ===
df_results = pd.read_excel(results_file)
df_database = pd.read_excel(database_file)

# === 3. Define properties columns to update ===
properties_cols = [
    "SlogP", "TPSA", "MW", "NumRotatableBonds",
    "NumHBD", "NumHBA", "Chirality",
    "NumStereocenters", "NumUnspecifiedStereocenters"
]

# Keep ID as key
key_col = "ID"

# === 4. Set index by ID for easy updating ===
df_database.set_index(key_col, inplace=True)
df_results.set_index(key_col, inplace=True)

# === 5. Update only the property columns in the database ===
df_database.update(df_results[properties_cols])

# === 6. Reset index and save updated database ===
df_database.reset_index(inplace=True)
df_database.to_excel(output_file, index=False)

print(f"✅ Database updated in-place saved as '{output_file}'")
