In [None]:
"""
This script requires an input Excel file with at least the following columns:
- 'ID': A unique identifier for each molecule.
- 'Smiles': The SMILES string representing the molecule.

The script calculates the size-normalized Spacial Score (nSPS), 
as reported in https://doi.org/10.1021/acs.jmedchem.3c00689. 

Documentation for the RDKit implementation of this index can be found at:
https://rdkit.org/docs/source/rdkit.Chem.SpacialScore.html

It can also be used to update an existing database that already contains 
precomputed values in the 'Spacial Score' column. The code automatically detects 
cells with empty values in 'Spacial Score' and calculates results only for those, 
leaving previously computed values unchanged. Otherwise, if no values exist in 
'Spacial Score', the script will calculate results for the entire database.

Example of an input file structure for updating a database:

ID        Smiles        Spacial Score
# CMPD001   CCO           3.45
# CMPD002   C1CCO1        2.87
# CMPD003   CCN(CC)C      4.12
# CMPD004   COC           <empty>
# CMPD005   CC(=O)O       <empty>

The results are saved in a new Excel file named 'results.xlsx' 
(or 'results_updated.xlsx' if the input already contained values).
"""

import sys
import pandas as pd
from rdkit import Chem
from rdkit.Chem import SpacialScore

# Load the Excel file
df = pd.read_excel('input_file.xlsx')

# Ensure the 'Spacial Score' column exists
if 'Spacial Score' not in df.columns:
    df['Spacial Score'] = None

# Function to calculate the size-normalized Spacial Score (nSPS)
def calculate_spacial_score(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is not None:
        score = SpacialScore.SPS(mol, normalize=True)  # nSPS
        return round(score, 2)  # Round to 2 decimals
    else:
        return None

# Check if there are already computed values
already_has_values = df['Spacial Score'].notna().any()

# Calculate only for rows where 'Spacial Score' is missing (NaN or None)
for idx, row in df[df['Spacial Score'].isna()].iterrows():
    score = calculate_spacial_score(row['Smiles'])
    df.at[idx, 'Spacial Score'] = score
    # Update the last successfully processed ID in real time (overwriting in the same line)
    sys.stdout.write(f"\rLast successful ID processed: {row['ID']}")
    sys.stdout.flush()

# Choose output filename depending on whether there were preexisting values
output_file = "results_updated.xlsx" if already_has_values else "results.xlsx"
df.to_excel(output_file, index=False)

print(f"\nProcessing completed. Results saved to '{output_file}'.")


In [None]:
# Optional: Display the resulting dataframe
#df