In [None]:
"""
Code function:
1)This code calculates the following descriptors SlogP, TPSA,	MW,	NumRotatableBonds,	NumHBD and	NumHBA.
2)Also calculates some parameters related with the chirality: NumStereocenters and	NumUnspecifiedStereocenters.
    2.1)The compounds are classified according to their chirality: chirality annotated, chirality not annotated and achiral.
3)This information is exported into an excel file.


Requirements/Notes: 
1) The column containing the SMILES strings must have a header labeled: Smiles .
2) Modify in the script the name of the input file.
3) The file results.xlsx contain the calculated descriptors and chirality information of all the compounds.
"""

import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors, Crippen, Lipinski, rdMolDescriptors

# === 1. Read Excel file ===
input_file = "Name of the input file.xlsx"
output_file = "results.xlsx"

df = pd.read_excel(input_file)

print("Columns in file:", df.columns)

if "Smiles" not in df.columns:
    raise ValueError("❌ Column 'Smiles' not found in Excel file. Found columns: " + str(df.columns))

# === 2. Function to calculate properties ===
def calculate_properties(smiles):
    try:
        if pd.isna(smiles):
            return [None]*9
        
        mol = Chem.MolFromSmiles(str(smiles).strip())
        if mol is None:
            print(f"⚠️ Invalid SMILES: {smiles}")
            return [None]*9
        
        # Properties
        slogp = Crippen.MolLogP(mol)
        tpsa = rdMolDescriptors.CalcTPSA(mol)
        mw = Descriptors.MolWt(mol)
        num_rot_bonds = Lipinski.NumRotatableBonds(mol)
        num_hbd = Lipinski.NumHDonors(mol)
        num_hba = Lipinski.NumHAcceptors(mol)

        # Stereochemistry
        chiral_centers = Chem.FindMolChiralCenters(mol, includeUnassigned=True)
        num_stereocenters = len(chiral_centers)
        num_unspecified = 0
        for center in chiral_centers:
            if len(center) == 2:  # (atom_idx, label)
                if center[1] == "?":
                    num_unspecified += 1
            elif len(center) == 3:  # (atom_idx, label, flag)
                if center[2] == "?":
                    num_unspecified += 1

        # === Chirality classification ===
        try:
            chirality_class = Chem.FindMolChiralCenters(
                mol, force=True, includeUnassigned=True, useLegacyImplementation=False
            )
            if not chirality_class:
                chirality = "achiral"
            elif "?" in str(chirality_class):
                chirality = "chirality not annotated"
            else:
                chirality = "chirality annotated"
        except:
            chirality = "smiles input is empty"

        return [
            slogp, tpsa, mw, num_rot_bonds, num_hbd, num_hba,
            chirality, num_stereocenters, num_unspecified
        ]
    
    except Exception as e:
        print(f"❌ Error with SMILES {smiles}: {e}")
        return [None]*9

# === 3. Apply function ===
results = df["Smiles"].apply(calculate_properties)
df[[
    "SlogP","TPSA","MW","NumRotatableBonds","NumHBD","NumHBA",
    "Chirality","NumStereocenters","NumUnspecifiedStereocenters"
]] = pd.DataFrame(results.tolist(), index=df.index)

# === 4. Save output ===
df.to_excel(output_file, index=False)

print(f"✅ Done. File '{output_file}' generated successfully.\n")

# === 5. Show Chirality distribution (percentages) ===
chirality_counts = df["Chirality"].value_counts(normalize=True) * 100
print("📊 Chirality distribution (%):")
print(chirality_counts.round(2).to_string())
