In [1]:
from utils.protein_logic import prepare_protein
from utils.ligand_logic import prepare_ligands
import yaml

with open("config.yaml") as f:
    cfg = yaml.safe_load(f)

# Protein
prepare_protein(
    cfg["protein"]["input_pdb"],
    cfg["protein"]["output_pdb"],
    cfg["protein"]["pH"]
)

# Ligands
n = prepare_ligands(
    excel_file=cfg["ligands"]["input_excel"],
    smiles_col=cfg["ligands"]["smiles_column"],
    id_col=cfg["ligands"]["id_column"],
    output_sdf=cfg["ligands"]["output_sdf"],
    ph=cfg["protein"]["pH"],
    seed=cfg["embedding"]["random_seed"]
)


print(f"{n} ligands successfully prepared.")


[15:27:02] Can't kekulize mol.  Unkekulized atoms: 4 5 7 8 10 11 14 15 19 20 24
[15:27:02] Can't kekulize mol.  Unkekulized atoms: 4 5 7 8 10 11 14 15 19 20 24
[15:27:02] Explicit valence for atom # 3 O, 3, is greater than permitted
[15:27:02] Can't kekulize mol.  Unkekulized atoms: 1 4 5 6 8
[15:27:02] Can't kekulize mol.  Unkekulized atoms: 1 4 5 7 9
[15:27:02] Can't kekulize mol.  Unkekulized atoms: 1 4 5 7 9
[15:27:02] Can't kekulize mol.  Unkekulized atoms: 1 4 5 6 8
[15:27:02] Can't kekulize mol.  Unkekulized atoms: 1 4 5 6 8
[15:27:02] Can't kekulize mol.  Unkekulized atoms: 1 4 5 7 9
[15:27:02] Can't kekulize mol.  Unkekulized atoms: 1 4 5 7 9
[15:27:02] Can't kekulize mol.  Unkekulized atoms: 1 4 5 6 8
[15:27:02] Explicit valence for atom # 3 O, 3, is greater than permitted
[15:27:02] Can't kekulize mol.  Unkekulized atoms: 1 4 5 7 9
[15:27:02] Can't kekulize mol.  Unkekulized atoms: 1 4 5 7 9
[15:27:02] Explicit valence for atom # 3 O, 3, is greater than permitted
[15:27:02] 

Prepared 8 ligands
Failed   0 ligands
8 ligands successfully prepared.


In [None]:
#Cell n√†y d√πng ƒë·ªÉ ki·ªÉm tra t√≠nh h·ª£p l·ªá c·ªßa c√°c ligand output sau khi chu·∫©n b·ªã
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import RDLogger

# (Optional) t·∫Øt warning spam n·∫øu mu·ªën
# RDLogger.DisableLog('rdApp.warning')

SDF_PATH = r"D:\code python\open_protein_ligand_prep_pipeline(v2.0)\output\ligands_for_8skl_prepared_v2.0.sdf"

suppl = Chem.SDMolSupplier(SDF_PATH, removeHs=False)

total = 0
sanitize_failed = 0
not_3d = 0
passed = 0

print("=== Ligand QC report ===")

for idx, mol in enumerate(suppl, start=1):
    total += 1

    if mol is None:
        print(f"[LIG {idx}] ‚ùå Mol is None (read failed)")
        sanitize_failed += 1
        continue

    # -------------------------
    # STEP 1: Final sanitize
    # -------------------------
    try:
        Chem.SanitizeMol(mol)
    except Exception as e:
        print(f"[LIG {idx}] ‚ùå Sanitize FAILED: {e}")
        sanitize_failed += 1
        continue

    # -------------------------
    # STEP 2: 3D confirmation
    # -------------------------
    try:
        conf = mol.GetConformer()
    except Exception:
        print(f"[LIG {idx}] ‚ùå No conformer found (not 3D)")
        not_3d += 1
        continue

    zs = [conf.GetAtomPosition(i).z for i in range(mol.GetNumAtoms())]
    z_span = max(zs) - min(zs)

    if z_span < 0.1:
        print(f"[LIG {idx}] ‚ùå Effectively 2D (Z-span = {z_span:.3f} √Ö)")
        not_3d += 1
        continue

    # -------------------------
    # PASSED
    # -------------------------
    print(f"[LIG {idx}] ‚úÖ PASS | Z-span = {z_span:.3f} √Ö")
    passed += 1

# -------------------------
# SUMMARY
# -------------------------
print("\n=== SUMMARY ===")
print(f"Total ligands      : {total}")
print(f"Sanitize FAILED    : {sanitize_failed}")
print(f"Not real 3D        : {not_3d}")
print(f"PASSED (GNINA-safe): {passed}")

if sanitize_failed == 0 and not_3d == 0:
    print("\nüü¢ ALL ligands are RDKit-valid and docking-ready.")
else:
    print("\nüü° Some ligands need attention (see log above).")




=== Ligand QC report ===
[LIG 1] ‚úÖ PASS | Z-span = 7.606 √Ö
[LIG 2] ‚úÖ PASS | Z-span = 5.367 √Ö
[LIG 3] ‚úÖ PASS | Z-span = 2.675 √Ö
[LIG 4] ‚úÖ PASS | Z-span = 5.981 √Ö
[LIG 5] ‚úÖ PASS | Z-span = 6.724 √Ö
[LIG 6] ‚úÖ PASS | Z-span = 7.119 √Ö
[LIG 7] ‚úÖ PASS | Z-span = 7.527 √Ö
[LIG 8] ‚úÖ PASS | Z-span = 8.117 √Ö

=== SUMMARY ===
Total ligands      : 8
Sanitize FAILED    : 0
Not real 3D        : 0
PASSED (GNINA-safe): 8

üü¢ ALL ligands are RDKit-valid and docking-ready.
