In [1]:
import pandas as pd
import numpy as np
from rdkit import Chem
from chemopy import ChemoPy
import sys
smiles_df = pd.read_csv("smiles.csv", header=None)
smiles_df.columns = ['Drug', 'SMILES']
labels_df = pd.read_csv("labels.csv")
labels_df.columns = labels_df.columns.str.strip()  # remove whitespace
def preprocess_smiles_2d(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return None
    return Chem.AddHs(mol)
smiles_df['Mol'] = smiles_df['SMILES'].apply(preprocess_smiles_2d)
cmp = ChemoPy(ignore_3D=True, include_fps=False)
class DummyFile(object):
    def write(self, x): pass
    def flush(self): pass
save_stdout = sys.stdout
sys.stdout = DummyFile() 
drug_desc_map = {}
for idx, row in smiles_df.iterrows():
    drug = row['Drug']
    mol = row['Mol']
    if mol is None:
        drug_desc_map[drug] = None
        continue
    desc_df = cmp.calculate([mol])   
    drug_desc_map[drug] = desc_df.iloc[0].values  
    if 'desc_names' not in locals():
        desc_names = list(desc_df.columns)  
sys.stdout = save_stdout
combined_names = [f"A_{n}" for n in desc_names] + [f"B_{n}" for n in desc_names]
labels_df['DrugA_desc'] = labels_df['drug_a_name'].map(drug_desc_map)
labels_df['DrugB_desc'] = labels_df['drug_b_name'].map(drug_desc_map)
labels_df['Combined_chem_desc'] = labels_df.apply(
    lambda row: np.concatenate([row['DrugA_desc'], row['DrugB_desc']])
    if row['DrugA_desc'] is not None and row['DrugB_desc'] is not None
    else None,
    axis=1
)
chem_desc_df = pd.DataFrame(labels_df['Combined_chem_desc'].tolist(), columns=combined_names)
chem_desc_df['synergy'] = labels_df['synergy']
chem_desc_df.to_csv("drug_comb_chem_descriptors_named.csv", index=False)
print("Chemical descriptors saved successfully")

Chemical descriptors with names saved successfully as 'drug_comb_chem_descriptors_named.csv'!


Loaded SMILES file:
         Drug                                             SMILES
0        5-FU                            O=c1[nH]cc(F)c(=O)[nH]1
1     ABT-888                 CC1(c2nc3c(C(N)=O)cccc3[nH]2)CCCN1
2     AZD1775  C=CCn1c(=O)c2cnc(Nc3ccc(N4CCN(C)CC4)cc3)nc2n1-...
3     BEZ-235  Cn1c(=O)n(-c2ccc(C(C)(C)C#N)cc2)c2c3cc(-c4cnc5...
4  BORTEZOMIB     CC(C)CC(NC(=O)C(Cc1ccccc1)NC(=O)c1cnccn1)B(O)O
Loaded SMARTS alerts:
                        Name                    SMARTS
0  R1 Reactive alkyl halides     [Br,Cl,I][CX4;CH,CH2]
1            R2 Acid halides  [S,C](=[O,S])[F,Br,Cl,I]
2              R3 Carbazides            O=CN=[N+]=[N-]
3         R4 Sulphate esters             COS(=O)O[C,c]
4             R5 Sulphonates          COS(=O)(=O)[C,c]
Valid SMARTS patterns: 1251
Toxicophore features saved as drug_toxicophore_features_descriptive.csv
Name  R1 Reactive alkyl halides  R2 Acid halides  R3 Carbazides  \
0                             0                0              0   
1   