### get molecular properties of the late-stage diversified systems

In [1]:
from rdkit import Chem
import pandas as pd
import numpy as np

from rdkit.Chem import AllChem, rdMolDescriptors, Descriptors
from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions
from tqdm.notebook import tqdm

from rdkit import RDLogger
lg = RDLogger.logger()
lg.setLevel(RDLogger.CRITICAL)

import multiprocessing as mp
import molprops_utils

### physicochemical properties - for joint distribution plots.

In [2]:
def get_df_props(input_filename,output_filename,smiles_column):
    data = pd.read_csv(input_filename)
    all_molprops = []
    
    for s in tqdm(data[smiles_column]):
        all_molprops.append(molprops_utils.getallprops(s))

    data["allprops"] = all_molprops
    data[["LogP","MW","HBD","HBA","PSA","ROTB","FSP3","SSSR","QED"]] = data.allprops.tolist()
    data.drop("allprops",axis=1,inplace=True)
    data.to_csv(output_filename,index=False)
    
    

### for sp3 system

In [7]:
input_filename = "./ac3drug_am3small/ac3drug_am3small_smiles.csv"

output_filename = "./ac3drug_am3small/ac3drug_am3small_props.csv"
smiles_column = "largest_cleaned"

In [8]:
get_df_props(input_filename,output_filename,smiles_column)

  0%|          | 0/1443 [00:00<?, ?it/s]

In [9]:
input_filename = "./ac3drug_am3small/ac3drug_am3small_rxn_centers.csv"

output_filename = "./ac3drug_am3small/ac3drug_am3small_rxn_centers_props.csv"
smiles_column = "pdt_cleaned"

In [10]:
get_df_props(input_filename,output_filename,smiles_column)

  0%|          | 0/1443 [00:00<?, ?it/s]

## PMI

### full molecule

In [21]:
input_filename = "./ac3drug_am3small/ac3drug_am3small_smiles.csv"
output_filename = "./ac3drug_am3small/ac3drug_am3small_pmi.csv"
smiles_column = "largest_cleaned"

In [22]:
data = pd.read_csv(input_filename)

In [26]:
result = []

for s in tqdm(data[smiles_column]):
    result.append(molprops_utils.cal_pmi(s))

  0%|          | 0/1443 [00:00<?, ?it/s]

In [27]:
data["pmi_x"] = np.array(result)[:,0]
data["pmi_y"] = np.array(result)[:,1]
data.to_csv(output_filename,index=False)

### reaction center

In [31]:
input_filename = "./ac3drug_am3small/ac3drug_am3small_rxn_centers.csv"
output_filename = "./ac3drug_am3small/ac3drug_am3small_rxnctr_pmi.csv"
smiles_column = "pdt_cleaned"

In [32]:
data = pd.read_csv(input_filename)

In [33]:
result = []

for s in tqdm(data[smiles_column]):
    result.append(molprops_utils.cal_pmi(s))

  0%|          | 0/1443 [00:00<?, ?it/s]

In [34]:
data["pmi_x"] = np.array(result)[:,0]
data["pmi_y"] = np.array(result)[:,1]
data.to_csv(output_filename,index=False)