In [166]:
from chembl_webresource_client.new_client import new_client
from chembl_webresource_client.query_set import QuerySet
import pandas as pd
from os import mkdir
from loguru import logger

try:
  mkdir("results")
  mkdir("results/compounds")
  mkdir("results/compounds/molfiles")
except:
  pass

In [167]:
# type: ignore

def DataFrameMolfilesFromIdList(molecule_chembl_id_list: list[str]) -> pd.DataFrame:
    data: QuerySet = new_client.molecule.filter(molecule_chembl_id__in=molecule_chembl_id_list).only(['molecule_chembl_id', 'molecule_structures'])
    df = pd.DataFrame(data)

    df['molfile'] = df['molecule_structures'].apply(lambda x: x['molfile'] if isinstance(x, dict) else None)

    df = df.drop(['molecule_structures'], axis=1)

    return df

In [168]:
def SaveMolfilesToSDF(df: pd.DataFrame, file_name: str):
    with open(file_name, 'w') as f:
        for value in df.values:
            molecule_chembl_id, molfile = value
            f.write(f"{molecule_chembl_id}{molfile}\n\n$$$$\n")

In [169]:
def SaveMolfilesToSDFByIdList(molecule_chembl_id_list: list[str], file_name: str, 
                              extra_data: pd.DataFrame = pd.DataFrame(), print_to_console: bool = False) -> None:
    def DataFrameMolfilesFromIdList(molecule_chembl_id_list: list[str]) -> pd.DataFrame:
        qs_data: QuerySet = new_client.molecule.filter(
            molecule_chembl_id__in=molecule_chembl_id_list).only([
                'molecule_chembl_id', 'molecule_structures'])

        data = pd.DataFrame(qs_data)

        data['molfile'] = data['molecule_structures'].apply(
            lambda x: x['molfile'] if isinstance(x, dict) else None)

        data = data.drop(['molecule_structures'], axis=1)

        return data

    def SaveMolfilesToSDF(data: pd.DataFrame, file_name: str,
                          extra_data: pd.DataFrame = pd.DataFrame(), 
                          print_to_console: bool = False) -> None:
        if print_to_console:
            logger.info(f"Opening {file_name}...".ljust(77))

        with open(f"{file_name}.sdf", 'w') as f:
            if print_to_console:
                logger.success(f"Opening {file_name}".ljust(77))

            for value in data.values:
                molecule_chembl_id, molfile = value

                f.write(f"{molecule_chembl_id}{molfile}\n\n")

                if not extra_data.empty:
                    df = extra_data.set_index("molecule_chembl_id")
                    
                    for column in df.columns:
                        value = str(df.loc[molecule_chembl_id, column])

                        if value != "nan" and value != "None":
                            f.write(f"> <{column}>\n")
                            f.write(f"{value}\n\n")
                
                f.write("$$$$\n")

                if print_to_console:
                    logger.info(
                        f"Writing {molecule_chembl_id} data to .sdf file...".ljust(77))

    if print_to_console:
        logger.info("Collecting molfiles to pandas.DataFrame()...".ljust(77))

    data = DataFrameMolfilesFromIdList(molecule_chembl_id_list)

    if print_to_console:
        logger.success("Collecting molfiles to pandas.DataFrame()".ljust(77))

    SaveMolfilesToSDF(data=data, file_name=file_name, extra_data=extra_data,
                      print_to_console=print_to_console)


In [170]:
df = pd.read_csv("../results/activities/CHEMBL1951_IC50_activities.csv", sep=';') 
SaveMolfilesToSDFByIdList(["CHEMBL156630", "CHEMBL155754"], "results/molfiles", df, True)

[32m2024-10-23 14:51:33.742[0m | [1mINFO    [0m | [36m__main__[0m:[36mSaveMolfilesToSDFByIdList[0m:[36m49[0m - [1mCollecting molfiles to pandas.DataFrame()...                                 [0m


[32m2024-10-23 14:51:33.751[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mSaveMolfilesToSDFByIdList[0m:[36m54[0m - [32m[1mCollecting molfiles to pandas.DataFrame()                                    [0m
[32m2024-10-23 14:51:33.751[0m | [1mINFO    [0m | [36m__main__[0m:[36mSaveMolfilesToSDF[0m:[36m21[0m - [1mOpening results/molfiles...                                                  [0m
[32m2024-10-23 14:51:33.751[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mSaveMolfilesToSDF[0m:[36m25[0m - [32m[1mOpening results/molfiles                                                     [0m
[32m2024-10-23 14:51:33.751[0m | [1mINFO    [0m | [36m__main__[0m:[36mSaveMolfilesToSDF[0m:[36m45[0m - [1mWriting CHEMBL155754 data to .sdf file...                                    [0m
[32m2024-10-23 14:51:33.751[0m | [1mINFO    [0m | [36m__main__[0m:[36mSaveMolfilesToSDF[0m:[36m45[0m - [1mWriting CHEMBL156630 data to .sdf file...                  