In [1]:
# Importing some important libraries
from rdkit import Chem
import rdkit
import pandas as pd
from IPython.utils import io

In [2]:
# Reading an archive .sdf in python
dsMol = Chem.SDMolSupplier('structures.sdf')

In [4]:
# Creating a list to get all dicts 
BMDB_all_dicts = []

# This loop gets all the atributes from a row of the DB and sets it to a dict
for x in range(len(dsMol)):
    globals()[f"BMDB_dict_{x}"] = {
        "DB_ID": None,
        "SMILES_Struct": None,
        "Mol_wheight": None,
        "Chem_LogP": None,
        "Acceptors_Num": None,
        "Donors_Num": None,
        "Formula": None,
        "Rotatables_Count": None,
        "Rings_Num": None,
        "Total_PSA": None, 
    }
    # This "try" command is necessary because some rows don't have all the necessary values acessed by the keys
    # so we need to skip them a catch the exception
    try:
        # This "with" command supresses the outputs that are generated when accessing the database rows 
        with io.capture_output() as captured:
            globals()[f"BMDB_dict_{x}"]['DB_ID'] =  dsMol[x].GetProp("DATABASE_ID")
            globals()[f"BMDB_dict_{x}"]['SMILES_Struct'] = dsMol[x].GetProp("SMILES")
            globals()[f"BMDB_dict_{x}"]['Mol_wheight'] = dsMol[x].GetProp("MOLECULAR_WEIGHT")
            globals()[f"BMDB_dict_{x}"]['Chem_LogP'] = dsMol[x].GetProp("JCHEM_LOGP")
            globals()[f"BMDB_dict_{x}"]['Acceptors_Num'] = dsMol[x].GetProp("JCHEM_ACCEPTOR_COUNT")
            globals()[f"BMDB_dict_{x}"]['Donors_Num'] = dsMol[x].GetProp("JCHEM_DONOR_COUNT")
            globals()[f"BMDB_dict_{x}"]['Formula'] = dsMol[x].GetProp("FORMULA")
            globals()[f"BMDB_dict_{x}"]['Rotatables_Count'] = dsMol[x].GetProp("JCHEM_ROTATABLE_BOND_COUNT")
            globals()[f"BMDB_dict_{x}"]['Rings_Num'] = dsMol[x].GetProp("JCHEM_NUMBER_OF_RINGS")
            globals()[f"BMDB_dict_{x}"]['Total_PSA'] = dsMol[x].GetProp("JCHEM_POLAR_SURFACE_AREA")
    # The AtributeError happens when the roll is completely NULL/ when the row is "NoneType"
    except AttributeError: 
        print(f"Element {x}: Empty")
        del globals()[f"BMDB_dict_{x}"]
        pass
    # The KeyError happens when one of the row elements is NULL
    except KeyError:
        print(f"Element {x}: Invalid Value(s)")
        del globals()[f"BMDB_dict_{x}"]
        pass
    else:
        BMDB_all_dicts.append(globals()[f"BMDB_dict_{x}"])
  

Element 700: Empty
Element 825: Invalid Value(s)
Element 994: Invalid Value(s)
Element 1037: Invalid Value(s)
Element 1038: Invalid Value(s)
Element 1064: Invalid Value(s)
Element 1317: Invalid Value(s)
Element 1349: Empty
Element 1744: Invalid Value(s)
Element 1758: Invalid Value(s)
Element 1963: Invalid Value(s)
Element 2572: Empty
Element 2577: Empty
Element 2595: Invalid Value(s)
Element 2864: Empty
Element 38276: Invalid Value(s)


In [None]:
# Here we concat all the dicts to form a bigger dict and turn them into a DataFrame
df = pd.concat([pd.Series(d) for d in BMDB_all_dicts], axis=1).fillna(0).T

# For last, the DataFrame is converted into a .csv file
# The items are divided by ";" 
df.to_csv('BMDB_items.csv', encoding = 'utf-8', sep = ";", header = True)

In [None]:
# Visualize the DataFrame
df

Unnamed: 0,DB_ID,SMILES_Struct,Mol_wheight,Chem_LogP,Acceptors_Num,Donors_Num,Formula,Rotatables_Count,Rings_Num,Total_PSA
0,BMDB0000001,CN1C=NC(C[C@H](N)C(O)=O)=C1,169.1811,-3.0704187479965057,4,2,C7H11N3O2,3,1,81.14000000000001
1,BMDB0000002,NCCCN,74.1249,-1.362482935,2,2,C3H10N2,2,0,52.04
2,BMDB0000005,CCC(=O)C(O)=O,102.0886,0.7664098003333332,3,1,C4H6O3,2,0,54.37
3,BMDB0000008,CC[C@H](O)C(O)=O,104.105,0.050692876999999914,3,2,C4H8O3,2,0,57.53
4,BMDB0000010,[H][C@@]12CCC(=O)[C@@]1(C)CC[C@]1([H])C3=C(CC[...,300.3921,4.151357768333334,3,1,C19H24O3,1,4,46.53
...,...,...,...,...,...,...,...,...,...,...
51679,BMDB0109641,CCCCCCC\C=C/CCCCCCCC(O)=O,268.441,6.339229786,2,1,C17H32O2,14,0,37.3
51680,BMDB0109642,CCC(C)CCCCCCCCCCC(O)=O,242.403,5.6544645440000005,2,1,C15H30O2,12,0,37.3
51681,BMDB0109643,CC(C)CCCCCCCCCCCC(O)=O,242.3975,5.6544645440000005,2,1,C15H30O2,12,0,37.3
51682,BMDB0109644,CC(C)CCCCCCCCCCCCC(O)=O,256.4241,6.099033209,2,1,C16H32O2,13,0,37.3
