In [1]:
import sys
from google.colab import drive
drive.mount('/content/gdrive')

%cd "/content/gdrive/MyDrive/ARDF_SSD"

Mounted at /content/gdrive
/content/gdrive/.shortcut-targets-by-id/1qYHA6AgfkviPjK9bxK3OfHqP1cktD-Up/ARDF_SSD


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
! pip install rdkit -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.1/33.1 MB[0m [31m40.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import pandas as pd

curated_df = pd.read_csv("Analysis&Modeling/Integrated Analysis/1.Life_Stage_Analysis/DataSet/5.LifeStageData-CompoundsCurated.csv")

In [None]:
curated_df = curated_df[~curated_df['Curated_SMILES'].isna()]

# Descriptors

In [11]:
# %%writefile "Analysis&Modeling/Integrated Analysis/Integrative-SSD-Modeling-in-Ecotox-Risk-Assessment/Integrative_Analysis/2.Curation Workflows/2.4-Descriptors_Calculation_Workflow.py"
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors, rdMolDescriptors, rdmolops, AllChem
import logging
import numpy as np

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class DescriptorCalculator:
    """
    A class to calculate various molecular descriptors using RDKit,
    excluding properties already extracted by PhysChemPropertiesExtractor.
    """

    def __init__(self, sdf_file):
        """
        Initialize the DescriptorCalculator with an SDF file.

        Args:
            sdf_file (str): Path to the SDF file containing chemical structures.
        """
        self.sdf_file = sdf_file
        self.smiles_df = self.read_smiles_from_sdf()
        self.descriptors_df = None

    def read_smiles_from_sdf(self):
        """
        Reads SMILES strings from an SDF file and returns them as a DataFrame.

        Returns:
            pd.DataFrame: A DataFrame containing the SMILES strings.
        """
        logger.info(f"Reading SMILES from SDF file: {self.sdf_file}")
        suppl = Chem.SDMolSupplier(self.sdf_file)
        smiles_list = []

        for mol in suppl:
            if mol is not None:
                smiles = Chem.MolToSmiles(mol)
                smiles_list.append(smiles)

        logger.info(f"Read {len(smiles_list)} SMILES from the SDF file.")
        return pd.DataFrame(smiles_list, columns=["SMILES"])

    def calculate_constitutional_descriptors(self, mol):
        """
        Calculate constitutional descriptors for a given RDKit molecule.

        Args:
            mol (rdkit.Chem.rdchem.Mol): RDKit molecule object.

        Returns:
            dict: Dictionary of constitutional descriptors.
        """
        logger.debug("Calculating constitutional descriptors.")

        # These descriptors describe the basic structure of a molecule,
        # including the number of atoms, bonds, and aromaticity.
        # Important for understanding molecular size and complexity,
        # which can influence the bioavailability and toxicity of compounds.

        # Count the number of aromatic atoms manually
        num_aromatic_atoms = sum(1 for atom in mol.GetAtoms() if atom.GetIsAromatic())

        return {
            'NumAtoms': mol.GetNumAtoms(),  # Total number of atoms in the molecule
            'NumBonds': mol.GetNumBonds(),  # Total number of bonds in the molecule
            'NumAromaticAtoms': num_aromatic_atoms,  # Number of aromatic atoms, related to stability and reactivity
            'NumRotatableBonds': Descriptors.NumRotatableBonds(mol)  # Flexibility of the molecule, affecting interaction with biological targets
        }

    def calculate_topological_descriptors(self, mol):
        """
        Calculate topological descriptors for a given RDKit molecule.

        Args:
            mol (rdkit.Chem.rdchem.Mol): RDKit molecule object.

        Returns:
            dict: Dictionary of topological descriptors.
        """
        logger.debug("Calculating topological descriptors.")

        # Topological descriptors capture the connectivity of the molecule,
        # providing insights into the molecular shape and branching, which
        # are crucial for understanding how a molecule interacts with its environment.

        # Calculate the Wiener Index
        wiener_index = self.calculate_wiener_index(mol)

        # Calculate the Balaban J Index
        balaban_j_index = self.calculate_balaban_j_index(mol)

        # Calculate the Harary Index
        harary_index = self.calculate_harary_index(mol)

        # Calculate the Zagreb Indices
        zagreb_indices = self.calculate_zagreb_indices(mol)

        return {
            'WienerIndex': wiener_index,  # Sum of all distances between atoms, related to molecular branching
            'BalabanJ': balaban_j_index,  # A measure of molecular connectivity and branching
            'HararyIndex': harary_index,  # Sum of reciprocal distances, indicating molecular compactness
            'FirstZagrebIndex': zagreb_indices['FirstZagrebIndex'],  # Reflects molecular branching
            'SecondZagrebIndex': zagreb_indices['SecondZagrebIndex']  # Relates to the interaction of adjacent atoms
        }

    def calculate_wiener_index(self, mol):
        """
        Calculate the Wiener index for a given RDKit molecule.

        Args:
            mol (rdkit.Chem.rdchem.Mol): RDKit molecule object.

        Returns:
            float: Wiener index.
        """
        # The Wiener Index is a topological descriptor that sums the distances
        # between all pairs of atoms. It is related to molecular branching and
        # can be predictive of physical properties like boiling point.
        distance_matrix = rdmolops.GetDistanceMatrix(mol)
        wiener_index = distance_matrix.sum() / 2  # Sum of all distances divided by 2 to avoid double-counting
        return wiener_index

    def calculate_balaban_j_index(self, mol):
        """
        Calculate the Balaban J index for a given RDKit molecule.

        Args:
            mol (rdkit.Chem.rdchem.Mol): RDKit molecule object.

        Returns:
            float: Balaban J index.
        """
        # The Balaban J index is another measure of molecular connectivity and branching,
        # providing insights into molecular shape, which can influence how the molecule
        # interacts with biological targets.
        num_atoms = mol.GetNumAtoms()
        num_bonds = mol.GetNumBonds()

        if num_bonds == 0 or num_atoms <= 2:
            return 0.0

        distance_matrix = rdmolops.GetDistanceMatrix(mol)
        diameter = np.max(distance_matrix)  # Maximum distance in the distance matrix

        balaban_j = (num_bonds / (num_atoms - 1)) * (1 / diameter) * np.sum(1 / distance_matrix[distance_matrix > 0])
        return balaban_j

    def calculate_harary_index(self, mol):
        """
        Calculate the Harary index for a given RDKit molecule.

        Args:
            mol (rdkit.Chem.rdchem.Mol): RDKit molecule object.

        Returns:
            float: Harary index.
        """
        # The Harary Index sums the reciprocals of all distances between atoms,
        # providing a measure of molecular compactness. Compact molecules may have
        # different biological interactions compared to more extended structures.
        distance_matrix = rdmolops.GetDistanceMatrix(mol)
        with np.errstate(divide='ignore', invalid='ignore'):  # Handle division by zero and invalid values
            reciprocal_distances = np.where(distance_matrix > 0, 1 / distance_matrix, 0)
        harary_index = np.sum(reciprocal_distances) / 2  # Sum of reciprocals divided by 2 to avoid double-counting
        return harary_index

    def calculate_zagreb_indices(self, mol):
        """
        Calculate the first and second Zagreb indices for a given RDKit molecule.

        Args:
            mol (rdkit.Chem.rdchem.Mol): RDKit molecule object.

        Returns:
            dict: Dictionary containing the first and second Zagreb indices.
        """
        # The Zagreb Indices reflect the molecular branching and are related
        # to the stability and reactivity of the molecule. These indices can
        # help predict how a molecule behaves in different environments.

        # First Zagreb Index (M1): Sum of the squares of the degrees of the vertices
        first_zagreb_index = 0
        for atom in mol.GetAtoms():
            degree = atom.GetDegree()
            first_zagreb_index += degree ** 2

        # Second Zagreb Index (M2): Sum of the products of degrees of adjacent vertices
        second_zagreb_index = 0
        for bond in mol.GetBonds():
            degree1 = bond.GetBeginAtom().GetDegree()
            degree2 = bond.GetEndAtom().GetDegree()
            second_zagreb_index += degree1 * degree2

        return {
            'FirstZagrebIndex': first_zagreb_index,
            'SecondZagrebIndex': second_zagreb_index
        }

    def calculate_electronic_descriptors(self, mol):
        """
        Calculate electronic descriptors for a given RDKit molecule.

        Args:
            mol (rdkit.Chem.rdchem.Mol): RDKit molecule object.

        Returns:
            dict: Dictionary of electronic descriptors.
        """
        logger.debug("Calculating electronic descriptors.")

        # Electronic descriptors, like partial charges, provide information on
        # how electrons are distributed in the molecule. These descriptors are
        # crucial for understanding how the molecule will interact with biological
        # systems, as charge distribution affects binding to proteins and other targets.
        return {
            'MaxAbsPartialCharge': Descriptors.MaxAbsPartialCharge(mol),  # Maximum absolute partial charge
            'MinAbsPartialCharge': Descriptors.MinAbsPartialCharge(mol),  # Minimum absolute partial charge
            'MaxPartialCharge': Descriptors.MaxPartialCharge(mol),  # Maximum partial charge
            'MinPartialCharge': Descriptors.MinPartialCharge(mol),  # Minimum partial charge
        }

    def calculate_bcut_descriptors(self, mol):
        """
        Calculate BCUT descriptors for a given RDKit molecule.

        Args:
            mol (rdkit.Chem.rdchem.Mol): RDKit molecule object.

        Returns:
            dict: Dictionary of BCUT descriptors.
        """
        logger.debug("Calculating BCUT descriptors.")

        # BCUT descriptors capture both the atomic properties (like charges) and
        # the molecular connectivity. These descriptors help predict how the molecule
        # will interact with biological systems and environmental factors.

        # Generate atomic charges (e.g., Gasteiger charges)
        AllChem.ComputeGasteigerCharges(mol)

        # Retrieve atomic charges
        atomic_charges = np.array([float(atom.GetProp('_GasteigerCharge')) for atom in mol.GetAtoms()])

        # Check for NaNs or infinities in atomic charges
        if not np.all(np.isfinite(atomic_charges)):
            logger.error("Invalid atomic charges (NaNs or infinities) detected in the molecule. Skipping BCUT calculation.")
            return {
                'BCUT2D_MWLOW': np.nan,
                'BCUT2D_MWUP': np.nan
            }

        # Build an adjacency matrix
        adjacency_matrix = rdmolops.GetAdjacencyMatrix(mol)

        # Create a diagonal matrix of atomic properties
        atomic_property_matrix = np.diag(atomic_charges)

        # Calculate the BCUT matrix: adjacency matrix weighted by atomic properties
        bcut_matrix = np.dot(atomic_property_matrix, adjacency_matrix)

        # Compute eigenvalues of the BCUT matrix
        try:
            eigenvalues = np.linalg.eigvals(bcut_matrix)
        except np.linalg.LinAlgError as e:
            logger.error(f"Linear algebra error during BCUT calculation: {e}")
            return {
                'BCUT2D_MWLOW': np.nan,
                'BCUT2D_MWUP': np.nan
            }

        # Sort eigenvalues to obtain the low and high values
        bcut_mwlow = np.min(eigenvalues)
        bcut_mwup = np.max(eigenvalues)

        return {
            'BCUT2D_MWLOW': bcut_mwlow,  # Lower eigenvalue of the BCUT matrix, related to molecular interaction properties
            'BCUT2D_MWUP': bcut_mwup  # Upper eigenvalue of the BCUT matrix, related to molecular interaction properties
        }

    def calculate_hybrid_descriptors(self, mol):
        """
        Calculate hybrid descriptors for a given RDKit molecule.

        Args:
            mol (rdkit.Chem.rdchem.Mol): RDKit molecule object.

        Returns:
            dict: Dictionary of hybrid descriptors.
        """
        logger.debug("Calculating hybrid descriptors.")

        # Hybrid descriptors combine multiple types of molecular properties,
        # like electronic and steric properties, providing a comprehensive
        # view of how the molecule might interact in biological systems.

        # Calculate BCUT descriptors using custom implementation
        bcut_descriptors = self.calculate_bcut_descriptors(mol)

        return {
            'EState_VSA1': Descriptors.EState_VSA1(mol),  # Electronic state surface area descriptor
            'EState_VSA2': Descriptors.EState_VSA2(mol),  # Another electronic state surface area descriptor
            'BCUT2D_MWLOW': bcut_descriptors['BCUT2D_MWLOW'],  # Lower BCUT eigenvalue
            'BCUT2D_MWUP': bcut_descriptors['BCUT2D_MWUP']  # Upper BCUT eigenvalue
        }

    def calculate_fragment_based_descriptors(self, mol):
        """
        Calculate fragment-based descriptors for a given RDKit molecule.

        Args:
            mol (rdkit.Chem.rdchem.Mol): RDKit molecule object.

        Returns:
            dict: Dictionary of fragment-based descriptors.
        """
        logger.debug("Calculating fragment-based descriptors.")

        # Fragment-based descriptors count specific substructures, such as
        # aromatic and aliphatic rings, which are important for understanding
        # molecular stability and interaction with biological systems.
        return {
            'NumAromaticRings': Descriptors.NumAromaticRings(mol),  # Number of aromatic rings, related to molecular stability and reactivity
            'NumAliphaticRings': Descriptors.NumAliphaticRings(mol)  # Number of aliphatic rings, related to molecular flexibility and reactivity
        }

    def calculate_all_descriptors(self, smiles):
        """
        Calculate all descriptors for a given SMILES string.

        Args:
            smiles (str): SMILES string of the molecule.

        Returns:
            dict: Dictionary of all descriptors or None if molecule is invalid.
        """
        logger.info(f"Calculating descriptors for SMILES: {smiles}")
        mol = Chem.MolFromSmiles(smiles)
        if not mol:
            logger.warning(f"Invalid SMILES string: {smiles}")
            return None

        descriptors = {}
        descriptors.update(self.calculate_constitutional_descriptors(mol))
        descriptors.update(self.calculate_topological_descriptors(mol))
        descriptors.update(self.calculate_electronic_descriptors(mol))
        descriptors.update(self.calculate_hybrid_descriptors(mol))
        descriptors.update(self.calculate_fragment_based_descriptors(mol))
        print(descriptors)

        return descriptors

    def process_smiles(self):
        """
        Calculate descriptors for each SMILES string in the DataFrame and store the results.

        Returns:
            None
        """
        logger.info("Processing SMILES to calculate descriptors.")
        smiles_list = self.smiles_df['SMILES']
        descriptors_list = [self.calculate_all_descriptors(smiles) for smiles in smiles_list]
        self.descriptors_df = pd.DataFrame(descriptors_list, index=smiles_list).reset_index().rename(columns={"index": "SMILES"})
        logger.info("Descriptor calculation completed.")

    def get_final_dataframe(self):
        """
        Run the full process and return the final DataFrame with SMILES and their descriptors.

        Returns:
            pd.DataFrame: DataFrame containing SMILES and their corresponding descriptors.
        """
        logger.info("Generating final DataFrame with descriptors.")
        self.process_smiles()
        logger.info("Final DataFrame is ready.")
        self.descriptors_df.to_csv('Analysis&Modeling/Integrated Analysis/1.Life_Stage_Analysis/DataSet/descriptors.csv', index=False)
        return self.descriptors_df

# Example usage:
calculator = DescriptorCalculator('Analysis&Modeling/Integrated Analysis/1.Life_Stage_Analysis/DataSet/5.LifeStageData-CompoundsCurated.sdf')
final_df = calculator.get_final_dataframe()
final_df

{'NumAtoms': 14, 'NumBonds': 14, 'NumAromaticAtoms': 0, 'NumRotatableBonds': 5, 'WienerIndex': 301.0, 'BalabanJ': 11.641758241758241, 'HararyIndex': 37.83571428571429, 'FirstZagrebIndex': 64, 'SecondZagrebIndex': 72, 'MaxAbsPartialCharge': 0.3429836890948602, 'MinAbsPartialCharge': 0.30596634526037897, 'MaxPartialCharge': 0.3429836890948602, 'MinPartialCharge': -0.30596634526037897, 'EState_VSA1': 7.670279738874966, 'EState_VSA2': 0.0, 'BCUT2D_MWLOW': (-0.01094596634507425-0.07166715550167674j), 'BCUT2D_MWUP': (0.010945966345074252+0.07166715550167672j), 'NumAromaticRings': 0, 'NumAliphaticRings': 1}
{'NumAtoms': 19, 'NumBonds': 20, 'NumAromaticAtoms': 12, 'NumRotatableBonds': 2, 'WienerIndex': 678.0, 'BalabanJ': 13.773456790123458, 'HararyIndex': 61.980555555555554, 'FirstZagrebIndex': 98, 'SecondZagrebIndex': 112, 'MaxAbsPartialCharge': 0.2008590133369853, 'MinAbsPartialCharge': 0.08434412664200656, 'MaxPartialCharge': 0.2008590133369853, 'MinPartialCharge': -0.08434412664200656, 'ES

ERROR:__main__:Invalid atomic charges (NaNs or infinities) detected in the molecule. Skipping BCUT calculation.
ERROR:__main__:Invalid atomic charges (NaNs or infinities) detected in the molecule. Skipping BCUT calculation.


{'NumAtoms': 9, 'NumBonds': 8, 'NumAromaticAtoms': 0, 'NumRotatableBonds': 4, 'WienerIndex': 114.0, 'BalabanJ': 4.8102040816326515, 'HararyIndex': 16.835714285714285, 'FirstZagrebIndex': 32, 'SecondZagrebIndex': 30, 'MaxAbsPartialCharge': 0.4658776881575565, 'MinAbsPartialCharge': 0.3021228679209696, 'MaxPartialCharge': 0.3021228679209696, 'MinPartialCharge': -0.4658776881575565, 'EState_VSA1': 0.0, 'EState_VSA2': 5.969305287951849, 'BCUT2D_MWLOW': (-0.08301806549959183+0j), 'BCUT2D_MWUP': (0.08301806549959179+0j), 'NumAromaticRings': 0, 'NumAliphaticRings': 0}
{'NumAtoms': 10, 'NumBonds': 10, 'NumAromaticAtoms': 6, 'NumRotatableBonds': 0, 'WienerIndex': 110.0, 'BalabanJ': 10.348148148148148, 'HararyIndex': 23.283333333333335, 'FirstZagrebIndex': 48, 'SecondZagrebIndex': 54, 'MaxAbsPartialCharge': 0.08423147288483936, 'MinAbsPartialCharge': 0.0779030660373676, 'MaxPartialCharge': 0.0779030660373676, 'MinPartialCharge': -0.08423147288483936, 'EState_VSA1': 0.0, 'EState_VSA2': 0.0, 'BCUT

ERROR:__main__:Invalid atomic charges (NaNs or infinities) detected in the molecule. Skipping BCUT calculation.


{'NumAtoms': 28, 'NumBonds': 29, 'NumAromaticAtoms': 12, 'NumRotatableBonds': 8, 'WienerIndex': 2322.0, 'BalabanJ': 12.946037643846179, 'HararyIndex': 102.45226342285166, 'FirstZagrebIndex': 134, 'SecondZagrebIndex': 153, 'MaxAbsPartialCharge': 0.42653754742916117, 'MinAbsPartialCharge': 0.31039666304800784, 'MaxPartialCharge': 0.31039666304800784, 'MinPartialCharge': -0.42653754742916117, 'EState_VSA1': 0.0, 'EState_VSA2': 11.938610575903699, 'BCUT2D_MWLOW': (-0.07694170898645177+0j), 'BCUT2D_MWUP': (0.07694170898645153+0j), 'NumAromaticRings': 2, 'NumAliphaticRings': 0}
{'NumAtoms': 20, 'NumBonds': 20, 'NumAromaticAtoms': 6, 'NumRotatableBonds': 6, 'WienerIndex': 880.0, 'BalabanJ': 12.055165461864027, 'HararyIndex': 62.98823953823954, 'FirstZagrebIndex': 94, 'SecondZagrebIndex': 106, 'MaxAbsPartialCharge': 0.3941232075520646, 'MinAbsPartialCharge': 0.26894513302429984, 'MaxPartialCharge': 0.26894513302429984, 'MinPartialCharge': -0.3941232075520646, 'EState_VSA1': 34.41944964982841, 

ERROR:__main__:Invalid atomic charges (NaNs or infinities) detected in the molecule. Skipping BCUT calculation.
ERROR:__main__:Invalid atomic charges (NaNs or infinities) detected in the molecule. Skipping BCUT calculation.
ERROR:__main__:Invalid atomic charges (NaNs or infinities) detected in the molecule. Skipping BCUT calculation.


{'NumAtoms': 24, 'NumBonds': 27, 'NumAromaticAtoms': 10, 'NumRotatableBonds': 3, 'WienerIndex': 1233.0, 'BalabanJ': 19.370342898208516, 'HararyIndex': 90.75364357864358, 'FirstZagrebIndex': 136, 'SecondZagrebIndex': 167, 'MaxAbsPartialCharge': 0.47749701156478486, 'MinAbsPartialCharge': 0.34072512112828185, 'MaxPartialCharge': 0.34072512112828185, 'MinPartialCharge': -0.47749701156478486, 'EState_VSA1': 17.215316520898284, 'EState_VSA2': 28.851220142292743, 'BCUT2D_MWLOW': (-0.07570212108759132+0j), 'BCUT2D_MWUP': (0.0767405215960273+0j), 'NumAromaticRings': 2, 'NumAliphaticRings': 2}
{'NumAtoms': 17, 'NumBonds': 16, 'NumAromaticAtoms': 5, 'NumRotatableBonds': 3, 'WienerIndex': 7000000168.0, 'BalabanJ': 7.061904901904762e-07, 'HararyIndex': 35.309524509523804, 'FirstZagrebIndex': 86, 'SecondZagrebIndex': 83, 'MaxAbsPartialCharge': nan, 'MinAbsPartialCharge': nan, 'MaxPartialCharge': nan, 'MinPartialCharge': nan, 'EState_VSA1': 7.808509083810693, 'EState_VSA2': 0.0, 'BCUT2D_MWLOW': nan,

ERROR:__main__:Invalid atomic charges (NaNs or infinities) detected in the molecule. Skipping BCUT calculation.


{'NumAtoms': 11, 'NumBonds': 11, 'NumAromaticAtoms': 6, 'NumRotatableBonds': 1, 'WienerIndex': 152.0, 'BalabanJ': 9.716666666666667, 'HararyIndex': 26.5, 'FirstZagrebIndex': 52, 'SecondZagrebIndex': 58, 'MaxAbsPartialCharge': 0.5042595843842756, 'MinAbsPartialCharge': 0.33532252564441734, 'MaxPartialCharge': 0.33532252564441734, 'MinPartialCharge': -0.5042595843842756, 'EState_VSA1': 11.718817121235755, 'EState_VSA2': 11.3129633249809, 'BCUT2D_MWLOW': (-0.07866818468446252-0.2729428820620899j), 'BCUT2D_MWUP': (0.07866818468446257+0.2729428820620899j), 'NumAromaticRings': 1, 'NumAliphaticRings': 0}
{'NumAtoms': 10, 'NumBonds': 10, 'NumAromaticAtoms': 6, 'NumRotatableBonds': 1, 'WienerIndex': 120.0, 'BalabanJ': 8.37037037037037, 'HararyIndex': 22.6, 'FirstZagrebIndex': 46, 'SecondZagrebIndex': 50, 'MaxAbsPartialCharge': 0.507966178790397, 'MinAbsPartialCharge': 0.33518285963636446, 'MaxPartialCharge': 0.33518285963636446, 'MinPartialCharge': -0.507966178790397, 'EState_VSA1': 5.969305287

Unnamed: 0,SMILES,NumAtoms,NumBonds,NumAromaticAtoms,NumRotatableBonds,WienerIndex,BalabanJ,HararyIndex,FirstZagrebIndex,SecondZagrebIndex,MaxAbsPartialCharge,MinAbsPartialCharge,MaxPartialCharge,MinPartialCharge,EState_VSA1,EState_VSA2,BCUT2D_MWLOW,BCUT2D_MWUP,NumAromaticRings,NumAliphaticRings
0,O=P1(N(CCCl)CCCl)NCCCO1,14,14,0,5,301.0,11.641758,37.835714,64,72,0.342984,0.305966,0.342984,-0.305966,7.670280,0.000000,-0.010946-0.071667j,0.010946+0.071667j,0,1
1,Clc1ccc(C(c2ccc(Cl)cc2)C(Cl)(Cl)Cl)cc1,19,20,12,2,678.0,13.773457,61.980556,98,112,0.200859,0.084344,0.200859,-0.084344,3.792536,5.917906,-0.076704+0.000000j,0.076704+0.000000j,2,0
2,OCC(O)c1oc(O)c(O)c1O,12,12,5,2,188.0,11.145455,30.650000,58,68,0.501663,0.329934,0.329934,-0.501663,35.916427,0.000000,-0.145903-0.207875j,0.146135+0.334819j,1,0
3,CCCCOCCOCCOCc1cc2c(cc1CCC)OCO2,24,25,6,13,1767.0,10.478616,77.122615,110,122,0.453585,0.230801,0.230801,-0.453585,0.000000,0.000000,-0.082825-0.264801j,0.110881+0.000000j,1,1
4,CC(=O)Nc1ccc2c(c1)Cc1ccccc1-2,17,19,12,1,513.0,14.053131,53.253968,92,109,0.326328,0.220748,0.220748,-0.326328,0.000000,5.907180,-0.102957+0.000000j,0.102955+0.000000j,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1460,CC(C)(CC(=O)O)C(=O)O,10,9,0,3,120.0,8.786667,21.966667,44,46,0.481204,0.309158,0.309158,-0.481204,17.353601,6.420822,-0.003302-0.452057j,0.003302+0.452057j,0,0
1461,CC(C)(C)c1cc([N+](=O)[O-])cc([N+](=O)[O-])c1O,17,17,6,2,472.0,18.735417,52.900000,86,99,0.501880,0.317381,0.317381,-0.501880,32.385897,5.563451,-0.308313+0.000000j,0.308313+0.000000j,1,0
1462,O=C1NC(=O)C(c2ccccc2)(c2ccc(O)cc2)N1,20,22,12,2,723.0,17.938587,69.715873,108,131,0.507967,0.322409,0.322409,-0.507967,17.477219,5.749512,-0.107028+0.000000j,0.107009+0.000000j,2,1
1463,CCCCCCCCN(C)C,11,10,0,7,212.0,5.026367,22.618651,40,38,0.309400,0.002484,-0.002484,-0.309400,0.000000,0.000000,-0.099616+0.000000j,0.099616+0.000000j,0,0
