In [1]:
import pandas as pd
df= pd.read_csv('coconut_csv-11-2025.csv', low_memory=False)

In [2]:
#df.info()
#df.shape
#df.dtypes
df.columns
#df.info()
#df.head()
#df.sample(8)
#df.describe()
#df.isnull().sum()
#df.columns.to_list()

Index(['identifier', 'canonical_smiles', 'standard_inchi',
       'standard_inchi_key', 'name', 'iupac_name', 'annotation_level',
       'total_atom_count', 'heavy_atom_count', 'molecular_weight',
       'exact_molecular_weight', 'molecular_formula', 'alogp',
       'topological_polar_surface_area', 'rotatable_bond_count',
       'hydrogen_bond_acceptors', 'hydrogen_bond_donors',
       'hydrogen_bond_acceptors_lipinski', 'hydrogen_bond_donors_lipinski',
       'lipinski_rule_of_five_violations', 'aromatic_rings_count',
       'qed_drug_likeliness', 'formal_charge', 'fractioncsp3',
       'number_of_minimal_rings', 'van_der_walls_volume', 'contains_sugar',
       'contains_ring_sugars', 'contains_linear_sugars', 'murcko_framework',
       'np_likeness', 'chemical_class', 'chemical_sub_class',
       'chemical_super_class', 'direct_parent_classification',
       'np_classifier_pathway', 'np_classifier_superclass',
       'np_classifier_class', 'np_classifier_is_glycoside', 'organisms',


In [2]:
df_filtrado = df[df["collections"].str.contains("Phyto4Health", case=False, na=False)]

In [3]:
print(df_filtrado.columns)

Index(['identifier', 'canonical_smiles', 'standard_inchi',
       'standard_inchi_key', 'name', 'iupac_name', 'annotation_level',
       'total_atom_count', 'heavy_atom_count', 'molecular_weight',
       'exact_molecular_weight', 'molecular_formula', 'alogp',
       'topological_polar_surface_area', 'rotatable_bond_count',
       'hydrogen_bond_acceptors', 'hydrogen_bond_donors',
       'hydrogen_bond_acceptors_lipinski', 'hydrogen_bond_donors_lipinski',
       'lipinski_rule_of_five_violations', 'aromatic_rings_count',
       'qed_drug_likeliness', 'formal_charge', 'fractioncsp3',
       'number_of_minimal_rings', 'van_der_walls_volume', 'contains_sugar',
       'contains_ring_sugars', 'contains_linear_sugars', 'murcko_framework',
       'np_likeness', 'chemical_class', 'chemical_sub_class',
       'chemical_super_class', 'direct_parent_classification',
       'np_classifier_pathway', 'np_classifier_superclass',
       'np_classifier_class', 'np_classifier_is_glycoside', 'organisms',


In [4]:
#df_filtrado.head()
#df_filtrado.shape
df_filtrado["collections"].value_counts().nlargest()

collections
CMAUP (cCollective molecular activities of useful plants)|NPASS|Phyto4Health|Super Natural II|WikiData Natural Products                                                                 134
CMAUP (cCollective molecular activities of useful plants)|NPASS|Phyto4Health|WikiData Natural Products                                                                                   87
CMAUP (cCollective molecular activities of useful plants)|NPASS|Phyto4Health                                                                                                             54
CMAUP (cCollective molecular activities of useful plants)|NPASS|Phyto4Health|Super Natural II|TCMDB-Taiwan (Traditional Chinese Medicine database)|WikiData Natural Products|ZINC NP     42
NPASS|Phyto4Health|WikiData Natural Products                                                                                                                                             25
Name: count, dtype: int64

In [5]:
df_filtrado['np_classifier_pathway'].value_counts().head(10)

np_classifier_pathway
Shikimates and Phenylpropanoids    1105
Terpenoids                          822
Alkaloids                           332
Fatty acids                         256
Polyketides                         181
Amino acids and Peptides             76
Carbohydrates                        38
Name: count, dtype: int64

In [6]:
# Importar funciones
from rdkit import Chem
from rdkit.Chem import PandasTools
from rdkit.Chem import rdMolDescriptors
from molvs.standardize import Standardizer
from molvs.charge import Uncharger, Reionizer
from molvs.fragment import LargestFragmentChooser
from molvs.tautomer import TautomerCanonicalizer
from rdkit.Chem.rdmolops import GetFormalCharge, RemoveStereochemistry

In [7]:
# Define functions
STD = Standardizer() # Get the standardized version of a given SMILES string (canonical SMILES).
LFC = LargestFragmentChooser() # Select the largest fragment from a salt (ionic compound).
UC = Uncharger() # Charge corrections are applied to ensure, for example, that free metals are correctly ionized.
RI = Reionizer() # Neutralize molecule by adding/removing hydrogens.
TC = TautomerCanonicalizer()  # Return a tautormer “reasonable” from a chemist’s point, but isn’t guaranteed to be the most energetically favourable

In [8]:
def pretreatment(notations):
  try:
      mol = Chem.MolFromSmiles(notations)
      if mol == None:
        return "Error 1"
      else:
        mol = STD(mol)
        mol = LFC(mol)

        allowed_elements = {"H","B","C","N","O","F","Si","P","S","Cl","Se","Br","I"}
        actual_elements = set([atom.GetSymbol() for atom in mol.GetAtoms()])
        if len(actual_elements-allowed_elements) == 0:
            mol = UC(mol)
            mol = RI(mol)
            RemoveStereochemistry(mol)
            mol = TC(mol)
            return Chem.MolToSmiles(mol)
        else:
            return "Error 2"
  except:
      return "Nope"

In [9]:
df_filtrado["Smiles standarization"] = [pretreatment(i) for i in df_filtrado["canonical_smiles"]]
df_filtrado

[02:44:31] Explicit valence for atom # 13 P, 7, is greater than permitted
[02:44:31] Explicit valence for atom # 17 P, 7, is greater than permitted
[02:44:31] Explicit valence for atom # 13 P, 7, is greater than permitted
[02:44:31] Explicit valence for atom # 17 P, 7, is greater than permitted
[02:44:31] Explicit valence for atom # 13 P, 7, is greater than permitted
[02:44:31] Explicit valence for atom # 17 P, 7, is greater than permitted
[02:44:31] Explicit valence for atom # 13 P, 7, is greater than permitted
[02:44:31] Explicit valence for atom # 17 P, 7, is greater than permitted
[02:44:31] Explicit valence for atom # 13 P, 7, is greater than permitted
[02:44:31] Explicit valence for atom # 17 P, 7, is greater than permitted
[02:44:31] Explicit valence for atom # 13 P, 7, is greater than permitted
[02:44:31] Explicit valence for atom # 17 P, 7, is greater than permitted
[02:44:31] Explicit valence for atom # 13 P, 7, is greater than permitted
[02:44:31] Explicit valence for atom #

Unnamed: 0,identifier,canonical_smiles,standard_inchi,standard_inchi_key,name,iupac_name,annotation_level,total_atom_count,heavy_atom_count,molecular_weight,...,np_classifier_pathway,np_classifier_superclass,np_classifier_class,np_classifier_is_glycoside,organisms,collections,dois,synonyms,cas,Smiles standarization
18,CNP0286854.0,C=CCC1=CC(OC)=C(OC)C(OC)=C1,InChI=1S/C12H16O3/c1-5-6-9-7-10(13-2)12(15-4)1...,BPLQKQKXWHCZSS-UHFFFAOYSA-N,Elemicin,"5-allyl-1,2,3-trimethoxy-benzene",5,31,15,208.26,...,Shikimates and Phenylpropanoids,Phenylpropanoids (C6-C3),Cinnamic acids and derivatives,False,Aconitum episcopale|Aconitum leucostomum|Acoru...,ANPDB|Australian natural products|BIOFACQUIM|C...,10.1002/(SICI)1099-1026(199909/10)14:5<312::AI...,487-11-6|5-Allyl-1|2|3-trimethoxybenzene|Elemi...,487-11-6,C=CCc1cc(OC)c(OC)c(OC)c1
42,CNP0292843.0,COC1=CC(OC)=C2C=CC(=O)OC2=C1,InChI=1S/C11H10O4/c1-13-7-5-9(14-2)8-3-4-11(12...,NXJCRELRQHZBQA-UHFFFAOYSA-N,citropten,"5,7-dimethoxychromen-2-one",5,25,15,206.20,...,Shikimates and Phenylpropanoids,Coumarins,Simple coumarins,False,Adenia fruticosa|Aeromonas hydrophila|Artemisi...,AnalytiCon Discovery NPs|Australian natural pr...,10.1016/0031-9422(89)85042-3|10.1016/0031-9422...,{0:5|7-Dimethoxycoumarin|1:Limettin|3:487-06-9...,487-06-9,COc1cc(OC)c2ccc(=O)oc2c1
45,CNP0203265.1,CC1=C(/C=C/C(C)=C/C=C/C(C)=C/C=C/C=C(C)/C=C/C=...,InChI=1S/C40H56O3/c1-29(17-13-19-31(3)21-23-36...,VYIRVAXUEZSDNC-RDJLEWNRSA-N,Capsanthin,"(2~{E},4~{E},6~{E},8~{E},10~{E},12~{E},14~{E},...",5,99,43,584.89,...,Terpenoids,Carotenoids (C40),"Carotenoids (C40, β-κ)",False,Asparagus officinalis|Berberis spp.|Capsicum a...,CMAUP (cCollective molecular activities of use...,10.1002/CHIN.198644308|10.1002/HLCA.1985068062...,465-42-9|all-trans-capsanthin|Capsanthin\/caps...,465-42-9,CC(C=CC=C(C)C=CC(=O)C1(C)CC(O)CC1(C)C)=CC=CC=C...
62,CNP0239078.0,COC1=CC(C=CCO)=CC=C1O,InChI=1S/C10H12O3/c1-13-10-7-8(3-2-6-11)4-5-9(...,JMFRWRFFLBVWSI-UHFFFAOYSA-N,"Phenol, 4-(3-hydroxy-1-propenyl)-2-methoxy-",4-(3-hydroxyprop-1-enyl)-2-methoxy-phenol,5,25,13,180.20,...,Shikimates and Phenylpropanoids,Phenylpropanoids (C6-C3),Cinnamic acids and derivatives,False,Acorus gramineus|Acorus gramineus|Acorus tatar...,ANPDB|Australian natural products|CMAUP (cColl...,10.1002/CHIN.200533256|10.1002/OMS.1210270317|...,DTXSID9060029|Oprea1_201369|AKOS028108458|DB-0...,,COc1cc(C=CCO)ccc1O
63,CNP0305890.0,O=C(C=CC1=CC=C(O)C=C1)C1=C(O)C=C(O)C=C1O,InChI=1S/C15H12O5/c16-10-4-1-9(2-5-10)3-6-12(1...,YQHMWTPYORBCMF-UHFFFAOYSA-N,"3-(4-hydroxyphenyl)-1-(2,4,6-trihydroxyphenyl)...","3-(4-hydroxyphenyl)-1-(2,4,6-trihydroxyphenyl)...",5,32,20,272.26,...,Shikimates and Phenylpropanoids,Flavonoids,Chalcones,False,Absidia repens|Abuta panurensis|Adoxophyes ora...,AnalytiCon Discovery NPs|Australian natural pr...,10.1016/0305-1978(91)90071-7|10.1016/J.INDCROP...,{0:Chalconaringenin|1:Chalcononaringenin|2:Iso...,,O=C(C=Cc1ccc(O)cc1)c1c(O)cc(O)cc1O
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715924,CNP0407964.3,O=C1OC[C@H]2OC(O)[C@@H]3OC(=O)C4=CC(O)=C(O)C(O...,InChI=1S/C34H24O22/c35-10-1-6-15(23(43)19(10)3...,IYMHVUYNBVWXKH-ZITZVVOASA-N,Pedunculagin,"(1~{R},2~{S},19~{R},22~{R})-7,8,9,12,13,14,20,...",5,80,56,784.54,...,Shikimates and Phenylpropanoids,Phenolic acids (C6-C1),Gallotannins,True,Agrimonia pilosa|Alchemilla xanthochlora|Alnus...,ANPDB|Australian natural products|CMAUP (cColl...,10.1002/JCCS.199700025|10.1002/PCA.548|10.1002...,7045-42-3|CHEBI:7948|CHEMBL506204|(1R|2S|19R|2...,7045-42-3,O=C1OCC2OC(O)C3OC(=O)c4cc(O)c(O)c(O)c4-c4c(cc(...
715955,CNP0110439.1,O=C1C[C@H](O)[C@@H](CO)O1,"InChI=1S/C5H8O4/c6-2-4-3(7)1-5(8)9-4/h3-4,6-7H...",YIXDEYPPAGPYDP-IUYQGCFVSA-N,34371-14-7,"(4~{S},5~{R})-4-hydroxy-5-(hydroxymethyl)tetra...",5,17,9,132.11,...,,,,False,Actinomadura hibisca|Anastrepha suspensa|Aneth...,CMAUP (cCollective molecular activities of use...,10.1002/HLCA.200690079|10.1016/S0031-9422(01)0...,(4S|5R)-4-hydroxy-5-(hydroxymethyl)dihydrofura...,73209-20-8,O=C1CC(O)C(CO)O1
715978,CNP0089019.0,CC(=O)C1=CC=C(C(=O)O)C=C1,InChI=1S/C9H8O3/c1-6(10)7-2-4-8(5-3-7)9(11)12/...,QBHDSQZASIBAAI-UHFFFAOYSA-N,4-ACETYLBENZOIC ACID,4-acetylbenzoic acid,4,20,12,164.16,...,Shikimates and Phenylpropanoids,Phenolic acids (C6-C1),Simple phenolic acids,False,Citrus reticulata|Citrus unshiu,CMAUP (cCollective molecular activities of use...,,586-89-0|Benzoic acid| 4-acetyl-|p-acetylbenzo...,586-89-0,CC(=O)c1ccc(C(=O)O)cc1
716293,CNP0180604.0,COC1=CC=C2C(=C1O)OC1=C(OC)C(OC)=C(O)C=C12,InChI=1S/C15H14O6/c1-18-10-5-4-7-8-6-9(16)14(1...,BWNWAKZIGLEKRI-UHFFFAOYSA-N,epsilon-cotonefuran,"3,4,7-trimethoxydibenzofuran-2,6-diol",5,35,21,290.27,...,Shikimates and Phenylpropanoids,Terphenyls,p-Terphenyls,False,Aplidium meridianum|Astragalus pterocarpus|Ber...,CMAUP (cCollective molecular activities of use...,10.1016/0031-9422(94)00636-8|10.1021/NP900460J...,CHEMBL1080658|epsilon-Cotonefuran,,COc1ccc2c(oc3c(OC)c(OC)c(O)cc32)c1O


In [10]:
# Delate smiles that rdkit could not read
df_filtrado = df_filtrado[df_filtrado["Smiles standarization"] != "Error 1"]
# Delate smiles that no contain allowed atoms
df_filtrado = df_filtrado[df_filtrado["Smiles standarization"] != "Error 2"]
# Delate other errors
df_filtrado = df_filtrado[df_filtrado["Smiles standarization"] != "Nope"].reset_index(drop=True)

In [11]:
# Delete duplicates
df_filtrado = df_filtrado.drop_duplicates(subset=["Smiles standarization"], keep="first").reset_index(drop=True)
df_filtrado.head(3)

Unnamed: 0,identifier,canonical_smiles,standard_inchi,standard_inchi_key,name,iupac_name,annotation_level,total_atom_count,heavy_atom_count,molecular_weight,...,np_classifier_pathway,np_classifier_superclass,np_classifier_class,np_classifier_is_glycoside,organisms,collections,dois,synonyms,cas,Smiles standarization
0,CNP0286854.0,C=CCC1=CC(OC)=C(OC)C(OC)=C1,InChI=1S/C12H16O3/c1-5-6-9-7-10(13-2)12(15-4)1...,BPLQKQKXWHCZSS-UHFFFAOYSA-N,Elemicin,"5-allyl-1,2,3-trimethoxy-benzene",5,31,15,208.26,...,Shikimates and Phenylpropanoids,Phenylpropanoids (C6-C3),Cinnamic acids and derivatives,False,Aconitum episcopale|Aconitum leucostomum|Acoru...,ANPDB|Australian natural products|BIOFACQUIM|C...,10.1002/(SICI)1099-1026(199909/10)14:5<312::AI...,487-11-6|5-Allyl-1|2|3-trimethoxybenzene|Elemi...,487-11-6,C=CCc1cc(OC)c(OC)c(OC)c1
1,CNP0292843.0,COC1=CC(OC)=C2C=CC(=O)OC2=C1,InChI=1S/C11H10O4/c1-13-7-5-9(14-2)8-3-4-11(12...,NXJCRELRQHZBQA-UHFFFAOYSA-N,citropten,"5,7-dimethoxychromen-2-one",5,25,15,206.2,...,Shikimates and Phenylpropanoids,Coumarins,Simple coumarins,False,Adenia fruticosa|Aeromonas hydrophila|Artemisi...,AnalytiCon Discovery NPs|Australian natural pr...,10.1016/0031-9422(89)85042-3|10.1016/0031-9422...,{0:5|7-Dimethoxycoumarin|1:Limettin|3:487-06-9...,487-06-9,COc1cc(OC)c2ccc(=O)oc2c1
2,CNP0203265.1,CC1=C(/C=C/C(C)=C/C=C/C(C)=C/C=C/C=C(C)/C=C/C=...,InChI=1S/C40H56O3/c1-29(17-13-19-31(3)21-23-36...,VYIRVAXUEZSDNC-RDJLEWNRSA-N,Capsanthin,"(2~{E},4~{E},6~{E},8~{E},10~{E},12~{E},14~{E},...",5,99,43,584.89,...,Terpenoids,Carotenoids (C40),"Carotenoids (C40, β-κ)",False,Asparagus officinalis|Berberis spp.|Capsicum a...,CMAUP (cCollective molecular activities of use...,10.1002/CHIN.198644308|10.1002/HLCA.1985068062...,465-42-9|all-trans-capsanthin|Capsanthin\/caps...,465-42-9,CC(C=CC=C(C)C=CC(=O)C1(C)CC(O)CC1(C)C)=CC=CC=C...


In [12]:
# Delete rows with NaN (Not a Number) and None values from pandas DataFrame.
df_filtrado = df_filtrado.dropna()

In [13]:
df_filtrado.to_csv("curada.csv", sep=",", index=False)

In [15]:
df_filtrado.shape

(1257, 45)

In [16]:
df_filtrado.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1257 entries, 0 to 2599
Data columns (total 45 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   identifier                        1257 non-null   object 
 1   canonical_smiles                  1257 non-null   object 
 2   standard_inchi                    1257 non-null   object 
 3   standard_inchi_key                1257 non-null   object 
 4   name                              1257 non-null   object 
 5   iupac_name                        1257 non-null   object 
 6   annotation_level                  1257 non-null   int64  
 7   total_atom_count                  1257 non-null   int64  
 8   heavy_atom_count                  1257 non-null   int64  
 9   molecular_weight                  1257 non-null   float64
 10  exact_molecular_weight            1257 non-null   float64
 11  molecular_formula                 1257 non-null   object 
 12  alogp      

In [17]:
df_filtrado["np_classifier_pathway"].value_counts()

np_classifier_pathway
Shikimates and Phenylpropanoids    573
Terpenoids                         347
Alkaloids                          197
Polyketides                         99
Amino acids and Peptides            21
Carbohydrates                       15
Fatty acids                          5
Name: count, dtype: int64

In [20]:
df_sk = df_filtrado[df_filtrado["np_classifier_pathway"].str.lower() == "shikimates and phenylpropanoids".lower()]

In [23]:
df_sk

Unnamed: 0,identifier,canonical_smiles,standard_inchi,standard_inchi_key,name,iupac_name,annotation_level,total_atom_count,heavy_atom_count,molecular_weight,...,np_classifier_pathway,np_classifier_superclass,np_classifier_class,np_classifier_is_glycoside,organisms,collections,dois,synonyms,cas,Smiles standarization
0,CNP0286854.0,C=CCC1=CC(OC)=C(OC)C(OC)=C1,InChI=1S/C12H16O3/c1-5-6-9-7-10(13-2)12(15-4)1...,BPLQKQKXWHCZSS-UHFFFAOYSA-N,Elemicin,"5-allyl-1,2,3-trimethoxy-benzene",5,31,15,208.26,...,Shikimates and Phenylpropanoids,Phenylpropanoids (C6-C3),Cinnamic acids and derivatives,False,Aconitum episcopale|Aconitum leucostomum|Acoru...,ANPDB|Australian natural products|BIOFACQUIM|C...,10.1002/(SICI)1099-1026(199909/10)14:5<312::AI...,487-11-6|5-Allyl-1|2|3-trimethoxybenzene|Elemi...,487-11-6,C=CCc1cc(OC)c(OC)c(OC)c1
1,CNP0292843.0,COC1=CC(OC)=C2C=CC(=O)OC2=C1,InChI=1S/C11H10O4/c1-13-7-5-9(14-2)8-3-4-11(12...,NXJCRELRQHZBQA-UHFFFAOYSA-N,citropten,"5,7-dimethoxychromen-2-one",5,25,15,206.20,...,Shikimates and Phenylpropanoids,Coumarins,Simple coumarins,False,Adenia fruticosa|Aeromonas hydrophila|Artemisi...,AnalytiCon Discovery NPs|Australian natural pr...,10.1016/0031-9422(89)85042-3|10.1016/0031-9422...,{0:5|7-Dimethoxycoumarin|1:Limettin|3:487-06-9...,487-06-9,COc1cc(OC)c2ccc(=O)oc2c1
10,CNP0270767.0,CC(C)=CCOC1=C2OC=CC2=CC2=C1OC(=O)C=C2,InChI=1S/C16H14O4/c1-10(2)5-7-19-16-14-12(6-8-...,OLOOJGVNMBJLLR-UHFFFAOYSA-N,IMPERATORIN,"9-(3-methylbut-2-enoxy)furo[3,2-g]chromen-7-one",5,34,20,270.28,...,Shikimates and Phenylpropanoids,Coumarins,Furocoumarins,False,Acanthus montanus|Adlumia fungosa|Aegle marmel...,ANPDB|AfroCancer|AnalytiCon Discovery NPs|Aust...,10.1002/(SICI)1099-1565(199811/12)9:6<283::AID...,{0:Ammidin|1:8-Isopentenyloxypsoralene|2:Marme...,482-44-0,CC(C)=CCOc1c2occc2cc2ccc(=O)oc12
19,CNP0201090.0,CC(C)=CCC1=C2OC(C)(C)C=CC2=C2OC=C(C3=CC=C(O)C(...,InChI=1S/C25H24O6/c1-13(2)5-7-15-21(28)20-22(2...,GHCZYXUOYFOXIP-UHFFFAOYSA-N,pomiferin,"3-(3,4-dihydroxyphenyl)-5-hydroxy-8,8-dimethyl...",4,55,31,420.46,...,Shikimates and Phenylpropanoids,Isoflavonoids,Isoflavanones,False,Acacia horrida|Andropogon zizanioides|Derris m...,AnalytiCon Discovery NPs|CMAUP (cCollective mo...,10.1002/JPS.2600640121|10.1002/JPS.3030410912|...,572-03-2|CHEBI:8329|NSC5113|74YIS40APM|MLS0027...,572-03-2,CC(C)=CCc1c2c(c3occ(-c4ccc(O)c(O)c4)c(=O)c3c1O...
24,CNP0289559.0,CC(C)=CCC1=C(O)C(CC=C(C)C)=C2CCC3=CC(O)=C(O)C=...,InChI=1S/C24H28O4/c1-13(2)5-8-17-16-10-7-15-11...,YJJXCOSDPIJFJR-UHFFFAOYSA-N,Gancaonin U,"6,8-bis(3-methylbut-2-enyl)-9,10-dihydrophenan...",4,56,28,380.48,...,Shikimates and Phenylpropanoids,Phenanthrenoids,Phenanthrenes,False,Glycyrrhiza uralensis|Glycyrrhiza uralensis|Le...,CMAUP (cCollective molecular activities of use...,10.1016/S0031-9422(00)95210-5,134958-56-8|8-(3-Methyl-but-2-enyl)-6-((E)-3-m...,134958-56-8,CC(C)=CCc1c(O)c(CC=C(C)C)c2c(c1O)-c1cc(O)c(O)c...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2581,CNP0177600.1,C=C(C)[C@@H](O)COC1=C2C=COC2=CC2=C1C=CC(=O)O2,InChI=1S/C16H14O5/c1-9(2)12(17)8-20-16-10-3-4-...,BVMOMQJYQYBMKL-LBPRGKRZSA-N,Pangelin,4-[(2~{R})-2-hydroxy-3-methyl-but-3-enoxy]furo...,4,35,21,286.28,...,Shikimates and Phenylpropanoids,Coumarins,Furocoumarins,False,Angelica dahurica|Angelica dahurica|Angelica j...,CMAUP (cCollective molecular activities of use...,10.1007/BF02975883|10.1016/0031-9422(83)80174-...,33783-80-1|7H-Furo(3|2-g)(1)benzopyran-7-one| ...,33783-80-1,C=C(C)C(O)COc1c2ccoc2cc2oc(=O)ccc12
2590,CNP0127115.0,COC1=CC2=C(C(O)=C1OC)C(=O)C=C(C1=CC=C(O)C(O)=C...,InChI=1S/C17H14O7/c1-22-14-7-13-15(16(21)17(14...,IMEYGBIXGJLUIS-UHFFFAOYSA-N,Cirsiliol,"2-(3,4-dihydroxyphenyl)-5-hydroxy-6,7-dimethox...",5,38,24,330.29,...,Shikimates and Phenylpropanoids,Flavonoids,Flavones,False,Achillea ageratum|Achillea fragrantissima|Achi...,ANPDB|AnalytiCon Discovery NPs|Australian natu...,10.1002/ARDP.19713040802|10.1007/BF00580042|10...,34334-69-5|6|7-DIMETHOXY-3'|4'|5-TRIHYDROXYFLA...,34334-69-5,COc1cc2oc(-c3ccc(O)c(O)c3)cc(=O)c2c(O)c1OC
2596,CNP0208109.1,C/C=C(/C)C(=O)O[C@@H]1C2=C(C=CC3=C2OC(=O)C=C3)...,InChI=1S/C21H22O7/c1-6-11(2)20(24)27-18-16-14(...,FFCDTHIJWHJUQJ-JZWAJAMXSA-N,Edultin,"[(8~{S},9~{R})-8-(1-acetoxy-1-methyl-ethyl)-2-...",4,50,28,386.40,...,Shikimates and Phenylpropanoids,Coumarins,Furocoumarins,False,Angelica edulis|Cnidium monieri|Cnidium monnie...,CMAUP (cCollective molecular activities of use...,10.1007/BF00569581|10.1007/BF00598313|10.1016/...,15591-75-0|Cnidimine|(8S|9R)-8-(2-acetyloxypro...,15591-75-0,CC=C(C)C(=O)OC1c2c(ccc3ccc(=O)oc23)OC1C(C)(C)O...
2597,CNP0301421.1,O=C(O)[C@H]1O[C@@H](OC2=C(C3=CC(O)=C(O)C(O)=C3...,InChI=1S/C21H18O14/c22-6-3-7(23)11-10(4-6)33-1...,MBWOCQLTCWTIJE-ZUGPOPFOSA-N,Myricetin 3-O-glucuronide,"(2~{S},3~{S},4~{S},5~{R},6~{S})-6-[5,7-dihydro...",5,53,35,494.36,...,Shikimates and Phenylpropanoids,Flavonoids,Flavonols,True,Ajuga genevensis|Boronia pinnata|Chamerion dod...,ANPDB|Australian natural products|CMAUP (cColl...,10.1002/CHIN.200117207|10.1007/S10600-011-0025...,77363-65-6|Myricetin-3-O-beta-D-glucuronide|CH...,77363-65-6,O=C(O)C1OC(Oc2c(-c3cc(O)c(O)c(O)c3)oc3cc(O)cc(...
