In [1]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole

# Create a list of Mol objects
amino_acid_supplier = Chem.SDMolSupplier('data/amino_acids/amino_acids-nat20.sdf', sanitize=False, removeHs=False, strictParsing=False)

# Set flags for a limited sanitization (full sanitization results in bad aromaticity detection).
s_flags = Chem.rdmolops.SanitizeFlags.SANITIZE_CLEANUP|Chem.rdmolops.SanitizeFlags.SANITIZE_FINDRADICALS|Chem.rdmolops.SanitizeFlags.SANITIZE_CLEANUPCHIRALITY|Chem.rdmolops.SanitizeFlags.SANITIZE_PROPERTIES|Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS|Chem.rdmolops.SanitizeFlags.SANITIZE_SETHYBRIDIZATION

# Make a list of the molecule names, instantiated Mol objects and sanitize using the flags set
mol_list = []
amino_acids = []
for mol in amino_acid_supplier:
    mol_name = mol.GetProp('_Name')
    mol_list += [mol_name]
    newmol = Chem.Mol(mol)    # See "Creating, Visualizing, ..." for why this is necessary
    amino_acids += [newmol]
    Chem.SanitizeMol(mol, sanitizeOps=s_flags, catchErrors=True)

In [16]:
def get_inchi(mol):
    inchi_plus = Chem.rdinchi.MolToInchi(mol)
    inchi = inchi_plus[0]
    inchi_key = Chem.rdinchi.InchiToInchiKey(inchi)
    
    return inchi_key, inchi

inchi_list = []
inchi_keys = []
for i, mol in enumerate(amino_acids):    
    key, inchi = get_inchi(mol)
    inchi_list += inchi
    inchi_keys += [key]
    print(f"{mol_list[i]}:\t{key}")

Ala:	QNAYBMKLOCPYGJ-REOHCLBHSA-N
Arg:	ODKSFYDXXFIFQN-BYPYZUCNSA-N
Asn:	DCXYFEDJOCDNAF-REOHCLBHSA-N
Asp:	CKLJMWTZIZZHCS-REOHCLBHSA-N
Cys:	XUJNEKJLAYXESH-REOHCLBHSA-N
Gln:	ZDXPYRJPNDTMRX-VKHMYHEASA-N
Glu:	WHUUTDBJXJRKMK-VKHMYHEASA-N
Gly:	DHMQDGOQFOQNFH-UHFFFAOYSA-N
His:	HNDVDQJCIGZPNO-YFKPBYRVSA-N
Ile:	AGPKZVBTJJNPAG-UHNVWZDZSA-N
Leu:	ROHFNLRQFUQHCH-YFKPBYRVSA-N
Lys:	KDXKERNSBIXSRK-YFKPBYRVSA-N
Met:	FFEARJCKVFRZRR-BYPYZUCNSA-N
Phe:	COLNVLDHVKWLRT-QMMMGPOBSA-N
Pro:	ONIBWKKTOPOVIA-BYPYZUCNSA-N
Ser:	MTCFGRXMJLQNBG-REOHCLBHSA-N
Thr:	AYFVYJQAPQTCCC-GBXIJSLDSA-N
Trp:	QIVBCDIJIAJPQS-VIFPVBQESA-N
Tyr:	OUYCCCASQSFEME-QMMMGPOBSA-N
Val:	KZSNJWFQEVHDMF-BYPYZUCNSA-N


In [17]:
import requests

In [18]:
print(inchi_keys[0])

QNAYBMKLOCPYGJ-REOHCLBHSA-N


In [26]:
baseurl = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug'
inp = '/compound/inchikey/' + inchi_keys[0]
ops = '/cids/json'
url = baseurl + inp + ops 
r = requests.get(url)
r.status_code

200

In [27]:
r.json()

{'IdentifierList': {'CID': [5950, 7311724]}}

In [32]:
url = baseurl + '_view/data/compound/5950/json'
r2 = requests.get(url)
r2.status_code

200

In [50]:
dump = r2.json()
def get_props_section(record):
    for i, sec in enumerate(record['Record']['Section']):
        if sec['TOCHeading'] == 'Chemical and Physical Properties':
            section = record['Record']['Section'][i]
            print(section)
    return section

In [51]:
props = get_props_section(dump)

{'TOCHeading': 'Chemical and Physical Properties', 'Description': 'Various chemical and physical properties that are experimentally determined for this compound.  See also the Safety and Hazard Properties section (if available), which has additional properties pertinent to chemical safety and hazards.', 'Section': [{'TOCHeading': 'Computed Properties', 'Description': 'Properties of this compound computed from its molecular formula and structure.', 'DisplayControls': {'CreateTable': {'FromInformationIn': 'Subsections', 'NumberOfColumns': 3, 'ColumnHeadings': ['Property Name', 'Property Value', 'Reference'], 'ColumnContents': ['Name', 'Value', 'Reference']}}, 'Section': [{'TOCHeading': 'Molecular Weight', 'Description': 'Molecular weight or molecular mass refers to the mass of a molecule. It is calculated as the sum of the mass of each constituent atom multiplied by the number of atoms of that element in the molecular formula.  The molecular weight is also called the relative molar mass,

In [48]:
def getpath(nested_dict, value, prepath=()):
    for k, v in nested_dict.items():
        path = prepath + (k,)
        if v == value: # found value
            return path
        elif hasattr(v, 'items'): # v is a dict
            p = getpath(v, value, path) # recursive call
            if p is not None:
                return p

In [52]:
path = getpath(props, 'Experimental Properties')
print(path)

None


In [59]:
for i, sec in enumerate(props['Section']):
    print(i, sec['TOCHeading'])

0 Computed Properties
1 Experimental Properties


In [60]:
print(props['Section'][1]['Section'])

[{'TOCHeading': 'Physical Description', 'Description': 'The appearance or features of this compound, including color, odor, state, taste and more in general.', 'Information': [{'ReferenceNumber': 14, 'Value': {'StringWithMarkup': [{'String': 'Other Solid'}]}}, {'ReferenceNumber': 23, 'Value': {'StringWithMarkup': [{'String': 'Solid; [Merck Index] White odorless solid; [HSDB] White crystalline solid; [Sigma-Aldrich MSDS]'}]}}, {'ReferenceNumber': 25, 'Value': {'StringWithMarkup': [{'String': 'Solid'}]}}]}, {'TOCHeading': 'Color/Form', 'Description': 'Color is the aspect of any object that may be described in terms of hue, lightness, and saturation.', 'URL': 'https://www.britannica.com/science/color', 'Information': [{'ReferenceNumber': 24, 'Description': 'PEER REVIEWED', 'Reference': ["O'Neil, M.J. (ed.). The Merck Index - An Encyclopedia of  Chemicals, Drugs, and Biologicals. 13th Edition, Whitehouse  Station, NJ:  Merck and Co., Inc., 2001., p. 39"], 'Value': {'StringWithMarkup': [{'S

In [61]:
melting_point = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/6322/XML?heading=Melting+Point")
xml_melting = melting_point.content

In [62]:
print(xml_melting)

b'<?xml version="1.0" encoding="UTF-8"?>\n<Record\n    xmlns="http://pubchem.ncbi.nlm.nih.gov/pug_view"\n    xmlns:xs="http://www.w3.org/2001/XMLSchema-instance"\n    xs:schemaLocation="http://pubchem.ncbi.nlm.nih.gov/pug_view https://pubchem.ncbi.nlm.nih.gov/pug_view/pug_view.xsd"\n>\n  <RecordType>CID</RecordType>\n  <RecordNumber>6322</RecordNumber>\n  <RecordTitle>Arginine</RecordTitle>\n  <Section>\n    <TOCHeading>Chemical and Physical Properties</TOCHeading>\n    <Description>Various chemical and physical properties that are experimentally determined for this compound.  See also the Safety and Hazard Properties section (if available), which has additional properties pertinent to chemical safety and hazards.</Description>\n    <Section>\n      <TOCHeading>Experimental Properties</TOCHeading>\n      <Description>Various experimentally determined properties for this compound.  See also the Safety and Hazard Properties section (if available), which has additional properties pertinen