In [1]:
import requests
import openpyxl
import os
import time

os.chdir('/home/vinay/Documents')
wb = openpyxl.load_workbook('62_compounslist_after_ADMET.xlsx')

sheet = wb['Sheet1']

def get_smile(name):
    res = requests.get(f'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{name}/property/CanonicalSMILES/TXT')
    res.raise_for_status()
    return res.text

def get_molecular_formula(name):
    res = requests.get(f'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{name}/property/MolecularFormula/TXT')
    res.raise_for_status()
    return res.text

def get_molecular_weight(name):
    res = requests.get(f'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{name}/property/MolecularWeight/TXT')
    res.raise_for_status()
    return res.text

for row in range(5,1497):
    name = sheet.cell(row=row, column=2).value
    print(name)
    if name is None:
        continue
    name = name.strip()
    try:
        smile = get_smile(name)
        time.sleep(2)
        formula = get_molecular_formula(name)
        time.sleep(2)
        mw = get_molecular_weight(name)
    except Exception:
        continue
    smiles = smile.split('\n')
    formulae = formula.split('\n')
    mws = mw.split('\n')
    if len(formulae) > 2:
        if formulae[0] == formulae[1]:
            print(formulae[0], row)
            sheet[f'C{row}'] = formulae[0].strip()
        else:
            pass
        
    else:
        print(formula, row)
        sheet[f'C{row}'] = formula.strip()
        
    if len(mws) > 2:
        if mws[0] == mws[1]:
            print(mws[0], row)
            sheet[f'D{row}'] = mws[0].strip()
        else:
            pass
    else:
        print(mw, row)
        sheet[f'D{row}'] = mw.strip()
        
    if len(smiles) > 2:
        if smiles[0] == smiles[1]:
            print(smiles[0], row)
            sheet[f'E{row}'] = smiles[0].strip()
        else:
            pass

    else:
        print(smile, row)
        sheet[f'E{row}'] = smile.strip()
        
    wb.save('GC-MS_LIST_OF_57_Med_PLANTS.xlsx')

In [None]:
import re
import requests
import openpyxl

lipinski = {'hbd':['<hbd_lipinski>','</hbd_lipinski>'],
            'algop':['<alogp>','</alogp>'],
            'mol_species':['<molecular_species>','</molecular_species>'],
            'hba':['<hba_lipinski>','</hba_lipinski>'],
            'violations':['<num_lipinski_ro5_violations>','</num_lipinski_ro5_violations>'],
            'cx':['<cx_logp>','</cx_logp>'],
            'logd':['<cx_logd>','</cx_logd>'],
            'smile':['<canonical_smiles>','</canonical_smiles>'],
            'mol_formula':['<full_molformula>','</full_molformula>'],
            'mol_weight':['<full_mwt>','</full_mwt>']}

def get_inchi_key(name):
    res = requests.get(f'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{name}/property/InChIKey/TXT')
    res.raise_for_status()
    return res.text

def get_lipinski_values(inchi_key):
    results = {key:'' for key in lipinski.keys()}
    res = requests.get(f'https://www.ebi.ac.uk/chembl/api/data/molecule/{inchi_key}')
    res.raise_for_status()
    result = re.search('(<cx_most_apka>)(.*)(</cx_most_apka>)',res.text)
    if result:
        results['acidic_pka'] = result.groups()[1]
        results['basic_pka'] = '--'
    else:
        result = re.search('(<cx_most_bpka>)(.*)(</cx_most_bpka>)',res.text)
        results['acidic_pka'] = '--'
        results['basic_pka'] = result.groups()[1]
    for tag in lipinski.keys():
        result = re.search(f'({lipinski[tag][0]})(.*)({lipinski[tag][1]})',res.text)
        results[tag] = result.groups()[1]
    return results

def get_lipinski_values_by_name(name):
    results = {key:'' for key in lipinski.keys()}
    res = requests.get(f'https://www.ebi.ac.uk/chembl/api/data/molecule/search?q={name.strip()}')
    res.raise_for_status()
    result = re.search('(<cx_most_apka>)(.*)(</cx_most_apka>)',res.text)
    if result:
        results['acidic_pka'] = result.groups()[1]
        results['basic_pka'] = '--'
    else:
        result = re.search('(<cx_most_bpka>)(.*)(</cx_most_bpka>)',res.text)
        results['acidic_pka'] = '--'
        results['basic_pka'] = result.groups()[1]
    for tag in lipinski.keys():
        result = re.search(f'({lipinski[tag][0]})(.*)({lipinski[tag][1]})',res.text)
        results[tag] = result.groups()[1]
    return results

def update_sheet(results, row):
    sheet[f'B{row}'] = results['mol_formula']
    sheet[f'C{row}'] = results['mol_weight']
    sheet[f'D{row}'] = results['smile']
    sheet[f'E{row}'] = results['algop']
    sheet[f'F{row}'] = results['mol_species']
    sheet[f'G{row}'] = results['hba']
    sheet[f'H{row}'] = results['hbd']
    sheet[f'I{row}'] = results['violations']
    sheet[f'J{row}'] = results['cx']
    sheet[f'K{row}'] = results['acidic_pka']
    sheet[f'L{row}'] = results['basic_pka']
    sheet[f'M{row}'] = results['logd']

for row in range(8, 541):
    name = get_name(row)
    try:
        results = get_lipinski_values_by_name(name)
    except Exception:
        try:
            inchi_key = get_inchi_key(name)
            results = get_lipinski_values(inchi_key)
        except Exception:
            continue
    print(row,results, '\n')
    update_sheet(results, row)
    wb.save('unknowFor_findout_GCMS.xlsx')

In [7]:
import openpyxl
import requests

def get_name(row):
    return sheet[f'B{row}'].value
def get_smiles(row):
    return sheet[f'C{row}'].value
def get_sdf(smiles):
    res = requests.get(
    f'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/{smiles}/SDF')
    res.raise_for_status()
    return res.text
def write_sdf(sdf):
    os.chdir('/home/vinay/Documents/SDF')
    with open(name+ '.sdf', 'w') as f:
        f.write(sdf)

for row in range(2, 64):
    smiles = get_smiles(row).strip()
    name = get_name(row).strip()
    try:
        sdf = get_sdf(smiles)
    except:
        continue
    print(name,row)
    write_sdf(sdf)