In [2]:
import pandas as pd
from pubchempy import get_compounds, Compound

In [78]:
df = pd.read_csv('test_data.csv', sep='\t')
df.head(3)

Unnamed: 0,Name,CAS,SMILES
0,α-hANP,85637-73-6,N1[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(...
1,Astragaloside IV,84687-43-4,CC(C)(O)[C@@H]1CC[C@@](C)(O1)[C@H]2[C@@H](O)C[...
2,Carocainide,66203-00-7,C1CCN(C1)CCOc1c(OC)c2occc2c(c1NC(NC)=O)OC


# 1.Convert name/CAS into SMILES

In [65]:
def name2Smiles(df, name_col, smiles_col, verbose=True):
    error_list = []
    for index, row in df.iterrows():
        name = row[name_col]
        com = get_compounds(name, namespace='name')
        if len(com) == 0:
            if verbose:
                print('No results found! {},{}'.format(index, name))
            error_list.append(index)
        else:
            smi = com[0].canonical_smiles
            df.loc[index, smiles_col] = smi
            if verbose:
                print('Finish {}, {}'.format(index, name))
    return error_list

In [66]:
error = name2Smiles(df, 'Name', 'new_smiles')

No results found! 0,α-hANP
Finish 1, Astragaloside IV
Finish 2, Carocainide
Finish 3, Clindamycin
Finish 4, Dimetindene
Finish 5, Fenspiride
Finish 6, Hexobarbital
Finish 7, Lanreotide
Finish 8, Metronidazole
Finish 9, Olsalazine
Finish 10, PR-104
Finish 11, Safinamide
Finish 12, Teriparatide


* View results

In [67]:
df.head(3)

Unnamed: 0,Name,CAS,SMILES,new_smiles
0,α-hANP,85637-73-6,N1[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(...,
1,Astragaloside IV,84687-43-4,CC(C)(O)[C@@H]1CC[C@@](C)(O1)[C@H]2[C@@H](O)C[...,CC1(C(CCC23C1C(CC4C2(C3)CCC5(C4(CC(C5C6(CCC(O6...
2,Carocainide,66203-00-7,C1CCN(C1)CCOc1c(OC)c2occc2c(c1NC(NC)=O)OC,CNC(=O)NC1=C(C2=C(C(=C1OCCN3CCCC3)OC)OC=C2)OC


* failed results

In [70]:
df.loc[error]

Unnamed: 0,Name,CAS,SMILES,new_smiles
0,α-hANP,85637-73-6,N1[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(...,


# 2.Convert SMILES into synonyms/iupac_name

In [79]:
def smiles2Name(df, name_col, smiles_col, kind='synonyms', verbose=True):
    error_list = []
    for index, row in df.iterrows():
        smiles = row[smiles_col]
        com = get_compounds(smiles, namespace='smiles')
        if len(com) == 0:
            if verbose:
                print('No results found! {},{}'.format(index, name))
            error_list.append(index)
        else:
            if kind == 'synonyms':
                name = com[0].synonyms
            elif kind == 'iupac_name':
                name = com[0].iupac_name
            df.loc[index, name_col] = name
            if verbose:
                print('Finish {}, {}'.format(index, smiles))
    return error_list

In [None]:
error = smiles2Name(df, 'new_name', 'SMILES', kind='iupac_name', verbose=False)

* View results

In [81]:
df.head(3)

Unnamed: 0,Name,CAS,SMILES,new_name
0,α-hANP,85637-73-6,N1[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(...,(2S)-2-[[(2S)-2-[[(2S)-2-[[(2S)-2-[[(2S)-4-ami...
1,Astragaloside IV,84687-43-4,CC(C)(O)[C@@H]1CC[C@@](C)(O1)[C@H]2[C@@H](O)C[...,"(2R,3R,4S,5S,6R)-2-[[(1S,3R,6S,8R,9S,11S,12S,1..."
2,Carocainide,66203-00-7,C1CCN(C1)CCOc1c(OC)c2occc2c(c1NC(NC)=O)OC,"1-[4,7-dimethoxy-6-(2-pyrrolidin-1-ylethoxy)-1..."
