In [None]:
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem as Chem
from standardiser import standardise as std
from chembl_webresource_client.unichem import unichem_client as unichem
from requests.exceptions import RetryError
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
data = pd.read_csv("./drugbank_approved_structure_links_v5_1_5.csv")
print(data.shape)
data.head()

In [None]:
# get smiles and inchi

def std_smile_and_inchi(smiles_list):
    i=0
    std_smiles = []
    std_smiles_canonical = []
    inchi_keys = []
    inchi_keys_canonical = []
    length = len(smiles_list)
    for smile in smiles_list[:]:

        if type(smile) == float or smile is None:
            std_smile = None
            std_smiles.append(std_smile)
            inchi_key = None
            inchi_keys.append(inchi_key)
            inchi_key_can = None
            inchi_keys_canonical.append(inchi_key)
            std_smile_can = None
            std_smiles_canonical.append(std_smile_can)
            
        else:
            # Isomeric
            try:
                std_smile = std.run(smile)
                std_smile = Chem.CanonSmiles(std_smile, useChiral=1)
                mol = Chem.MolFromSmiles(std_smile)#,catchErrors=False)

                if mol is None: ### Currently doesn't capture the cant kekulize mol :( 
                    print ("Error in molfrom smiles -> skipping this molecule")
                    inchi_key = None
                    inchi_keys.append(inchi_key)
                    std_smiles.append(std_smile)

                else:
                    std_smiles.append(std_smile)
                    inchi_key = Chem.inchi.MolToInchiKey(mol) ### INCHI AT THE MOMENT (WHICH IS WHY CHEMBL_ID ISEN'T WORKING)
                    inchi_keys.append(inchi_key)

            except std.StandardiseException:
                print ('hey')
                std_smile = None
                std_smiles.append(std_smile)
                inchi_key = None
                inchi_keys.append(inchi_key)

            except Chem.inchi.InchiReadWriteError:
                inchi_key = None
                inchi_keys.append(inchi_key)

                
            # Canonical
            try:
                std_smile = std.run(smile)
                std_smile_can = Chem.CanonSmiles(std_smile, useChiral=0)
                mol = Chem.MolFromSmiles(std_smile)#,catchErrors=False)
                #std_smile_can = Chem.MolToSmiles(mol,isomericSmiles=False)

                if mol is None: ### Currently doesn't capture the cant kekulize mol :( 
                    print ("Error in molfrom smiles -> skipping this molecule")
                    inchi_key_can = None
                    inchi_keys_canonical.append(inchi_key)
                    std_smiles_canonical.append(std_smile_can)

                else:
                    std_smiles_canonical.append(std_smile_can)
                    inchi_key_can = Chem.inchi.MolToInchiKey(mol) ### INCHI AT THE MOMENT (WHICH IS WHY CHEMBL_ID ISEN'T WORKING)
                    inchi_keys_canonical.append(inchi_key_can)

            except std.StandardiseException:
                print ('hey')
                inchi_key_can = None
                inchi_keys_canonical.append(inchi_key)
                std_smile_can = None
                std_smiles_canonical.append(std_smile_can)
            except Chem.inchi.InchiReadWriteError:
                inchi_key_can = None
                inchi_keys_canonical.append(inchi_key)
                std_smile_can = None
                std_smiles_canonical.append(std_smile_can)

        i+=1
        print("std compound SMILE out of: ",i, length)

    return std_smiles, std_smiles_canonical, inchi_keys, inchi_keys_canonical 

In [None]:
smis = data['SMILES'].tolist()[:]
std_smiles, std_smiles_canonical, inchi_keys, inchi_keys_canonical = std_smile_and_inchi(smis[:])
data['std_SMILES'] = std_smiles
data['std_SMILES_canonical'] = std_smiles_canonical
data['std_inchi'] = inchi_keys
data['std_inchi_canonical'] = inchi_keys_canonical

In [None]:
print(len(inchi_keys_canonical))

In [None]:
data.to_csv("./DrugBank_v5_1_5_processed.csv")