# Molecule processing
**In this notebook, the processing of the molecules will be carried out in order to obtain a
similarity table of all the molecules belonging to the chosen data set.**


## Import Libraries

In [1]:
import pandas as pd
import sys

from io import StringIO
from drugSimilarityTable import SimilarityTable
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem

## Load the CSV File

In [2]:
path_similarities = 'linked_drugs_debug.csv'
data_types_dictionary = {
    'compound-id': 'int',
    'chembl-id': 'str',
    'chembl-name': 'str',
    'smiles': 'str',
    'drugbank-id': 'str',
    'drugbank-name': 'str'
}
drugs_csv = pd.read_csv(path_similarities, dtype=data_types_dictionary)

### Handling errors from RDKit

In [3]:
Chem.WrapLogs()
sio = sys.stderr = StringIO()

## Data analysis

The null values ​​of all the data are verified, there would exist errors if any of the smiles values ​​of the compounds is null

In [4]:
drugs_csv.isnull().sum()

compound-id            0
chembl-id          10867
chembl-name      2054207
smiles                 0
drugbank-id      2084309
drugbank-name    2084309
dtype: int64

In this case, the null values ​​that are found refer to the id of the databases from which the compounds were acquired.
The type of data and the names of the tables are also verified.


In [5]:
drugs_csv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2095590 entries, 0 to 2095589
Data columns (total 6 columns):
 #   Column         Dtype 
---  ------         ----- 
 0   compound-id    int64 
 1   chembl-id      object
 2   chembl-name    object
 3   smiles         object
 4   drugbank-id    object
 5   drugbank-name  object
dtypes: int64(1), object(5)
memory usage: 95.9+ MB


## Calculation of the similarity table
This similarity table consists of the comparison of a molecule with all the rest of the molecules
that belong to the database, with the aim of obtaining the top of molecules that contain the most
similarity with that molecule.

To achieve this, first, the value of the molecule is obtained from its "smiles" found in the database,
then it is necessary to calculate the "fingerprint" for that molecule. To finish, it is necessary to
calculate the similarity between molecules using their fingerprints.

In [9]:
for drug in drugs_csv.to_dict('records'):
    print(drug['compound-id'], drug['smiles'])

0 Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccccc1Cl
1 Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccc(C#N)cc1
2 Cc1cc(-n2ncc(=O)[nH]c2=O)cc(C)c1C(O)c1ccc(Cl)cc1
3 Cc1ccc(C(=O)c2ccc(-n3ncc(=O)[nH]c3=O)cc2)cc1
4 Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccc(Cl)cc1
5 Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccccc1
6 Cc1cc(Br)ccc1C(=O)c1ccc(-n2ncc(=O)[nH]c2=O)cc1Cl
7 O=C(c1ccc(Cl)cc1Cl)c1ccc(-n2ncc(=O)[nH]c2=O)cc1Cl
8 CS(=O)(=O)c1ccc(C(=O)c2ccc(-n3ncc(=O)[nH]c3=O)cc2Cl)cc1
9 c1cc2cc(c1)-c1cccc(c1)C[n+]1ccc(c3ccccc31)NCCCCCCCCCCNc1cc[n+](c3ccccc13)C2
10 CSc1ccc(C(=O)c2ccc(-n3ncc(=O)[nH]c3=O)cc2Cl)cc1
11 O=C(c1ccc(Cl)cc1)c1ccc(-n2ncc(=O)[nH]c2=O)cc1Cl
12 O=C1O/C(=C/Br)CCC1c1cccc2ccccc12
13 O=C1O/C(=C/I)CCC1c1cccc2ccccc12
14 C#C/C=C1\CCC(c2cccc3ccccc23)C(=O)O1
15 O=C1O/C(=C\I)CCC1c1cccc2ccccc12
16 O=C(c1ccccc1)c1ccc(-n2ncc(=O)[nH]c2=O)cc1
17 O=c1cnn(-c2ccc(C(O)c3ccc(Cl)cc3)c(Cl)c2)c(=O)[nH]1
18 O=C(c1ccccc1)c1cc(-n2ncc(=O)[nH]c2=O)ccc1Cl
19 Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(O)c1ccccc1
20 COc1cc2nc(N3CCN(C(=O)C4CC4c4ccc

KeyboardInterrupt: 

In [6]:
for drug in drugs_csv.iterrows():
    # drug_id (id de la molecula actual)
    drug_id = drug[0]
    # drug_similarity_table (tabla de similaridad para la molecula actual)
    drug_similarity_table = SimilarityTable(drug_id)
    # drug_smiles (smiles de la molecula actual)
    drug_smiles = drug[1]['smiles']
    drug_molecule = Chem.MolFromSmiles(drug_smiles)
    drug_fingerprint = AllChem.GetMorganFingerprint(drug_molecule, 2)

    for drug_to_test in drugs_csv.iterrows():
        # drug_to_test_id (id de la molecula a comparar)
        drug_to_test_id = drug_to_test[0]
        try:
            if drug_to_test_id != drug_id:
                # drug_to_test_smiles (smiles de la molecula a comparar)
                drug_to_test_smiles = drug_to_test[1]['smiles']
                drug_to_test_molecule = Chem.MolFromSmiles(drug_to_test_smiles)
                drug_to_test_fingerprint = AllChem.GetMorganFingerprint(drug_to_test_molecule, 2)
                drugs_similarity = DataStructs.DiceSimilarity(drug_fingerprint, drug_to_test_fingerprint)

                drug_similarity_table.add_item(drug_to_test_id, drugs_similarity)
        except:
            print("Failed Smiles of Drug", drug_to_test_id)
            continue

    drug_similarity_table.print_dictionary()
    break

RDKit ERROR: [11:50:18] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 10 11 15 16 17 19 20 21
[11:50:18] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 10 11 15 16 17 19 20 21

RDKit ERROR: 


Failed Smiles of Drug 1833727


RDKit ERROR: [11:51:53] Explicit valence for atom # 0 N, 4, is greater than permitted
[11:51:53] Explicit valence for atom # 0 N, 4, is greater than permitted
RDKit ERROR: [11:51:53] Explicit valence for atom # 0 N, 4, is greater than permitted
[11:51:53] Explicit valence for atom # 0 N, 4, is greater than permitted
RDKit ERROR: [11:51:53] Explicit valence for atom # 0 N, 4, is greater than permitted
[11:51:53] Explicit valence for atom # 0 N, 4, is greater than permitted


Failed Smiles of Drug 2085105
Failed Smiles of Drug 2085115
Failed Smiles of Drug 2085513


RDKit ERROR: [11:51:53] Explicit valence for atom # 13 Cl, 5, is greater than permitted
[11:51:53] Explicit valence for atom # 13 Cl, 5, is greater than permitted
RDKit ERROR: [11:51:53] SMILES Parse Error: syntax error while parsing: OS(O)(O)C1=CC=C(C=C1)C-1=C2\C=CC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC=C(C=C1)S(O)(O)O)C1=CC=C(C=C1)S([O-])([O-])[O-])\C1=CC=C(C=C1)S(O)(O)[O-]
[11:51:53] SMILES Parse Error: syntax error while parsing: OS(O)(O)C1=CC=C(C=C1)C-1=C2\C=CC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC=C(C=C1)S(O)(O)O)C1=CC=C(C=C1)S([O-])([O-])[O-])\C1=CC=C(C=C1)S(O)(O)[O-]
RDKit ERROR: [11:51:53] SMILES Parse Error: Failed parsing SMILES 'OS(O)(O)C1=CC=C(C=C1)C-1=C2\C=CC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC=C(C=C1)S(O)(O)O)C1=CC=C(C=C1)S([O-])([O-])[O-])\C1=CC=C(C=C1)S(O)(O)[O-]' for input: 'OS(O)(O)C1=CC=C(C=C1)C-1=C2\C=CC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC=C(C=C1)S(O)(O)O)C1=CC=C(C=C1)S(

Failed Smiles of Drug 2086328
Failed Smiles of Drug 2086395
Failed Smiles of Drug 2086994


RDKit ERROR: [11:51:54] Explicit valence for atom # 6 N, 4, is greater than permitted
[11:51:54] Explicit valence for atom # 6 N, 4, is greater than permitted
RDKit ERROR: [11:51:54] Explicit valence for atom # 0 O, 3, is greater than permitted
[11:51:54] Explicit valence for atom # 0 O, 3, is greater than permitted
RDKit ERROR: [11:51:54] Explicit valence for atom # 3 N, 4, is greater than permitted
[11:51:54] Explicit valence for atom # 3 N, 4, is greater than permitted


Failed Smiles of Drug 2087733
Failed Smiles of Drug 2088277
Failed Smiles of Drug 2088397


RDKit ERROR: [11:51:54] Explicit valence for atom # 4 F, 2, is greater than permitted
[11:51:54] Explicit valence for atom # 4 F, 2, is greater than permitted


Failed Smiles of Drug 2088581


RDKit ERROR: [11:51:55] Explicit valence for atom # 13 Be, 3, is greater than permitted
[11:51:55] Explicit valence for atom # 13 Be, 3, is greater than permitted


Failed Smiles of Drug 2091090


RDKit ERROR: [11:51:55] Explicit valence for atom # 84 N, 4, is greater than permitted
[11:51:55] Explicit valence for atom # 84 N, 4, is greater than permitted
RDKit ERROR: [11:51:55] SMILES Parse Error: syntax error while parsing: OC1=CC=CC(=C1)C-1=C2\CCC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC(O)=CC=C1)C1=CC(O)=CC=C1)\C1=CC(O)=CC=C1
[11:51:55] SMILES Parse Error: syntax error while parsing: OC1=CC=CC(=C1)C-1=C2\CCC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC(O)=CC=C1)C1=CC(O)=CC=C1)\C1=CC(O)=CC=C1
RDKit ERROR: [11:51:55] SMILES Parse Error: Failed parsing SMILES 'OC1=CC=CC(=C1)C-1=C2\CCC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC(O)=CC=C1)C1=CC(O)=CC=C1)\C1=CC(O)=CC=C1' for input: 'OC1=CC=CC(=C1)C-1=C2\CCC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC(O)=CC=C1)C1=CC(O)=CC=C1)\C1=CC(O)=CC=C1'
[11:51:55] SMILES Parse Error: Failed parsing SMILES 'OC1=CC=CC(=C1)C-1=C2\CCC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2

Failed Smiles of Drug 2092021
Failed Smiles of Drug 2092388


RDKit ERROR: [11:51:55] Explicit valence for atom # 1 Cl, 4, is greater than permitted
[11:51:55] Explicit valence for atom # 1 Cl, 4, is greater than permitted
RDKit ERROR: [11:51:55] Explicit valence for atom # 0 N, 4, is greater than permitted
[11:51:55] Explicit valence for atom # 0 N, 4, is greater than permitted


Failed Smiles of Drug 2093064
Failed Smiles of Drug 2093616


RDKit ERROR: [11:51:56] Explicit valence for atom # 5 K, 2, is greater than permitted
[11:51:56] Explicit valence for atom # 5 K, 2, is greater than permitted


Failed Smiles of Drug 2094466
33 0.8676470588235294
5 0.8636363636363636
6 0.8529411764705882
57 0.8382352941176471
4 0.835820895522388
59 0.8333333333333334
18 0.8181818181818182
1 0.7883211678832117
7 0.7794117647058824
11 0.7761194029850746




In [None]:
for drug in drugs_csv.iterrows():
    # drug_id (id de la molecula actual)
    drug_id = drug[0]
    drug_similarity_table = SimilarityTable(drug_id)
    # drug_smiles (smiles de la molecula actual)
    drug_smiles = drug[1]['smiles']
    drug_molecule = Chem.MolFromSmiles(drug_smiles)
    drug_fingerprint = AllChem.GetMorganFingerprint(drug_molecule, 4)

    for drug_to_test in drugs_csv.iterrows():
        # drug_to_test_id (id de la molecula a comparar)
        drug_to_test_id = drug_to_test[0]
        try:
            if drug_to_test_id != drug_id:
                # drug_to_test_smiles (smiles de la molecula a comparar)
                drug_to_test_smiles = drug_to_test[1]['smiles']
                drug_to_test_molecule = Chem.MolFromSmiles(drug_to_test_smiles)
                drug_to_test_fingerprint = AllChem.GetMorganFingerprint(drug_to_test_molecule, 4)
                drugs_similarity = DataStructs.DiceSimilarity(drug_fingerprint, drug_to_test_fingerprint)

                drug_similarity_table.add_item(drug_to_test_id, drugs_similarity)
        except:
            print("Failed Smiles of Drug", drug_to_test_id)
            continue

    drug_similarity_table.print_dictionary()
    break

In [7]:
#Anterior
for drug in drugs_csv.iterrows():
    drug_similarity_table = SimilarityTable(drug[0])
    drug_molecule = Chem.MolFromSmiles(drug[1]['smiles'])
    drug_fingerprint = AllChem.GetMorganFingerprint(drug_molecule, 4)

    for drug_to_test in drugs_csv.iterrows():
        try:
            if drug_to_test[0] != drug[0]:
                drug_to_test_molecule = Chem.MolFromSmiles(drug_to_test[1]['smiles'])
                drug_to_test_fingerprint = AllChem.GetMorganFingerprint(drug_to_test_molecule, 4)
                drugs_similarity = DataStructs.DiceSimilarity(drug_fingerprint, drug_to_test_fingerprint)

                drug_similarity_table.add_item(drug_to_test[0], drugs_similarity)
        except:
            print("Failed Smiles of Drug", drug_to_test[0])
            continue

    drug_similarity_table.print_dictionary()
    break

RDKit ERROR: [12:37:16] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 10 11 15 16 17 19 20 21
[12:37:16] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 10 11 15 16 17 19 20 21

RDKit ERROR: 


Failed Smiles of Drug 1833727


RDKit ERROR: [12:39:31] Explicit valence for atom # 0 N, 4, is greater than permitted
[12:39:31] Explicit valence for atom # 0 N, 4, is greater than permitted
RDKit ERROR: [12:39:31] Explicit valence for atom # 0 N, 4, is greater than permitted
[12:39:31] Explicit valence for atom # 0 N, 4, is greater than permitted
RDKit ERROR: [12:39:31] Explicit valence for atom # 0 N, 4, is greater than permitted
[12:39:31] Explicit valence for atom # 0 N, 4, is greater than permitted


Failed Smiles of Drug 2085105
Failed Smiles of Drug 2085115
Failed Smiles of Drug 2085513


RDKit ERROR: [12:39:31] Explicit valence for atom # 13 Cl, 5, is greater than permitted
[12:39:31] Explicit valence for atom # 13 Cl, 5, is greater than permitted
RDKit ERROR: [12:39:31] SMILES Parse Error: syntax error while parsing: OS(O)(O)C1=CC=C(C=C1)C-1=C2\C=CC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC=C(C=C1)S(O)(O)O)C1=CC=C(C=C1)S([O-])([O-])[O-])\C1=CC=C(C=C1)S(O)(O)[O-]
[12:39:31] SMILES Parse Error: syntax error while parsing: OS(O)(O)C1=CC=C(C=C1)C-1=C2\C=CC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC=C(C=C1)S(O)(O)O)C1=CC=C(C=C1)S([O-])([O-])[O-])\C1=CC=C(C=C1)S(O)(O)[O-]
RDKit ERROR: [12:39:31] SMILES Parse Error: Failed parsing SMILES 'OS(O)(O)C1=CC=C(C=C1)C-1=C2\C=CC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC=C(C=C1)S(O)(O)O)C1=CC=C(C=C1)S([O-])([O-])[O-])\C1=CC=C(C=C1)S(O)(O)[O-]' for input: 'OS(O)(O)C1=CC=C(C=C1)C-1=C2\C=CC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC=C(C=C1)S(O)(O)O)C1=CC=C(C=C1)S(

Failed Smiles of Drug 2086328
Failed Smiles of Drug 2086395


RDKit ERROR: [12:39:31] Explicit valence for atom # 19 O, 3, is greater than permitted
[12:39:31] Explicit valence for atom # 19 O, 3, is greater than permitted


Failed Smiles of Drug 2086994


RDKit ERROR: [12:39:32] Explicit valence for atom # 6 N, 4, is greater than permitted
[12:39:32] Explicit valence for atom # 6 N, 4, is greater than permitted
RDKit ERROR: [12:39:32] Explicit valence for atom # 0 O, 3, is greater than permitted
[12:39:32] Explicit valence for atom # 0 O, 3, is greater than permitted
RDKit ERROR: [12:39:32] Explicit valence for atom # 3 N, 4, is greater than permitted
[12:39:32] Explicit valence for atom # 3 N, 4, is greater than permitted


Failed Smiles of Drug 2087733
Failed Smiles of Drug 2088277
Failed Smiles of Drug 2088397
Failed Smiles of Drug 2088581


RDKit ERROR: [12:39:32] Explicit valence for atom # 4 F, 2, is greater than permitted
[12:39:32] Explicit valence for atom # 4 F, 2, is greater than permitted
RDKit ERROR: [12:39:33] Explicit valence for atom # 13 Be, 3, is greater than permitted
[12:39:33] Explicit valence for atom # 13 Be, 3, is greater than permitted


Failed Smiles of Drug 2091090


RDKit ERROR: [12:39:33] Explicit valence for atom # 84 N, 4, is greater than permitted
[12:39:33] Explicit valence for atom # 84 N, 4, is greater than permitted
RDKit ERROR: [12:39:34] SMILES Parse Error: syntax error while parsing: OC1=CC=CC(=C1)C-1=C2\CCC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC(O)=CC=C1)C1=CC(O)=CC=C1)\C1=CC(O)=CC=C1
[12:39:34] SMILES Parse Error: syntax error while parsing: OC1=CC=CC(=C1)C-1=C2\CCC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC(O)=CC=C1)C1=CC(O)=CC=C1)\C1=CC(O)=CC=C1
RDKit ERROR: [12:39:34] SMILES Parse Error: Failed parsing SMILES 'OC1=CC=CC(=C1)C-1=C2\CCC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC(O)=CC=C1)C1=CC(O)=CC=C1)\C1=CC(O)=CC=C1' for input: 'OC1=CC=CC(=C1)C-1=C2\CCC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC(O)=CC=C1)C1=CC(O)=CC=C1)\C1=CC(O)=CC=C1'
[12:39:34] SMILES Parse Error: Failed parsing SMILES 'OC1=CC=CC(=C1)C-1=C2\CCC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2

Failed Smiles of Drug 2092021
Failed Smiles of Drug 2092388


RDKit ERROR: [12:39:34] Explicit valence for atom # 1 Cl, 4, is greater than permitted
[12:39:34] Explicit valence for atom # 1 Cl, 4, is greater than permitted


Failed Smiles of Drug 2093064


RDKit ERROR: [12:39:34] Explicit valence for atom # 0 N, 4, is greater than permitted
[12:39:34] Explicit valence for atom # 0 N, 4, is greater than permitted


Failed Smiles of Drug 2093616


RDKit ERROR: [12:39:35] Explicit valence for atom # 5 K, 2, is greater than permitted
[12:39:35] Explicit valence for atom # 5 K, 2, is greater than permitted


Failed Smiles of Drug 2094466
33 0.8216216216216217
5 0.7821229050279329
57 0.7783783783783784
4 0.7582417582417582
6 0.7243243243243244
1 0.7204301075268817
59 0.6927374301675978
18 0.6666666666666666
7 0.6486486486486487
11 0.6483516483516484




In [8]:

for row in drugs_csv.itertuples(index=True, name='Pandas'):
    print(row)
    print(getattr(row, "_1"))  # getattr(row, "smiles")

Pandas(Index=0, _1=0, _2='CHEMBL6329', _3=nan, smiles='Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccccc1Cl', _5=nan, _6=nan)
0
Pandas(Index=1, _1=1, _2='CHEMBL6328', _3=nan, smiles='Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccc(C#N)cc1', _5=nan, _6=nan)
1
Pandas(Index=2, _1=2, _2='CHEMBL265667', _3=nan, smiles='Cc1cc(-n2ncc(=O)[nH]c2=O)cc(C)c1C(O)c1ccc(Cl)cc1', _5=nan, _6=nan)
2
Pandas(Index=3, _1=3, _2='CHEMBL6362', _3=nan, smiles='Cc1ccc(C(=O)c2ccc(-n3ncc(=O)[nH]c3=O)cc2)cc1', _5=nan, _6=nan)
3
Pandas(Index=4, _1=4, _2='CHEMBL267864', _3=nan, smiles='Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccc(Cl)cc1', _5=nan, _6=nan)
4
Pandas(Index=5, _1=5, _2='CHEMBL6363', _3=nan, smiles='Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccccc1', _5=nan, _6=nan)
5
Pandas(Index=6, _1=6, _2='CHEMBL6352', _3=nan, smiles='Cc1cc(Br)ccc1C(=O)c1ccc(-n2ncc(=O)[nH]c2=O)cc1Cl', _5=nan, _6=nan)
6
Pandas(Index=7, _1=7, _2='CHEMBL268097', _3=nan, smiles='O=C(c1ccc(Cl)cc1Cl)c1ccc(-n2ncc(=O)[nH]c2=O)cc1Cl', _5=nan, _6=nan)
7
Pandas(Index=8, _1

KeyboardInterrupt: 

In [9]:
for drug in drugs_csv.itertuples():
    # drug_id (id de la molecula actual)
    drug_id = getattr(drug, "_1")
    drug_similarity_table = SimilarityTable(drug_id)
    # drug_smiles (smiles de la molecula actual)
    drug_smiles = getattr(drug, "smiles")
    drug_molecule = Chem.MolFromSmiles(drug_smiles)
    drug_fingerprint = AllChem.GetMorganFingerprint(drug_molecule, 4)

    for drug_to_test in drugs_csv.itertuples():
        # drug_to_test_id (id de la molecula a comparar)
        drug_to_test_id = getattr(drug_to_test, "_1")
        try:
            if drug_to_test_id != drug_id:
                # drug_to_test_smiles (smiles de la molecula a comparar)
                drug_to_test_smiles = getattr(drug_to_test, "smiles")
                drug_to_test_molecule = Chem.MolFromSmiles(drug_to_test_smiles)
                drug_to_test_fingerprint = AllChem.GetMorganFingerprint(drug_to_test_molecule, 4)
                drugs_similarity = DataStructs.DiceSimilarity(drug_fingerprint, drug_to_test_fingerprint)

                drug_similarity_table.add_item(drug_to_test_id, drugs_similarity)
        except:
            print("Failed Smiles of Drug", drug_to_test_id)
            continue

    drug_similarity_table.print_dictionary()
    break

Failed Smiles of Drug 2028214
33 0.8216216216216217
5 0.7821229050279329
57 0.7783783783783784
4 0.7582417582417582
6 0.7243243243243244
1 0.7204301075268817
59 0.6927374301675978
18 0.6666666666666666
7 0.6486486486486487
11 0.6483516483516484
