# Reading Spectra

## Setting the path of the Mona file

In [88]:
import os

path = os.path.join(os.path.dirname(os.getcwd()), "data")
msp_file = os.path.join(path, "MoNA-export-GC-MS.msp")

## Getting the info from Mona file as Spectra

In [89]:
from matchms.importing import load_from_msp

spectrums = [s for s in load_from_msp(msp_file)]
print("Number of spectra: ", len(spectrums))

Number of spectra:  14847


## Filtering the Spectra

In [90]:
from matchms.filtering import normalize_intensities
from matchms.filtering import select_by_relative_intensity
from matchms.filtering import select_by_mz
from matchms.filtering import require_minimum_number_of_peaks
from matchms.filtering import add_precursor_mz

def apply_my_filters(s):
    s = normalize_intensities(s)
    s = select_by_mz(s, mz_from=0, mz_to=1000)
    s = require_minimum_number_of_peaks(s, n_required=10)
    s = select_by_relative_intensity(s, intensity_from=0.01, intensity_to=1.0)
    return s

spectrums = [apply_my_filters(s) for s in spectrums]

spectrums = [s for s in spectrums if s is not None]

print("Remaining spectra: ", len(spectrums))

Remaining spectra:  14359


## Adding figerprints

In [91]:
from matchms.filtering import add_fingerprint

#sample = spectrums[720:725]

#sample = spectrums[722:723]
#print(sample[0].metadata)

spectrums_clone = spectrums.copy()

#def change_smiles(spectrum):
 #   spectrum.set('smiles', spectrum.get('smiles_2'))
  #  return spectrum

#spectrums_clone = [change_smiles(s) for s in spectrums_clone]

spectrums_fingerprint = [add_fingerprint(s, fingerprint_type="daylight", nbits=2048) for s in spectrums_clone]

RDKit ERROR: [17:52:32] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [17:52:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [17:52:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [17:52:32] Can't kekulize mol.  Unkekulized atoms: 2 3 5 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [17:52:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [17:52:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [17:52:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [17:52:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 12
RDKit ERROR: 
RDKit ERROR: [17:52:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 12
RDKit ERROR: 
RDKit ERROR: [17:52:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 8 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [17:5

RDKit ERROR: [17:52:45] Can't kekulize mol.  Unkekulized atoms: 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [17:52:45] Can't kekulize mol.  Unkekulized atoms: 6 7 8 10 11 12 13 14 15
RDKit ERROR: 
RDKit ERROR: [17:52:45] Can't kekulize mol.  Unkekulized atoms: 6 7 9 10 11 12 13 14 15
RDKit ERROR: 
RDKit ERROR: [17:52:45] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
RDKit ERROR: 
RDKit ERROR: [17:52:45] Can't kekulize mol.  Unkekulized atoms: 4 5 6 8 9 10 11 12 13
RDKit ERROR: 
RDKit ERROR: [17:52:45] Can't kekulize mol.  Unkekulized atoms: 4 5 6 8 9 10 11 12 13
RDKit ERROR: 
RDKit ERROR: [17:52:45] Can't kekulize mol.  Unkekulized atoms: 4 5 6 8 9 10 11 12 13
RDKit ERROR: 
RDKit ERROR: [17:52:45] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9 10 11 12 13
RDKit ERROR: 
RDKit ERROR: [17:52:45] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 12 13 14 15 16
RDKit ERROR: 
RDKit ERROR: [17:52:45] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11 12 13 14 16
RDKit

RDKit ERROR: [17:53:02] Can't kekulize mol.  Unkekulized atoms: 6 7 8 9 10 11 12 13 14
RDKit ERROR: 
RDKit ERROR: [17:53:02] Can't kekulize mol.  Unkekulized atoms: 15 16 17 18 19 20 21 22 23
RDKit ERROR: 
RDKit ERROR: [17:53:03] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8
RDKit ERROR: 
RDKit ERROR: [17:53:04] Can't kekulize mol.  Unkekulized atoms: 2 3 6
RDKit ERROR: 
RDKit ERROR: [17:53:04] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [17:53:04] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [17:53:04] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [17:53:04] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6
RDKit ERROR: 
RDKit ERROR: [17:53:04] Can't kekulize mol.  Unkekulized atoms: 2 3 4 7 8 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [17:53:05] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
RDKit ERROR: 
RDKit ERROR: [17:53:06] Can't kekulize mol. 

RDKit ERROR: 
RDKit ERROR: [17:53:14] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19
RDKit ERROR: 
RDKit ERROR: [17:53:14] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19
RDKit ERROR: 


In [95]:
for i, spec1 in enumerate(spectrums_fingerprint):
    if spec1.get("fingerprint") is None:
        print(i, "no figerprint")
    elif spec1.get("fingerprint").sum() < 1:
        print(i, "weird")

12941 no figerprint


In [96]:
from matchms.similarity import FingerprintSimilarityParallel

similarity_measure = FingerprintSimilarityParallel(similarity_measure="jaccard")
scores_mol_similarity = similarity_measure(spectrums_fingerprint, spectrums_fingerprint)


NameError: name 'np' is not defined

In [99]:
filename = os.path.join(path, "gcms_similarities_daylight2048_jaccard.npy")
numpy.save(filename, scores_mol_similarity)

In [100]:
from matchms.utils import mol_converter

for i, spec in enumerate(spectrums_fingerprint):
    inchikey_smiles = None
    inchikey_inchi = None
    inchi = spec.get("inchi")
    if inchi:
        inchikey_inchi = mol_converter(inchi, "inchi", "inchikey")
    smiles = spec.get("smiles")
    if smiles:
        inchikey_smiles = mol_converter(smiles, "smiles", "inchikey")
    if inchikey_inchi and inchikey_smiles:
        if not inchikey_inchi[:14] == inchikey_smiles[:14]:
            print(i, "\n")
            print(10* "--", smiles)
            print(10* "--", inchi)
            print(5* "--", inchikey_inchi[:14])
            print(5* "--", inchikey_smiles[:14])
            

RDKit ERROR: [18:18:32] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [18:18:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [18:18:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:18:32] Can't kekulize mol.  Unkekulized atoms: 2 3 5 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [18:18:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [18:18:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [18:18:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:18:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 12
RDKit ERROR: 
RDKit ERROR: [18:18:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 12
RDKit ERROR: 
RDKit ERROR: [18:18:32] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 8 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:1

1768 

-------------------- O=C(C(C)(C)1)C(C)(C)CC(=CC1)C[Sn](C)(C)C
-------------------- InChI=1S/C12H20O.3CH4.Sn.4H/c1-9-6-7-11(2,3)10(13)12(4,5)8-9;;;;;;;;/h1,6-8H2,2-5H3;3*1H4;;;;;
---------- MXZAGCHAAXTYPS
---------- HLYWPYLMMCZNEC




1958 

-------------------- CC[Sn](CC)(CC)CC
-------------------- InChI=1S/4C2H6.Sn.4H/c4*1-2;;;;;/h4*1-2H3;;;;;
---------- GDHITDSZLDSFOV
---------- RWWNQEOPUOCKGR
1959 

-------------------- CCCC[Sn](F)(CCCC)CCCC
-------------------- InChI=1S/3C4H10.FH.Sn/c3*1-3-4-2;;/h3*3-4H2,1-2H3;1H;/q;;;;+1/p-1
---------- ILIBOEFITZJBMH
---------- DFNPRTKVCGZMMC
1960 

-------------------- CCCC[Sn](Cl)(CCCC)CCCC
-------------------- InChI=1S/3C4H10.ClH.Sn/c3*1-3-4-2;;/h3*3-4H2,1-2H3;1H;/q;;;;+1/p-1
---------- MFLILBQFMLCFGI
---------- GCTFWCDSFPMHHS
1961 

-------------------- CCCC[Sn](CCCC)(CCCC)CCCC
-------------------- InChI=1S/4C4H10.Sn.4H/c4*1-3-4-2;;;;;/h4*3-4H2,1-2H3;;;;;
---------- STETXKIDVIWJIC
---------- AFCAKJKUYFLYFK
1962 

-------------------- CCCC[Sn](CCCC)(CCCC)OC(C)=O
-------------------- InChI=1S/3C4H10.C2H4O2.Sn/c3*1-3-4-2;1-2(3)4;/h3*3-4H2,1-2H3;1H3,(H,3,4);/q;;;;+1/p-1
---------- DRRJYHLHDBEXIX
---------- PWBHRVGYSMBMIO
1963 

-------------------- c(c3)ccc(c3)[Sn](Cl)(c(c2)cc

RDKit ERROR: [18:18:35] Can't kekulize mol.  Unkekulized atoms: 4 5 6 7 8 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:18:35] Can't kekulize mol.  Unkekulized atoms: 6 7 8 9 10 11 12 13 14
RDKit ERROR: 
RDKit ERROR: [18:18:35] Can't kekulize mol.  Unkekulized atoms: 6 7 8 9 10 11 12 13 14
RDKit ERROR: 
RDKit ERROR: [18:18:35] Can't kekulize mol.  Unkekulized atoms: 7 8 9 10 11 12 13 14 15
RDKit ERROR: 


2284 

-------------------- CCCC[Sn](CCCC)(CCCC)CCCC
-------------------- InChI=1S/4C4H10.Sn.4H/c4*1-3-4-2;;;;;/h4*3-4H2,1-2H3;;;;;
---------- STETXKIDVIWJIC
---------- AFCAKJKUYFLYFK


RDKit ERROR: [18:18:35] Can't kekulize mol.  Unkekulized atoms: 4 5 6 7 8 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:18:35] Can't kekulize mol.  Unkekulized atoms: 11 16 17 18 19 20 21 22 23
RDKit ERROR: 
RDKit ERROR: [18:18:35] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9 10 11 12 13
RDKit ERROR: 
RDKit ERROR: [18:18:35] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
RDKit ERROR: 
RDKit ERROR: [18:18:35] Can't kekulize mol.  Unkekulized atoms: 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [18:18:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8 9 10
RDKit ERROR: 


2716 

-------------------- FN=C(F)N(F)F
-------------------- InChI=1S/2CF4N2/c2*2-1(6-3)7(4)5/b2*6-1+
---------- SMRRSJDMBDWJJY
---------- RVDXRQSAHJWHMT
2835 

-------------------- Clc(c2)ccc(c2)C(N(C)1)[S+2]([O-1])([O-1])CCC(=O)1
-------------------- InChI=1S/C11H12ClNO3S/c1-13-10(14)6-7-17(15,16)11(13)8-2-4-9(12)5-3-8/h2-5,11H,6-7H2,1H3/q+2
---------- CUWAMRPNZASHMN
---------- WEQAYVWKMWHEJO
3021 

-------------------- CCCC[Sn](CCCC)(O1)OC(=O)C=CC(=O)1
-------------------- InChI=1S/C4H4O4.2C4H10.Sn/c5-3(6)1-2-4(7)8;2*1-3-4-2;/h1-2H,(H,5,6)(H,7,8);2*3-4H2,1-2H3;/q;;;+2/p-2/b2-1-;;;
---------- SUELYNMTKIOVEL
---------- ZBBLRPRYYSJUCZ
3022 

-------------------- CCCC[Sn](OC(CCCCCCCCCCC)=O)(OC(CCCCCCCCCCC)=O)CCCC
-------------------- InChI=1S/2C12H24O2.2C4H10.Sn/c2*1-2-3-4-5-6-7-8-9-10-11-12(13)14;2*1-3-4-2;/h2*2-11H2,1H3,(H,13,14);2*3-4H2,1-2H3;/q;;;;+2/p-2
---------- QMRYWSXNTKCXBW
---------- UKLDJPRMSDWDSL


RDKit ERROR: [18:18:36] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
RDKit ERROR: 
RDKit ERROR: [18:18:36] Can't kekulize mol.  Unkekulized atoms: 6 7 8 9 10 11 12 13 14
RDKit ERROR: 


3237 

-------------------- CCCC[Sn](CCCC)(O1)OC(=O)C=CC(=O)1
-------------------- InChI=1S/C4H4O4.2C4H10.Sn/c5-3(6)1-2-4(7)8;2*1-3-4-2;/h1-2H,(H,5,6)(H,7,8);2*3-4H2,1-2H3;/q;;;+2/p-2/b2-1-;;;
---------- SUELYNMTKIOVEL
---------- ZBBLRPRYYSJUCZ
3238 

-------------------- CCCC[Sn](OC(CCCCCCCCCCC)=O)(OC(CCCCCCCCCCC)=O)CCCC
-------------------- InChI=1S/2C12H24O2.2C4H10.Sn/c2*1-2-3-4-5-6-7-8-9-10-11-12(13)14;2*1-3-4-2;/h2*2-11H2,1H3,(H,13,14);2*3-4H2,1-2H3;/q;;;;+2/p-2
---------- QMRYWSXNTKCXBW
---------- UKLDJPRMSDWDSL


RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9 10 11 12 13
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 10 11 12 13 14
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 11 16 17 19 20 21 22 23 24
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 9 14 15 16 17 18 19 20 21
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9 10 11 12 13
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 2 3 35 36 37 38 39 40 43
RDKit ERROR: 


3472 

-------------------- CN(C)CCOC(c(c2)cccc2)c(c1)cccc1
-------------------- InChI=1S/C17H21NO.C7H7ClN4O2/c1-18(2)13-14-19-17(15-9-5-3-6-10-15)16-11-7-4-8-12-16;1-11-4-3(9-6(8)10-4)5(13)12(2)7(11)14/h3-12,17H,13-14H2,1-2H3;1-2H3,(H,9,10)
---------- NFLLKCVHYJRNRH
---------- ZZVUWRFHKOJYTH


RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 6 7 8 9 10 11 12 13 14
RDKit ERROR: 


3746 

-------------------- [Br-1]
-------------------- InChI=1S/C8H20N.BrH/c1-5-9(6-2,7-3)8-4;/h5-8H2,1-4H3;1H/q+1;/p-1
---------- HWCKGOZZJDHMNC
---------- CPELXLSAUQHCOX
3747 

-------------------- [Cl-1]
-------------------- InChI=1S/C16H36N.ClH/c1-5-9-13-17(14-10-6-2,15-11-7-3)16-12-8-4;/h5-16H2,1-4H3;1H/q+1;/p-1
---------- NHGXDBSUJJNIRV
---------- VEXZGXHMUGYJMC
3748 

-------------------- [Br-1]
-------------------- InChI=1S/C16H36N.BrH/c1-5-9-13-17(14-10-6-2,15-11-7-3)16-12-8-4;/h5-16H2,1-4H3;1H/q+1;/p-1
---------- JRMUNVKIHCOMHV
---------- CPELXLSAUQHCOX
3749 

-------------------- [I-1]
-------------------- InChI=1S/C20H44N.HI/c1-5-9-13-17-21(18-14-10-6-2,19-15-11-7-3)20-16-12-8-4;/h5-20H2,1-4H3;1H/q+1;/p-1
---------- FBLZDUAOBOMSNZ
---------- XMBWDFGMSWQBCA
3750 

-------------------- [I-1]
-------------------- InChI=1S/C20H44N.HI/c1-17(2)9-13-21(14-10-18(3)4,15-11-19(5)6)16-12-20(7)8;/h17-20H,9-16H2,1-8H3;1H/q+1;/p-1
---------- BMKCBSPIAGPRBW
---------- XMBWDFGMSWQBCA
3751

RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 10
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 3 4 6 7 8 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 12
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 11 12
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 1 2 3 11 13
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 4 5 6 7 8 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 3 4 6 7 8 9 10 11 13
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 3 4 6 7 8 9 10 12 13
RDKit ERROR: 
RDKit ERROR: [18:18:37] Can't kekulize mol.  Unkekulized atoms: 3 4 6 7 8 9 10 11 14
RDKit ERROR: 
RDKit ERROR: [18:18:37] C

4067 

-------------------- [Br-1]
-------------------- InChI=1S/C9H14N.BrH/c1-10(2,3)9-7-5-4-6-8-9;/h4-8H,1-3H3;1H/q+1;/p-1
---------- GNMJFQWRASXXMS
---------- CPELXLSAUQHCOX
4068 

-------------------- [I-1]
-------------------- InChI=1S/C8H20N.HI/c1-5-9(6-2,7-3)8-4;/h5-8H2,1-4H3;1H/q+1;/p-1
---------- UQFSVBXCNGCBBW
---------- XMBWDFGMSWQBCA
4069 

-------------------- [I-1]
-------------------- InChI=1S/C4H12N.HI/c1-5(2,3)4;/h1-4H3;1H/q+1;/p-1
---------- RXMRGBVLCSYIBO
---------- XMBWDFGMSWQBCA
4070 

-------------------- [Br-1]
-------------------- InChI=1S/C5H12N.BrH/c1-5-6(2,3)4;/h5H,1H2,2-4H3;1H/q+1;/p-1
---------- IDVSELVVGYIOEX
---------- CPELXLSAUQHCOX
4071 

-------------------- [Br-1]
-------------------- InChI=1S/C4H12N.BrH/c1-5(2,3)4;/h1-4H3;1H/q+1;/p-1
---------- DDFYFBUWEBINLX
---------- CPELXLSAUQHCOX


RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8
RDKit ERROR: 
RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 9 10 11 12 13 14 15 16 17
RDKit ERROR: 
RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 9 10 11 12 13 14 15 16 17
RDKit ERROR: 
RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 9 10 11 12 13 14 15 16 17
RDKit ERROR: 
RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
RDKit ERROR: 
RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 5 6 7 12 13 14 15 16 17
RDKit ERROR: 
RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
RDKit ERROR: 
RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 7 8 10 11 12 13 14 15 17
RDKit ERROR: 
RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 7 8 10 11 12 13 14 15 17
RDKit ERROR: 
RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 7 8 10 11 12 13 14 15 17
RDKit E

4453 

-------------------- CCCC=CCC[Sn](C)(C)C
-------------------- InChI=1S/C7H14.3CH4.Sn.4H/c1-3-5-7-6-4-2;;;;;;;;/h5,7H,3-4,6H2,1-2H3;3*1H4;;;;;/b7-5+;;;;;;;;
---------- UAKOCVPFSUQANI
---------- TZNXFWLGOAUSQR
4465 

-------------------- Fc(c2)c(ccc2)[Hg]c(c1)c(F)ccc1
-------------------- InChI=1S/2C6H5F.Hg/c2*7-6-4-2-1-3-5-6;/h2*1-5H;
---------- HVEHUWZOEFTECY
---------- VCIMYPFASJQYDJ
4471 

-------------------- c(c1)ccc(c1)SCCC[Sn](C)(C)C
-------------------- InChI=1S/C9H12S.3CH4.Sn.4H/c1-2-8-10-9-6-4-3-5-7-9;;;;;;;;/h3-7H,2,8H2,1H3;3*1H4;;;;;
---------- VMQIEGDMFYMEHK
---------- HMJQOLXJRJTWPL


RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 7 8 9 10 11 12 13 14 15
RDKit ERROR: 
RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9 10 11 12 15
RDKit ERROR: 
RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 7 8 9 10 11 12 13 14 15
RDKit ERROR: 
RDKit ERROR: [18:18:38] Can't kekulize mol.  Unkekulized atoms: 7 8 9 10 11 12 13 14 15
RDKit ERROR: 


5068 

-------------------- [H]Cl
-------------------- InChI=1S/C6H13NO.ClH/c8-7-6-4-2-1-3-5-6;/h6-8H,1-5H2;1H
---------- SSVAHXZUFFSFER
---------- VEXZGXHMUGYJMC


RDKit ERROR: [18:18:40] Can't kekulize mol.  Unkekulized atoms: 7 8 9 14 15 16 17 18 19
RDKit ERROR: 
RDKit ERROR: [18:18:40] Can't kekulize mol.  Unkekulized atoms: 5 6 7 15 16 17 18 19 20
RDKit ERROR: 
RDKit ERROR: [18:18:40] Can't kekulize mol.  Unkekulized atoms: 7 8 14 15 16 17 18 19 20
RDKit ERROR: 
RDKit ERROR: [18:18:40] Can't kekulize mol.  Unkekulized atoms: 8 9 10 15 16 17 18 19 20
RDKit ERROR: 
RDKit ERROR: [18:18:40] Can't kekulize mol.  Unkekulized atoms: 10 11 12 15 16 17 18 19 20
RDKit ERROR: 
RDKit ERROR: [18:18:40] Can't kekulize mol.  Unkekulized atoms: 10 11 12 14 15 16 17 19 20
RDKit ERROR: 
RDKit ERROR: [18:18:40] Can't kekulize mol.  Unkekulized atoms: 9 10 11 12 13 14 16 17 18
RDKit ERROR: 
RDKit ERROR: [18:18:40] Can't kekulize mol.  Unkekulized atoms: 9 10 11 12 13 14 15 17 22
RDKit ERROR: 
RDKit ERROR: [18:18:40] Can't kekulize mol.  Unkekulized atoms: 8 9 10 11 12 14 15 17 22
RDKit ERROR: 
RDKit ERROR: [18:18:40] Can't kekulize mol.  Unkekulized atoms: 9 10 

7500 

-------------------- CCCC[Sn](CCCC)(O1)OC(=O)C=CC(=O)1
-------------------- InChI=1S/C4H4O4.2C4H10.Sn/c5-3(6)1-2-4(7)8;2*1-3-4-2;/h1-2H,(H,5,6)(H,7,8);2*3-4H2,1-2H3;/q;;;+2/p-2/b2-1-;;;
---------- SUELYNMTKIOVEL
---------- ZBBLRPRYYSJUCZ
7501 

-------------------- CCCC[Sn](OC(CCCCCCCCCCC)=O)(OC(CCCCCCCCCCC)=O)CCCC
-------------------- InChI=1S/2C12H24O2.2C4H10.Sn/c2*1-2-3-4-5-6-7-8-9-10-11-12(13)14;2*1-3-4-2;/h2*2-11H2,1H3,(H,13,14);2*3-4H2,1-2H3;/q;;;;+2/p-2
---------- QMRYWSXNTKCXBW
---------- UKLDJPRMSDWDSL


RDKit ERROR: [18:18:43] Can't kekulize mol.  Unkekulized atoms: 9 10 11 12 14
RDKit ERROR: 
RDKit ERROR: [18:18:43] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4
RDKit ERROR: 
RDKit ERROR: [18:18:43] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4
RDKit ERROR: 
RDKit ERROR: [18:18:43] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9
RDKit ERROR: 
RDKit ERROR: [18:18:43] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
RDKit ERROR: 


8621 

-------------------- CN(C)C(=S)S[Zn]SC(=S)N(C)C
-------------------- InChI=1S/2C3H7NS2.Zn.2H/c2*1-4(2)3(5)6;;;/h2*1-2H3,(H,5,6);;;/q;;+2;;/p-2
---------- VHHLBXGMWZJIPX
---------- DUBNHZYBDBBJHD


RDKit ERROR: [18:18:43] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 12
RDKit ERROR: 
RDKit ERROR: [18:18:43] Can't kekulize mol.  Unkekulized atoms: 6 7 8 9 10 11 12 13 14
RDKit ERROR: 


8907 

-------------------- C[Sn](C)(C)C
-------------------- InChI=1S/4CH4.Sn.4H/h4*1H4;;;;;
---------- JDMJRFMRAXUYFT
---------- VXKWYPOMXBVZSJ


RDKit ERROR: [18:18:43] Can't kekulize mol.  Unkekulized atoms: 15 16 17 18 19 20 21 22 23
RDKit ERROR: 
RDKit ERROR: [18:18:44] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8
RDKit ERROR: 
RDKit ERROR: [18:18:44] Can't kekulize mol.  Unkekulized atoms: 2 3 6
RDKit ERROR: 


9462 

-------------------- CC(C)N(C(=O)1)[S+2]([O-1])([O-1])Nc(c2)c(ccc2)1
-------------------- InChI=1S/C10H12N2O3S/c1-7(2)12-10(13)8-5-3-4-6-9(8)11-16(12,14)15/h3-7H,1-2H3,(H-,13,14,15)/q+2/p+1
---------- RSDLSDBVWDSGOK
---------- ZOMSMJKLGFBRBS


RDKit ERROR: [18:18:44] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [18:18:44] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [18:18:44] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [18:18:44] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6
RDKit ERROR: 
RDKit ERROR: [18:18:44] Can't kekulize mol.  Unkekulized atoms: 2 3 4 7 8 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:18:45] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
RDKit ERROR: 
RDKit ERROR: [18:18:45] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
RDKit ERROR: 
RDKit ERROR: [18:18:45] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:18:45] Can't kekulize mol.  Unkekulized atoms: 5 6 7 10 12
RDKit ERROR: 
RDKit ERROR: [18:18:45] Can't kekulize mol.  Unkekulized atoms: 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [18:18:45] Can't kekulize mol. 

10310 

-------------------- [H]O[H]
-------------------- InChI=1S/C6H9N3O2.ClH.H2O/c7-5(6(10)11)1-4-2-8-3-9-4;;/h2-3,5H,1,7H2,(H,8,9)(H,10,11);1H;1H2
---------- CMXXUDSWGMGYLZ
---------- XLYOFNOQVPJJNP
10350 

-------------------- CC(=CC[Sn](C)(C)C)C[Sn](C)(C)C
-------------------- InChI=1S/C5H8.6CH4.2Sn.8H/c1-4-5(2)3;;;;;;;;;;;;;;;;/h4H,1-2H2,3H3;6*1H4;;;;;;;;;;/b5-4-;;;;;;;;;;;;;;;;
---------- ODKHJQGKBJZVLD
---------- JUVFDQMUBKKZOB
10352 

-------------------- C[Sn](C)(C)CC=CC[Sn](C)(C)C
-------------------- InChI=1S/C4H6.6CH4.2Sn.8H/c1-3-4-2;;;;;;;;;;;;;;;;/h3-4H,1-2H2;6*1H4;;;;;;;;;;/b4-3+;;;;;;;;;;;;;;;;
---------- RECVJKMOHRWGHD
---------- CFJUYISNWQKCKP
10353 

-------------------- C(C=CC[Sn](CCCC)(CCCC)CCCC)[Sn](CCCC)(CCCC)CCCC
-------------------- InChI=1S/6C4H10.C4H6.2Sn.8H/c7*1-3-4-2;;;;;;;;;;/h6*3-4H2,1-2H3;3-4H,1-2H2;;;;;;;;;;/b;;;;;;4-3+;;;;;;;;;;
---------- IYSKBFTXKCEDBT
---------- VDQCIJNLWBPEDB


RDKit ERROR: [18:18:45] Can't kekulize mol.  Unkekulized atoms: 5 6 7 10 12
RDKit ERROR: 
RDKit ERROR: [18:18:45] Can't kekulize mol.  Unkekulized atoms: 5 6 7 9 10
RDKit ERROR: 
RDKit ERROR: [18:18:45] Can't kekulize mol.  Unkekulized atoms: 4 5 6 9 11
RDKit ERROR: 
RDKit ERROR: [18:18:45] Can't kekulize mol.  Unkekulized atoms: 2 3 5 6 7
RDKit ERROR: 
RDKit ERROR: [18:18:45] Can't kekulize mol.  Unkekulized atoms: 4 5 6 7 8
RDKit ERROR: 
RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkekulized atoms: 4 5 6 11 12
RDKit ERROR: 
RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkekulized atoms: 4 5 12 17 18
RDKit ERROR: 
RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8
RDKit ERROR: 


10775 

-------------------- [H]C(C1([H])3)(OC([H])(C2)C(CC3)([H])OC([H])(CC=O)C([H])(C2)OC(=C([H])C[Sn](CCCC)(CCCC)CCCC)[H])CCCO1
-------------------- InChI=1S/C18H28O5.3C4H10.Sn.4H/c1-2-11-20-14-6-8-16-17(23-18(14)9-10-19)7-5-13-15(22-16)4-3-12-21-13;3*1-3-4-2;;;;;/h2,10-11,13-18H,3-9,12H2,1H3;3*3-4H2,1-2H3;;;;;/b11-2-;;;;;;;;/t13-,14+,15+,16-,17+,18-;;;;;;;;/m1......../s1
---------- FYFNCFYJJSSGFU
---------- MSTVYJCSNUCWJT
10851 

-------------------- [H]Cl
-------------------- InChI=1S/C4H9NO2.ClH/c1-2-7-4(6)3-5;/h2-3,5H2,1H3;1H
---------- TXTWXQXDMWILOF
---------- VEXZGXHMUGYJMC


RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkekulized atoms: 4 5 6 7 8 9 10 11 13
RDKit ERROR: 
RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkekulized atoms: 4 5 6 7 8 9 10 11 13
RDKit ERROR: 
RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkekulized atoms: 6 7 8 9 10 11 12 13 14
RDKit ERROR: 
RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
RDKit ERROR: 
RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkekulized atoms: 3 4 5 8 9
RDKit ERROR: 
RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkekulized atoms: 4 5 6 7 8 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkekulized atoms: 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9
RDKit ERROR: 
RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkekulized atoms: 12 13 14 15 16
RDKit ERROR: 
RDKit ERROR: [18:18:46] Can't kekulize mol.  Unkek

In [101]:
spectrums_2 = [add_fingerprint(s, fingerprint_type="morgan3", nbits=2048) for s in spectrums_fingerprint]


RDKit ERROR: [18:22:10] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [18:22:10] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [18:22:10] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:22:10] Can't kekulize mol.  Unkekulized atoms: 2 3 5 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [18:22:10] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [18:22:10] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [18:22:10] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:22:10] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 12
RDKit ERROR: 
RDKit ERROR: [18:22:10] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 12
RDKit ERROR: 
RDKit ERROR: [18:22:10] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 8 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:2

RDKit ERROR: 
RDKit ERROR: [18:22:22] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
RDKit ERROR: 
RDKit ERROR: [18:22:23] Can't kekulize mol.  Unkekulized atoms: 4 5 6 8 9 10 11 12 13
RDKit ERROR: 
RDKit ERROR: [18:22:23] Can't kekulize mol.  Unkekulized atoms: 4 5 6 8 9 10 11 12 13
RDKit ERROR: 
RDKit ERROR: [18:22:23] Can't kekulize mol.  Unkekulized atoms: 4 5 6 8 9 10 11 12 13
RDKit ERROR: 
RDKit ERROR: [18:22:23] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9 10 11 12 13
RDKit ERROR: 
RDKit ERROR: [18:22:23] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 12 13 14 15 16
RDKit ERROR: 
RDKit ERROR: [18:22:23] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11 12 13 14 16
RDKit ERROR: 
RDKit ERROR: [18:22:23] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11 12 13 14 16
RDKit ERROR: 
RDKit ERROR: [18:22:23] Can't kekulize mol.  Unkekulized atoms: 5 6 7 10 11 12 13 14 16
RDKit ERROR: 
RDKit ERROR: [18:22:23] Can't kekulize mol.  Unkekulized a

RDKit ERROR: [18:22:39] Can't kekulize mol.  Unkekulized atoms: 15 16 17 18 19 20 21 22 23
RDKit ERROR: 
RDKit ERROR: [18:22:40] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8
RDKit ERROR: 
RDKit ERROR: [18:22:40] Can't kekulize mol.  Unkekulized atoms: 2 3 6
RDKit ERROR: 
RDKit ERROR: [18:22:40] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [18:22:40] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [18:22:40] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8 9 10
RDKit ERROR: 
RDKit ERROR: [18:22:41] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6
RDKit ERROR: 
RDKit ERROR: [18:22:41] Can't kekulize mol.  Unkekulized atoms: 2 3 4 7 8 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [18:22:41] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
RDKit ERROR: 
RDKit ERROR: [18:22:42] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
RDKit ERROR: 
RDKit ERROR: [18:22:42] Can't kekulize mol.  Unke

RDKit ERROR: [18:22:50] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19
RDKit ERROR: 
RDKit ERROR: [18:22:50] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19
RDKit ERROR: 


In [102]:
similarity_measure = FingerprintSimilarityParallel(similarity_measure="jaccard")
scores_mol_similarity = similarity_measure(spectrums_2, spectrums_2)

filename = os.path.join(path, "gcms_similarities_morgan3_2048_dice.npy")
numpy.save(filename, scores_mol_similarity)