# How-To: build Signature from molecules

Molecule signature is available using the `MoleculeSignature` object which expect to be fed with a RDkit Mol object:

In [2]:
from rdkit import Chem
from signature.Signature import MoleculeSignature

mol = Chem.MolFromSmiles("COC=O")
mol_sig = MoleculeSignature(mol)
mol_sig

MoleculeSignature(atoms=[AtomSignature(morgans=(650, 1004), root='[O;H0;h0;D2;X2]-[C;H1;h1;D2;X3]=[O;H0;h0;D1;X1:1]', root_minus='None', neighbors=[]), AtomSignature(morgans=(694, 287, 1276), root='[C;H3;h3;D1;X4]-[O;H0;h0;D2;X2]-[C;H1;h1;D2;X3:1]=[O;H0;h0;D1;X1]', root_minus='None', neighbors=[]), AtomSignature(morgans=(695, 1874), root='[C;H3;h3;D1;X4]-[O;H0;h0;D2;X2:1]-[C;H1;h1;D2;X3]=[O;H0;h0;D1;X1]', root_minus='None', neighbors=[]), AtomSignature(morgans=(1057, 841), root='[C;H1;h1;D2;X3]-[O;H0;h0;D2;X2]-[C;H3;h3;D1;X4:1]', root_minus='None', neighbors=[])])

Once built, the signature can be exported as a canonic string with the `.to_string()` method:

In [4]:
mol_sig.to_string()

'650-1004 ## [O;H0;h0;D2;X2]-[C;H1;h1;D2;X3]=[O;H0;h0;D1;X1:1] .. 694-287-1276 ## [C;H3;h3;D1;X4]-[O;H0;h0;D2;X2]-[C;H1;h1;D2;X3:1]=[O;H0;h0;D1;X1] .. 695-1874 ## [C;H3;h3;D1;X4]-[O;H0;h0;D2;X2:1]-[C;H1;h1;D2;X3]=[O;H0;h0;D1;X1] .. 1057-841 ## [C;H1;h1;D2;X3]-[O;H0;h0;D2;X2]-[C;H3;h3;D1;X4:1]'

Signatures are exportable as a list of atomatic signatures with `.to_list()`:

In [5]:
mol_sig.to_list()

['650-1004 ## [O;H0;h0;D2;X2]-[C;H1;h1;D2;X3]=[O;H0;h0;D1;X1:1]',
 '694-287-1276 ## [C;H3;h3;D1;X4]-[O;H0;h0;D2;X2]-[C;H1;h1;D2;X3:1]=[O;H0;h0;D1;X1]',
 '695-1874 ## [C;H3;h3;D1;X4]-[O;H0;h0;D2;X2:1]-[C;H1;h1;D2;X3]=[O;H0;h0;D1;X1]',
 '1057-841 ## [C;H1;h1;D2;X3]-[O;H0;h0;D2;X2]-[C;H3;h3;D1;X4:1]']

A `MoleculeSignature` object may be created from a previous signature export:

In [7]:
sig_str = '650-1004 ## [O;H0;h0;D2;X2]-[C;H1;h1;D2;X3]=[O;H0;h0;D1;X1:1] .. 694-287-1276 ## [C;H3;h3;D1;X4]-[O;H0;h0;D2;X2]-[C;H1;h1;D2;X3:1]=[O;H0;h0;D1;X1] .. 695-1874 ## [C;H3;h3;D1;X4]-[O;H0;h0;D2;X2:1]-[C;H1;h1;D2;X3]=[O;H0;h0;D1;X1] .. 1057-841 ## [C;H1;h1;D2;X3]-[O;H0;h0;D2;X2]-[C;H3;h3;D1;X4:1]'
mol_sig2 = MoleculeSignature.from_string(sig_str)
mol_sig2.to_list()

['650-1004 ## [O;H0;h0;D2;X2]-[C;H1;h1;D2;X3]=[O;H0;h0;D1;X1:1]',
 '694-287-1276 ## [C;H3;h3;D1;X4]-[O;H0;h0;D2;X2]-[C;H1;h1;D2;X3:1]=[O;H0;h0;D1;X1]',
 '695-1874 ## [C;H3;h3;D1;X4]-[O;H0;h0;D2;X2:1]-[C;H1;h1;D2;X3]=[O;H0;h0;D1;X1]',
 '1057-841 ## [C;H1;h1;D2;X3]-[O;H0;h0;D2;X2]-[C;H3;h3;D1;X4:1]']

Signature neighbors can be generated with `.post_compute_neighbors()` and exported using `.to_string(neighbors=True)` and `.to_list(neighbors=True)` methods:

In [9]:
mol_sig2.post_compute_neighbors()
mol_sig2.to_list(True)

['650-1004 ## [C;H1;h1;D2;X3]=[O;H0;h0;D1;X1:1] && DOUBLE <> [O;H0;h0;D1;X1]=[C;H1;h1;D2;X3:1]-[O;H0;h0;D2;X2]',
 '694-287-1276 ## [O;H0;h0;D1;X1]=[C;H1;h1;D2;X3:1]-[O;H0;h0;D2;X2] && DOUBLE <> [C;H1;h1;D2;X3]=[O;H0;h0;D1;X1:1] && SINGLE <> [C;H1;h1;D2;X3]-[O;H0;h0;D2;X2:1]-[C;H3;h3;D1;X4]',
 '695-1874 ## [C;H1;h1;D2;X3]-[O;H0;h0;D2;X2:1]-[C;H3;h3;D1;X4] && SINGLE <> [O;H0;h0;D1;X1]=[C;H1;h1;D2;X3:1]-[O;H0;h0;D2;X2] && SINGLE <> [O;H0;h0;D2;X2]-[C;H3;h3;D1;X4:1]',
 '1057-841 ## [O;H0;h0;D2;X2]-[C;H3;h3;D1;X4:1] && SINGLE <> [C;H1;h1;D2;X3]-[O;H0;h0;D2;X2:1]-[C;H3;h3;D1;X4]']

Molecule signature equalities can be assessed with the `==` operator:

In [10]:
mol_sig = MoleculeSignature(mol, radius=1)
other_sig = MoleculeSignature(mol)
mol_sig == other_sig

False

In [11]:
mol_sig = MoleculeSignature(mol)
other_sig = MoleculeSignature.from_string(mol_sig.to_string())
mol_sig == other_sig

True

## How To: create signatures to feed an alphabet of atomic signatures

Atom signatures as character strings

In [12]:
from signature.utils import mol_from_smiles
from signature.Signature import AtomSignature, MoleculeSignature

alphabet = []

# Build
smi = "CO"
mol = mol_from_smiles(smi)
ms = MoleculeSignature(mol, radius=2, use_smarts=True, nbits=2048, boundary_bonds=False, map_root=True)

# Feed using string representation
for _as in ms.to_list():
    if _as not in alphabet:
        alphabet.append(_as)

# Extend to neighbors
for _as in alphabet:
    _as = AtomSignature.from_string(_as)
    _as.post_compute_neighbors()
    print(_as.to_string(True))

807-1155 ## [C;H3;h3;D1;X4]-[O;H1;h1;D1;X2:1] && SINGLE <> [O;H1;h1;D1;X2]-[C;H3;h3;D1;X4:1]
1057 ## [O;H1;h1;D1;X2]-[C;H3;h3;D1;X4:1] && SINGLE <> [C;H3;h3;D1;X4]-[O;H1;h1;D1;X2:1]


Atom signatures as AtomSignature objecs

In [14]:
from signature.utils import mol_from_smiles
from signature.Signature import MoleculeSignature

alphabet = []

# Build
smi = "CO"
mol = mol_from_smiles(smi)
ms = MoleculeSignature(mol, radius=2, use_smarts=True, nbits=2048, boundary_bonds=False, map_root=True)

# Feed using AtomSignature
for _as in ms.atoms:
    if _as not in alphabet:
        alphabet.append(_as)

# Extend to neighbors
for _as in alphabet:
    _as.post_compute_neighbors()
    print(_as.to_string(True))

807-1155 ## [C;H3;h3;D1;X4]-[O;H1;h1;D1;X2:1] && SINGLE <> [O;H1;h1;D1;X2]-[C;H3;h3;D1;X4:1]
1057 ## [O;H1;h1;D1;X2]-[C;H3;h3;D1;X4:1] && SINGLE <> [C;H3;h3;D1;X4]-[O;H1;h1;D1;X2:1]
