# TF-ligand search / prediction: Tanimoto scorer.
In this notebook I will build the first version of a simple tool to predict the possible interactions between TF a ligand. The first step is to determine the closest chemical molecule in the database. For this, we use a Tanimoto similarity function ( https://en.wikipedia.org/wiki/Chemical_similarity ).

In [100]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import DataStructs

In [101]:
#defines a function to calculate the tanimoto similarity between two molecules in SMILES format (from https://medium.com/data-professor/how-to-calculate-molecular-similarity-25d543ea7f40).
def tanimoto_calc(smi1, smi2):
    mol1 = Chem.MolFromSmiles(smi1)
    mol2 = Chem.MolFromSmiles(smi2)
    fp1 = AllChem.GetMorganFingerprintAsBitVect(mol1, 3, nBits=2048)
    fp2 = AllChem.GetMorganFingerprintAsBitVect(mol2, 3, nBits=2048)
    sim = round(DataStructs.TanimotoSimilarity(fp1,fp2),3)
    return sim

In [103]:
def tanimoto_ranker(query): #query must be a SMILES string 
    molecules = pd.read_csv('./TF_DB_clean.csv')
    simil = []
    for i in molecules['Can_SMILES']:
        simil.append(float(tanimoto_calc(query,i)))
    molecules['Tanimoto_score_vs_query'] = simil
    molecules = molecules.sort_values(by=['Tanimoto_score_vs_query'], ascending=False).reset_index(drop=True)
    return molecules

In [104]:
queries = ['C1C(OC2=CC(=CC(=C2C1=O)O)O)C3=CC=CC=C3']
for i in queries:
    print(tanimoto_ranker(i))

                Molecule                                              InChI  \
0             Naringenin  InChI=1S/C15H12O5/c16-9-3-1-8(2-4-9)13-7-12(19...   
1             Naringenin  InChI=1S/C15H12O5/c16-9-3-1-8(2-4-9)13-7-12(19...   
2             Naringenin  InChI=1S/C15H12O5/c16-9-3-1-8(2-4-9)13-7-12(19...   
3             Naringenin  InChI=1S/C15H12O5/c16-9-3-1-8(2-4-9)13-7-12(19...   
4             naringenin  InChI=1S/C15H12O5/c16-9-3-1-8(2-4-9)13-7-12(19...   
...                  ...                                                ...   
5432  Copper ion, (Cu2+)                                        InChI=1S/Cu   
5433  Copper ion, (Cu2+)                                        InChI=1S/Cu   
5434  Copper ion, (Cu2+)                                        InChI=1S/Cu   
5435  Copper ion, (Cu2+)                                        InChI=1S/Cu   
5436  Nickel ion, (Ni2+)                                        InChI=1S/Ni   

                               Can_SMILES Organism_