# tidy chemical similarity

Load required packages (ChemmineR needs ChemmineOB to use `openbabel`)

In [None]:
library(purrr)
library(ChemmineR)
library(tidyverse)

Let's define a function which calculates tanimoto similarity between two molecules provided as SMILES

In [None]:
# function two calculate tanimoto similarity from two smiles
smiles2tanimoto <- function(mol1,mol2){
  sdfset <- smiles2sdf(c(cmp1=mol1,cmp2=mol2))
  apset <- sdf2ap(sdfset)
  fpset <- desc2fp(x=apset, descnames=512, type="FPset")
  result <- fpSim(fpset[1], fpset[2], method="Tanimoto")
  result
}

Let's test it on a tiny data frame

In [None]:
# test data frame
# glucose vs. glucose-6-P
# glucose vs. NADH
compare_mol <- tibble(mol1=c("glucose","glucose"),
                      mol2=c("glucose-6-P","NADH"),
                      smi1=c("C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O",
                             "C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O"),
                      smi2=c("C([C@H]([C@H]([C@@H]([C@H](C=O)O)O)O)O)OP(=O)(O)O",
                          "C1C=CN(C=C1C(=O)N)[C@H]2[C@@H]([C@@H]([C@H](O2)COP(=O)(O)OP(=O)(O)OC[C@@H]3[C@H]([C@H]([C@@H](O3)N4C=NC5=C4N=CN=C5N)O)O)O)O"))

compare_mol %>% 
  mutate(tanim_sim = map2_dbl(smi1,smi2,~ smiles2tanimoto(.x,.y))) %>% 
  select(mol1,mol2,tanim_sim)