# tidy chemical similarity

Load required packages (ChemmineR needs ChemmineOB to use `openbabel`)

In [2]:
library(purrr)
library(ChemmineR)
library(tidyverse)
library(tidygraph)

Let's define a function which calculates tanimoto similarity between two molecules provided as SMILES

In [3]:
# function two calculate tanimoto similarity from two smiles
smiles2tanimoto <- function(mol1,mol2){
  sdfset <- smiles2sdf(c(cmp1=mol1,cmp2=mol2))
  apset <- sdf2ap(sdfset)
  fpset <- desc2fp(x=apset, descnames=512, type="FPset")
  result <- fpSim(fpset[1], fpset[2], method="Tanimoto")
  result
}

Let's test it on a tiny data frame

In [4]:
# test data frame
# glucose vs. glucose-6-P
# glucose vs. NADH
compare_mol <- tibble(mol1=c("glucose","glucose"),
                      mol2=c("glucose-6-P","NADH"),
                      smi1=c("C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O",
                             "C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O"),
                      smi2=c("C([C@H]([C@H]([C@@H]([C@H](C=O)O)O)O)O)OP(=O)(O)O",
                          "C1C=CN(C=C1C(=O)N)[C@H]2[C@@H]([C@@H]([C@H](O2)COP(=O)(O)OP(=O)(O)OC[C@@H]3[C@H]([C@H]([C@@H](O3)N4C=NC5=C4N=CN=C5N)O)O)O)O"))

compare_mol %>% 
  mutate(tanim_sim = map2_dbl(smi1,smi2,~ smiles2tanimoto(.x,.y))) %>% 
  select(mol1,mol2,tanim_sim)

mol1,mol2,tanim_sim
glucose,glucose-6-P,0.9090909
glucose,NADH,0.4761905


In [5]:
human_ma <- readRDS("hma-network-lvalue-20190412.rds")

In [6]:
human_ma

# A tbl_graph: 4624 nodes and 10926 edges
#
# A directed simple graph with 144 components
#
# Node Data: 4,624 x 2 (active)
  name                              lvalues
  <chr>                               <dbl>
1 L-lactate[p]                         3.71
2 sedoheptulose-1,7-bisphosphate[c]  225.  
3 PEP[c]                            1200.  
4 3-phospho-D-glycerate[c]           215.  
5 GAP[c]                            1368.  
6 fructose-1,6-bisphosphate[c]       114.  
# … with 4,618 more rows
#
# Edge Data: 10,926 x 5
   from    to no_HMR num_triangles hmr     
  <int> <int>  <dbl>         <dbl> <chr>   
1     1  1094      1             0 HMR_4281
2     2    20      1             1 HMR_4355
3     2    53      1             2 HMR_4355
# … with 1.092e+04 more rows