Permalink
Browse files

Updated fingerprint to allow non-square similarity matrices. Also sim…

…plified code
  • Loading branch information...
1 parent 6584f99 commit 4baf4090efb65563e282d280cbf16c72b7e44a7b @rajarshi rajarshi committed Jul 26, 2011
Showing with 32 additions and 25 deletions.
  1. +9 −0 fingerprint/ChangeLog
  2. +2 −2 fingerprint/DESCRIPTION
  3. +13 −21 fingerprint/R/matrix.R
  4. +8 −2 fingerprint/man/sim.Rd
View
@@ -1,3 +1,12 @@
+2011-07-26 Rajarshi Guha <guhar@Rajarshi-Guha-MacBook-Pro.local>
+
+ * man/sim.Rd: Updated man page for fp.sim.matrix to indicate the
+ use of two fingerprint lists
+
+ * R/matrix.R (fp.sim.matrix): Updated similarity matrix
+ calculation to support cross-similarity (ie, similarity matrix
+ from two (possibly different lengths) lists of fingerprints
+
2011-06-03 Rajarshi Guha <guhar@Rajarshi-Guha-MacBook-Pro.local>
* src/fpdistance.c: Cleaned up uncessary headers and unused variables
View
@@ -1,6 +1,6 @@
Package: fingerprint
-Version: 3.4.4
-Date: 2011-06-03
+Version: 3.4.5
+Date: 2011-07-26
Title: Functions to operate on binary fingerprint data
Author: Rajarshi Guha <rajarshi.guha@gmail.com>
Maintainer: Rajarshi Guha <rajarshi.guha@gmail.com>
View
@@ -1,25 +1,17 @@
-fp.sim.matrix <- function(fplist, method='tanimoto') {
- fptype <- class(fplist[[1]])
- if ("fingerprint" %in% fptype) {
- size <- fplist[[1]]@nbit
- sim <- matrix(0,nr=length(fplist), nc=length(fplist))
- for (i in 1:(length(fplist)-1)) {
- v <- unlist(lapply( fplist[(i+1):length(fplist)], distance, fp2=fplist[[i]], method=method))
- sim[i,(i+1):length(fplist)] <- v
- sim[(i+1):length(fplist),i] <- v
- }
- diag(sim) <- 1.0
- sim
- } else {
- sim <- matrix(0,nr=length(fplist), nc=length(fplist))
- for (i in 1:(length(fplist)-1)) {
- v <- unlist(lapply( fplist[(i+1):length(fplist)], distance, fp2=fplist[[i]], method=method))
- sim[i,(i+1):length(fplist)] <- v
- sim[(i+1):length(fplist),i] <- v
- }
- diag(sim) <- 1.0
- sim
+fp.sim.matrix <- function(fplist, fplist2=NULL, method='tanimoto') {
+
+ if (!is.null(fplist2)) {
+ return(do.call('rbind', lapply(fplist, function(fp) unlist(lapply(fplist2, function(x) distance(x,fp))))))
+ }
+
+ sim <- matrix(0,nr=length(fplist), nc=length(fplist))
+ for (i in 1:(length(fplist)-1)) {
+ v <- unlist(lapply( fplist[(i+1):length(fplist)], distance, fp2=fplist[[i]], method=method))
+ sim[i,(i+1):length(fplist)] <- v
+ sim[(i+1):length(fplist),i] <- v
}
+ diag(sim) <- 1.0
+ return(sim)
}
## Takes the fingerprints, P bits, for a set of N molecules supplied as
View
@@ -15,21 +15,27 @@ Note that if the the Euclidean distance is specified then the resultant matrix i
distance matrix and not a similarity matrix
}
\usage{
-fp.sim.matrix(fplist, method='tanimoto')
+fp.sim.matrix(fplist, fplist2=NULL, method='tanimoto')
}
\arguments{
\item{fplist}{
A list structure with each element being an object of class
\code{fingerprint} or \code{featvec}. These can be constructed by hand or
read from disk via \code{\link{fp.read}}
}
+\item{fplist2}{A list structure with each element being an object of class
+\code{fingerprint} or \code{featvec}. if \code{NULL} then traditional pairwise
+similarity is calculated with each member in \code{fplist}, otherwise the
+resultant N x M matrix is derived from the similarity between each member of
+\code{fplist} and \code{fplist2}}
\item{method}{
The type of distance metric to use. The default is \code{tanimoto}. Partial
matching is supported.
}
}
\value{
-A matrix with dimensions equal to \code{(length(fplist), length(fplist))}
+A matrix with dimensions equal to \code{(length(fplist), length(fplist))} if
+\code{fplist2} is NULL, otherwise \code{(length(fplist), length(fplist2))}
}
\seealso{
\code{\link{distance}}, \code{\link{fp.read}}

0 comments on commit 4baf409

Please sign in to comment.