/
jaccard.R
69 lines (64 loc) · 1.73 KB
/
jaccard.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#' Compute an expected Jaccard/Tanimoto similarity coefficient under independence
#'
#' @param x a binary vector (e.g., fingerprint)
#' @param y a binary vector (e.g., fingerprint)
#' @param px probability of successes in \code{x} (optional)
#' @param py probability of successes in \code{y} (optional)
#'
#' @return \code{jaccard.ev} returns an expected value.
#'
#' @export jaccard.ev
#'
#' @examples
#' set.seed(1234)
#' x = rbinom(100,1,.5)
#' y = rbinom(100,1,.5)
#' jaccard.ev(x,y)
jaccard.ev <- function(x, y, px=NULL, py=NULL) {
if(length(x) != length(y)) {
stop("Two fingerprints (x and y) must be of the same length.")
}
if(is.null(px) | is.null(py)){
px <- mean(x)
py <- mean(y)
}
return((px*py)/(px+py-px*py))
}
#' Compute a Jaccard/Tanimoto similarity coefficient
#'
#' @param x a binary vector (e.g., fingerprint)
#' @param y a binary vector (e.g., fingerprint)
#' @param center whether to center the Jaccard/Tanimoto coefficient by its expectation
#' @param px probability of successes in \code{x} (optional)
#' @param py probability of successes in \code{y} (optional)
#'
#' @return \code{jaccard} returns a Jaccard/Tanimoto coefficient.
#'
#' @export jaccard
#'
#' @examples
#' set.seed(1234)
#' x = rbinom(100,1,.5)
#' y = rbinom(100,1,.5)
#' jaccard(x,y)
jaccard <- function(x, y, center=FALSE, px=NULL, py=NULL) {
if(length(x) != length(y)) {
stop("Two fingerprints (x and y) must be of the same length.")
}
if(is.null(px) | is.null(py)){
px <- mean(x)
py <- mean(y)
}
sumxy <- sum(x & y)
unionxy <- sum(x)+sum(y)-sumxy
if(unionxy == 0) {
j <- (px*py)/(px+py-px*py)
} else {
j <- sumxy/unionxy
}
if(center == FALSE) {
return(j)
} else {
return(j - (px*py)/(px+py-px*py))
}
}