Skip to content

Commit

Permalink
Added pairwise_pmi function for pointwise mutual information
Browse files Browse the repository at this point in the history
  • Loading branch information
dgrtwo committed Oct 29, 2017
1 parent 545219c commit e297c54
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 2 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Expand Up @@ -6,6 +6,8 @@ export(pairwise_count)
export(pairwise_count_)
export(pairwise_dist)
export(pairwise_dist_)
export(pairwise_pmi)
export(pairwise_pmi_)
export(pairwise_similarity)
export(pairwise_similarity_)
export(squarely)
Expand Down
2 changes: 1 addition & 1 deletion R/pairwise_count.R
Expand Up @@ -29,7 +29,7 @@
#' # count the number of times two letters appear together
#' pairwise_count(dat, letter, group)
#' pairwise_count(dat, letter, group, sort = TRUE)
#' pairwise_count(dat, letter, group, sort = TRUE, diag = FALSE)
#' pairwise_count(dat, letter, group, sort = TRUE, diag = TRUE)
#'
#' @export
pairwise_count <- function(tbl, item, feature, wt = NULL, ...) {
Expand Down
65 changes: 65 additions & 0 deletions R/pairwise_pmi.R
@@ -0,0 +1,65 @@
#' Pointwise mutual information of pairs of items
#'
#' Find pointwise mutual information of pairs of items in a column, based on
#' a "feature" column that links them together.
#' This is an example of the spread-operate-retidy pattern.
#'
#' @param tbl Table
#' @param item Item to compare; will end up in \code{item1} and
#' \code{item2} columns
#' @param feature Column describing the feature that links one item to others
#' @param sort Whether to sort in descending order of the pointwise mutual
#' information
#' @param ... Extra arguments passed on to \code{squarely},
#' such as \code{diag} and \code{upper}
#'
#' @name pairwise_pmi
#'
#' @return A tbl_df with three columns, \code{item1}, \code{item2}, and
#' \code{pmi}.
#'
#' @examples
#'
#' library(dplyr)
#'
#' dat <- data_frame(group = rep(1:5, each = 2),
#' letter = c("a", "b",
#' "a", "c",
#' "a", "c",
#' "b", "e",
#' "b", "f"))
#'
#' # how informative is each letter about each other letter
#' pairwise_pmi(dat, letter, group)
#' pairwise_pmi(dat, letter, group, sort = TRUE)
#'
#' @export
pairwise_pmi <- function(tbl, item, feature, sort = FALSE) {
pairwise_pmi_(tbl,
col_name(substitute(item)),
col_name(substitute(feature)),
sort = sort)
}


#' @rdname pairwise_pmi
#' @export
pairwise_pmi_ <- function(tbl, item, feature, sort = FALSE) {
f <- function(m) {
row_sums <- rowSums(m) / sum(m)

ret <- m %*% t(m)
ret <- ret / sum(ret)
ret <- ret / row_sums
ret <- t(t(ret) / (row_sums))
ret
}
pmi_func <- squarely_(f, sparse = TRUE, sort = sort)

tbl %>%
ungroup() %>%
mutate(..value = 1) %>%
pmi_func(item, feature, "..value") %>%
mutate(value = log(value)) %>%
rename(pmi = value)
}
2 changes: 1 addition & 1 deletion man/pairwise_count.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 50 additions & 0 deletions man/pairwise_pmi.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e297c54

Please sign in to comment.