Permalink
Browse files

Added pairwise_pmi function for pointwise mutual information

  • Loading branch information...
dgrtwo committed Oct 29, 2017
1 parent 545219c commit e297c547a1599e48bd3afd23fead6bd01310253a
Showing with 119 additions and 2 deletions.
  1. +2 −0 NAMESPACE
  2. +1 −1 R/pairwise_count.R
  3. +65 −0 R/pairwise_pmi.R
  4. +1 −1 man/pairwise_count.Rd
  5. +50 −0 man/pairwise_pmi.Rd
View
@@ -6,6 +6,8 @@ export(pairwise_count)
export(pairwise_count_)
export(pairwise_dist)
export(pairwise_dist_)
export(pairwise_pmi)
export(pairwise_pmi_)
export(pairwise_similarity)
export(pairwise_similarity_)
export(squarely)
View
@@ -29,7 +29,7 @@
#' # count the number of times two letters appear together
#' pairwise_count(dat, letter, group)
#' pairwise_count(dat, letter, group, sort = TRUE)
#' pairwise_count(dat, letter, group, sort = TRUE, diag = FALSE)
#' pairwise_count(dat, letter, group, sort = TRUE, diag = TRUE)
#'
#' @export
pairwise_count <- function(tbl, item, feature, wt = NULL, ...) {
View
@@ -0,0 +1,65 @@
#' Pointwise mutual information of pairs of items
#'
#' Find pointwise mutual information of pairs of items in a column, based on
#' a "feature" column that links them together.
#' This is an example of the spread-operate-retidy pattern.
#'
#' @param tbl Table
#' @param item Item to compare; will end up in \code{item1} and
#' \code{item2} columns
#' @param feature Column describing the feature that links one item to others
#' @param sort Whether to sort in descending order of the pointwise mutual
#' information
#' @param ... Extra arguments passed on to \code{squarely},
#' such as \code{diag} and \code{upper}
#'
#' @name pairwise_pmi
#'
#' @return A tbl_df with three columns, \code{item1}, \code{item2}, and
#' \code{pmi}.
#'
#' @examples
#'
#' library(dplyr)
#'
#' dat <- data_frame(group = rep(1:5, each = 2),
#' letter = c("a", "b",
#' "a", "c",
#' "a", "c",
#' "b", "e",
#' "b", "f"))
#'
#' # how informative is each letter about each other letter
#' pairwise_pmi(dat, letter, group)
#' pairwise_pmi(dat, letter, group, sort = TRUE)
#'
#' @export
pairwise_pmi <- function(tbl, item, feature, sort = FALSE) {
pairwise_pmi_(tbl,
col_name(substitute(item)),
col_name(substitute(feature)),
sort = sort)
}
#' @rdname pairwise_pmi
#' @export
pairwise_pmi_ <- function(tbl, item, feature, sort = FALSE) {
f <- function(m) {
row_sums <- rowSums(m) / sum(m)
ret <- m %*% t(m)
ret <- ret / sum(ret)
ret <- ret / row_sums
ret <- t(t(ret) / (row_sums))
ret
}
pmi_func <- squarely_(f, sparse = TRUE, sort = sort)
tbl %>%
ungroup() %>%
mutate(..value = 1) %>%
pmi_func(item, feature, "..value") %>%
mutate(value = log(value)) %>%
rename(pmi = value)
}
View

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.
View

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.

0 comments on commit e297c54

Please sign in to comment.