Skip to content

Commit

Permalink
Merge pull request #9 from hoxo-m/refactoring
Browse files Browse the repository at this point in the history
Refactoring
  • Loading branch information
hoxo-m committed Jun 30, 2019
2 parents 6f30b02 + 2aa1e52 commit cb81edd
Show file tree
Hide file tree
Showing 20 changed files with 497 additions and 319 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Generated by roxygen2: do not edit by hand

S3method(print,KLIEP)
S3method(print,RuLSIF)
S3method(print,densratio)
S3method(print,uLSIF)
export(KLIEP)
export(RuLSIF)
export(densratio)
Expand Down
40 changes: 20 additions & 20 deletions R/KLIEP.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Estimate Density Ratio p(x)/q(y) by KLIEP (Kullback-Leibler Importance Estimation Procedure)
#' Estimate Density Ratio p(x)/q(x) by KLIEP (Kullback-Leibler Importance Estimation Procedure)
#'
#' @param x numeric vector or matrix. Data from a numerator distribution p(x).
#' @param y numeric vector or matrix. Data from a denominator distribution q(y).
#' @param x1 numeric vector or matrix. Data from a numerator distribution p(x).
#' @param x2 numeric vector or matrix. Data from a denominator distribution q(x).
#' @param sigma positive numeric vector. Search range of Gaussian kernel bandwidth.
#' @param kernel_num positive integer. Number of kernels.
#' @param fold positive integer. Numer of the folds of cross validation.
Expand All @@ -10,44 +10,44 @@
#' @return KLIEP object that contains a function to compute estimated density ratio.
#'
#' @export
KLIEP <- function(x, y, sigma = "auto", kernel_num = 100, fold = 5, verbose = TRUE) {
KLIEP <- function(x1, x2, sigma = "auto", kernel_num = 100, fold = 5, verbose = TRUE) {
if(verbose) message("############ Start KLIEP ############")
if(is.vector(x)) x <- matrix(x)
if(is.vector(y)) y <- matrix(y)
if(ncol(x) != ncol(y)) stop("x and y must be same dimensions.")
if(is.vector(x1)) x1 <- matrix(x1)
if(is.vector(x2)) x2 <- matrix(x2)
if(ncol(x1) != ncol(x2)) stop("x1 and x2 must be same dimensions.")

nx <- nrow(x)
kernel_num <- min(kernel_num, nx)
centers <- x[sample(nx, size = kernel_num), , drop = FALSE]
nx1 <- nrow(x1)
kernel_num <- min(kernel_num, nx1)
centers <- x1[sample(nx1, size = kernel_num), , drop = FALSE]

if(identical(sigma, "auto")) {
if(verbose) message("Searching optimal sigma and lambda...")
sigma <- KLIEP_search_sigma(x, y, centers, fold, verbose)
sigma <- KLIEP_search_sigma(x1, x2, centers, fold, verbose)
if(verbose) message(sprintf("Found optimal sigma = %.5f.", sigma))
} else if(length(sigma) > 1) {
if(verbose) message("Searching optimal sigma and lambda...")
sigma <- KLIEP_search_sigma_list(x, y, centers, sigma, fold, verbose)
sigma <- KLIEP_search_sigma_list(x1, x2, centers, sigma, fold, verbose)
if(verbose) message(sprintf("Found optimal sigma = %.5f.", sigma))
}

if(verbose) message("Optimizing alpha...")
phi_x <- compute_kernel_Gaussian(x, centers, sigma)
phi_y <- compute_kernel_Gaussian(y, centers, sigma)
alpha <- KLIEP_optimize_alpha(phi_x, phi_y)
if(verbose) message("Optimizing kernel weights...")
phi_x1 <- compute_kernel_Gaussian(x1, centers, sigma)
phi_x2 <- compute_kernel_Gaussian(x2, centers, sigma)
kernel_weights <- KLIEP_optimize_alpha(phi_x1, phi_x2)
if(verbose) message("End.")

result <- list(alpha = alpha,
result <- list(kernel_weights = as.vector(kernel_weights),
kernel_info = list(
kernel = "Gaussian RBF",
kernel = "Gaussian",
kernel_num = kernel_num,
sigma = sigma,
centers = centers
),
fold = fold,
compute_density_ratio = function(x) {
if(is.vector(x)) x <- matrix(x)
phi_x <- compute_kernel_Gaussian(x, centers, sigma)
density_ratio <- as.vector(phi_x %*% alpha)
phi_x1 <- compute_kernel_Gaussian(x1, centers, sigma)
density_ratio <- as.vector(phi_x1 %*% kernel_weights)
density_ratio
}
)
Expand Down
8 changes: 4 additions & 4 deletions R/RuLSIF.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ RuLSIF <- function(x1, x2,
lambda = 10 ^ seq(-3, 1, length.out = 9),
alpha = 0.1, kernel_num = 100, verbose = TRUE) {

if(verbose) message("################## Start RuLSIF ##################")
if(is.vector(x1)) x1 <- matrix(x1)
if(is.vector(x2)) x2 <- matrix(x2)
if(ncol(x1) != ncol(x2)) stop("x1 and x2 must be same dimensions.")
if (verbose) message("################## Start RuLSIF ##################")
if (is.vector(x1)) x1 <- matrix(x1)
if (is.vector(x2)) x2 <- matrix(x2)
if (ncol(x1) != ncol(x2)) stop("x1 and x2 must be same dimensions.")

nx1 <- nrow(x1)
nx2 <- nrow(x2)
Expand Down
24 changes: 11 additions & 13 deletions R/densratio.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#' Estimate Density Ratio p(x)/q(y)
#' Estimate Density Ratio p(x)/q(x)
#'
#' @param x numeric vector or matrix. Data from a numerator distribution p(x).
#' @param y numeric vector or matrix. Data from a denominator distribution q(y).
#' @param method "uLSIF" (default), "KLIEP", or "RuLSIF".
#' @param x1 numeric vector or matrix. Data from a numerator distribution p(x).
#' @param x2 numeric vector or matrix. Data from a denominator distribution q(x).
#' @param method "uLSIF" (default), "RuLSIF", or "KLIEP".
#' @param sigma positive numeric vector. Search range of Gaussian kernel bandwidth.
#' @param lambda positive numeric vector. Search range of regularization parameter for uLSIF and RuLSIF.
#' @param alpha numeric in [0, 1]. Relative parameter for RuLSIF. Default 0.1.
Expand All @@ -13,25 +13,25 @@
#' @return densratio object that contains a function to compute estimated density ratio.
#'
#' @examples
#' x <- rnorm(200, mean = 1, sd = 1/8)
#' y <- rnorm(200, mean = 1, sd = 1/2)
#' x1 <- rnorm(200, mean = 1, sd = 1/8)
#' x2 <- rnorm(200, mean = 1, sd = 1/2)
#'
#' result <- densratio(x, y)
#' densratio_obj <- densratio(x1, x2)
#'
#' new_x <- seq(0, 2, by = 0.06)
#' estimated_density_ratio <- result$compute_density_ratio(new_x)
#' new_x <- seq(0, 2, by = 0.05)
#' estimated_density_ratio <- densratio_obj$compute_density_ratio(new_x)
#'
#' plot(new_x, estimated_density_ratio, pch=19)
#'
#' @export
densratio <- function(x, y, method = c("uLSIF", "RuLSIF", "KLIEP"),
densratio <- function(x1, x2, method = c("uLSIF", "RuLSIF", "KLIEP"),
sigma = "auto", lambda = "auto", alpha = 0.1,
kernel_num = 100, fold = 5, verbose = TRUE) {
# Prepare Arguments -------------------------------------------------------
method <- match.arg(method)

# To Retain Default Arguments in Functions of Methods ---------------------
params <- alist(x = x, y = y, kernel_num = kernel_num, verbose = verbose)
params <- alist(x1 = x1, x2 = x2, kernel_num = kernel_num, verbose = verbose)
if (!identical(sigma, "auto")) {
params <- c(params, alist(sigma = sigma))
}
Expand All @@ -41,8 +41,6 @@ densratio <- function(x, y, method = c("uLSIF", "RuLSIF", "KLIEP"),
if (!identical(lambda, "auto")) params <- c(params, alist(lambda = lambda))
result <- do.call(uLSIF, params)
} else if (method == "RuLSIF") {
params <- alist(x1 = x, x2 = y, kernel_num = kernel_num, verbose = verbose)
if (!identical(sigma, "auto")) params <- c(params, alist(sigma = sigma))
if (!identical(lambda, "auto")) params <- c(params, alist(lambda = lambda))
params <- c(params, alist(alpha = alpha))
result <- do.call(RuLSIF, params)
Expand Down
44 changes: 22 additions & 22 deletions R/print.R
Original file line number Diff line number Diff line change
@@ -1,44 +1,44 @@
#' @importFrom utils str
#' @export
print.densratio <- function(x, digits = 3L, ...) {
cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"),
"\n\n", sep = "")
if (!is.null(x$call)) {
cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"),
"\n\n", sep = "")
}

info <- x$kernel_info
cat("Kernel Information:\n")
cat(" Kernel type: ", info$kernel, "\n")
cat(" Number of kernels: ", info$kernel_num, "\n")
cat(" Bandwidth(sigma): ", format(info$sigma, digits = digits), "\n")
cat(" Bandwidth (sigma): ", format(info$sigma, digits = digits), "\n")
cat(" Centers: ")
str(info$centers, digits.d = digits, give.attr = FALSE)
cat("\n")
cat("Kernel Weights:\n ")
str(x$kernel_weights, digits.d = digits, give.attr = FALSE)
cat("\n")


if("uLSIF" %in% class(x)) {
cat("Kernel Weights:\n ")
str(x$alpha, digits.d = digits, give.attr = FALSE)
cat("\n")
cat("Regularization Parameter(lambda): ", x$lambda, "\n\n")
}

if("RuLSIF" %in% class(x)) {
cat("Kernel Weights:\n ")
str(x$kernel_weights, digits.d = digits, give.attr = FALSE)
cat("\n")
if ("uLSIF" %in% class(x)) {
cat("Regularization Parameter (lambda): ", x$lambda, "\n\n")
} else if ("RuLSIF" %in% class(x)) {
cat("Regularization Parameter (lambda): ", x$lambda, "\n")
cat("Relative Parameter (alpha): ", x$alpha, "\n\n")
}

if("KLIEP" %in% class(x)) {
cat("Kernel Weights:\n ")
str(x$alpha, digits.d = digits, give.attr = FALSE)
cat("\n")
} else if ("KLIEP" %in% class(x)) {
cat("Number of the Folds: ", x$fold, "\n\n")
}

cat("The Function to Estimate Density Ratio:\n")
cat("Function to Estimate Density Ratio:\n")
cat(" compute_density_ratio()\n")

cat("\n")
invisible(x)
}

#' @export
print.KLIEP <- print.densratio

#' @export
print.uLSIF <- print.densratio

#' @export
print.RuLSIF <- print.densratio
56 changes: 8 additions & 48 deletions R/uLSIF.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Estimate Density Ratio p(x)/q(y) by uLSIF (unconstrained Least-Square Importance Fitting)
#' Estimate Density Ratio p(x)/q(x) by uLSIF (unconstrained Least-Square Importance Fitting)
#'
#' @param x numeric vector or matrix. Data from a numerator distribution p(x).
#' @param y numeric vector or matrix. Data from a denominator distribution q(y).
#' @param x1 numeric vector or matrix. Data from a numerator distribution p(x).
#' @param x2 numeric vector or matrix. Data from a denominator distribution q(x).
#' @param sigma positive numeric vector. Search range of Gaussian kernel bandwidth.
#' @param lambda positive numeric vector. Search range of regularization parameter.
#' @param kernel_num positive integer. Number of kernels.
Expand All @@ -10,55 +10,15 @@
#' @return uLSIF object that contains a function to compute estimated density ratio.
#'
#' @export
uLSIF <- function(x, y,
uLSIF <- function(x1, x2,
sigma = 10 ^ seq(-3, 1, length.out = 9),
lambda = 10 ^ seq(-3, 1, length.out = 9),
kernel_num = 100, verbose = TRUE) {

if(verbose) message("################## Start uLSIF ##################")
if(is.vector(x)) x <- matrix(x)
if(is.vector(y)) y <- matrix(y)
if(ncol(x) != ncol(y)) stop("x and y must be same dimensions.")

nx <- nrow(x)
ny <- nrow(y)

kernel_num <- min(kernel_num, nx)
centers <- x[sample(nx, size = kernel_num), , drop = FALSE]

if(length(sigma) != 1 || length(lambda) != 1) {
if(verbose) message("Searching optimal sigma and lambda...")
opt_params <- uLSIF_search_sigma_and_lambda(x, y, centers, sigma, lambda, verbose)
sigma <- opt_params$sigma
lambda <- opt_params$lambda
if(verbose) message(sprintf("Found optimal sigma = %.3f, lambda = %.3f.", sigma, lambda))
}

if(verbose) message("Optimizing alpha...")
phi_x <- compute_kernel_Gaussian(x, centers, sigma)
phi_y <- compute_kernel_Gaussian(y, centers, sigma)
H <- crossprod(phi_y) / ny
h <- colMeans(phi_x)
alpha <- solve(H + diag(lambda, kernel_num, kernel_num)) %*% h
alpha[alpha < 0] <- 0
if(verbose) message("End.")

result <- list(alpha = as.vector(alpha),
lambda = lambda,
kernel_info = list(
kernel = "Gaussian RBF",
kernel_num = kernel_num,
sigma = sigma,
centers = centers
),
compute_density_ratio = function(x) {
if(is.vector(x)) x <- matrix(x)
phi_x <- compute_kernel_Gaussian(x, centers, sigma)
density_ratio <- as.vector(phi_x %*% alpha)
density_ratio
}
)
params <- alist(x1 = x1, x2 = x2,
sigma = sigma, lambda = lambda, alpha = 0,
kernel_num = kernel_num, verbose = verbose)
result <- do.call(RuLSIF, params)
class(result) <- c("uLSIF", class(result))
if(verbose) message("################## Finished uLSIF ###############")
result
}
45 changes: 0 additions & 45 deletions R/uLSIF_search_sigma_and_lambda.R

This file was deleted.

Loading

0 comments on commit cb81edd

Please sign in to comment.