Skip to content

Commit

Permalink
version 1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
teng-gao authored and cran-robot committed Oct 25, 2023
0 parents commit 466187f
Show file tree
Hide file tree
Showing 103 changed files with 5,226 additions and 0 deletions.
26 changes: 26 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,26 @@
Package: hahmmr
Title: Haplotype-Aware Hidden Markov Model for RNA
Version: 1.0.0
Authors@R:
c(person("Teng", "Gao", email = "tgaoteng@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-0196-689X")),
person("Evan", "Biederstedt", email = "evan.biederstedt@gmail.com", role="aut"),
person("Peter", "Kharchenko", email = "peter_kharchenko@hms.harvard.edu", role = "aut"))
Description: Haplotype-aware Hidden Markov Model for RNA (HaHMMR) is a method for detecting copy number variations (CNVs) from bulk RNA-seq data. Additional examples, documentations, and details on the method are available at <https://github.com/kharchenkolab/hahmmr/>.
Depends: R (>= 4.1.0)
biocViews:
Imports: data.table, dplyr, GenomicRanges, ggplot2, glue, IRanges,
methods, patchwork, Rcpp, stringr, tibble, zoo
Suggests: ggrastr, testthat
LinkingTo: Rcpp, RcppArmadillo, roptim
NeedsCompilation: yes
License: MIT + file LICENSE
Encoding: UTF-8
RoxygenNote: 7.2.3
LazyData: true
Packaged: 2023-10-25 14:22:02 UTC; tenggao
Author: Teng Gao [aut, cre] (<https://orcid.org/0000-0002-0196-689X>),
Evan Biederstedt [aut],
Peter Kharchenko [aut]
Maintainer: Teng Gao <tgaoteng@gmail.com>
Repository: CRAN
Date/Publication: 2023-10-25 18:00:10 UTC
2 changes: 2 additions & 0 deletions LICENSE
@@ -0,0 +1,2 @@
YEAR: 2023
COPYRIGHT HOLDER: hahmmr authors
102 changes: 102 additions & 0 deletions MD5
@@ -0,0 +1,102 @@
db833a049d315033dc4abb49a1d82fef *DESCRIPTION
f0d0766890e21f1bfe3c1810a928af7f *LICENSE
3601cc7134f430ab35f8bc018375fef8 *NAMESPACE
59131dd90379a82b7f46f9a0ea322917 *R/RcppExports.R
2449f7964cdda116fdb295e9a818db97 *R/data.R
10841ff7ead3c766e34aa62bae0989e3 *R/distributions.R
bfff21f0a98aaf05d5df4addf7ef8ca6 *R/hahmmr.R
2d66b97039eb5d1751c23bb4dc2d42b9 *R/vis.R
6232da740e9c4a35f5a4c4aaa22b14a0 *R/workflows.R
889316959aa433447ac2ee173150bfb2 *README.md
2f73655087eb8953ec58c4699793e90e *data/acen_hg19.rda
3c2a0fa281beedd8f86be3a4b8858abd *data/acen_hg38.rda
8efac9c806f2810fedfb6b723fd33ae5 *data/bulk_example.rda
110fec45d03652c73294a2cb6beda054 *data/chrom_sizes_hg19.rda
8e51a671d180d287ff87861d06fa30d0 *data/chrom_sizes_hg38.rda
4f64e061bb377b655d4225d843e4535b *data/df_allele_example.rda
87d7ff149d69d09a0c05294947aba465 *data/gaps_hg19.rda
170a9ca2c3d97d0013750653d3acceab *data/gaps_hg38.rda
fcfdb2a041faf94fdd350147f338bf95 *data/gene_counts_example.rda
ecfb38f6439cdaca65e47fcc8c8cd918 *data/gtf_hg19.rda
b69826d226b2f1fffbc53a4f856210d0 *data/gtf_hg38.rda
d93edd82cb7caa0362aa2d4d1f18edf9 *data/gtf_mm10.rda
785979e12c8c49b6445658bd70658cc0 *data/pre_likelihood_hmm.rda
321cabe1f8208be8ef07f9823360b9c7 *data/ref_hca.rda
6b62eb0bd388965e2948c654b1915ceb *data/ref_hca_counts.rda
ab0d33f7f58ac0e78bc0d84fde8a54ad *data/segs_example.rda
8629d8aa8c7de2379b796e44d4e518d2 *data/vcf_meta.rda
5f3a25c683b2651f5dcd9600577950aa *man/acen_hg19.Rd
e1aaebd2521f61627fdad6d756b16cc0 *man/acen_hg38.Rd
63dd78ac590f8e5a5de022417f2fd1e6 *man/analyze_allele.Rd
ade4d36280ad15035353bebbc44cea08 *man/analyze_joint.Rd
028e14228e8e822c49292ab204beb960 *man/annot_cm.Rd
6fac7dacd5b7577dcf3cbc06ef724d13 *man/annot_segs.Rd
c572521a6a443d6101196eeffce0892e *man/approx_phi_post.Rd
e35fdb6319fe207bd149b95dfca8eb4e *man/approx_theta_post_s2.Rd
078dacf4776280821974e67ce4fe3f0b *man/approx_theta_post_s3.Rd
bf23b76cbdc2a3b80752ce7c8dcde11a *man/bulk_example.Rd
5eca831c9d63ac62b13e695bccf9bed5 *man/calc_allele_LLR.Rd
cd913dfd795f8048371b4ab062e73966 *man/calc_allele_lik_s2.Rd
569cb74a1440d26295b732e444deb628 *man/calc_allele_lik_s3.Rd
4e95084105428fa1ba1302a16cfb0694 *man/calc_exp_LLR.Rd
3b8ae9d0d8210c62bf38418f5b59fce8 *man/calc_trans_mat_s15.Rd
c2a82a664d39ed3fb90c0f37cb266c4a *man/calc_trans_mat_s7.Rd
33f50593b2d8de794f3755769cb306b2 *man/check_cols.Rd
1ab1b6f034416df0bb7f9c209e7b49bc *man/check_matrix.Rd
f58158f8b8f9b55a66d75e179719ecf2 *man/chrom_sizes_hg19.Rd
a3802a643cf910e9f519993c24a946e8 *man/chrom_sizes_hg38.Rd
7c82eb6989fcc946463eaf78d94d2f51 *man/classify_alleles.Rd
56ff33d4e5b47cb702400de5294da898 *man/combine_bulk.Rd
8bddbea37ea6a1861cbeda88eb825e21 *man/dbbinom.Rd
5f69bf0805355adc5b693103b1445702 *man/df_allele_example.Rd
e331fdf2690263bbb253c7c5f218dcdf *man/dpoilog.Rd
11f36c36da636341c755a40aa3139fcd *man/filter_genes.Rd
4aff62efea703cd30169d7f7e7609a25 *man/fit_gamma.Rd
693f220d76ad320b21d5b6f129ba2f8d *man/fit_lnpois_cpp.Rd
46fee729636ceabb1e46e03b7e83a0ad *man/fit_ref_sse.Rd
a5ed43b98552469a90dfd7ef42f3d63d *man/forward_back_allele.Rd
65b6cc6ea6ac371e57b682b9c6f2971b *man/gaps_hg19.Rd
21f5bfa157622ce2a12840e185de296c *man/gaps_hg38.Rd
fbe34319e0023fc535970f8f882f3561 *man/gene_counts_example.Rd
24f9bec990c28e07d08abafea1b541e9 *man/generate_postfix.Rd
a2d8a921d768d6c6b65cf4b421a3c198 *man/get_allele_bulk.Rd
0bd6ef987ab0a84aaa4ae64eab908163 *man/get_allele_hmm_s2.Rd
07eb8c2e2de3377a74ebd05fcd794e4f *man/get_allele_hmm_s3.Rd
e172d982b321f13b5ee67710677bf01d *man/get_bulk.Rd
d28beaa581276b151af8d8e0a0a4921d *man/get_exp_bulk.Rd
16f7516da08ed469d2932546dfee6d29 *man/get_trans_probs_s15.Rd
d120247f04a8cadfff1814e246425fc8 *man/get_trans_probs_s7.Rd
f5dfe96e32940270a7e65b811d900a59 *man/gtf_hg19.Rd
6a9528eed118256a5e42d3480d50fb78 *man/gtf_hg38.Rd
9a04926616cf1511ceedc5740c4ad9b3 *man/gtf_mm10.Rd
f2d49475376b9e8113215304551bdb2b *man/l_bbinom.Rd
4012749a0b8ee767800488129ede8af8 *man/l_lnpois.Rd
a4d3a90966c9c1805a82a315e189d895 *man/likelihood_allele.Rd
9fd2f49a75d2b0fddbc35aee5aeb6253 *man/logSumExp.Rd
576c8f6fc9c5c41aacc1efb80d234121 *man/plot_bulks.Rd
0a95f6bf87c87095cafda42450daf72f *man/plot_psbulk.Rd
9d40b069baca47035ea22845e08e2c58 *man/pre_likelihood_hmm.Rd
4141230e3c1a8b05fed58e59042943f9 *man/ref_hca.Rd
9a6d4eb5bf60efe8dbe0bf956cd3afac *man/ref_hca_counts.Rd
75afb35d7348c09ec4b034f3ac944a7d *man/run_allele_hmm_s3.Rd
86cdbe04fa98aff69e6c373b447d3d6d *man/run_allele_hmm_s5.Rd
16d103c24c1dabdf9bded5e8ead10c7d *man/run_joint_hmm_s15.Rd
a80566b3f0578a425e2fecd1902e38b5 *man/run_joint_hmm_s7.Rd
d13af4aed9049727a81db474c692ad0b *man/segs_example.Rd
3da1358b521a038ef5d0b25688a1f8a0 *man/smooth_segs.Rd
4ebd860080a5166b08940851b862663d *man/switch_prob_cm.Rd
e3885c67d44b99a62da639827593147c *man/theta_hat_seg.Rd
36eae748d6ab493555c6ee7e60c143c1 *man/vcf_meta.Rd
cae195fee8a99bfa29fc980035db1464 *man/viterbi_allele.Rd
2439ac1c03c1ab966a111f70a4e62035 *man/viterbi_joint.Rd
f67bdd046b06597eccb3f548f7570518 *man/viterbi_joint_mat.Rd
019f6c90a9c24178f9ebeaa3b94809f6 *src/Makevars
3241f7543f2203fa0b7ffa4fb5b1b847 *src/Makevars.win
bea6881a83764a1347f8d16965d4c54d *src/RcppExports.cpp
55acc414721e74c9b7b0fc4cdbad452f *src/bbinom.cpp
ba558973dd0fe543c0be8f2e214ef2d9 *src/hmm.cpp
d6b9e9547ce5720fcd90d5fb19b387ee *src/misc.cpp
cbcbc4f2d94c6560dc120a72fc47bf87 *src/mle.cpp
aafdd91c9a912bd003a596e1bc983517 *src/poilog.cpp
bb2d0d6c979f390842487e95b0e7540b *tests/testthat.R
d679f085b9d099db4cb4fb5e5d1e4048 *tests/testthat/test_functions.R
35 changes: 35 additions & 0 deletions NAMESPACE
@@ -0,0 +1,35 @@
# Generated by roxygen2: do not edit by hand

export(analyze_allele)
export(analyze_joint)
export(dbbinom)
export(dpoilog)
export(fit_lnpois_cpp)
export(forward_back_allele)
export(get_allele_bulk)
export(get_bulk)
export(l_bbinom)
export(l_lnpois)
export(likelihood_allele)
export(logSumExp)
export(plot_bulks)
export(plot_psbulk)
export(run_allele_hmm_s5)
export(run_joint_hmm_s15)
import(Rcpp)
import(dplyr)
import(ggplot2)
import(glue)
import(patchwork)
import(stringr)
importFrom(data.table,as.data.table)
importFrom(grDevices,colorRampPalette)
importFrom(methods,as)
importFrom(methods,is)
importFrom(stats,dnbinom)
importFrom(stats,na.omit)
importFrom(stats,optim)
importFrom(stats,pnorm)
importFrom(stats,setNames)
importFrom(stats,start)
useDynLib(hahmmr)
55 changes: 55 additions & 0 deletions R/RcppExports.R
@@ -0,0 +1,55 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

cppdbbinom <- function(x, size, alpha, beta, log_prob = FALSE) {
.Call('_hahmmr_cppdbbinom', PACKAGE = 'hahmmr', x, size, alpha, beta, log_prob)
}

cpp_dgpois <- function(x, alpha, beta, log_prob = FALSE) {
.Call('_hahmmr_cpp_dgpois', PACKAGE = 'hahmmr', x, alpha, beta, log_prob)
}

#' logSumExp function
#'
#' @param x NumericVector
#' @return double logSumExp of x
#' @export
logSumExp <- function(x) {
.Call('_hahmmr_logSumExp', PACKAGE = 'hahmmr', x)
}

likelihood_compute <- function(logphi, logprob, logPi, n, m) {
.Call('_hahmmr_likelihood_compute', PACKAGE = 'hahmmr', logphi, logprob, logPi, n, m)
}

forward_backward_compute <- function(logphi, logprob, logPi, n, m) {
.Call('_hahmmr_forward_backward_compute', PACKAGE = 'hahmmr', logphi, logprob, logPi, n, m)
}

viterbi_compute <- function(log_delta, logprob, logPi, n, m, nu, z) {
.Call('_hahmmr_viterbi_compute', PACKAGE = 'hahmmr', log_delta, logprob, logPi, n, m, nu, z)
}

roman2int_internal <- function(letters, nchar) {
.Call('_hahmmr_roman2int_internal', PACKAGE = 'hahmmr', letters, nchar)
}

#' Fit MLE of log-normal Poisson model
#'
#' @param Y_obs Vector of observed counts
#' @param lambda_ref Vector of reference rates
#' @param d integer Total depth
#' @return NumericVector MLE estimates of mu and sigma
#' @export
fit_lnpois_cpp <- function(Y_obs, lambda_ref, d) {
.Call('_hahmmr_fit_lnpois_cpp', PACKAGE = 'hahmmr', Y_obs, lambda_ref, d)
}

poilog1 <- function(x, my, sig) {
.Call('_hahmmr_poilog1', PACKAGE = 'hahmmr', x, my, sig)
}

l_lnpois_cpp <- function(Y_obs, lambda_ref, d, mu, sig, phi = 1.0) {
.Call('_hahmmr_l_lnpois_cpp', PACKAGE = 'hahmmr', Y_obs, lambda_ref, d, mu, sig, phi)
}

68 changes: 68 additions & 0 deletions R/data.R
@@ -0,0 +1,68 @@
#' gene model (hg38)
#'
"gtf_hg38"

#' gene model (hg19)
#'
"gtf_hg19"

#' gene model (mm10)
#'
"gtf_mm10"

#' chromosome sizes (hg38)
#'
"chrom_sizes_hg38"

#' genome gap regions (hg38)
#'
"gaps_hg38"

#' centromere regions (hg38)
#'
"acen_hg38"

#' chromosome sizes (hg19)
#'
"chrom_sizes_hg19"

#' genome gap regions (hg19)
#'
"gaps_hg19"

#' centromere regions (hg19)
#'
"acen_hg19"

#' HMM object for unit tests
#'
"pre_likelihood_hmm"

#' reference expression magnitudes from HCA
#'
"ref_hca"

#' reference expression counts from HCA
#'
"ref_hca_counts"

#' example VCF header
#'
"vcf_meta"

#' example pseudobulk dataframe
#'
"bulk_example"

#' example allele count dataframe
#'
"df_allele_example"

#' example CNV segments dataframe
#'
"segs_example"

#' example gene expression counts matrix
#'
"gene_counts_example"

89 changes: 89 additions & 0 deletions R/distributions.R
@@ -0,0 +1,89 @@
############ Probability distributions ############

#'@useDynLib hahmmr
#'@import Rcpp
NULL

## refer to https://github.com/evanbiederstedt/poilogcpp

#' Returns the density for the Poisson lognormal distribution with parameters mu and sig
#'
#' @param x vector of integers, the observations
#' @param mu mean of lognormal distribution
#' @param sig standard deviation of lognormal distribution
#' @param log boolean Return the log density if TRUE (default=FALSE)
#' @return numeric Probability density values
#' @examples
#' p = dpoilog(1, 1, 1)
#' @export
dpoilog <- function(x, mu, sig, log=FALSE){
if (!(length(x) == length(mu) & length(x) == length(sig))) stop('dpoilog: All parameters must be same length')
if (any((x[x!=0]/trunc(x[x!=0]))!=1)) stop('dpoilog: all x must be integers')
if (any(x<0)) stop('dpoilog: one or several values of x are negative')
if (!all(is.finite(c(mu,sig)))) {
stop('dpoilog: all parameters should be finite')
}
if (any(is.na(c(x,mu,sig)))) stop('dpoilog: Parameters cannot be NA')
if (any(sig<=0)) {
stop(c('dpoilog: sig is not larger than 0', unique(sig)))
}

p = poilog1(x=as.integer(x), my=as.double(mu), sig=as.double(sig^2))

p[p == 0] = 1e-15

if (log) {
return(log(p))
} else {
return(p)
}
}

#' calculate joint likelihood of a PLN model
#' @param Y_obs numeric vector Gene expression counts
#' @param lambda_ref numeric vector Reference expression levels
#' @param d numeric Total library size
#' @param mu numeric Global mean expression
#' @param sig numeric Global standard deviation of expression
#' @param phi numeric Fold change of expression
#' @return numeric Joint log likelihood
#' @examples
#' l_lnpois(c(1, 2), c(1, 2), 1, 1, 1)
#' @export
l_lnpois = function(Y_obs, lambda_ref, d, mu, sig, phi = 1) {
if (any(sig <= 0)) {stop(glue('negative sigma. value: {sig}'))}
if (length(sig) == 1) {sig = rep(sig, length(Y_obs))}
sum(log(dpoilog(Y_obs, mu + log(phi * d * lambda_ref), sig)))
}

#' Beta-binomial distribution density function
#' A distribution is beta-binomial if p, the probability of success,
#' in a binomial distribution has a beta distribution with shape
#' parameters alpha > 0 and beta > 0
#' For more details, see extraDistr::dbbinom
#'
#' @param x vector of quantiles
#' @param size number of trials (zero or more)
#' @param alpha numeric (default=1)
#' @param beta numeric (default=1)
#' @param log boolean (default=FALSE)
#' @return numeric Probability density values
#' @examples
#' dbbinom(1, 1, 1, 1)
#' @export
dbbinom <- function(x, size, alpha = 1, beta = 1, log = FALSE) {
cppdbbinom(x, size, alpha, beta, log[1L])
}

#' calculate joint likelihood of allele data
#' @param AD numeric vector Variant allele depth
#' @param DP numeric vector Total allele depth
#' @param alpha numeric Alpha parameter of Beta-Binomial distribution
#' @param beta numeric Beta parameter of Beta-Binomial distribution
#' @return numeric Joint log likelihood
#' @examples
#' l_bbinom(c(1, 2), c(1, 2), 1, 1)
#' @export
l_bbinom = function(AD, DP, alpha, beta) {
sum(dbbinom(AD, DP, alpha, beta, log = TRUE))
}

0 comments on commit 466187f

Please sign in to comment.