Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 466187f
Showing
103 changed files
with
5,226 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
Package: hahmmr | ||
Title: Haplotype-Aware Hidden Markov Model for RNA | ||
Version: 1.0.0 | ||
Authors@R: | ||
c(person("Teng", "Gao", email = "tgaoteng@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-0196-689X")), | ||
person("Evan", "Biederstedt", email = "evan.biederstedt@gmail.com", role="aut"), | ||
person("Peter", "Kharchenko", email = "peter_kharchenko@hms.harvard.edu", role = "aut")) | ||
Description: Haplotype-aware Hidden Markov Model for RNA (HaHMMR) is a method for detecting copy number variations (CNVs) from bulk RNA-seq data. Additional examples, documentations, and details on the method are available at <https://github.com/kharchenkolab/hahmmr/>. | ||
Depends: R (>= 4.1.0) | ||
biocViews: | ||
Imports: data.table, dplyr, GenomicRanges, ggplot2, glue, IRanges, | ||
methods, patchwork, Rcpp, stringr, tibble, zoo | ||
Suggests: ggrastr, testthat | ||
LinkingTo: Rcpp, RcppArmadillo, roptim | ||
NeedsCompilation: yes | ||
License: MIT + file LICENSE | ||
Encoding: UTF-8 | ||
RoxygenNote: 7.2.3 | ||
LazyData: true | ||
Packaged: 2023-10-25 14:22:02 UTC; tenggao | ||
Author: Teng Gao [aut, cre] (<https://orcid.org/0000-0002-0196-689X>), | ||
Evan Biederstedt [aut], | ||
Peter Kharchenko [aut] | ||
Maintainer: Teng Gao <tgaoteng@gmail.com> | ||
Repository: CRAN | ||
Date/Publication: 2023-10-25 18:00:10 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2023 | ||
COPYRIGHT HOLDER: hahmmr authors |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
db833a049d315033dc4abb49a1d82fef *DESCRIPTION | ||
f0d0766890e21f1bfe3c1810a928af7f *LICENSE | ||
3601cc7134f430ab35f8bc018375fef8 *NAMESPACE | ||
59131dd90379a82b7f46f9a0ea322917 *R/RcppExports.R | ||
2449f7964cdda116fdb295e9a818db97 *R/data.R | ||
10841ff7ead3c766e34aa62bae0989e3 *R/distributions.R | ||
bfff21f0a98aaf05d5df4addf7ef8ca6 *R/hahmmr.R | ||
2d66b97039eb5d1751c23bb4dc2d42b9 *R/vis.R | ||
6232da740e9c4a35f5a4c4aaa22b14a0 *R/workflows.R | ||
889316959aa433447ac2ee173150bfb2 *README.md | ||
2f73655087eb8953ec58c4699793e90e *data/acen_hg19.rda | ||
3c2a0fa281beedd8f86be3a4b8858abd *data/acen_hg38.rda | ||
8efac9c806f2810fedfb6b723fd33ae5 *data/bulk_example.rda | ||
110fec45d03652c73294a2cb6beda054 *data/chrom_sizes_hg19.rda | ||
8e51a671d180d287ff87861d06fa30d0 *data/chrom_sizes_hg38.rda | ||
4f64e061bb377b655d4225d843e4535b *data/df_allele_example.rda | ||
87d7ff149d69d09a0c05294947aba465 *data/gaps_hg19.rda | ||
170a9ca2c3d97d0013750653d3acceab *data/gaps_hg38.rda | ||
fcfdb2a041faf94fdd350147f338bf95 *data/gene_counts_example.rda | ||
ecfb38f6439cdaca65e47fcc8c8cd918 *data/gtf_hg19.rda | ||
b69826d226b2f1fffbc53a4f856210d0 *data/gtf_hg38.rda | ||
d93edd82cb7caa0362aa2d4d1f18edf9 *data/gtf_mm10.rda | ||
785979e12c8c49b6445658bd70658cc0 *data/pre_likelihood_hmm.rda | ||
321cabe1f8208be8ef07f9823360b9c7 *data/ref_hca.rda | ||
6b62eb0bd388965e2948c654b1915ceb *data/ref_hca_counts.rda | ||
ab0d33f7f58ac0e78bc0d84fde8a54ad *data/segs_example.rda | ||
8629d8aa8c7de2379b796e44d4e518d2 *data/vcf_meta.rda | ||
5f3a25c683b2651f5dcd9600577950aa *man/acen_hg19.Rd | ||
e1aaebd2521f61627fdad6d756b16cc0 *man/acen_hg38.Rd | ||
63dd78ac590f8e5a5de022417f2fd1e6 *man/analyze_allele.Rd | ||
ade4d36280ad15035353bebbc44cea08 *man/analyze_joint.Rd | ||
028e14228e8e822c49292ab204beb960 *man/annot_cm.Rd | ||
6fac7dacd5b7577dcf3cbc06ef724d13 *man/annot_segs.Rd | ||
c572521a6a443d6101196eeffce0892e *man/approx_phi_post.Rd | ||
e35fdb6319fe207bd149b95dfca8eb4e *man/approx_theta_post_s2.Rd | ||
078dacf4776280821974e67ce4fe3f0b *man/approx_theta_post_s3.Rd | ||
bf23b76cbdc2a3b80752ce7c8dcde11a *man/bulk_example.Rd | ||
5eca831c9d63ac62b13e695bccf9bed5 *man/calc_allele_LLR.Rd | ||
cd913dfd795f8048371b4ab062e73966 *man/calc_allele_lik_s2.Rd | ||
569cb74a1440d26295b732e444deb628 *man/calc_allele_lik_s3.Rd | ||
4e95084105428fa1ba1302a16cfb0694 *man/calc_exp_LLR.Rd | ||
3b8ae9d0d8210c62bf38418f5b59fce8 *man/calc_trans_mat_s15.Rd | ||
c2a82a664d39ed3fb90c0f37cb266c4a *man/calc_trans_mat_s7.Rd | ||
33f50593b2d8de794f3755769cb306b2 *man/check_cols.Rd | ||
1ab1b6f034416df0bb7f9c209e7b49bc *man/check_matrix.Rd | ||
f58158f8b8f9b55a66d75e179719ecf2 *man/chrom_sizes_hg19.Rd | ||
a3802a643cf910e9f519993c24a946e8 *man/chrom_sizes_hg38.Rd | ||
7c82eb6989fcc946463eaf78d94d2f51 *man/classify_alleles.Rd | ||
56ff33d4e5b47cb702400de5294da898 *man/combine_bulk.Rd | ||
8bddbea37ea6a1861cbeda88eb825e21 *man/dbbinom.Rd | ||
5f69bf0805355adc5b693103b1445702 *man/df_allele_example.Rd | ||
e331fdf2690263bbb253c7c5f218dcdf *man/dpoilog.Rd | ||
11f36c36da636341c755a40aa3139fcd *man/filter_genes.Rd | ||
4aff62efea703cd30169d7f7e7609a25 *man/fit_gamma.Rd | ||
693f220d76ad320b21d5b6f129ba2f8d *man/fit_lnpois_cpp.Rd | ||
46fee729636ceabb1e46e03b7e83a0ad *man/fit_ref_sse.Rd | ||
a5ed43b98552469a90dfd7ef42f3d63d *man/forward_back_allele.Rd | ||
65b6cc6ea6ac371e57b682b9c6f2971b *man/gaps_hg19.Rd | ||
21f5bfa157622ce2a12840e185de296c *man/gaps_hg38.Rd | ||
fbe34319e0023fc535970f8f882f3561 *man/gene_counts_example.Rd | ||
24f9bec990c28e07d08abafea1b541e9 *man/generate_postfix.Rd | ||
a2d8a921d768d6c6b65cf4b421a3c198 *man/get_allele_bulk.Rd | ||
0bd6ef987ab0a84aaa4ae64eab908163 *man/get_allele_hmm_s2.Rd | ||
07eb8c2e2de3377a74ebd05fcd794e4f *man/get_allele_hmm_s3.Rd | ||
e172d982b321f13b5ee67710677bf01d *man/get_bulk.Rd | ||
d28beaa581276b151af8d8e0a0a4921d *man/get_exp_bulk.Rd | ||
16f7516da08ed469d2932546dfee6d29 *man/get_trans_probs_s15.Rd | ||
d120247f04a8cadfff1814e246425fc8 *man/get_trans_probs_s7.Rd | ||
f5dfe96e32940270a7e65b811d900a59 *man/gtf_hg19.Rd | ||
6a9528eed118256a5e42d3480d50fb78 *man/gtf_hg38.Rd | ||
9a04926616cf1511ceedc5740c4ad9b3 *man/gtf_mm10.Rd | ||
f2d49475376b9e8113215304551bdb2b *man/l_bbinom.Rd | ||
4012749a0b8ee767800488129ede8af8 *man/l_lnpois.Rd | ||
a4d3a90966c9c1805a82a315e189d895 *man/likelihood_allele.Rd | ||
9fd2f49a75d2b0fddbc35aee5aeb6253 *man/logSumExp.Rd | ||
576c8f6fc9c5c41aacc1efb80d234121 *man/plot_bulks.Rd | ||
0a95f6bf87c87095cafda42450daf72f *man/plot_psbulk.Rd | ||
9d40b069baca47035ea22845e08e2c58 *man/pre_likelihood_hmm.Rd | ||
4141230e3c1a8b05fed58e59042943f9 *man/ref_hca.Rd | ||
9a6d4eb5bf60efe8dbe0bf956cd3afac *man/ref_hca_counts.Rd | ||
75afb35d7348c09ec4b034f3ac944a7d *man/run_allele_hmm_s3.Rd | ||
86cdbe04fa98aff69e6c373b447d3d6d *man/run_allele_hmm_s5.Rd | ||
16d103c24c1dabdf9bded5e8ead10c7d *man/run_joint_hmm_s15.Rd | ||
a80566b3f0578a425e2fecd1902e38b5 *man/run_joint_hmm_s7.Rd | ||
d13af4aed9049727a81db474c692ad0b *man/segs_example.Rd | ||
3da1358b521a038ef5d0b25688a1f8a0 *man/smooth_segs.Rd | ||
4ebd860080a5166b08940851b862663d *man/switch_prob_cm.Rd | ||
e3885c67d44b99a62da639827593147c *man/theta_hat_seg.Rd | ||
36eae748d6ab493555c6ee7e60c143c1 *man/vcf_meta.Rd | ||
cae195fee8a99bfa29fc980035db1464 *man/viterbi_allele.Rd | ||
2439ac1c03c1ab966a111f70a4e62035 *man/viterbi_joint.Rd | ||
f67bdd046b06597eccb3f548f7570518 *man/viterbi_joint_mat.Rd | ||
019f6c90a9c24178f9ebeaa3b94809f6 *src/Makevars | ||
3241f7543f2203fa0b7ffa4fb5b1b847 *src/Makevars.win | ||
bea6881a83764a1347f8d16965d4c54d *src/RcppExports.cpp | ||
55acc414721e74c9b7b0fc4cdbad452f *src/bbinom.cpp | ||
ba558973dd0fe543c0be8f2e214ef2d9 *src/hmm.cpp | ||
d6b9e9547ce5720fcd90d5fb19b387ee *src/misc.cpp | ||
cbcbc4f2d94c6560dc120a72fc47bf87 *src/mle.cpp | ||
aafdd91c9a912bd003a596e1bc983517 *src/poilog.cpp | ||
bb2d0d6c979f390842487e95b0e7540b *tests/testthat.R | ||
d679f085b9d099db4cb4fb5e5d1e4048 *tests/testthat/test_functions.R |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(analyze_allele) | ||
export(analyze_joint) | ||
export(dbbinom) | ||
export(dpoilog) | ||
export(fit_lnpois_cpp) | ||
export(forward_back_allele) | ||
export(get_allele_bulk) | ||
export(get_bulk) | ||
export(l_bbinom) | ||
export(l_lnpois) | ||
export(likelihood_allele) | ||
export(logSumExp) | ||
export(plot_bulks) | ||
export(plot_psbulk) | ||
export(run_allele_hmm_s5) | ||
export(run_joint_hmm_s15) | ||
import(Rcpp) | ||
import(dplyr) | ||
import(ggplot2) | ||
import(glue) | ||
import(patchwork) | ||
import(stringr) | ||
importFrom(data.table,as.data.table) | ||
importFrom(grDevices,colorRampPalette) | ||
importFrom(methods,as) | ||
importFrom(methods,is) | ||
importFrom(stats,dnbinom) | ||
importFrom(stats,na.omit) | ||
importFrom(stats,optim) | ||
importFrom(stats,pnorm) | ||
importFrom(stats,setNames) | ||
importFrom(stats,start) | ||
useDynLib(hahmmr) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# Generated by using Rcpp::compileAttributes() -> do not edit by hand | ||
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 | ||
|
||
cppdbbinom <- function(x, size, alpha, beta, log_prob = FALSE) { | ||
.Call('_hahmmr_cppdbbinom', PACKAGE = 'hahmmr', x, size, alpha, beta, log_prob) | ||
} | ||
|
||
cpp_dgpois <- function(x, alpha, beta, log_prob = FALSE) { | ||
.Call('_hahmmr_cpp_dgpois', PACKAGE = 'hahmmr', x, alpha, beta, log_prob) | ||
} | ||
|
||
#' logSumExp function | ||
#' | ||
#' @param x NumericVector | ||
#' @return double logSumExp of x | ||
#' @export | ||
logSumExp <- function(x) { | ||
.Call('_hahmmr_logSumExp', PACKAGE = 'hahmmr', x) | ||
} | ||
|
||
likelihood_compute <- function(logphi, logprob, logPi, n, m) { | ||
.Call('_hahmmr_likelihood_compute', PACKAGE = 'hahmmr', logphi, logprob, logPi, n, m) | ||
} | ||
|
||
forward_backward_compute <- function(logphi, logprob, logPi, n, m) { | ||
.Call('_hahmmr_forward_backward_compute', PACKAGE = 'hahmmr', logphi, logprob, logPi, n, m) | ||
} | ||
|
||
viterbi_compute <- function(log_delta, logprob, logPi, n, m, nu, z) { | ||
.Call('_hahmmr_viterbi_compute', PACKAGE = 'hahmmr', log_delta, logprob, logPi, n, m, nu, z) | ||
} | ||
|
||
roman2int_internal <- function(letters, nchar) { | ||
.Call('_hahmmr_roman2int_internal', PACKAGE = 'hahmmr', letters, nchar) | ||
} | ||
|
||
#' Fit MLE of log-normal Poisson model | ||
#' | ||
#' @param Y_obs Vector of observed counts | ||
#' @param lambda_ref Vector of reference rates | ||
#' @param d integer Total depth | ||
#' @return NumericVector MLE estimates of mu and sigma | ||
#' @export | ||
fit_lnpois_cpp <- function(Y_obs, lambda_ref, d) { | ||
.Call('_hahmmr_fit_lnpois_cpp', PACKAGE = 'hahmmr', Y_obs, lambda_ref, d) | ||
} | ||
|
||
poilog1 <- function(x, my, sig) { | ||
.Call('_hahmmr_poilog1', PACKAGE = 'hahmmr', x, my, sig) | ||
} | ||
|
||
l_lnpois_cpp <- function(Y_obs, lambda_ref, d, mu, sig, phi = 1.0) { | ||
.Call('_hahmmr_l_lnpois_cpp', PACKAGE = 'hahmmr', Y_obs, lambda_ref, d, mu, sig, phi) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#' gene model (hg38) | ||
#' | ||
"gtf_hg38" | ||
|
||
#' gene model (hg19) | ||
#' | ||
"gtf_hg19" | ||
|
||
#' gene model (mm10) | ||
#' | ||
"gtf_mm10" | ||
|
||
#' chromosome sizes (hg38) | ||
#' | ||
"chrom_sizes_hg38" | ||
|
||
#' genome gap regions (hg38) | ||
#' | ||
"gaps_hg38" | ||
|
||
#' centromere regions (hg38) | ||
#' | ||
"acen_hg38" | ||
|
||
#' chromosome sizes (hg19) | ||
#' | ||
"chrom_sizes_hg19" | ||
|
||
#' genome gap regions (hg19) | ||
#' | ||
"gaps_hg19" | ||
|
||
#' centromere regions (hg19) | ||
#' | ||
"acen_hg19" | ||
|
||
#' HMM object for unit tests | ||
#' | ||
"pre_likelihood_hmm" | ||
|
||
#' reference expression magnitudes from HCA | ||
#' | ||
"ref_hca" | ||
|
||
#' reference expression counts from HCA | ||
#' | ||
"ref_hca_counts" | ||
|
||
#' example VCF header | ||
#' | ||
"vcf_meta" | ||
|
||
#' example pseudobulk dataframe | ||
#' | ||
"bulk_example" | ||
|
||
#' example allele count dataframe | ||
#' | ||
"df_allele_example" | ||
|
||
#' example CNV segments dataframe | ||
#' | ||
"segs_example" | ||
|
||
#' example gene expression counts matrix | ||
#' | ||
"gene_counts_example" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
############ Probability distributions ############ | ||
|
||
#'@useDynLib hahmmr | ||
#'@import Rcpp | ||
NULL | ||
|
||
## refer to https://github.com/evanbiederstedt/poilogcpp | ||
|
||
#' Returns the density for the Poisson lognormal distribution with parameters mu and sig | ||
#' | ||
#' @param x vector of integers, the observations | ||
#' @param mu mean of lognormal distribution | ||
#' @param sig standard deviation of lognormal distribution | ||
#' @param log boolean Return the log density if TRUE (default=FALSE) | ||
#' @return numeric Probability density values | ||
#' @examples | ||
#' p = dpoilog(1, 1, 1) | ||
#' @export | ||
dpoilog <- function(x, mu, sig, log=FALSE){ | ||
if (!(length(x) == length(mu) & length(x) == length(sig))) stop('dpoilog: All parameters must be same length') | ||
if (any((x[x!=0]/trunc(x[x!=0]))!=1)) stop('dpoilog: all x must be integers') | ||
if (any(x<0)) stop('dpoilog: one or several values of x are negative') | ||
if (!all(is.finite(c(mu,sig)))) { | ||
stop('dpoilog: all parameters should be finite') | ||
} | ||
if (any(is.na(c(x,mu,sig)))) stop('dpoilog: Parameters cannot be NA') | ||
if (any(sig<=0)) { | ||
stop(c('dpoilog: sig is not larger than 0', unique(sig))) | ||
} | ||
|
||
p = poilog1(x=as.integer(x), my=as.double(mu), sig=as.double(sig^2)) | ||
|
||
p[p == 0] = 1e-15 | ||
|
||
if (log) { | ||
return(log(p)) | ||
} else { | ||
return(p) | ||
} | ||
} | ||
|
||
#' calculate joint likelihood of a PLN model | ||
#' @param Y_obs numeric vector Gene expression counts | ||
#' @param lambda_ref numeric vector Reference expression levels | ||
#' @param d numeric Total library size | ||
#' @param mu numeric Global mean expression | ||
#' @param sig numeric Global standard deviation of expression | ||
#' @param phi numeric Fold change of expression | ||
#' @return numeric Joint log likelihood | ||
#' @examples | ||
#' l_lnpois(c(1, 2), c(1, 2), 1, 1, 1) | ||
#' @export | ||
l_lnpois = function(Y_obs, lambda_ref, d, mu, sig, phi = 1) { | ||
if (any(sig <= 0)) {stop(glue('negative sigma. value: {sig}'))} | ||
if (length(sig) == 1) {sig = rep(sig, length(Y_obs))} | ||
sum(log(dpoilog(Y_obs, mu + log(phi * d * lambda_ref), sig))) | ||
} | ||
|
||
#' Beta-binomial distribution density function | ||
#' A distribution is beta-binomial if p, the probability of success, | ||
#' in a binomial distribution has a beta distribution with shape | ||
#' parameters alpha > 0 and beta > 0 | ||
#' For more details, see extraDistr::dbbinom | ||
#' | ||
#' @param x vector of quantiles | ||
#' @param size number of trials (zero or more) | ||
#' @param alpha numeric (default=1) | ||
#' @param beta numeric (default=1) | ||
#' @param log boolean (default=FALSE) | ||
#' @return numeric Probability density values | ||
#' @examples | ||
#' dbbinom(1, 1, 1, 1) | ||
#' @export | ||
dbbinom <- function(x, size, alpha = 1, beta = 1, log = FALSE) { | ||
cppdbbinom(x, size, alpha, beta, log[1L]) | ||
} | ||
|
||
#' calculate joint likelihood of allele data | ||
#' @param AD numeric vector Variant allele depth | ||
#' @param DP numeric vector Total allele depth | ||
#' @param alpha numeric Alpha parameter of Beta-Binomial distribution | ||
#' @param beta numeric Beta parameter of Beta-Binomial distribution | ||
#' @return numeric Joint log likelihood | ||
#' @examples | ||
#' l_bbinom(c(1, 2), c(1, 2), 1, 1) | ||
#' @export | ||
l_bbinom = function(AD, DP, alpha, beta) { | ||
sum(dbbinom(AD, DP, alpha, beta, log = TRUE)) | ||
} |
Oops, something went wrong.