Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit a09987c
Showing
20 changed files
with
1,151 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
Package: DPBBM | ||
Type: Package | ||
Title: Dirichlet Process Beta-Binomial Mixture | ||
Version: 0.2.5 | ||
Date: 2016-09-21 | ||
Author: Lin Zhang | ||
Maintainer: Lin Zhang <lin.zhang@cumt.edu.cn> | ||
Depends: R (>= 3.1.0) | ||
Imports: tmvtnorm, VGAM, gplots, CEoptim | ||
Description: Beta-binomial Mixture Model is used to infer the pattern from count data. | ||
It can be used for clustering of RNA methylation sequencing data. | ||
License: GPL (>= 2) | ||
NeedsCompilation: no | ||
Packaged: 2016-09-28 15:21:06 UTC; lin.zhang | ||
Repository: CRAN | ||
Date/Publication: 2016-09-29 08:02:30 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
85deb92e49f8e80d11b68243156f69c3 *DESCRIPTION | ||
fcd59f34600633e5b452a9bf4e4cb3d4 *NAMESPACE | ||
788cd67fd78de3aafe4948565b0ff337 *NEWS | ||
78f496dd38ec9c7aabeeb080122867d9 *R/BCubed_metric.r | ||
04011008f379c8feaf9e067a109832d3 *R/DPBBM-internal.R | ||
3a3617deef264facbf8f95f85a994d62 *R/bbm_data_generate.r | ||
f5ff34ab69bca1f5b44a9a867ef977cc *R/choose_cluster_id_by_mode.r | ||
fc08705e0767e837fa027a32304390d3 *R/dpbbm_mc_iterations.r | ||
7957a39ac67fb28b507269aedd6eafaa *R/dpbbm_mc_single_iter.r | ||
357b514d059c7822b73c99c3d4a3ccf0 *R/dpbbm_update_likelihood.r | ||
6e29979330db426d7edd337475475716 *R/dpbbm_update_tau.r | ||
92fe9c86d6e3d8a4ec99e895dcfaf640 *R/generate_u_param.r | ||
1f2d26caa65c2917c620f1ad90244b06 *inst/doc/DPBBM_example.pdf | ||
bee6206963313d57ffe574bc8a3390f7 *inst/extdata/DPBBM_example.Rmd | ||
d5ef12392295c2229a4688452167ac99 *inst/extdata/DPBBM_example.html | ||
bbcc89bad9adb4ebd99a0f5255cd3a30 *man/BCubed_metric.Rd | ||
df26148160ed7c2c26598927518d999b *man/DPBBM-package.Rd | ||
9e0994ac718f4cc6dea60772189ebe95 *man/bbm_data_generate.Rd | ||
20c8fa19eb4a6e3d6cbe1d6b19fc1ff6 *man/dpbbm_mc_iterations.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
exportPattern("^[[:alpha:]]+") | ||
export(dpbbm_mc_iterations) | ||
export(bbm_data_generate) | ||
export(BCubed_metric) | ||
import(tmvtnorm) | ||
import(VGAM) | ||
import(gplots) | ||
importFrom(CEoptim,dirichletrnd) | ||
importFrom("graphics", "par") | ||
importFrom("stats", "cor", "rbeta", "rgamma", "rnbinom", "runif") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
|
||
version 0.0.13 (2016-09-12) | ||
- bug fix | ||
|
||
version 0.1.0 (2016-09-18) | ||
- pre-release | ||
|
||
version 0.2.0 (2016-09-21) | ||
- added "Getting started with DPBBM package", including HTML, Rmd and PDF. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
########################################################################### | ||
# calculate BCubed metric for clustering evaluation | ||
# input : L, C, alpha | ||
# L - real label of classes | ||
# C - classification label of samples | ||
# alpha - F metric parameter | ||
# output: F metric score | ||
|
||
BCubed_metric <- function(L, C, alpha){ | ||
snL <- length(L) | ||
snC <- length(C) | ||
|
||
if(snL != snC){ | ||
stop("length of category does not comply with length of cluster") | ||
geterrmessage() | ||
} | ||
|
||
# define the correctness of the relation between i and j | ||
Correctness <- matrix(nrow = snL, ncol = snC) | ||
ncluster <- c() | ||
ncategory <- c() | ||
for( i in 1:snL ){ | ||
for( j in 1:snC ){ | ||
|
||
if((L[i] == L[j]) & (C[i] == C[j])) Correctness[i,j] <- 1 | ||
else Correctness[i,j] <- 0 | ||
} | ||
} | ||
|
||
# define the num of each cluster | ||
for(i in 1:snC){ | ||
ncluster[i] <- length(which(C == C[i])) | ||
} | ||
|
||
# define the num of each category | ||
for(i in 1:snL){ | ||
ncategory[i] <- length(which(L[i] == L)) | ||
} | ||
|
||
Precision <- mean(rowSums(Correctness) / ncluster) | ||
Recall <- mean(colSums(Correctness) / ncategory) | ||
F <- 1/(alpha/Precision+(1-alpha)/Recall) | ||
|
||
return(F) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
.Random.seed <- | ||
c(403L, 1L, -1118861171L, -1550840121L, 440092590L, -1719578104L, | ||
-62925301L, 1292639977L, 1706850744L, 451556966L, 647156481L, | ||
1878381971L, 1004318274L, -236989964L, -681989129L, 1870008301L, | ||
-479856508L, -880779606L, -1583361003L, -1688337233L, 389139126L, | ||
1310181600L, -1989544925L, -1692095679L, -1661618640L, -430521266L, | ||
-307508039L, 1043990731L, 801248634L, 774645884L, -1484948289L, | ||
-2092516971L, -1042804308L, 2099979138L, 1853639005L, 547768407L, | ||
208174910L, 1429956472L, 814500955L, 1507638201L, -1604105720L, | ||
-1016508234L, -361658383L, 1490997795L, -121824654L, -2088656700L, | ||
-778426745L, -1902829987L, -1433124780L, -1186454918L, 793147845L, | ||
1023724319L, 1467213478L, -1731332048L, 1203792595L, 146292977L, | ||
-783700960L, 1891252926L, -1975300407L, -2102899589L, 2002149962L, | ||
-518580564L, 1283117487L, -1172899483L, -747215076L, -2027590798L, | ||
1407485933L, -1599264729L, 1830236238L, -1599294296L, 2061056683L, | ||
1688129481L, 1682940888L, -526630266L, 1933398881L, 1146443507L, | ||
-96859806L, 382662612L, 1011057367L, -99792691L, 1943847012L, | ||
674098762L, 1038154677L, 639256335L, -1321675754L, 890797760L, | ||
1929980035L, 144098465L, -834934128L, 1085741422L, -1105479911L, | ||
-709071957L, 1342273946L, -536175268L, -1520241569L, -1687917515L, | ||
2077620172L, -2042649822L, 1289510269L, -1195610505L, -983312482L, | ||
-2132052392L, -1504387141L, 1650012505L, -1960875288L, 1998271254L, | ||
-1976191855L, -1127028413L, -603036590L, -1582975260L, -1987907289L, | ||
-511254147L, 600860788L, 1000704602L, -135395675L, -598027073L, | ||
-1693227194L, -1879620144L, 446044147L, 768929553L, -350556352L, | ||
-1569876194L, 1552746473L, 806923547L, 2106230570L, -1255647156L, | ||
-917393393L, -1728513211L, 183676156L, 286448466L, -1598866227L, | ||
-1631830905L, 189400942L, 930265416L, -1352483509L, -2075909975L, | ||
617398776L, 1488875814L, 558166593L, -1148339245L, -1309537918L, | ||
1823997364L, -1045821129L, 2126306349L, 2056079684L, 189185642L, | ||
-41797419L, 868324975L, 446645110L, 1905849504L, -137007773L, | ||
-216662527L, 1531166064L, 144809230L, 965173113L, -257532149L, | ||
94478394L, 637153084L, -656503681L, -1230331947L, 1394119148L, | ||
213312322L, -99701603L, 1899484439L, 1508273278L, 1623835704L, | ||
585835291L, 1227245689L, 425529544L, -731793162L, -857159375L, | ||
-1710522013L, -752151758L, -628630396L, -850204473L, -2111941731L, | ||
-780740844L, -215839302L, 764210181L, -754974241L, -609221530L, | ||
1853047792L, 154991891L, 980312753L, -1752052384L, 1782334206L, | ||
-1620869495L, -537548357L, -2139712630L, 963590764L, 921531375L, | ||
-1585578971L, 1005642972L, -1934886222L, 52469165L, 429662567L, | ||
-1381515634L, -816126104L, 525262699L, 755239177L, 201564056L, | ||
-877533498L, -1740393439L, 1749819571L, -580027870L, -796738540L, | ||
-687394665L, -591084915L, 242471844L, 1698688394L, -1984620043L, | ||
1602548815L, 1769550678L, 1057729024L, 624231235L, 1814184673L, | ||
439898192L, -1962118994L, 1810998361L, 1671910507L, -1983309350L, | ||
1712300700L, 2137816223L, 383856373L, -247868532L, 1765040738L, | ||
-642984387L, -1048266569L, 1088472926L, 492476788L, -1130393966L, | ||
-361700768L, -564522868L, 172986720L, 757799138L, -1137857368L, | ||
809215692L, -2125915652L, 693820338L, -288168976L, -935669308L, | ||
868388184L, 1261805946L, 315240880L, -2101786180L, 1735327316L, | ||
-1190700606L, -427678944L, 1959752636L, 1275916368L, -1852417054L, | ||
500760424L, -607455412L, 1125027580L, 1611503346L, -1795080960L, | ||
927140980L, 413765128L, -1821489734L, 1495266768L, 1943910380L, | ||
-1575449708L, 2065647826L, -841882528L, 632130892L, -1787374816L, | ||
333232770L, 975176168L, -105858068L, 800472636L, 109864562L, | ||
235285360L, -776659676L, 2001926712L, -261279334L, 1164881360L, | ||
-714750532L, -352096620L, 1832592002L, -1512874560L, -785467588L, | ||
-1680002416L, 525334306L, 1976038920L, 961151756L, 1924514716L, | ||
-1645634542L, -2115179648L, -1767613676L, -699534040L, -1831063494L, | ||
897762256L, -623773076L, -1279751116L, -1846366062L, -1994589856L, | ||
-1572348596L, -686172704L, 1954372514L, -1804248152L, -1288119156L, | ||
-556338244L, 1978868850L, 195659760L, 1913037892L, -781055912L, | ||
-837353414L, -1117550480L, 653131324L, -974035180L, 1391708866L, | ||
557224672L, 2116338812L, -1512534832L, 66835362L, 280677928L, | ||
1521189132L, 303433660L, -1556188878L, -1792288576L, -205794764L, | ||
1762040520L, -693878086L, -1861432176L, -1470478612L, -272198060L, | ||
-1783421294L, 951936800L, -584075316L, 715909344L, 1388613314L, | ||
582980648L, 1741069228L, 2029423292L, -1947950606L, 98362928L, | ||
-1750173788L, 1911570872L, -1586291878L, -972254064L, 1908284284L, | ||
1635688212L, -63398590L, 1953484544L, -685261892L, -114450608L, | ||
1040965922L, 966502984L, -1767843828L, -1040479140L, 228381138L, | ||
-558557696L, -100830892L, 314122472L, -1750216454L, -707680560L, | ||
895764780L, 1935292660L, -1083691630L, -24770208L, 561793036L, | ||
722578144L, -826367390L, -70354264L, 1016035916L, -1140275332L, | ||
-1878815438L, 2021058160L, 1826421572L, 324798424L, 1986764666L, | ||
-1515803088L, 103471036L, 1334580820L, 460818498L, -199458656L, | ||
1930751676L, 1870498256L, -1257479326L, -827391256L, 2114961612L, | ||
-1565916676L, -479024654L, -1247511936L, 493942260L, 784557704L, | ||
-713072838L, -2013887664L, 920941676L, 536377748L, 1502487634L, | ||
-8033824L, 377240780L, -1112331104L, 1860408706L, -1008328472L, | ||
-1054114068L, 1256820028L, -1959674766L, 938369136L, -287967196L, | ||
1645816888L, -1760266470L, 167738576L, -1359210692L, -1490757740L, | ||
-1158607998L, 1599189440L, 539577916L, -1901582960L, 646358562L, | ||
1919589640L, 945260172L, 2077480732L, -1056300910L, 1287571840L, | ||
458317332L, -1804775640L, -957345222L, -316271152L, -1936848532L, | ||
-443767884L, 961595410L, 1640623584L, -1879097396L, -1892376352L, | ||
1506793506L, -418676440L, -899203572L, 2073878332L, 411728882L, | ||
2027924208L, -1464883644L, 1858839128L, -2011275462L, -1112345104L, | ||
866242236L, 1461475604L, 1503394498L, -1185330080L, 1194161020L, | ||
-174143536L, 1429913890L, -131757400L, -1682050164L, -1777751876L, | ||
1765238578L, 732695104L, 760505524L, -506365368L, 170433978L, | ||
1457519120L, 891865836L, 548686164L, -1365491438L, -1397080672L, | ||
932186400L, 944433417L, -274153925L, 1075786636L, 399430170L, | ||
-1068920177L, -1533854247L, -1503845778L, -1456541188L, -1179737235L, | ||
-213496553L, 2062199008L, 1528139454L, -243540853L, 931535629L, | ||
1029122874L, 380666744L, 1488884001L, -2013857325L, -110925580L, | ||
1546086786L, 185941015L, 1439304417L, 1642976294L, 675311908L, | ||
-2011303467L, -19649153L, -1783098344L, 1389050678L, 1125891523L, | ||
666227717L, -1726156894L, -2146915440L, -627429575L, -647586837L, | ||
-615314212L, -1535060726L, -1935074305L, 663674441L, 1723681758L, | ||
953725836L, -236080291L, 612528999L, 339155088L, 753387182L, | ||
162440027L, 820704061L, 1053981098L, -932211128L, -1410210191L, | ||
-1491429405L, 215720676L, -1638188526L, -2063454745L, 986986929L, | ||
-1000337418L, 1563546132L, 1473043749L, -1559171537L, 442866664L, | ||
-1955992378L, 1324570995L, 890151381L, 1890645426L, -86373504L, | ||
75812713L, -2010528805L, 1561979692L, -906431174L, -457602065L, | ||
-833216903L, 1420552462L, -746850084L, -461476019L, 1465654967L, | ||
-1447736256L, -585710306L, 1333092331L, -676322451L, -1985465894L, | ||
-280024168L, -1154936319L, 1322632115L, -1840241964L, -558611486L, | ||
-1322641801L, 1302597569L, -1598018234L, 1769012356L, 1028112949L, | ||
-475968993L, 603491704L, 1435126870L, 1978077411L, -87591259L, | ||
362756930L, -697609936L, 102181209L, 1772153547L, 116233020L, | ||
860191594L, -19392097L, 1276661801L, -1685902658L, 830073772L, | ||
35821821L, 2045285447L, 484471600L, 534901838L, 941201531L, -112550947L, | ||
-1583840758L, 2059462440L, 1055750545L, 1440923523L, 861123716L, | ||
-341413198L, -1006590457L, 203712081L, 463789526L, -375902028L, | ||
-238392123L, -1636192753L, 1638065800L, 1953614758L, 1619002195L, | ||
-1465545547L, -352392686L, 1921509344L, 1148402249L, -1900384133L, | ||
-1814388404L, 98301146L, 1252422863L, -100261735L, -1850573650L, | ||
-1014412868L, 1957966381L, 1308231127L, -656634848L, 1522266494L, | ||
-1013981365L, -1757338291L, -807441542L, 265645496L, -861925535L, | ||
-1031231725L, -2008629452L, 1861414210L, 8659287L, -1406276447L, | ||
-1875053210L, -414427292L, 2013992597L, -139849793L, -1832955048L, | ||
1590112630L, 1334293379L, -1083756091L, -1967237406L, 2032580432L, | ||
-1087827335L, -1881631317L, -1305912548L, -1030509494L, -2087307201L, | ||
-2080954231L, 726822046L, 1392908620L, 211732715L) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
########################################################################## | ||
# generate data based on beta binomial mixture model | ||
bbm_data_generate<- function(S=3, G=50, K=3, prob=rep(1,times=3), | ||
alpha_band=c(2,6), | ||
beta_band=c(2,6), | ||
nb_mu=100,nb_size=0.2, plotf = FALSE, | ||
max_cor=0.5){ | ||
|
||
prob = prob/sum(prob) | ||
ff <- "repeat" | ||
ct <- 1 # iteration limitation | ||
while (ff=="repeat") { | ||
# generate the starting point | ||
# generate cluster ID | ||
gamma <- sample.int(K, size = G, replace = TRUE, prob = prob) | ||
# generate alpha | ||
alpha <- exp(matrix(runif(K*S, min = alpha_band[1], max = alpha_band[2]),nrow=K)) | ||
# generate_u_param(runif(S, min = alpha_band[1], max = alpha_band[2]), rep(4, S), n = K) | ||
# alpha <- exp(abs(alpha)) | ||
# | ||
|
||
# generate beta | ||
beta <- exp(matrix(runif(K*S, min = beta_band[1], max = beta_band[2]),nrow=K)) | ||
# generate_u_param(runif(S, min = beta_band[1], max = beta_band[2]), rep(4, S), n = K) | ||
# beta <- exp(abs(beta)) | ||
|
||
mu <- alpha/(alpha+beta) # parameter for betabinomial | ||
a <- abs(cor(t(mu))) | ||
ID <- upper.tri(a,diag=F) | ||
m <- a[ID] | ||
# min_c <- min(m) | ||
max_c = max(m) # keep the sampled data relatively correlated, not too high, not too low | ||
if (max_c < max_cor) {ff = "go"; | ||
# if (min_c > max_cor) {ff = "go"; | ||
print(paste("after tried",ct,"times")) | ||
print(paste("sample data generated with correlation smaller than:",max_cor))} | ||
# print(paste("sample data generated with correlation larger than:",max_cor))} | ||
if (ct > 1000) {ff = "go"; | ||
print(paste("Fail to generate data with correlation smaller than:",max_cor)) | ||
print("clusters can be highly correlated ...") | ||
} | ||
ct <- ct + 1 | ||
} | ||
|
||
# generate the number of reads (total number of reads for samples) | ||
n <- matrix(rnbinom(G*S, mu = nb_mu, size=nb_size),nrow=G) # generate the number of reads | ||
alpha_g <- alpha[gamma,] | ||
beta_g <- beta[gamma,] | ||
|
||
# generate the random number (number of reads for IP sample) | ||
k <- matrix(rbetabinom.ab(n=G*S, size=n, shape1=alpha_g, shape2=beta_g, .dontuse.prob = NULL),nrow=G) | ||
|
||
if (plotf == TRUE) { | ||
p <- k/n | ||
par(mfrow= c(K,1)) | ||
for (m in 1:K) { | ||
plot( rep(1:S,sum(gamma==m)),t(p[gamma==m,]), main=m) | ||
} | ||
} | ||
|
||
|
||
# save the data | ||
mat=list(S=S, G=G, K=K, alpha_band=alpha_band, | ||
beta_band=beta_band, nb_mu=nb_mu, nb_size=nb_size, | ||
gamma=gamma, alpha=alpha, beta=beta, | ||
k=k, n=n, c=n-k) | ||
return(mat) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# function used to get the mode for each site | ||
# | ||
.getmode <- function(v) { | ||
uniqv <- unique(v) | ||
uniqv[which.max(tabulate(match(v, uniqv)))] | ||
} | ||
|
||
.choose_cluster_id_by_mode <- function(param){ | ||
|
||
num_iter <- length(param) | ||
num_site <- length(param[[num_iter]]$c) | ||
c_mat <- matrix(NA, nrow= num_iter-1000, ncol = num_site) | ||
for (i in 1:(num_iter-1000)){ | ||
c_mat[i,] <- param[[i+1000]]$c | ||
} | ||
|
||
c_final <- apply(c_mat, 2, .getmode) | ||
return(c_final) | ||
} |
Oops, something went wrong.