Skip to content

Commit

Permalink
version 0.2.5
Browse files Browse the repository at this point in the history
  • Loading branch information
laurenie authored and cran-robot committed Sep 29, 2016
0 parents commit a09987c
Show file tree
Hide file tree
Showing 20 changed files with 1,151 additions and 0 deletions.
16 changes: 16 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,16 @@
Package: DPBBM
Type: Package
Title: Dirichlet Process Beta-Binomial Mixture
Version: 0.2.5
Date: 2016-09-21
Author: Lin Zhang
Maintainer: Lin Zhang <lin.zhang@cumt.edu.cn>
Depends: R (>= 3.1.0)
Imports: tmvtnorm, VGAM, gplots, CEoptim
Description: Beta-binomial Mixture Model is used to infer the pattern from count data.
It can be used for clustering of RNA methylation sequencing data.
License: GPL (>= 2)
NeedsCompilation: no
Packaged: 2016-09-28 15:21:06 UTC; lin.zhang
Repository: CRAN
Date/Publication: 2016-09-29 08:02:30
19 changes: 19 additions & 0 deletions MD5
@@ -0,0 +1,19 @@
85deb92e49f8e80d11b68243156f69c3 *DESCRIPTION
fcd59f34600633e5b452a9bf4e4cb3d4 *NAMESPACE
788cd67fd78de3aafe4948565b0ff337 *NEWS
78f496dd38ec9c7aabeeb080122867d9 *R/BCubed_metric.r
04011008f379c8feaf9e067a109832d3 *R/DPBBM-internal.R
3a3617deef264facbf8f95f85a994d62 *R/bbm_data_generate.r
f5ff34ab69bca1f5b44a9a867ef977cc *R/choose_cluster_id_by_mode.r
fc08705e0767e837fa027a32304390d3 *R/dpbbm_mc_iterations.r
7957a39ac67fb28b507269aedd6eafaa *R/dpbbm_mc_single_iter.r
357b514d059c7822b73c99c3d4a3ccf0 *R/dpbbm_update_likelihood.r
6e29979330db426d7edd337475475716 *R/dpbbm_update_tau.r
92fe9c86d6e3d8a4ec99e895dcfaf640 *R/generate_u_param.r
1f2d26caa65c2917c620f1ad90244b06 *inst/doc/DPBBM_example.pdf
bee6206963313d57ffe574bc8a3390f7 *inst/extdata/DPBBM_example.Rmd
d5ef12392295c2229a4688452167ac99 *inst/extdata/DPBBM_example.html
bbcc89bad9adb4ebd99a0f5255cd3a30 *man/BCubed_metric.Rd
df26148160ed7c2c26598927518d999b *man/DPBBM-package.Rd
9e0994ac718f4cc6dea60772189ebe95 *man/bbm_data_generate.Rd
20c8fa19eb4a6e3d6cbe1d6b19fc1ff6 *man/dpbbm_mc_iterations.Rd
10 changes: 10 additions & 0 deletions NAMESPACE
@@ -0,0 +1,10 @@
exportPattern("^[[:alpha:]]+")
export(dpbbm_mc_iterations)
export(bbm_data_generate)
export(BCubed_metric)
import(tmvtnorm)
import(VGAM)
import(gplots)
importFrom(CEoptim,dirichletrnd)
importFrom("graphics", "par")
importFrom("stats", "cor", "rbeta", "rgamma", "rnbinom", "runif")
10 changes: 10 additions & 0 deletions NEWS
@@ -0,0 +1,10 @@

version 0.0.13 (2016-09-12)
- bug fix

version 0.1.0 (2016-09-18)
- pre-release

version 0.2.0 (2016-09-21)
- added "Getting started with DPBBM package", including HTML, Rmd and PDF.

45 changes: 45 additions & 0 deletions R/BCubed_metric.r
@@ -0,0 +1,45 @@
###########################################################################
# calculate BCubed metric for clustering evaluation
# input : L, C, alpha
# L - real label of classes
# C - classification label of samples
# alpha - F metric parameter
# output: F metric score

BCubed_metric <- function(L, C, alpha){
snL <- length(L)
snC <- length(C)

if(snL != snC){
stop("length of category does not comply with length of cluster")
geterrmessage()
}

# define the correctness of the relation between i and j
Correctness <- matrix(nrow = snL, ncol = snC)
ncluster <- c()
ncategory <- c()
for( i in 1:snL ){
for( j in 1:snC ){

if((L[i] == L[j]) & (C[i] == C[j])) Correctness[i,j] <- 1
else Correctness[i,j] <- 0
}
}

# define the num of each cluster
for(i in 1:snC){
ncluster[i] <- length(which(C == C[i]))
}

# define the num of each category
for(i in 1:snL){
ncategory[i] <- length(which(L[i] == L))
}

Precision <- mean(rowSums(Correctness) / ncluster)
Recall <- mean(colSums(Correctness) / ncategory)
F <- 1/(alpha/Precision+(1-alpha)/Recall)

return(F)
}
126 changes: 126 additions & 0 deletions R/DPBBM-internal.R
@@ -0,0 +1,126 @@
.Random.seed <-
c(403L, 1L, -1118861171L, -1550840121L, 440092590L, -1719578104L,
-62925301L, 1292639977L, 1706850744L, 451556966L, 647156481L,
1878381971L, 1004318274L, -236989964L, -681989129L, 1870008301L,
-479856508L, -880779606L, -1583361003L, -1688337233L, 389139126L,
1310181600L, -1989544925L, -1692095679L, -1661618640L, -430521266L,
-307508039L, 1043990731L, 801248634L, 774645884L, -1484948289L,
-2092516971L, -1042804308L, 2099979138L, 1853639005L, 547768407L,
208174910L, 1429956472L, 814500955L, 1507638201L, -1604105720L,
-1016508234L, -361658383L, 1490997795L, -121824654L, -2088656700L,
-778426745L, -1902829987L, -1433124780L, -1186454918L, 793147845L,
1023724319L, 1467213478L, -1731332048L, 1203792595L, 146292977L,
-783700960L, 1891252926L, -1975300407L, -2102899589L, 2002149962L,
-518580564L, 1283117487L, -1172899483L, -747215076L, -2027590798L,
1407485933L, -1599264729L, 1830236238L, -1599294296L, 2061056683L,
1688129481L, 1682940888L, -526630266L, 1933398881L, 1146443507L,
-96859806L, 382662612L, 1011057367L, -99792691L, 1943847012L,
674098762L, 1038154677L, 639256335L, -1321675754L, 890797760L,
1929980035L, 144098465L, -834934128L, 1085741422L, -1105479911L,
-709071957L, 1342273946L, -536175268L, -1520241569L, -1687917515L,
2077620172L, -2042649822L, 1289510269L, -1195610505L, -983312482L,
-2132052392L, -1504387141L, 1650012505L, -1960875288L, 1998271254L,
-1976191855L, -1127028413L, -603036590L, -1582975260L, -1987907289L,
-511254147L, 600860788L, 1000704602L, -135395675L, -598027073L,
-1693227194L, -1879620144L, 446044147L, 768929553L, -350556352L,
-1569876194L, 1552746473L, 806923547L, 2106230570L, -1255647156L,
-917393393L, -1728513211L, 183676156L, 286448466L, -1598866227L,
-1631830905L, 189400942L, 930265416L, -1352483509L, -2075909975L,
617398776L, 1488875814L, 558166593L, -1148339245L, -1309537918L,
1823997364L, -1045821129L, 2126306349L, 2056079684L, 189185642L,
-41797419L, 868324975L, 446645110L, 1905849504L, -137007773L,
-216662527L, 1531166064L, 144809230L, 965173113L, -257532149L,
94478394L, 637153084L, -656503681L, -1230331947L, 1394119148L,
213312322L, -99701603L, 1899484439L, 1508273278L, 1623835704L,
585835291L, 1227245689L, 425529544L, -731793162L, -857159375L,
-1710522013L, -752151758L, -628630396L, -850204473L, -2111941731L,
-780740844L, -215839302L, 764210181L, -754974241L, -609221530L,
1853047792L, 154991891L, 980312753L, -1752052384L, 1782334206L,
-1620869495L, -537548357L, -2139712630L, 963590764L, 921531375L,
-1585578971L, 1005642972L, -1934886222L, 52469165L, 429662567L,
-1381515634L, -816126104L, 525262699L, 755239177L, 201564056L,
-877533498L, -1740393439L, 1749819571L, -580027870L, -796738540L,
-687394665L, -591084915L, 242471844L, 1698688394L, -1984620043L,
1602548815L, 1769550678L, 1057729024L, 624231235L, 1814184673L,
439898192L, -1962118994L, 1810998361L, 1671910507L, -1983309350L,
1712300700L, 2137816223L, 383856373L, -247868532L, 1765040738L,
-642984387L, -1048266569L, 1088472926L, 492476788L, -1130393966L,
-361700768L, -564522868L, 172986720L, 757799138L, -1137857368L,
809215692L, -2125915652L, 693820338L, -288168976L, -935669308L,
868388184L, 1261805946L, 315240880L, -2101786180L, 1735327316L,
-1190700606L, -427678944L, 1959752636L, 1275916368L, -1852417054L,
500760424L, -607455412L, 1125027580L, 1611503346L, -1795080960L,
927140980L, 413765128L, -1821489734L, 1495266768L, 1943910380L,
-1575449708L, 2065647826L, -841882528L, 632130892L, -1787374816L,
333232770L, 975176168L, -105858068L, 800472636L, 109864562L,
235285360L, -776659676L, 2001926712L, -261279334L, 1164881360L,
-714750532L, -352096620L, 1832592002L, -1512874560L, -785467588L,
-1680002416L, 525334306L, 1976038920L, 961151756L, 1924514716L,
-1645634542L, -2115179648L, -1767613676L, -699534040L, -1831063494L,
897762256L, -623773076L, -1279751116L, -1846366062L, -1994589856L,
-1572348596L, -686172704L, 1954372514L, -1804248152L, -1288119156L,
-556338244L, 1978868850L, 195659760L, 1913037892L, -781055912L,
-837353414L, -1117550480L, 653131324L, -974035180L, 1391708866L,
557224672L, 2116338812L, -1512534832L, 66835362L, 280677928L,
1521189132L, 303433660L, -1556188878L, -1792288576L, -205794764L,
1762040520L, -693878086L, -1861432176L, -1470478612L, -272198060L,
-1783421294L, 951936800L, -584075316L, 715909344L, 1388613314L,
582980648L, 1741069228L, 2029423292L, -1947950606L, 98362928L,
-1750173788L, 1911570872L, -1586291878L, -972254064L, 1908284284L,
1635688212L, -63398590L, 1953484544L, -685261892L, -114450608L,
1040965922L, 966502984L, -1767843828L, -1040479140L, 228381138L,
-558557696L, -100830892L, 314122472L, -1750216454L, -707680560L,
895764780L, 1935292660L, -1083691630L, -24770208L, 561793036L,
722578144L, -826367390L, -70354264L, 1016035916L, -1140275332L,
-1878815438L, 2021058160L, 1826421572L, 324798424L, 1986764666L,
-1515803088L, 103471036L, 1334580820L, 460818498L, -199458656L,
1930751676L, 1870498256L, -1257479326L, -827391256L, 2114961612L,
-1565916676L, -479024654L, -1247511936L, 493942260L, 784557704L,
-713072838L, -2013887664L, 920941676L, 536377748L, 1502487634L,
-8033824L, 377240780L, -1112331104L, 1860408706L, -1008328472L,
-1054114068L, 1256820028L, -1959674766L, 938369136L, -287967196L,
1645816888L, -1760266470L, 167738576L, -1359210692L, -1490757740L,
-1158607998L, 1599189440L, 539577916L, -1901582960L, 646358562L,
1919589640L, 945260172L, 2077480732L, -1056300910L, 1287571840L,
458317332L, -1804775640L, -957345222L, -316271152L, -1936848532L,
-443767884L, 961595410L, 1640623584L, -1879097396L, -1892376352L,
1506793506L, -418676440L, -899203572L, 2073878332L, 411728882L,
2027924208L, -1464883644L, 1858839128L, -2011275462L, -1112345104L,
866242236L, 1461475604L, 1503394498L, -1185330080L, 1194161020L,
-174143536L, 1429913890L, -131757400L, -1682050164L, -1777751876L,
1765238578L, 732695104L, 760505524L, -506365368L, 170433978L,
1457519120L, 891865836L, 548686164L, -1365491438L, -1397080672L,
932186400L, 944433417L, -274153925L, 1075786636L, 399430170L,
-1068920177L, -1533854247L, -1503845778L, -1456541188L, -1179737235L,
-213496553L, 2062199008L, 1528139454L, -243540853L, 931535629L,
1029122874L, 380666744L, 1488884001L, -2013857325L, -110925580L,
1546086786L, 185941015L, 1439304417L, 1642976294L, 675311908L,
-2011303467L, -19649153L, -1783098344L, 1389050678L, 1125891523L,
666227717L, -1726156894L, -2146915440L, -627429575L, -647586837L,
-615314212L, -1535060726L, -1935074305L, 663674441L, 1723681758L,
953725836L, -236080291L, 612528999L, 339155088L, 753387182L,
162440027L, 820704061L, 1053981098L, -932211128L, -1410210191L,
-1491429405L, 215720676L, -1638188526L, -2063454745L, 986986929L,
-1000337418L, 1563546132L, 1473043749L, -1559171537L, 442866664L,
-1955992378L, 1324570995L, 890151381L, 1890645426L, -86373504L,
75812713L, -2010528805L, 1561979692L, -906431174L, -457602065L,
-833216903L, 1420552462L, -746850084L, -461476019L, 1465654967L,
-1447736256L, -585710306L, 1333092331L, -676322451L, -1985465894L,
-280024168L, -1154936319L, 1322632115L, -1840241964L, -558611486L,
-1322641801L, 1302597569L, -1598018234L, 1769012356L, 1028112949L,
-475968993L, 603491704L, 1435126870L, 1978077411L, -87591259L,
362756930L, -697609936L, 102181209L, 1772153547L, 116233020L,
860191594L, -19392097L, 1276661801L, -1685902658L, 830073772L,
35821821L, 2045285447L, 484471600L, 534901838L, 941201531L, -112550947L,
-1583840758L, 2059462440L, 1055750545L, 1440923523L, 861123716L,
-341413198L, -1006590457L, 203712081L, 463789526L, -375902028L,
-238392123L, -1636192753L, 1638065800L, 1953614758L, 1619002195L,
-1465545547L, -352392686L, 1921509344L, 1148402249L, -1900384133L,
-1814388404L, 98301146L, 1252422863L, -100261735L, -1850573650L,
-1014412868L, 1957966381L, 1308231127L, -656634848L, 1522266494L,
-1013981365L, -1757338291L, -807441542L, 265645496L, -861925535L,
-1031231725L, -2008629452L, 1861414210L, 8659287L, -1406276447L,
-1875053210L, -414427292L, 2013992597L, -139849793L, -1832955048L,
1590112630L, 1334293379L, -1083756091L, -1967237406L, 2032580432L,
-1087827335L, -1881631317L, -1305912548L, -1030509494L, -2087307201L,
-2080954231L, 726822046L, 1392908620L, 211732715L)
68 changes: 68 additions & 0 deletions R/bbm_data_generate.r
@@ -0,0 +1,68 @@
##########################################################################
# generate data based on beta binomial mixture model
bbm_data_generate<- function(S=3, G=50, K=3, prob=rep(1,times=3),
alpha_band=c(2,6),
beta_band=c(2,6),
nb_mu=100,nb_size=0.2, plotf = FALSE,
max_cor=0.5){

prob = prob/sum(prob)
ff <- "repeat"
ct <- 1 # iteration limitation
while (ff=="repeat") {
# generate the starting point
# generate cluster ID
gamma <- sample.int(K, size = G, replace = TRUE, prob = prob)
# generate alpha
alpha <- exp(matrix(runif(K*S, min = alpha_band[1], max = alpha_band[2]),nrow=K))
# generate_u_param(runif(S, min = alpha_band[1], max = alpha_band[2]), rep(4, S), n = K)
# alpha <- exp(abs(alpha))
#

# generate beta
beta <- exp(matrix(runif(K*S, min = beta_band[1], max = beta_band[2]),nrow=K))
# generate_u_param(runif(S, min = beta_band[1], max = beta_band[2]), rep(4, S), n = K)
# beta <- exp(abs(beta))

mu <- alpha/(alpha+beta) # parameter for betabinomial
a <- abs(cor(t(mu)))
ID <- upper.tri(a,diag=F)
m <- a[ID]
# min_c <- min(m)
max_c = max(m) # keep the sampled data relatively correlated, not too high, not too low
if (max_c < max_cor) {ff = "go";
# if (min_c > max_cor) {ff = "go";
print(paste("after tried",ct,"times"))
print(paste("sample data generated with correlation smaller than:",max_cor))}
# print(paste("sample data generated with correlation larger than:",max_cor))}
if (ct > 1000) {ff = "go";
print(paste("Fail to generate data with correlation smaller than:",max_cor))
print("clusters can be highly correlated ...")
}
ct <- ct + 1
}

# generate the number of reads (total number of reads for samples)
n <- matrix(rnbinom(G*S, mu = nb_mu, size=nb_size),nrow=G) # generate the number of reads
alpha_g <- alpha[gamma,]
beta_g <- beta[gamma,]

# generate the random number (number of reads for IP sample)
k <- matrix(rbetabinom.ab(n=G*S, size=n, shape1=alpha_g, shape2=beta_g, .dontuse.prob = NULL),nrow=G)

if (plotf == TRUE) {
p <- k/n
par(mfrow= c(K,1))
for (m in 1:K) {
plot( rep(1:S,sum(gamma==m)),t(p[gamma==m,]), main=m)
}
}


# save the data
mat=list(S=S, G=G, K=K, alpha_band=alpha_band,
beta_band=beta_band, nb_mu=nb_mu, nb_size=nb_size,
gamma=gamma, alpha=alpha, beta=beta,
k=k, n=n, c=n-k)
return(mat)
}
19 changes: 19 additions & 0 deletions R/choose_cluster_id_by_mode.r
@@ -0,0 +1,19 @@
# function used to get the mode for each site
#
.getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}

.choose_cluster_id_by_mode <- function(param){

num_iter <- length(param)
num_site <- length(param[[num_iter]]$c)
c_mat <- matrix(NA, nrow= num_iter-1000, ncol = num_site)
for (i in 1:(num_iter-1000)){
c_mat[i,] <- param[[i+1000]]$c
}

c_final <- apply(c_mat, 2, .getmode)
return(c_final)
}

0 comments on commit a09987c

Please sign in to comment.