/
physigArbor.R
214 lines (169 loc) · 6.71 KB
/
physigArbor.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#' Function for measuring phylogenetic signal in comparative data
#'
#' @param td A "\code{treedata}" object
#' @param charType The type of data, one of:
#' \describe{
#' \item{"continuous"}{Continuous}
#' \item{"discrete"}{Discrete (Categorical)}
#' \item{"fromData"}{Try to tell from data: see \code{detectCharacterType}}
#' }
#' @param signalTest The test to carry out. Options are
#' \describe{
#' \item{"pagelLambda"}{Pagel's Lambda test}
#' \item{"Blomberg"}{Blomberg's K}
#' \item{"garbageTest"}{Garbage test (untested)}
#' }
#' @param discreteModelType Describes the discrete model. Options are
#' \describe{
#' \item{"ER"}{Equal rates}
#' \item{"SYM"}{Symmetric}
#' \item{"ARD"}{All rates different}
#' }
#' @param na.rm How to deal with missing data.
#' \describe{
#' \item{"bytrait"}{Drop species out trait-by-trait}
#' \item{"all"}{Drop out species that have NAs for any trait}
#' }
#' @return An object of class physigArbor.
#' @export
physigArbor<-function(td, charType="fromData", signalTest="pagelLambda", discreteModelType="ER", na.rm="bytrait") {
charType = match.arg(charType, c("fromData", "discrete", "continuous"))
if(charType =="fromData") # then try to figure it out
charType <-detectCharacterType(td$dat[[1]])
# check that the data actually make sense - this is a pretty weak test
if(charType=="continuous") {
td <- checkNumeric(td, return.numeric=TRUE)
}
if(charType=="discrete") {
td <- checkFactor(td, return.factor=TRUE)
}
signalTest = match.arg(signalTest, c("pagelLambda", "Blomberg", "garbageTest"))
discreteModelType = match.arg(discreteModelType, c("ER", "SYM", "ARD"))
if(any(is.na(td$dat))){
if(na.rm=="bytrait"){
res <- lapply(1:ncol(td$dat), function(i) {
tdi <- select(td, i);
tdi <- filter(tdi, !is.na(tdi$dat[,1]));
physigArborCalculator(tdi$phy, setNames(tdi$dat[[1]], attributes(tdi)$tip.label), charType, signalTest, discreteModelType)
})
}
if(na.rm=="all"){
td <- filter(td, apply(apply(td$dat, 1, function(x) !is.na(x)), 2, all))
res <- lapply(td$dat, function(x) physigArborCalculator(td$phy, setNames(x, attributes(td)$tip.label), charType, signalTest, discreteModelType))
}
} else {
res <- lapply(td$dat, function(x) physigArborCalculator(td$phy, setNames(x, attributes(td)$tip.label), charType, signalTest, discreteModelType))
}
class(res) <- c("physigArbor", class(res))
attributes(res)$td <- td
attributes(res)$charType <- charType
attributes(res)$signalTest <- signalTest
if(charType=="discrete"){
attributes(res)$discreteModelType = discreteModelType
attributes(res)$charStates = lapply(1:ncol(td$dat), function(x) levels(td$dat[,x]))
}
if(any(is.na(td$dat))){
attributes(res)$na.drop <- lapply(td$dat, function(x) rownames(td$dat)[which(is.na(x))])
}
names(res) <- colnames(td$dat)
return(res)
}
physigArborCalculator<-function(phy, dat, charType="fromData", signalTest="pagelLambda", discreteModelType="ER") {
if(charType =="discrete") {
# this changes the discrete data to 1:n and remembers the original charStates
dat<-as.factor(dat)
charStates<-levels(dat)
k<-nlevels(dat)
ndat<-as.numeric(dat)
names(ndat)<-names(dat)
if(signalTest=="pagelLambda") {
res<-discreteLambdaTest(phy, ndat, k, discreteModelType)
} else if(signalTest=="garbageTest") {
res<-discreteGarbageTest(phy, ndat, k, discreteModelType)
} else if(signalTest=="Blomberg") {
stop("Blomberg K should not be used for discrete characters")
}
}
if(charType =="continuous") {
if(signalTest=="pagelLambda") {
res<-continuousLambdaTest(phy, dat)
} else if(signalTest=="garbageTest") {
res<-continuousGarbageTest(phy, dat)
} else if(signalTest=="Blomberg") {
res<-continuousBlombergTest(phy, dat)
}
}
return(res)
}
discreteLambdaTest<-function(phy, ndat, k, discreteModelType) {
l0tree<-rescale(phy, "lambda", 0)
m1<-fitDiscrete(l0tree, ndat, model=discreteModelType)
m2<-fitDiscrete(phy, ndat, model=discreteModelType, transform="lambda")
chisqTestStat <- 2 * (m2$opt$lnL-m1$opt$lnL)
chisqPVal <- pchisq(chisqTestStat, 1, lower.tail=F)
lnlValues<-c(m1$opt$lnL, m2$opt$lnL)
names(lnlValues)<-c("Lambda fixed at zero", "Lambda estimated")
aiccScores<-c(m1$opt$aicc, m2$opt$aicc)
names(aiccScores)<-c("Lambda fixed at zero", "Lambda estimated")
lambdaValue<-m2$opt$lambda
res<-list(lnlValues= lnlValues, chisqTestStat= chisqTestStat, chisqPVal= chisqPVal, aiccScores= aiccScores, lambdaValue= lambdaValue)
return(res)
}
discreteGarbageTest<-function(phy, ndat, k, discreteModelType) {
m1<-fitDiscrete(phy, ndat, model=discreteModelType)
m2<-fitDiscreteGarbageModel(phy, ndat)
lnlValues<-c(m1$opt$lnL, m2$lnL)
names(lnlValues)<-c("Mk", "Garbage")
aiccScores<-c(m1$opt$aicc, m2$aicc)
names(aiccScores)<-c("Mk", "Garbage")
res<-list(lnlValues= lnlValues, chisqTestStat= NULL, chisqPVal= NULL, aiccScores= aiccScores)
return(res)
}
fitDiscreteGarbageModel<-function(phy, ndat) {
tt <- table(ndat)
prob <- tt/sum(tt)
lnL <- sum(tt * log(prob))
k <- length(tt)-1
n <- length(ndat)
aic <- -2 * lnL + 2 * k
aicc <- aic + 2 * k * (k+1) / (n - k - 1)
res<-list(prob=prob, lnL=lnL, k=k, aic=aic, aicc=aicc)
return(res)
}
continuousLambdaTest<-function(phy, dat) {
l0tree<-rescale(phy, "lambda", 0)
m1<-fitContinuous(l0tree, dat, model="BM")
m2<-fitContinuous(phy, dat, model="lambda")
lnlValues<-c(m1$opt$lnL, m2$opt$lnL)
names(lnlValues)<-c("Lambda fixed at zero", "Lambda estimated")
lambdaValue<-m2$opt$lambda
chisqTestStat <- 2 * (m2$opt$lnL-m1$opt$lnL)
chisqPVal <- pchisq(chisqTestStat, 1, lower.tail=F)
aiccScores<-c(m1$opt$aicc, m2$opt$aicc)
names(aiccScores)<-c("Lambda fixed at zero", "Lambda estimated")
res<-list(lnlValues = lnlValues, chisqTestStat= chisqTestStat, chisqPVal= chisqPVal, aiccScores= aiccScores, lambdaValue= lambdaValue)
return(res)
}
continuousGarbageTest<-function(phy, dat) {
m1<-fitContinuous(phy, dat)
m2<-fitContinuous(phy, dat, model="white")
lnlValues<-c(m1$opt$lnL, m2$opt$lnL)
names(lnlValues)<-c("BM", "white-noise")
aiccScores<-c(m1$opt$aicc, m2$opt$aicc)
names(aiccScores)<-c("Mk", "WhiteNoise")
res<-list(lnlValues= lnlValues, chisqTestStat= NULL, chisqPVal= NULL, aiccScores= aiccScores)
return(res)
}
continuousBlombergTest<-function(phy, dat) {
pstest<-phylosignal(dat, phy)
bres<-list(k= pstest$K, vObs= pstest$PIC.variance.obs, vRnd=pstest$PIC.variance.rnd.mean, pVal=pstest$PIC.variance.P, zScore=pstest$PIC.variance.Z)
res<-list(lnlValues= NULL, chisqTestStat= NULL, chisqPVal= NULL, aiccScores= NULL, bres=bres)
return(res)
}
#' @export
print.physigArbor <- function(x, ...){
names <- attributes(x)$names
attributes(x) <- NULL
attributes(x)$names <- names
print(x)
}