# Specific leaf area and leaf endopolyploidy

James Seery (jseery@mail.uoguelph.ca)

### Setup

Load packages for phylogenetic regression (caper) and function scripts.

In [None]:
tryCatch(    # To run caper, load this package dependency.
    library(mvtnorm),
    error = install.packages("mvtnorm", repos = "http://cran.utstat.utoronto.ca/"))
tryCatch(    # ... and this one too.
    library(ape),
    error = install.packages("ape", repos = "http://cran.utstat.utoronto.ca/"))
tryCatch(    # Now load caper package.
    library(caper),
    error = install.packages("caper", repos = "http://cran.utstat.utoronto.ca/"))
source(pgls_functions.R)

Load phylogenies: (1) The Angiosperm Phylogeny Group has family-level resolution; therefore, each family is a polytomy. (2) The Zanne phylogeny has species-level resolution, but does not have all species.

In [2]:
setwd("../../Raw_data")

apg.tree = read.tree("APG_phylo/Webb_ages_pruned_unrooted.nwk")
apg.tree = makeLabel(apg.tree) # Fix duplicate node names (because some are blank) error for comparative.data()
z.tree = read.tree("Zanne_phylo/Zanne_pruned.nwk")
z.tree = makeLabel(z.tree)

Loading required package: ape
Loading required package: MASS
Loading required package: mvtnorm


Load and compile relevant datasets: specific leaf area (SLA); (2) relative leaf water content (RWC); (3) leaf chlorophyll content (CC); (4) flow cytometry data on genome size and leaf endopolyploidy; and (5) growth form (GF). Endopolyploidy is represented in two ways: (1) leaf endoreduplication index (EI) and (2) mean leaf ploidy (MeanC). EXPORT THIS INTO FUNCTIONS HELD IN Raw_data

In [None]:
SLA_RWC = read.csv("SLA_RWC.csv")
CC = read.csv("CC.csv")
Flow_cytometry = read.csv("Flow_cytometry.csv")
GF = read.csv("GF.csv")
names(GF) = c("Species", "Lifespan", "Ann_per", "GH", "Wood_herb")

# Turn raw data into something sensible
SLA_RWC$SLA = SLA_RWC$LeafArea/(10000*SLA_RWC$DriedMass) # Standardize SLA to square metres per gram
SLA_RWC$RWC = (SLA_RWC$FreshMass - SLA_RWC$DriedMass)/SLA_RWC$DriedMass
Flow_cytometry$EI = rowMeans(cbind(Flow_cytometry$EI.FL2, Flow_cytometry$EI.FL3), na.rm=TRUE) # Get mean EI across the cytometers two detectors: FL2 and FL3
Flow_cytometry$MeanC = rowMeans(cbind(Flow_cytometry$MeanC.FL2, Flow_cytometry$MeanC.FL3), na.rm=TRUE)
GF$A_HP_WP = character(length(GF$Species))
GF$A_HP_WP[GF$Wood_herb == 'H'] = "HP"
GF$A_HP_WP[GF$Ann_per == 'A'] = "A" # This command must come after the former. The former command incorrectly assigns "HP" to annual species.
GF$A_HP_WP[GF$Wood_herb == 'W'] = "WP"

# Get species-level mean of each trait
mean.SLA_RWC = aggregate(data.frame(SLA = SLA_RWC$SLA, RWC = SLA_RWC$VcVw),
                         by=list(Species = SLA_RWC$Species), mean, na.rm=TRUE)
mean.CC = aggregate(list(CC = CC$CC),
                    by=list(Species = CC$Species), mean, na.rm=TRUE)
mean.Flow_cytometry = aggregate(data.frame(Genome.size = Flow_cytometry$Genome.size,
                                           EI = Flow_cytometry$EI
                                           MeanC = Flow_cytometry$MeanC
                                           Day = Flow_cytometry$Day),
                     by=list(Species = DNA$Species), mean, na.rm=TRUE)

# Merge into one data frame
data_partA = merge(mean.SLA_RWC,
                   list(Species = GF$Species, Wood_herb = GF$Wood_herb, A_HP_WP = GF$A_HP_WP),
                   by="Species", all.x=TRUE)
data_partB = merge(mean.Flow_cytometry,
                   mean.CC,
                   by="Species", all=TRUE)
data = merge(dataA,
             dataB,
             by="Species", all.x=TRUE)

write.csv(data, file="SLA+endopolyploidy.csv")

Subset data for (1) only herbaceous species and (2) only endopolyploid species.

In [None]:
data.herb = subset(data, data$Wood_herb == 'H')
data.endo = subset(data, data$EI >= 0.1)

Combine the data and phylogenies into a object of type comparative.data.

In [None]:
z.data = comparative.data(phy=z.tree, data=data, names.col="Species", vcv=TRUE) # vcv=TRUE will do some pre-processing for the regression by calculating the variance-covariance matrix
z.data.herb = comparative.data(phy=z.tree, data=data.herb, names.col="Species", vcv=TRUE)
z.data.endo = comparative.data(phy=z.tree, data=data.endo, names.col="Species", vcv=TRUE)
data$Species = tolower(data$Species))
apg.data = comparative.data(phy=apg.tree, data=data, names.col="Species", vcv=TRUE)
data.herb$Species = tolower(data.herb$Species))
apg.data.herb = comparative.data(phy=apg.tree, data=data.herb, names.col="Species", vcv=TRUE)
data.endo$Species = tolower(data.endo$Species))
apg.data.endo = comparative.data(phy=apg.tree, data=data.endo, names.col="Species", vcv=TRUE)

#### Data transformation

In [None]:
dist.check(data$SLA)

In [None]:
dist.check(data$MeanC)

In [None]:
dist.check(data$EI)

In [None]:
dist.check(data$Genome.size)

In [None]:
dist.check(data$RWC)

In [None]:
dist.check(data$Day)

In [None]:
op = par(mfrow = c(2, 2))

plot(data$SLA ~ data$MeanC)
plot(log(data$SLA) ~ log(data$MeanC))
plot(data$SLA ~ data$EI)
plot(log(data$SLA) ~ log(data$EI))

par(op)

In [None]:
op = par(mfrow = c(2, 2))

plot(data$RWC ~ data$MeanC)
plot(log(data$RWC) ~ log(data$MeanC))
plot(data$RWC ~ data$EI)
plot(log(data$RWC) ~ log(data$EI))

par(op)

### Regression

Examine the relationship between growth form and endopolyploidy

In [None]:
boxplot(log(EI) ~ GF3, data=data)
pglsZ.EI.GF3.log = pgls(log(EI) ~ GF3 + log(Genome.size) + Day, cdatZ, lambda='ML')
summary(pglsZ.EI.GF3.log)
# 95% CI for GF3HP coeff
c(-0.1632381 - qt(0.025, 146)*0.1679523/sqrt(147), -0.1632381 + qt(0.025, 146)*0.1679523/sqrt(147))
# 95% CI for GF3WP coeff
c(-0.7274948 - qt(0.025, 146)*0.2394684/sqrt(147), -0.7274948 + qt(0.025, 146)*0.2394684/sqrt(147))
pglsW.EI.GF3.log = pgls(log(EI) ~ GF3 + log(Genome.size) + Day, cdatW, lambda='ML')
summary(pglsW.EI.GF3.log) # very similar
# 95% CI for GF3HP coeff
c(-0.1714383 - qt(0.025, 161)*0.1782131/sqrt(162), -0.1714383 + qt(0.025, 161)*0.1782131/sqrt(162))
# 95% CI for GF3WP coeff
c(-0.6569639 - qt(0.025, 161)*0.2269929/sqrt(162), -0.6569639 + qt(0.025, 161)*0.2269929/sqrt(162))

# The main differences are herbaceous and woody
boxplot(log(EI) ~ Gh2, data=data)
pglsZ.EI.Gh2.log = pgls(log(EI) ~ Gh2 + log(Genome.size) + Day, cdatZ, lambda='ML')
summary(pglsZ.EI.Gh2.log)
AIC(pglsZ.EI.Gh2.log) # 326.2549 !!!!1
pglsW.EI.Gh2.log = pgls(log(EI) ~ Gh2 + log(Genome.size) + Day, cdatW, lambda='ML')
summary(pglsW.EI.Gh2.log)
AIC(pglsW.EI.Gh2.log) # 366.7467
windows(width=10, height=10)
# op=par(mfrow=c(1,2))
plot(log(EI) ~ log(Genome.size), data=data, ylab="Log leaf endoreduplication index", xlab=expression(" Log genome size" ~ (pg ~ "2C"^{-1})), main="Zanne (Akaike weight > 99%)")
abline(pglsZ.EI.Gh2.log$model$coef[1], 0)
# text(x=4.4, -4, labels="p = 0.004")
# text(x=4.52, -4.35, labels="AIC = 326.3")
abline(pglsZ.EI.Gh2.log$model$coef[1] + pglsZ.EI.Gh2.log$model$coef[2], 0, lty=2)
# text(x=4.4, -2.4, labels="p = 0.005")
# text(x=4.52, -2.75, labels="AIC = 366.7")
legend("topright",
       inset = 0.025, 
       c("Herbaceous", "Woody"),
       lty = c(1,2))
# plot(log(EI) ~ log(Genome.size), data=data, ylab="", yaxt='n', xlab=expression(" Log genome size" ~ (pg ~ "2C"^{-1})), main="APG (AIC = 366.7)")
# axis(side=2, labels=FALSE)
# abline(pglsW.EI.Gh2.log$model$coef[1], 0)
# abline(pglsW.EI.Gh2.log$model$coef[1] + pglsW.EI.Gh2.log$model$coef[2], 0, lty=2)
# legend("topright",
#        inset = 0.025, 
#        c("Herbaceous", "Woody"),
#        cex=0.9,
#        xjust=1,
#        lty = c(1,2))
# par(op)

# Plot significant relationships

# Plots
plot(log(SLA) ~ CC, data)
plot(log(VcVw) ~ CC, data)
plot(log(SLA) ~ Day, data)
plot(log(VcVw) ~ Dur2, data)
plot(log(SLA) ~ Gh2, data)
plot(log(VcVw) ~ Gh2, data)


# SLA ~ EI
pglsW.SLA.EI.log = pgls(log(SLA) ~ log(EI)*Gh2 + CC + log(Genome.size) + Day, cdatW, lambda = 'ML')
summary(pglsW.SLA.EI.log) # No interaction
AIC(pglsW.SLA.EI.log) # 116.5871
pglsZ.SLA.EI.log = pgls(log(SLA) ~ log(EI)*Gh2 + CC + log(Genome.size) + Day, cdatZ, lambda = 'ML')
anova(pglsZ.SLA.EI.log) # No interaction
AIC(pglsZ.SLA.EI.log) # 116.4714

pglsW.SLA.EI.log = pgls(log(SLA) ~ log(EI) + Gh2 + CC + log(Genome.size) + Day, cdatW, lambda = 'ML')
summary(pglsW.SLA.EI.log) # EI n.s.
AIC(pglsW.SLA.EI.log) # 114.5968 !!!!!
pglsZ.SLA.EI.log = pgls(log(SLA) ~ log(EI) + Gh2 + CC + log(Genome.size) + Day, cdatZ, lambda = 'ML')
summary(pglsZ.SLA.EI.log) # EI n.s.
AIC(pglsZ.SLA.EI.log) # 114.481 !!!!!

# VcVw ~ EI (GF3 is better than Gh2 for VcVw, because it lowers AICs)
pglsW.VcVw.EI.log = pgls(log(VcVw) ~ log(EI)*GF3 + CC + log(Genome.size) + Day, cdatW, lambda = 'ML')
summary(pglsW.VcVw.EI.log) # No interaction
AIC(pglsW.VcVw.EI.log) # 144.6174
pglsZ.VcVw.EI.log = pgls(log(VcVw) ~ log(EI)*GF3 + CC + log(Genome.size) + Day, cdatZ, lambda = 'ML')
summary(pglsZ.VcVw.EI.log) # No interaction
AIC(pglsZ.VcVw.EI.log) # 137.6905

pglsW.VcVw.EI.log = pgls(log(VcVw) ~ log(EI) + GF3 + CC + log(Genome.size) + Day, cdatW, lambda = 'ML')
summary(pglsW.VcVw.EI.log)
AIC(pglsW.VcVw.EI.log) # 140.8617
# 95% CI for GF3HP coeff
c(-0.2059660 - qt(0.025, 161)*0.0878047/sqrt(162), -0.2059660 + qt(0.025, 161)*0.0878047/sqrt(162))
# 95% CI for GF3WP coeff
c(-1.0747413 - qt(0.025, 161)*0.1043139/sqrt(162), -1.0747413 + qt(0.025, 161)*0.1043139/sqrt(162))
# All three GF states are different
pglsZ.VcVw.EI.log = pgls(log(VcVw) ~ log(EI) + GF3 + CC + log(Genome.size) + Day, cdatZ, lambda = 'ML')
summary(pglsZ.VcVw.EI.log) # EI is significant (when perenniality is removed) !!!!
AIC(pglsZ.VcVw.EI.log) # 133.7462 !!!!!
# 95% CI for GF3HP coeff
c(-0.2460751 - qt(0.025, 146)*0.0874583/sqrt(147), -0.2460751 + qt(0.025, 146)*0.0874583/sqrt(147))
# 95% CI for GF3WP coeff
c(-1.1486675 - qt(0.025, 146)*0.0977786/sqrt(147), -1.1486675 + qt(0.025, 146)*0.0977786/sqrt(147))

# Plot significant relationships
# Start with RWC because I only have to plot one model
mar.default <- c(5,4,4,2) + 0.1
op=par(mar = mar.default + c(0, 1, 0, 0))
plot(log(VcVw) ~ CC, type='n', data, ylab=expression("Ln relative leaf water content  " ~ (g ~ g^{-1})), xlab="Chlorophyll content", main="Zanne (Aikake weight > 99%)", ylim=c(0, 3.5))
points(log(VcVw) ~ CC, data=data[data$GF3 == "A", ], pch=4)
points(log(VcVw) ~ CC, data=data[data$GF3 == "HP", ], pch=6)
points(log(VcVw) ~ CC, data=data[data$GF3 == "WP", ], pch=17)
summary(pglsZ.VcVw.EI.log)
# Annual
abline(pglsZ.VcVw.EI.log$model$coef[1], pglsZ.VcVw.EI.log$model$coef[5], lty=3)
# Herbaceous perennial
abline(pglsZ.VcVw.EI.log$model$coef[1] + pglsZ.VcVw.EI.log$model$coef[3], pglsZ.VcVw.EI.log$model$coef[5], lty=2)
# Woody perennial
abline(pglsZ.VcVw.EI.log$model$coef[1] + pglsZ.VcVw.EI.log$model$coef[4], pglsZ.VcVw.EI.log$model$coef[5], lty=1)
legend("topright",
       inset = 0.025,
       cex=0.9,
       c("A", "HP", "WP"),
       lty = c(3,2,1),
       pch = c(4,6,17))
par(op)

windows(width=7.5, height=6)
op=par(mar = mar.default + c(0, 1, 0, 0), mfrow=c(1,3))
plot(log(SLA) ~ log(Genome.size), type = 'n', data, ylab=expression("Ln specific leaf area" ~ (m^{2} ~ g^{-1})), xlab=expression("Ln genome size" ~ (pg ~ "2C"^{-1})), ylim=c(min(log(data$SLA)), -1.618))
points(log(SLA) ~ log(Genome.size), data=data[data$Gh2 == "W", ], pch=17)
points(log(SLA) ~ log(Genome.size), data=data[data$Gh2 == "H", ], pch=6)
# APG
summary(pglsW.SLA.EI.log)
abline(pglsW.SLA.EI.log$model$coef[1], pglsW.SLA.EI.log$model$coef[5], lty=2, col=2)
abline(pglsW.SLA.EI.log$model$coef[1] + pglsW.SLA.EI.log$model$coef[3], pglsW.SLA.EI.log$model$coef[5], col=2)
# Zanne
summary(pglsZ.SLA.EI.log)
abline(pglsZ.SLA.EI.log$model$coef[1], pglsZ.SLA.EI.log$model$coef[5], lty=2, col=3)
abline(pglsZ.SLA.EI.log$model$coef[1] + pglsZ.SLA.EI.log$model$coef[3], pglsZ.SLA.EI.log$model$coef[5], col=3)

par(mar=mar.default)
plot(log(SLA) ~ CC, type = 'n', data, ylab="", xlab="Chlorophyll content", ylim=c(min(log(data$SLA)), -1.618))
points(log(SLA) ~ CC, data=data[data$Gh2 == "W", ], pch=17)
points(log(SLA) ~ CC, data=data[data$Gh2 == "H", ], pch=6)
# APG
summary(pglsW.SLA.EI.log)
abline(pglsW.SLA.EI.log$model$coef[1], pglsW.SLA.EI.log$model$coef[4], lty=2, col=2)
abline(pglsW.SLA.EI.log$model$coef[1] + pglsW.SLA.EI.log$model$coef[3], pglsW.SLA.EI.log$model$coef[4], col=2)
# Zanne
summary(pglsZ.SLA.EI.log)
abline(pglsZ.SLA.EI.log$model$coef[1], pglsZ.SLA.EI.log$model$coef[4], lty=2, col=3)
abline(pglsZ.SLA.EI.log$model$coef[1] + pglsZ.SLA.EI.log$model$coef[3], pglsZ.SLA.EI.log$model$coef[4], col=3)

plot(log(SLA) ~ Day, type = 'n', data, ylab="", xlab="Julian day", ylim=c(min(log(data$SLA)), -1.618))
points(log(SLA) ~ Day, data=data[data$Gh2 == "W", ], pch=17)
points(log(SLA) ~ Day, data=data[data$Gh2 == "H", ], pch=6)
# APG
abline(pglsW.SLA.EI.log$model$coef[1], pglsW.SLA.EI.log$model$coef[6], lty=2, col=2)
abline(pglsW.SLA.EI.log$model$coef[1] + pglsW.SLA.EI.log$model$coef[3], pglsW.SLA.EI.log$model$coef[6], col=2)
# Zanne
abline(pglsZ.SLA.EI.log$model$coef[1], pglsZ.SLA.EI.log$model$coef[6], lty=2, col=3)
abline(pglsZ.SLA.EI.log$model$coef[1] + pglsZ.SLA.EI.log$model$coef[3], pglsZ.SLA.EI.log$model$coef[6], col=3)

legend("topright",
       inset = 0.025, 
       #cex=0.8,
       c("Herbaceous", "Woody", "APG", "Zanne"),
       lty = c(2,1,1,1),
       pch = c(6,17,NA,NA),
       col = c(1,1,2,3))
par(op)

# Endoreduplication not-correlations
mar.default <- c(5,4,4,2) + 0.1
op=par(mar = mar.default + c(0, 1, 0, 0), mfrow=c(2,1))
plot(log(SLA) ~ log(EI), data=data, xlab="", xaxt='n', ylab=expression("Ln specific leaf area " ~ (m^{2} ~ g^{-1})), main = "")
axis(side=1, labels=FALSE)
plot(log(VcVw) ~ log(EI), data=data, ylab=expression("Ln relative water content " ~ (g ~ g^{-1})), xlab = "Ln endoreduplication index")
par(op)
