# Specific leaf area and leaf endopolyploidy

James Seery (jseery@mail.uoguelph.ca)

## Setup

Load packages for phylogenetic regression (caper) and function scripts.

In [None]:
tryCatch(    # To run caper, load this package dependency.
    library(mvtnorm),
    error = install.packages("mvtnorm", repos = "http://cran.utstat.utoronto.ca/"))
tryCatch(    # ... and this one too.
    library(ape),
    error = install.packages("ape", repos = "http://cran.utstat.utoronto.ca/"))
tryCatch(    # Now load caper package.
    library(caper),
    error = install.packages("caper", repos = "http://cran.utstat.utoronto.ca/"))
source(pgls_functions.R)

Load phylogenies: (1) The Angiosperm Phylogeny Group has family-level resolution; therefore, each family is a polytomy. (2) The Zanne phylogeny has species-level resolution, but does not have all species.

In [2]:
setwd("../../Raw_data")

apg.tree = read.tree("APG_phylo/Webb_ages_pruned_unrooted.nwk")
apg.tree = makeLabel(apg.tree) # Fix duplicate node names (because some are blank) error for comparative.data()
z.tree = read.tree("Zanne_phylo/Zanne_pruned.nwk")
z.tree = makeLabel(z.tree)

Loading required package: ape
Loading required package: MASS
Loading required package: mvtnorm


Load and compile relevant datasets: specific leaf area (SLA); (2) relative leaf water content (RWC); (3) leaf chlorophyll content (CC); (4) flow cytometry data on genome size and leaf endopolyploidy; and (5) growth form (GF). Endopolyploidy is represented in two ways: (1) leaf endoreduplication index (EI) and (2) mean leaf ploidy (MeanC). EXPORT THIS INTO FUNCTIONS HELD IN Raw_data

In [None]:
SLA_RWC = read.csv("SLA_RWC.csv")
CC = read.csv("CC.csv")
Flow_cytometry = read.csv("Flow_cytometry.csv")
GF = read.csv("GF.csv")
names(GF) = c("Species", "Lifespan", "Ann_per", "GH", "Wood_herb")

# Turn raw data into something sensible
SLA_RWC$SLA = SLA_RWC$LeafArea/(10000*SLA_RWC$DriedMass) # Standardize SLA to square metres per gram
SLA_RWC$RWC = (SLA_RWC$FreshMass - SLA_RWC$DriedMass)/SLA_RWC$DriedMass
Flow_cytometry$EI = rowMeans(cbind(Flow_cytometry$EI.FL2, Flow_cytometry$EI.FL3), na.rm=TRUE) # Get mean EI across the cytometers two detectors: FL2 and FL3
Flow_cytometry$MeanC = rowMeans(cbind(Flow_cytometry$MeanC.FL2, Flow_cytometry$MeanC.FL3), na.rm=TRUE)
GF$A_HP_WP = character(length(GF$Species))
GF$A_HP_WP[GF$Wood_herb == 'H'] = "HP"
GF$A_HP_WP[GF$Ann_per == 'A'] = "A" # This command must come after the former. The former command incorrectly assigns "HP" to annual species.
GF$A_HP_WP[GF$Wood_herb == 'W'] = "WP"

# Get species-level mean of each trait
mean.SLA_RWC = aggregate(data.frame(SLA = SLA_RWC$SLA, RWC = SLA_RWC$VcVw),
                         by=list(Species = SLA_RWC$Species), mean, na.rm=TRUE)
mean.CC = aggregate(list(CC = CC$CC),
                    by=list(Species = CC$Species), mean, na.rm=TRUE)
mean.Flow_cytometry = aggregate(data.frame(Genome.size = Flow_cytometry$Genome.size,
                                           EI = Flow_cytometry$EI
                                           MeanC = Flow_cytometry$MeanC
                                           Day = Flow_cytometry$Day),
                     by=list(Species = DNA$Species), mean, na.rm=TRUE)

# Merge into one data frame
data_partA = merge(mean.SLA_RWC,
                   list(Species = GF$Species, Wood_herb = GF$Wood_herb, A_HP_WP = GF$A_HP_WP),
                   by="Species", all.x=TRUE)
data_partB = merge(mean.Flow_cytometry,
                   mean.CC,
                   by="Species", all=TRUE)
data = merge(dataA,
             dataB,
             by="Species", all.x=TRUE)

write.csv(data, file="SLA+endopolyploidy.csv")

Subset data for (1) only herbaceous species and (2) only endopolyploid species.

In [None]:
data.herb = subset(data, data$Wood_herb == 'H')
data.endo = subset(data, data$EI >= 0.1)

Combine the data and phylogenies into a object of type comparative.data.

In [None]:
z.data = comparative.data(phy=z.tree, data=data, names.col="Species", vcv=TRUE) # vcv=TRUE will do some pre-processing for the regression by calculating the variance-covariance matrix
z.data.herb = comparative.data(phy=z.tree, data=data.herb, names.col="Species", vcv=TRUE)
z.data.endo = comparative.data(phy=z.tree, data=data.endo, names.col="Species", vcv=TRUE)
data$Species = tolower(data$Species))
apg.data = comparative.data(phy=apg.tree, data=data, names.col="Species", vcv=TRUE)
data.herb$Species = tolower(data.herb$Species))
apg.data.herb = comparative.data(phy=apg.tree, data=data.herb, names.col="Species", vcv=TRUE)
data.endo$Species = tolower(data.endo$Species))
apg.data.endo = comparative.data(phy=apg.tree, data=data.endo, names.col="Species", vcv=TRUE)

### Data transformation

In [None]:
dist.check(data$SLA)

In [None]:
dist.check(data$EI)

In [None]:
dist.check(data$MeanC) # The linearity of this is far worse than log(EI); therefore, don't use MeanC.

In [None]:
dist.check(data$Genome.size)

In [None]:
dist.check(data$RWC)

In [None]:
dist.check(data$Day)

In [None]:
op = par(mfrow = c(2, 2))

plot(data$SLA ~ data$MeanC)
plot(log(data$SLA) ~ log(data$MeanC))
plot(data$SLA ~ data$EI)
plot(log(data$SLA) ~ log(data$EI))

par(op)

In [None]:
op = par(mfrow = c(2, 2))

plot(data$RWC ~ data$MeanC)
plot(log(data$RWC) ~ log(data$MeanC))
plot(data$RWC ~ data$EI)
plot(log(data$RWC) ~ log(data$EI))

par(op)

## Regression

#### The relationship between endopolyplody and growth form

In [None]:
boxplot(log(EI) ~ A_HP_WP, data=data)
pglsZ.EI.A_HP_WP = pgls(log(EI) ~ A_HP_WP + log(Genome.size) + Day, z.data, lambda='ML')
summary(pglsZ.EI.A_HP_WP)
# 95% CI for "Herbaceous perennial" coefficient
c(-0.1632381 - qt(0.025, 146)*0.1679523/sqrt(147), -0.1632381 + qt(0.025, 146)*0.1679523/sqrt(147))
# 95% CI for "Woody perennial" coefficient
c(-0.7274948 - qt(0.025, 146)*0.2394684/sqrt(147), -0.7274948 + qt(0.025, 146)*0.2394684/sqrt(147))
pglsAPG.EI.A_HP_WP = pgls(log(EI) ~ A_HP_WP + log(Genome.size) + Day, apg.data, lambda='ML')
summary(pglsAPG.EI.A_HP_WP) # very similar
# 95% CI for "Herbaceous perennial" coefficient
c(-0.1714383 - qt(0.025, 161)*0.1782131/sqrt(162), -0.1714383 + qt(0.025, 161)*0.1782131/sqrt(162))
# 95% CI for "Woody perennial" coefficient
c(-0.6569639 - qt(0.025, 161)*0.2269929/sqrt(162), -0.6569639 + qt(0.025, 161)*0.2269929/sqrt(162))

# There is no difference between HP and A; therefore, the only growth form difference is between herbaceous and woody.
# Replace instances of A_HP_WP (three growth form categories) with Wood_herb (two growth form categories)
boxplot(log(EI) ~ Wood_herb, data=data)
pglsZ.EI.Wood_herb = pgls(log(EI) ~ Wood_herb + log(Genome.size) + Day, z.data, lambda='ML')
summary(pglsZ.EI.Wood_herb)
AIC(pglsZ.EI.Wood_herb) # 326.2549 <- Best score
pglsAPG.EI.Wood_herb = pgls(log(EI) ~ Wood_herb + log(Genome.size) + Day, apg.data, lambda='ML')
summary(pglsAPG.EI.Wood_herb)
AIC(pglsAPG.EI.Wood_herb) # 366.7467

# For plotting, remove Day.
pglsZ.EI.Wood_herb.simple = pgls(log(EI) ~ Wood_herb + log(Genome.size), z.data, lambda='ML')
summary(pglsZ.EI.Wood_herb.simple)
AIC(pglsZ.EI.Wood_herb.simple)
windows(width=10, height=10)
plot(log(EI) ~ log(Genome.size), type='n', data, ylab="Ln leaf endoreduplication index", xlab=expression("Ln genome size" ~ (pg ~ "2C"^{-1})), main="Zanne (Akaike weight > 99%)")
points(log(EI) ~ log(Genome.size), data=data[data$Wood_herb == "H", ], pch=4)
points(log(EI) ~ log(Genome.size), data=data[data$Wood_herb == "W", ], pch=17)
abline(pglsZ.EI.Wood_herb.simple$model$coef[1], 0)
# text(x=4.4, -4, labels="p = ???")
# text(x=4.52, -4.35, labels="AIC = ???")
abline(pglsZ.EI.Wood_herb.simple$model$coef[1] + pglsZ.EI.Wood_herb.simple$model$coef[2], 0, lty=2)
# text(x=4.4, -2.4, labels="p = ???")
# text(x=4.52, -2.75, labels="AIC = ???")
legend("topright",
       inset = 0.025, 
       c("Herbaceous", "Woody"),
       lty = c(1,2),
       pch = c(4,17))

#### The relationship between SLA and endopolyploidy via RWC

Regression. There is no interaction effect of endopolyploidy and growth form on either SLA or RWC.

##### SLA and endopolyploidy for Angiosperms in general

Lack of correlation between endopolyploidy and the response variables

In [None]:
mar.default <- c(5,4,4,2) + 0.1
op=par(mar = mar.default + c(0, 1, 0, 0), mfrow=c(2,1))
plot(log(SLA) ~ log(EI), data=data, xlab="", xaxt='n', 
     ylab=expression("Ln specific leaf area " ~ (m^{2} ~ g^{-1})), main = "")
axis(side=1, labels=FALSE)
plot(log(RWC) ~ log(EI), data=data, 
     ylab=expression("Ln relative water content " ~ (g ~ g^{-1})), xlab = "Ln endoreduplication index")
par(op)

Significant relationships with SLA

In [None]:
pglsAPG.SLA.EI = pgls(log(SLA) ~ log(EI) + Woody_herb + CC + log(Genome.size) + Day, apg.data, lambda = 'ML')
summary(pglsAPG.SLA.EI) # EI n.s.
AIC(pglsAPG.SLA.EI # 114.5968 !!!!!
pglsZ.SLA.EI = pgls(log(SLA) ~ log(EI) + Woody_herb + CC + log(Genome.size) + Day, z.data, lambda = 'ML')
summary(pglsZ.SLA.EI) # EI n.s.
AIC(pglsZ.SLA.EI) # 114.481 !!!!!
    
# Close in AIC. Use the APG tree because it has more data
    
# Reduced models for plotting significant relationships
pglsAPG.SLA.GS = pgls(log(SLA) ~ Woody_herb + log(Genome.size), apg.data, lambda = 'ML')
pglsAPG.SLA.CC = pgls(log(SLA) ~ Woody_herb + CC, apg.data, lambda = 'ML')
pglsAPG.SLA.Day = pgls(log(SLA) ~ Woody_herb + Day, apg.data, lambda = 'ML')

windows(width=7.5, height=6)
op=par(mar = mar.default + c(0, 1, 0, 0), mfrow=c(1,3))
plot(log(SLA) ~ log(Genome.size), type = 'n', data, 
     ylab=expression("Ln specific leaf area" ~ (m^{2} ~ g^{-1})), 
     xlab=expression("Ln genome size" ~ (pg ~ "2C"^{-1})), ylim=c(min(log(data$SLA)), -1.618))
points(log(SLA) ~ log(Genome.size), data=data[data$Woody_herb == "W", ], pch=17)
points(log(SLA) ~ log(Genome.size), data=data[data$Woody_herb == "H", ], pch=6)
summary(pglsAPG.SLA.GS)
line.maker(pglsAPG.SLA.GS)

par(mar=mar.default)
plot(log(SLA) ~ CC, type = 'n', data, ylab="", xlab="Chlorophyll content", ylim=c(min(log(data$SLA)), -1.618))
points(log(SLA) ~ CC, data=data[data$Woody_herb == "W", ], pch=17)
points(log(SLA) ~ CC, data=data[data$Woody_herb == "H", ], pch=6)
summary(pglsAPG.SLA.CC)
line.maker(pglsAPG.SLA.CC)

plot(log(SLA) ~ Day, type = 'n', data, ylab="", xlab="Julian day", ylim=c(min(log(data$SLA)), -1.618))
points(log(SLA) ~ Day, data=data[data$Woody_herb == "W", ], pch=17)
points(log(SLA) ~ Day, data=data[data$Woody_herb == "H", ], pch=6)
summary(pglsAPG.SLA.Day)
line.maker(pglsAPG.SLA.Day)

legend("topright",
       inset = 0.025, 
       #cex=0.8,
       c("Herbaceous", "Woody"),
       lty = c(2,1),
       pch = c(6,17))
par(op)

Significant relationships with leaf relative water content (RWC). Note that the three-category growth form (A_HP_WP) has better fit here.

In [None]:
pglsAPG.RWC.EI = pgls(log(RWC) ~ log(EI) + A_HP_WP + CC + log(Genome.size) + Day, apg.data, lambda = 'ML')
summary(pglsAPG.RWC.EI)
AIC(pglsAPG.RWC.EI) # 140.8617
# 95% CI for "Herbaceous perennial" regression coefficient
c(-0.2059660 - qt(0.025, 161)*0.0878047/sqrt(162), -0.2059660 + qt(0.025, 161)*0.0878047/sqrt(162))
# 95% CI for "Woody perennial" regression coefficient
c(-1.0747413 - qt(0.025, 161)*0.1043139/sqrt(162), -1.0747413 + qt(0.025, 161)*0.1043139/sqrt(162))
# All three GF states are different!
pglsZ.RWC.EI = pgls(log(RWC) ~ log(EI) + A_HP_WP + CC + log(Genome.size) + Day, z.data, lambda = 'ML')
summary(pglsZ.RWC.EI) # EI is significant (when perenniality is removed) !!!!
AIC(pglsZ.RWC.EI) # 133.7462 !!!!!
# 95% CI for "Herbaceous perennial" regression coefficient
c(-0.2460751 - qt(0.025, 146)*0.0874583/sqrt(147), -0.2460751 + qt(0.025, 146)*0.0874583/sqrt(147))
# 95% CI for "Woody perennial" regression coefficient
c(-1.1486675 - qt(0.025, 146)*0.0977786/sqrt(147), -1.1486675 + qt(0.025, 146)*0.0977786/sqrt(147))

# The Zanne phylogeny has a much better AIC score, so it will be used.

# Reduced model for plotting significant relationships
pglsZ.RWC.CC = pgls(log(RWC) ~ A_HP_WP + CC, z.data, lambda = 'ML')

mar.default <- c(5,4,4,2) + 0.1
op=par(mar = mar.default + c(0, 1, 0, 0))
plot(log(VcVw) ~ CC, type='n', data, ylab=expression("Ln leaf relative water content  " ~ (g ~ g^{-1})), 
     xlab="Chlorophyll content", main="Zanne phylogeny (Aikake weight > 99%)", ylim=c(0, 3.5))
points(log(RWC) ~ CC, data=data[data$A_HP_WP == "A", ], pch=4)
points(log(RWC) ~ CC, data=data[data$A_HP_WP == "HP", ], pch=6)
points(log(RWC) ~ CC, data=data[data$A_HP_WP == "WP", ], pch=17)
summary(pglsZ.RWC.CC)
# Annual
abline(pglsZ.RWC.CC$model$coef[1], pglsZ.RWC.CC$model$coef[4], lty=3)
# Herbaceous perennial
abline(pglsZ.RWC.CC$model$coef[1] + pglsZ.RWC.CC$model$coef[2], pglsZ.RWC.CC$model$coef[4], lty=2)
# Woody perennial
abline(pglsZ.RWC.CC$model$coef[1] + pglsZ.RWC.CC$model$coef[3], pglsZ.RWC.CC$model$coef[4], lty=1)
legend("topright",
       inset = 0.025,
       cex=0.9,
       c("Annual", "Herbaceous Perennial", "Woody Perennial"),
       lty = c(3,2,1),
       pch = c(4,6,17))
par(op)

##### SLA and endopolyploidy for Angiosperms with endopolyploid (EI >= 0.1) leaves