/
prep_data.R
457 lines (335 loc) · 17.7 KB
/
prep_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
### This script prepares Lichen FD project data tables exported from the SQLite database for analysis.
options(stringsAsFactors=F)
# Load functions
source('C:/Users/jrcoyle/Documents/UNC/Projects/Lichen Functional Diversity/Analysis/GitHub/NC-forest-lichens/lichen_FD_functions.R')
# Define data locations
mydir = 'C://Users/jrcoyle/Documents/UNC/Projects/Lichen Functional Diversity/Analysis/'
sql_dir = './Data/SQLite Tables/'
derive_dir = './Data/Derived Tables/'
setwd(mydir)
# Read in basic data tables from SQLite database
plots = read.csv(paste(sql_dir,'plots.csv', sep=''))
canopy = read.csv(paste(sql_dir,'canopy.csv', sep=''))
trees = read.csv(paste(sql_dir,'trees.csv', sep=''))
bark_traits = read.csv(paste(sql_dir,'bark_traits.csv', sep=''))
samples = read.csv(paste(sql_dir,'samples.csv', sep=''))
lichens = read.csv(paste(sql_dir,'lichens.csv', sep=''))
#######################################################
### Tree-level data
# Assess light bias associated with increasing light levels at dawn
treedata = merge(trees, canopy)
# Make date into date class
treedata$Date_time = as.POSIXlt(treedata$Date_time)
# Plot how light variables are affected by EV difference from reference
yvars = c('Canopy_open','Trans_tot')
syms = expand.grid(pch=c(1,16),col=rainbow(10))
syms = syms[1:19,]
rownames(syms) = plots$PlotID[order(plots$PairID, decreasing=T)]
par(mfrow=c(1,length(yvars)))
for(y in yvars){
plot(treedata[,y]~EVdif, data=treedata, ylab=y,
pch=syms[treedata$PlotID,'pch'], col=syms[treedata$PlotID, 'col'])
}
plot(Canopy_open~Trans_tot, data=treedata, pch=syms[treedata$PlotID,'pch'], col=syms[treedata$PlotID, 'col'])
pdf('./Figures/Dependence of light on camera EVdif.pdf', height=4, width=8)
for(p in rownames(syms)){
use_trees = subset(treedata, PlotID==p)
par(mfrow=c(1,2))
for(y in yvars){
plot(use_trees[,y]~EVdif, data=use_trees, ylab=y, main=p)
}
}
dev.off()
plot(EVdif~I(Date_time$hour+Date_time$min/60), data=treedata)
# Detrend Trans_tot based on EVdif assumming same relationship between EVdif and Trans_tot across sites
trans_mod = lm(Trans_tot~EVdif, data=treedata)
plot(Trans_tot~EVdif, data=treedata); abline(trans_mod)
treedata$Trans_tot_cor = resid(trans_mod)
open_mod = lm(Canopy_open~EVdif, data=treedata)
plot(Canopy_open~EVdif, data=treedata); abline(open_mod)
treedata$Canopy_open_cor = resid(open_mod)
par(mfrow=c(1,2))
plot(Trans_tot~Canopy_open, data=treedata)
plot(Trans_tot_cor~Canopy_open_cor, data=treedata)
# Transmission of light below different tree species
meds = with(treedata, tapply(Trans_tot_cor, TreeTaxonID, median))
plot_order = names(meds[order(meds)])
par(mar=c(10,3,1,1))
plot(Trans_tot_cor~factor(TreeTaxonID, levels=plot_order), data=treedata, las=2, xlab='')
# Light differeces across plots
plot(Trans_tot_cor~factor(PlotID), data=treedata, las=2, xlab='')
keep_cols = c('PlotID','TreeTaxonID','Module','DBH','UncertainID', 'Trans_tot_cor','Canopy_open_cor')
rownames(treedata) = treedata$TreeID
write.csv(treedata[,keep_cols], paste(derive_dir,'treedata.csv', sep=''), row.names=T)
treedata = read.csv(paste(derive_dir,'treedata.csv', sep=''), row.names=1)
###############################################################
### Sample level data
library(stringr)
# NOTE: lines not needed to generate samp_data table have been commented out
## Dependence of pH on pH meter
#boxplot(pH~pH_meter, data=bark_traits)
## Trait differences among re-measured samples
#dups = table(bark_traits$SampID)
#dups_many = names(dups[dups>2])
# Remove multiple measurements that should not be analyzed
#subset(bark_traits, SampID==dups_many[9])
remove_samps = c(724,725,726,793,1318,1319,932,895,896,1815,1818,2002)
barkdata = subset(bark_traits, !(BarkMeasID %in% remove_samps))
dups = table(barkdata$SampID)
dups = names(dups[dups>1])
# Calculate bark trait mean and range across bark measurements from the same sample
btraits = c('pH','WaterCapacity','Density','MassDry')
traitrange = aggregate(barkdata[,btraits], by=list(SampID=barkdata$SampID), FUN=function(x) abs(diff(range(x))))
names(traitrange)[names(traitrange)%in%btraits] = paste(btraits,'rng',sep='_')
traitmean = aggregate(barkdata[,btraits], by=list(SampID=barkdata$SampID), FUN=mean)
names(traitmean)[names(traitmean)%in%btraits] = paste(btraits,'mn',sep='_')
meter = aggregate(barkdata$pH_meter, by=list(SampID=barkdata$SampID), FUN=function(x){
if(length(unique(x))==1){ unique(x) } else {'Both'}
})
names(meter)[2] = 'pH_meter'
samptraits = merge(traitmean, traitrange)
samptraits = merge(samptraits, meter)
#use_data = samptraits[rowSums(samptraits[,paste(btraits, 'rng', sep='_')])>0,]
#plot(pH_rng~pH_mn, data=use_data)
#plot(WaterCapacity_rng~WaterCapacity_mn, data=use_data)
#plot(Density_rng~Density_mn, data=use_data)
#boxplot(pH_rng~pH_meter, data=use_data)
#tapply(samptraits$pH_rng, samptraits$pH_meter, mean)
#pHmod = lm(pH~factor(SampID)+factor(pH_meter), data=subset(barkdata, SampID %in% dups))
#anova(pHmod)
# Check which measurements are bad
use_data = merge(barkdata, samples[,c('SampID','Date')])
use_data$Date = as.Date(use_data$Date, format='%m/%d/%Y')
use_data$year = format(use_data$Date, '%Y')
#aggregate(use_data$pH, by=list(use_data$year, use_data$pH_meter), mean)
#boxplot(pH~year+pH_meter, data=use_data) # only samples from 2013 may be biased
#King2013 = subset(use_data, year=='2013'&pH_meter=='Kingsolver')
#plot(pH~Date, data=King2013)
#nrow(subset(King2013, !(SampID %in%dups))) # Would need to remeasure 773 + 45 samples
# Calculate rank order of samples measured under Kier vs Kingsolver pH meter
#use_samps = subset(samptraits, pH_meter=='Both')$SampID
#kings = subset(barkdata, SampID %in% use_samps & pH_meter=='Kingsolver')
#kiers = subset(barkdata, SampID %in% use_samps & pH_meter=='Kier')
#kings$king_rank = rank(kings$pH)
#kiers$kier_rank = rank(kiers$pH)
#kks = merge(kings[,c('SampID','king_rank','pH')],kiers[,c('SampID','kier_rank','pH')], by='SampID')
#plot(king_rank~kier_rank, data=kks[order(kks$king_rank),])
# Calculate mean difference among samples measured on both pH meters
use_samps = subset(samptraits, pH_meter=='Both')$SampID
#boxplot(pH~pH_meter, data=subset(barkdata, SampID %in% use_samps))
difs = sapply(use_samps, function(x){
use_data = subset(barkdata, SampID == x)
a = mean(use_data[use_data$pH_meter=='Kingsolver','pH'])
b = mean(use_data[use_data$pH_meter=='Kier','pH'])
diff(c(a,b))
})
mean_pH_diff = round(mean(difs),3)
# Remove duplicate samples from Kingsolver pH meter when measurement from Kier meter is available
use_samps = subset(samptraits, pH_meter=='Both')$SampID
remove_samps = subset(barkdata, (SampID %in% use_samps)&(pH_meter=='Kingsolver'))$BarkMeasID
barkdata = subset(barkdata, !(BarkMeasID %in% remove_samps))
# Correct for difference between pH meters in 2013 by adding the mean difference from repeated measures
use_samps = subset(use_data, year=='2013'&pH_meter=='Kingsolver')$BarkMeasID
barkdata[barkdata$BarkMeasID %in% use_samps,'pH'] = barkdata[barkdata$BarkMeasID %in% use_samps,'pH'] + mean_pH_diff
#boxplot(pH~pH_meter, data=barkdata)
## Estimate bark trait differences among observers
#boxplot(pH~Observer, data=barkdata)
#boxplot(WaterCapacity~Observer, data=barkdata)
#boxplot(Density~Observer, data=barkdata)
# No worrisome differences
## Filter for unlikely trait values
# Negative water holding capacity
#subset(barkdata, WaterCapacity < 0 )
#subset(barkdata, WaterCapacity < 0 & Notes!='forgot to measure wet mass' )
# modified data where it looked like WetMass and Drymass values had been swapped with neighboring samples or mis-recorded
# High Water Capacity
#subset(barkdata, WaterCapacity >5)
# modified 2 records with MassWet off by 1 decimal place
# Assign remaining negative WaterCapacity values to NA
barkdata[which(barkdata$WaterCapacity<0),'WaterCapacity'] = NA
# Density high
#plot(Density~MassDry, data=barkdata)
#subset(barkdata, Density > 1)
#subset(barkdata, Density > 3)
# corrected a couple obvious errors, but most of these may are too difficult to figure out the possible error
#subset(barkdata, Density < 0.05)
### Generate sample-level environmental data table
sampdata = samples[,c('SampID','TreeID','SampNum','Height','Angle','Bryophytes','Shedding','FurrowDepth','Aspect','NoLichen','Notes')]
# Convert Bryophytes 'several' and 'some' code to same level
sampdata[sampdata$Bryophytes=='some','Bryophytes'] = 'several'
# Convert FurrowDepth to number
sampdata[sampdata$FurrowDepth=='<0.5','FurrowDepth'] = 0.25
# Average bark traits from samples measured more than once
dups = names(which(table(barkdata$SampID)>1))
singles = names(which(table(barkdata$SampID)==1))
barktraitdata = subset(barkdata, SampID %in% singles)[,c('SampID','pH','Density','WaterCapacity','Notes')]
for(this_samp in dups){
these_bark = subset(barkdata, SampID==this_samp)
masses = these_bark$MassDry
# If one sample is <10mg, then exclude this data
if(sum(masses>0.01)>=1){
these_bark = subset(these_bark, MassDry > 0.01)
}
# Calculate average trait
avgs = apply(these_bark[,c('pH','Density','WaterCapacity')], 2, mean)
notes = str_trim(paste(these_bark$Notes, collapse=' '))
barktraitdata = rbind(barktraitdata, data.frame(SampID=this_samp, t(avgs), Notes=notes))
}
# Merge sample level data
sampdata[which(!(sampdata$SampID %in% barktraitdata$SampID)),] # Several samples missing bark data
barktraitdata[which(!(barktraitdata$SampID %in% sampdata$SampID)),] # Corresponds to a sample not used in analysis
colnames(sampdata)[colnames(sampdata)=='Notes'] = 'Notes_samp'
colnames(barktraitdata)[colnames(barktraitdata)=='Notes'] = 'Notes_bark'
sampdata = merge(sampdata, barktraitdata, all.x=T, all.y=F)
rownames(sampdata) = sampdata$SampID
keep_cols = c('TreeID','SampNum','Height','Angle','Bryophytes','Shedding','FurrowDepth','Aspect','pH','Density','WaterCapacity','NoLichen','Notes_samp','Notes_bark')
write.csv(sampdata[,keep_cols], paste(derive_dir, 'sampdata.csv', sep=''), row.names=T)
###########################################################################
### Plot level data
library(sp)
library(raster)
library(geosphere)
# Select columns in plots table to be in plot environmental data table
plot_data = plots[,c('PlotID','Lon','Lat','Year','Ecoregion','PairID','TopoPos','RH','Soil_pH','Soil_ECEC')]
## Cloud frequency (Wilson et al. unpub)
clouds = read.csv('./Data/cloud_cover_Wilson2015.csv')
clouds = clouds[,c('PlotID','Meanannual_CF','Intraannual_SD')]
names(clouds)[2:3] = c('CloudFreq_mean', 'CloudFreq_sd')
plot_data = merge(plot_data, clouds)
## PRISM data: precip, tmax, tmean, vpdmax
prism_dir = 'C:/Users/jrcoyle/Documents/UNC/Projects/GIS Data/PRISM/Normals81/'
ap = raster(paste(prism_dir, 'PRISM_ppt_30yr_normal_800mM2_annual_bil/PRISM_ppt_30yr_normal_800mM2_annual_bil.bil', sep=''))
tmax = raster(paste(prism_dir, 'PRISM_tmax_30yr_normal_800mM2_annual_bil/PRISM_tmax_30yr_normal_800mM2_annual_bil.bil', sep=''))
tmean = raster(paste(prism_dir, 'PRISM_tmean_30yr_normal_800mM2_annual_bil/PRISM_tmean_30yr_normal_800mM2_annual_bil.bil', sep=''))
vpdmax = raster(paste(prism_dir, 'PRISM_vpdmax_30yr_normal_800mM2_annual_bil/PRISM_vpdmax_30yr_normal_800mM2_annual_bil.bil', sep=''))
prism = stack(ap, tmax, tmean, vpdmax)
names(prism) = c('AP','T_max','T_mean','VPD_max')
prism_data = extract(prism, plot_data[,c('Lon','Lat')])
plot_data = cbind(plot_data, prism_data)
## NED: 1/9 arc-sec DEM
# Go through each location separately because DEM downloaded as tiles
NED_dir = 'C:/Users/jrcoyle/Documents/UNC/Projects/GIS Data/DEM grids/NED/'
filenames = list.files(paste(NED_dir))
elev_data = data.frame()
for(p in plot_data$PlotID){
# Get data for this plot
this_plot = subset(plots, PlotID==p)
# Read in DEM for this location
loc = substr(p, 1, nchar(p)-1)
this_dir = filenames[grep(loc, filenames)]
file_contents = list.files(paste(NED_dir,this_dir, sep=''))
this_file = file_contents[grep('.img$',file_contents)]
this_dem = raster(paste(NED_dir, this_dir, '/',this_file, sep=''))
# Find the center of the plot
start_pt = this_plot[,c('Lon','Lat')]
mid_pt = destPoint(start_pt, this_plot$Azimuth_true, 25)
# Extract elevation
elev = extract(this_dem, mid_pt)
# Calculate topographic openness
# Crop DEM to .01-deg around focal point
use_ext = extent(rep(as.numeric(mid_pt), each=2)+c(-.01,.01,-.01,.01))
this_dem = crop(this_dem, use_ext)
# Calculate openness from this point
topo = calc_openness(mid_pt, this_dem, 250)
# Add to data frame
elev_data = rbind(elev_data, data.frame(PlotID=p,Elevation=elev, topo))
}
plot_data = merge(plot_data, elev_data)
rownames(plot_data) = plot_data$PlotID
plot_data = plot_data[,-1]
# Save plot data
write.csv(plot_data, paste(derive_dir, 'plot_data.csv', sep=''), row.names=T)
# Plot difference in topographic exposure between paired plots
plot_data$TopoPos = factor(plot_data$TopoPos, levels=c('sheltered','exposed'))
pdf('./Figures/topographic openness of paired plots.pdf', height=4, width=7)
par(mfrow=c(1,2))
plot(OpenPos~as.numeric(TopoPos), data=plot_data, type='n', axes=F,
xlab='Topographic Position', ylab='Topographic Openness Index', main='Positive Openness')
use_tab = xtabs(OpenPos~PairID+TopoPos, data=plot_data)
for(i in 1:nrow(use_tab)){
lines(c(1.25,1.75), use_tab[i,], type='o')
text(1.9, use_tab[i,2], rownames(use_tab)[i])
}
axis(1, at=c(1.25,1.75), labels=levels(plot_data$TopoPos))
axis(2, las=1)
plot(OpenNeg~as.numeric(TopoPos), data=plot_data, type='n', axes=F,
xlab='Topographic Position', ylab='Topographic Openness Index', main='Negative Openness')
use_tab = xtabs(OpenNeg~PairID+TopoPos, data=plot_data)
for(i in 1:nrow(use_tab)){
lines(c(1.25,1.75), use_tab[i,], type='o')
text(1.9, use_tab[i,2], rownames(use_tab)[i])
}
axis(1, at=c(1.25,1.75), labels=levels(plot_data$TopoPos))
axis(2, las=1)
dev.off()
apply(use_tab, 1, function(x) x[2]-x[1])
###################################################################
### Lichen-level data
## Traits to be analyzed are:
## Level 1 : Attachment, Form, ColorVis, Photobiont, Pseudocyphellae, Maculae, Asco, Asexual
## Level 2 : Cilia, Rhizines, LobeShape, LobeArea, LobeDissect, CrustForm, AscoForm, AscoCover, AscoAbun, AsexualForm, AsexualAbun
### Recode traits
lichen_data = lichens
# Foliose size and shape
lichen_data$LobeArea = lichens$LobeWidth*lichens$LobeDepth
lichen_data$LobeDissect = lichens$LobeWidth/lichens$LobeDepth
# Sexual reproductive output - note that lirellae will have larger values than apothecia
lichen_data$AscoAbun = ifelse(is.na(lichens$AscoDist)&!is.na(lichens$AscoWidth), 0,
lichens$AscoWidth/(lichens$AscoWidth+lichens$AscoDist))
# Presence of Ascomata
lichen_data$Asco = !(lichen_data$AscoForm=='none')
# AscoCover
lichen_data$AscoCover = NA
lichen_data[lichen_data$AscoForm %in% c('perithecia','recessed apothecia','warts'),'AscoCover'] = 'covered'
lichen_data[lichen_data$AscoForm %in% c('apothecia','lirellae','stalked'),'AscoCover'] = 'exposed'
# Presence of Asexual
lichen_data$Asexual = !(lichen_data$AsexualForm=='none')
## Assign NA to hierarchical trait combinations that are not possible
lichen_data[lichen_data$Form!='foliose',c('Cilia','Rhizines','LobeShape','LobeDissect','LobeArea')] = NA
lichen_data[lichen_data$Form!='crustose', 'CrustForm'] = NA
lichen_data[(lichen_data$Asco==F)|is.na(lichen_data$Asco), c('AscoForm','AscoWidth','AscoHeight','AscoDist','AscoAbun')] = NA
lichen_data[(lichen_data$Asexual==F)|is.na(lichen_data$Asexual),c('AsexualForm', 'AsexualLoc', 'AsexualAbun')] = NA
# Make table for analysis
level1_traits = c('Attachment', 'Form', 'ColorVis', 'Photobiont', 'Pseudocyphellae', 'Maculae', 'Asco', 'Asexual')
level2_traits = c('Cilia', 'Rhizines', 'LobeShape', 'LobeArea', 'LobeDissect', 'CrustForm', 'AscoForm', 'AscoCover', 'AscoAbun', 'AsexualForm', 'AsexualAbun')
traitdf = data.frame(TraitName=c(level1_traits,level2_traits),
Level=c(rep(1, length(level1_traits)), rep(2, length(level2_traits))))
lichen_data = lichen_data[,c('LichenID','SampID','Name','AbunCount',traitdf$TraitName)]
rownames(lichen_data) = lichen_data$LichenID
lichen_data = lichen_data[,-1]
# Write to file- will fill in the other columns manually
#write.csv(traitdf, 'trait_levels.csv', row.names=F)
write.csv(lichen_data, paste(derive_dir, 'lichendata.csv', sep=''), row.names=T)
# Read back in data
lichen_data = read.csv(paste(derive_dir, 'lichendata.csv', sep=''), row.names=1)
# Drop lichen with missing data
lichen_data = subset(lichen_data, Form!='')
## Calculate morphotype X trait matrix
# Drop ColorVis from analysis because trait may not be consistent measure of intrinsic property
use_traits = traitdf$TraitName[traitdf$TraitName!='ColorVis']
# Find unique morphotypes
morphos = unique(lichen_data[,use_traits])
morphos = morphos[order(morphos$Form, morphos$Photobiont, morphos$Asco, morphos$Asexual, morphos$Attachment, morphos$CrustForm),]
morphos$MorphID = paste('M', 1:nrow(morphos), sep='')
rownames(morphos) = morphos$MorphID
# Save morphotypes
write.csv(morphos, paste(derive_dir, 'morphotypes.csv', sep=''), row.names=T)
# Read in morphotypes
morphos = read.csv(paste(derive_dir, 'morphotypes.csv', sep=''), row.names=1)
## Calculate sample X morphotype abundance matrix
lichen_data$MorphID = sapply(1:nrow(lichen_data), function(x){
match_morpho(lichen_data[x,], morphos[,names(morphos)!='MorphID'])
})
# Save lichen data
write.csv(lichen_data, paste(derive_dir, 'lichendata.csv', sep=''), row.names=T)
# Read in lichen data
lichen_data = read.csv(paste(derive_dir, 'lichendata.csv', sep=''), row.names=1)
# Convert Abundance counts to number and assign abundance of 1 to missing values
lichen_data$AbunCount = as.numeric(lichen_data$AbunCount)
lichen_data[is.na(lichen_data$AbunCount),'AbunCount'] = 1
# Distinct species with the same morphotye have their abundance added
sampXmorph = xtabs(AbunCount ~ SampID + MorphID, data=lichen_data)
# Save morphotype abundance matrix
write.csv(sampXmorph, paste(derive_dir, 'sampXmorph.csv', sep=''), row.names=T)
morph_freq = colSums(sampXmorph)
plot(morph_freq[order(morph_freq, decreasing=T)])