In [None]:
library("phangorn")
library("castor")
library("ape")

In [None]:
# N.B. FILL THIS IN WITH PATH
# TO GITHUB REPO
rootdir = ""

# calculate trait depth + homoplasy

Define trees and trait table to analyze, then run consentrait + CI. <b>N.B.</b> takes a few minutes to run.

In [None]:
trees = c(paste(rootdir,"/trees/bac175_outgroup/rp16_concat.BAC175.final.pruned.treefile", sep=""))

trait_table = paste(rootdir, "/metabolism/trait_analysis/trait_table.csv",sep="")

In [None]:
all_results = c()
cresults = c()

for (tree in trees){
  
  # get basename
  treename = unlist(strsplit(basename(tree), "_"))[1]
  # read in and reroot tree
  treefile = read.tree(file = tree)
  # remove problem bac175 taxon
  treefile = drop.tip(treefile, "None_CG1_02_FULL_CPR_43_390_curated")
  outgroup = treefile$tip.label[(grepl("BAC175", treefile$tip.label)==TRUE)]
  rooted = root(treefile, as.vector(outgroup))
  
  # read in trait information
  trait = read.csv(trait_table, header=T)
  traits = colnames(trait[, !(names(trait) %in% c("X", "newname"))])
  # subset and reorder to match tree
  trait_sub = subset(trait, newname %in% rooted$tip.label)
  trait_ordered = trait_sub[match(rooted$tip.label, trait_sub$newname),]
  
  # now run consentrait for each trait
  for (t in traits){
    
    # over a range of min_frac thresholds
    #for (min_frac in seq(0.65, 0.9, by=0.05)){
    for (min_frac in c(0.75,0.80, 0.85,0.90)){
      temp = c()
      # run consentrait
      results = get_trait_depth(rooted, as.numeric(unlist(trait_ordered[t])), Npermutations = 1000, count_singletons = F, min_fraction = min_frac)
      # extract positive clade depths
      for (clade in results$positive_clades){
        temp = rbind(temp, c(treename, t, min_frac, results$P, clade, results$mean_depth_per_clade[clade]))}
      # add to global results
      all_results = rbind(all_results, temp)   
    }
  }
  
  trait = read.csv(trait_table, header=T)
  traits = colnames(trait[, !(names(trait) %in% c("X", "newname"))])
  # subset and reorder to match tree
  trait_sub = subset(trait, newname %in% rooted$tip.label)
  trait_ordered = trait_sub[match(rooted$tip.label, trait_sub$newname),]   # run CI for all traits - remove NAs to avoid error
  trait_ordered = na.omit(trait_ordered)
  rownames(trait_ordered) = trait_ordered$newname
  trait_final = trait_ordered[,!(names(trait_ordered) %in% c("X","newname"))]
  ttrait = t(trait_final)
  
  # read in as phyDat object
  trait_data = phyDat(data.frame(ttrait), type="USER", levels=c("0","1"))
  # remove odd missing taxa that we don't find in the trait table
  rooted.pruned = drop.tip(rooted, rooted$tip.label[-match(colnames(ttrait), rooted$tip.label)])
  # change tree tip names - phyDat replaces - with . - must match
  temp <-sapply(rooted.pruned$tip.label, function(x) gsub("-", ".", x))
  rooted.pruned$tip.label = as.vector(temp)
  #execute
  cir = CI(rooted.pruned, trait_data, sitewise = T)
  # add metadata
  cirs = cbind(rownames(ttrait), cir, as.vector(rowSums(ttrait)), replicate(length(cir), treename))
  cresults = rbind(cresults, cirs)
  
}

In [None]:
# read in and format all results
all_df = data.frame(all_results)
rownames(all_df) = 1:(nrow(all_df))
colnames(all_df) = c("tree", "trait", "min_fraction", "pval", "clade", "mean_depth")
#write out
write.csv(all_df, file=paste(rootdir,"/metabolism/trait_analysis/bac175_consentrait_results.csv",sep=""), row.names=F)

cdf = data.frame(cresults)
rownames(cdf) = 1:(nrow(cdf))
colnames(cdf) = c("trait", "ci", "family_size","tree")
write.csv(cdf, file=paste(rootdir,"/metabolism/trait_analysis/bac175_ci_results.csv",sep=""), row.names=F)