# Summarize reads by sample through pipeline

In [17]:

library(tidyr)
library(dplyr)
library(phyloseq)


In [4]:
# Sequence counts in DADA2 pipeline 
Track1file = '~/Hyphosphere/data/Exp1/DADA2Files/Tracking.txt'
Track2file = '~/Hyphosphere/data/Exp10/DADA2Files/Tracking.txt'
Track3file = '~/Hyphosphere/data/Exp11/DADA2Files/Tracking.txt'

#Phyloseq objects
ps.full.file = '~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_full.rds'
ps.thresh.file = '~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_thresh.rds'
ps.nocontam.file = '~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_thresh_woTree_woContam.rds.rds'


In [8]:
#Read tracking files
Track1 = read.delim(Track1file, header = TRUE)
Track1$Exp = "Exp1"
head(Track1)

Track2 = read.delim(Track2file, header = TRUE)
Track2$Exp = "Exp2"
head(Track2)

Track3 = read.delim(Track3file, header = TRUE)
Track3$Exp = "Exp3"
head(Track3)

Unnamed: 0,input,filtered,seqtable,nonchim,Exp
10xBLS156_S303,76535,71931,69469,59843,Exp1
2xBLS143_S297,46036,42761,41067,35803,Exp1
5xBLS120_S300,58139,52793,50890,44074,Exp1
BartHigh_S258,50441,46587,44278,37880,Exp1
BartLow_S270,59500,56642,54238,45142,Exp1
BaseA_S310,43371,40476,38447,33584,Exp1


Unnamed: 0,input,filtered,seqtable,nonchim,Exp
1_3.1RH,108818,103426,100582,97872,Exp2
10_2.1RH,89188,85913,81276,79861,Exp2
100_5.5BP,82663,78206,49083,47567,Exp2
101_5.5CS,90291,84808,66300,64980,Exp2
102_5.5BS,104470,98937,62751,60648,Exp2
103_6.5BP,130629,125529,89890,87042,Exp2


Unnamed: 0,input,filtered,seqtable,nonchim,Exp
1_GVCLHN1-BH,123120,109349,67683,62437,Exp3
10_GVFLHP1-BS,99381,90430,53209,51141,Exp3
100_GVCLHN4-BS,108427,96811,62179,58520,Exp3
101_GVFLHN3-BH,68799,61320,35037,33873,Exp3
102_GVFLHN3-BS,88288,79397,44701,43169,Exp3
103_GVFLHN4-BH,95202,85979,50043,47699,Exp3


In [53]:
# Create seq count tables from phyloseq objects
## Exp1 
ps.full = readRDS(ps.full.file)
ps.full.counts = sample_sums(ps.full) %>% as.data.frame()
colnames(ps.full.counts) = "FullReads"
ps.full.counts = ps.full.counts %>% mutate(Sample = rownames(ps.full.counts)) %>% select(Sample, FullReads)

head(ps.full.counts)
rm(ps.full)


## Exp2
ps.thresh = readRDS(ps.thresh.file)
ps.thresh.counts = sample_sums(ps.thresh) %>% as.data.frame()
colnames(ps.thresh.counts) = "ThreshReads"
ps.thresh.counts$Sample = rownames(ps.thresh.counts)
head(ps.thresh.counts)
rm(ps.thresh)

## Exp3
ps.nocontam = readRDS(ps.nocontam.file)
ps.nocontam.counts = sample_sums(ps.nocontam) %>% as.data.frame()
colnames(ps.nocontam.counts) = "NoContamReads"
ps.nocontam.counts$Sample = rownames(ps.nocontam.counts)
head(ps.nocontam.counts)
rm(ps.nocontam)



“The `printer` argument is deprecated as of rlang 0.3.0.
“`lang()` is deprecated as of rlang 0.2.0.
Please use `call2()` instead.
“`new_overscope()` is deprecated as of rlang 0.2.0.
Please use `new_data_mask()` instead.
“`overscope_eval_next()` is deprecated as of rlang 0.2.0.
Please use `eval_tidy()` with a data mask instead.

Sample,FullReads
10xBLS156_S303,59843
2xBLS143_S297,35803
5xBLS120_S300,44074
BaseA_S310,33584
BaseB_S291,26702
BaseC_S294,35261


Unnamed: 0,ThreshReads,Sample
10xBLS156_S303,59828,10xBLS156_S303
2xBLS143_S297,35790,2xBLS143_S297
5xBLS120_S300,44060,5xBLS120_S300
BaseA_S310,33584,BaseA_S310
BaseB_S291,26698,BaseB_S291
BaseC_S294,35257,BaseC_S294


Unnamed: 0,NoContamReads,Sample
10xBLS156_S303,40949,10xBLS156_S303
2xBLS143_S297,19771,2xBLS143_S297
5xBLS120_S300,20986,5xBLS120_S300
BaseA_S310,19392,BaseA_S310
BaseB_S291,14720,BaseB_S291
BaseC_S294,19858,BaseC_S294


In [55]:
head(ps.full.counts)
head(ps.thresh.counts)
tmp = left_join(ps.full.counts, ps.thresh.counts, by= "Sample")
tmp = left_join(tmp, ps.nocontam.counts, by = "Sample")
head(tmp)

Sample,FullReads
10xBLS156_S303,59843
2xBLS143_S297,35803
5xBLS120_S300,44074
BaseA_S310,33584
BaseB_S291,26702
BaseC_S294,35261


Unnamed: 0,ThreshReads,Sample
10xBLS156_S303,59828,10xBLS156_S303
2xBLS143_S297,35790,2xBLS143_S297
5xBLS120_S300,44060,5xBLS120_S300
BaseA_S310,33584,BaseA_S310
BaseB_S291,26698,BaseB_S291
BaseC_S294,35257,BaseC_S294


Sample,FullReads,ThreshReads,NoContamReads
10xBLS156_S303,59843,59828,40949
2xBLS143_S297,35803,35790,19771
5xBLS120_S300,44074,44060,20986
BaseA_S310,33584,33584,19392
BaseB_S291,26702,26698,14720
BaseC_S294,35261,35257,19858


In [56]:
## Check arrays
#rownames(ps.full.counts)== rownames(ps.thresh.counts)
setdiff(rownames(ps.thresh.counts), rownames(ps.nocontam.counts))

In [59]:
#Root hyphae duplicates that had poor read counts were droped rather than merged.  Remove from other 
dim(tmp)
Duplicates = c('155_1.3RH', '156_2.3RH','157_3.3RH','158_1.4RH', '159_2.4RH')

tmp  = filter(tmp, !Sample %in% Duplicates)

dim(tmp)                               
#ps.full.counts = ps.full.counts[!row.names(ps.full.counts) %in% Duplicates,]


In [68]:
FullTrack = rbind(Track1,Track2,Track3) %>% 
    as.data.frame() %>%
    mutate(., Sample = rownames(.)) %>%
    .[,c(6,1:5)]
head(FullTrack)


Sample,input,filtered,seqtable,nonchim,Exp
10xBLS156_S303,76535,71931,69469,59843,Exp1
2xBLS143_S297,46036,42761,41067,35803,Exp1
5xBLS120_S300,58139,52793,50890,44074,Exp1
BartHigh_S258,50441,46587,44278,37880,Exp1
BartLow_S270,59500,56642,54238,45142,Exp1
BaseA_S310,43371,40476,38447,33584,Exp1


In [70]:
FinalTrack = inner_join(FullTrack, tmp, by = 'Sample') %>% 
    select(Sample, Exp, input, filtered, seqtable, nonchim, ThreshReads, NoContamReads)
head(FinalTrack)

Sample,Exp,input,filtered,seqtable,nonchim,ThreshReads,NoContamReads
10xBLS156_S303,Exp1,76535,71931,69469,59843,59828,40949
2xBLS143_S297,Exp1,46036,42761,41067,35803,35790,19771
5xBLS120_S300,Exp1,58139,52793,50890,44074,44060,20986
BaseA_S310,Exp1,43371,40476,38447,33584,33584,19392
BaseB_S291,Exp1,34654,31658,30425,26702,26698,14720
BaseC_S294,Exp1,45284,42044,40208,35261,35257,19858


In [72]:
write.table(FinalTrack, file = '~/Hyphosphere/ReadTracking.txt', sep = '\t')