# Create table of rhizosphere composition by sampling date

One table for each time point with the following columns:
* Rhizopshere Family
* Family relative abundance
* Number of OTUs /Num of OTUs responders


# Init

In [1]:
%load_ext rpy2.ipython

In [2]:
%%R
reqpkg = c("ggplot2", "phyloseq", "dplyr", "tidyr", 
           "reshape2")
# Load all required packages and show version
for (i in reqpkg) {
    print(i)
    print(packageVersion(i))
    suppressMessages(library(i, quietly = TRUE, verbose = FALSE, 
                             warn.conflicts = FALSE, character.only = TRUE))
}

[1] "ggplot2"
[1] ‘2.2.1’
[1] "phyloseq"
[1] ‘1.19.1’
[1] "dplyr"
[1] ‘0.5.0’
[1] "tidyr"
[1] ‘0.6.1’
[1] "reshape2"
[1] ‘1.4.2’


## load phyloseq object 

In [3]:
%%R
physeq.snorm = readRDS('/home/bryan/ERA/data/MiSeq/20170417_run1/phyloseq/ERA_snorm.rds')
phy.plant = subset_samples(physeq.snorm, Plant != "Bare" & TimePoint != "T0" & SeedTreatment != "Untreated")
phy.plant.t1 = subset_samples(phy.plant, TimePoint == "T1")
phy.plant.t2 = subset_samples(phy.plant, TimePoint == "T2")
phy.plant.t3 = subset_samples(phy.plant, TimePoint == "T3")

## load log2foldchange table 

In [4]:
%%R
df.l2fc = read.csv("/home/bryan/ERA/data/MiSeq/20170417_run1/DESeq2/BarevsHybrid.csv", header = TRUE)

In [5]:
%%R
head(df.l2fc)

    baseMean log2FoldChange     lfcSE        stat    pvalue padj    Rank1
1  6.8726577      0.1300310 0.6832738  0.00000000 1.0000000    1 Bacteria
2  0.6131857      0.1269938 0.6180803  0.00000000 1.0000000    1 Bacteria
3 23.9158212     -1.0456672 0.5212742 -0.08760687 0.9301891    1 Bacteria
4  6.4121877     -0.3889837 0.7521553  0.00000000 1.0000000    1 Bacteria
5  0.9485324     -0.5321542 0.7581169  0.00000000 1.0000000    1 Bacteria
6  5.7901305     -0.8500811 0.7360680  0.00000000 1.0000000    1 Bacteria
             Rank2                 Rank3          Rank4            Rank5
1 __Proteobacteria __Deltaproteobacteria __Myxococcales __Nannocystineae
2 __Proteobacteria __Deltaproteobacteria __Myxococcales __Nannocystineae
3 __Proteobacteria __Deltaproteobacteria __Myxococcales __Nannocystineae
4 __Proteobacteria __Deltaproteobacteria __Myxococcales __Nannocystineae
5 __Proteobacteria __Deltaproteobacteria __Myxococcales __Nannocystineae
6 __Proteobacteria __Deltaproteobacteria __M

## family designations

* Generate list of phyla that need family level designation shifted 

In [6]:
%%R
taxacorrect = df.l2fc %>%
    filter(TimePoint == "T1") %>%
            as.data.frame() %>%
            select(Rank2, Rank3, Rank4) %>%
            filter(grepl("ceae",Rank4)) %>%
               unique()

# Generate relative abundances by family: T1

In [7]:
%%R
phy.plant.t1

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 4592 taxa and 72 samples ]
sample_data() Sample Data:       [ 72 samples by 46 sample variables ]
tax_table()   Taxonomy Table:    [ 4592 taxa by 8 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 4592 tips and 4591 internal nodes ]


In [8]:
%%R
# identify rhizosphere responders at T1
df.l2fc.t1 = filter(df.l2fc, TimePoint == "T1")
responders.t1 = filter(df.l2fc, TimePoint == "T1" & log2FoldChange > 0 & p.adj < 0.05) %>%
    select(OTU) %>%
    mutate(Responder = 1) %>%
    unique()
head(responders.t1) %>% print
dim(responders.t1)

        OTU Responder
1   OTU.313         1
2   OTU.613         1
3   OTU.844         1
4  OTU.6864         1
5 OTU.12001         1
6   OTU.546         1
[1] 125   2


In [23]:
%%R
#Calculate portion of rhizosphere samples occupied by responders at T1
phy.resp.t1 = prune_taxa(as.character(responders.t1$OTU), phy.plant.t1)
Val = sample_sums(phy.resp.t1) %>% mean
paste("Relative abundance of rhizosphere responders from any time point at T1: ", Val) %>% print

#Calculate portion of rhizosphere samples occupied by responders at any time point
phy.resp.t1 = prune_taxa(responders.any, phy.plant.t1)
Val = sample_sums(phy.resp.t1) %>% mean
paste("Relative abundance of rhizosphere responders from any time point at T1: ", Val) %>% print

[1] "Relative abundance of rhizosphere responders from any time point at T1:  0.0921444006159518"
[1] "Relative abundance of rhizosphere responders from any time point at T1:  0.134224011185206"


In [21]:
%%R
mdf = psmelt(phy.plant.t1)
head(mdf)

           OTU         Sample  Abundance     X.sampleID TubeID ExtractionPlate
212930 OTU.362 ERA-T1_3-5c_85 0.03765148 ERA-T1_3-5c_85    212            ERA3
57546   OTU.14  ERA-T1_2-2c_0 0.03701122  ERA-T1_2-2c_0    159            ERA2
246638 OTU.481 ERA-T1_1-2c_85 0.03670899 ERA-T1_1-2c_85    129            ERA1
80229  OTU.163 ERA-T1_1-4b_85 0.03526324 ERA-T1_1-4b_85    143            ERA1
57562   OTU.14 ERA-T1_4-1a_85 0.03169648 ERA-T1_4-1a_85    214            ERA5
30083   OTU.12 ERA-T1_2-5c_85 0.03058624 ERA-T1_2-5c_85    181            ERA2
       PooledDNAPlate Sample.Well.ID PrimerPlate Primer.Number Primer.Well.ID
212930              C            A11           3            49             A7
57546               A            C10           1            75            C10
246638              A             A4           1            25             A4
80229               A             F1           1             6             F1
57562               B            A12           2         

In [49]:
%%R
mdf.r = left_join(mdf, responders.t1, by = "OTU")
head(mdf.r)

      OTU         Sample  Abundance     X.sampleID TubeID ExtractionPlate
1 OTU.362 ERA-T1_3-5c_85 0.03765148 ERA-T1_3-5c_85    212            ERA3
2  OTU.14  ERA-T1_2-2c_0 0.03701122  ERA-T1_2-2c_0    159            ERA2
3 OTU.481 ERA-T1_1-2c_85 0.03670899 ERA-T1_1-2c_85    129            ERA1
4 OTU.163 ERA-T1_1-4b_85 0.03526324 ERA-T1_1-4b_85    143            ERA1
5  OTU.14 ERA-T1_4-1a_85 0.03169648 ERA-T1_4-1a_85    214            ERA5
6  OTU.12 ERA-T1_2-5c_85 0.03058624 ERA-T1_2-5c_85    181            ERA2
  PooledDNAPlate Sample.Well.ID PrimerPlate Primer.Number Primer.Well.ID  X
1              C            A11           3            49             A7 NA
2              A            C10           1            75            C10 NA
3              A             A4           1            25             A4 NA
4              A             F1           1             6             F1 NA
5              B            A12           2            89            A12 NA
6              A          

In [50]:
%%R
byFam = mdf.r %>%
    mutate(., Family = ifelse(grepl("ceae",Rank4), as.character(Rank4), as.character(Rank5))) %>%
    group_by(X.sampleID, Rank2,  Rank3, Rank4, Family) %>% #first sum otus to get family abundances within a sample
    summarize(fAbund = sum(Abundance),
             nOTU = length(Abundance),
             nOTUresp = sum(Responder, na.rm = TRUE)) %>%
    group_by(Rank2, Rank3, Rank4, Family) %>%
    summarize(mAbund = mean(fAbund),
             nOTU = max(nOTU),
             nResponder = max(nOTUresp))

In [51]:
%%R
Top30.t1 = byFam %>% 
    arrange(desc(mAbund)) %>%
    .[1:30,]

In [54]:
%%R
print(Top30.t1, n = 30)

Source: local data frame [30 x 7]
Groups: Rank2, Rank3, Rank4 [27]

                Rank2                 Rank3
               <fctr>                <fctr>
1     __Acidobacteria               __DA023
2    __Actinobacteria     __Thermoleophilia
3    __Planctomycetes    __Planctomycetacia
4    __Proteobacteria  __Betaproteobacteria
5     __Acidobacteria               __DA023
6    __Actinobacteria     __Thermoleophilia
7       __Chloroflexi              __KD4-96
8    __Actinobacteria    __Streptomycetales
9    __Actinobacteria __Propionibacteriales
10 __Gemmatimonadetes    __Gemmatimonadales
11   __Proteobacteria __Gammaproteobacteria
12   __Proteobacteria  __Betaproteobacteria
13   __Proteobacteria __Alphaproteobacteria
14       __Firmicutes             __Bacilli
15    __Bacteroidetes    __Sphingobacteriia
16    __Acidobacteria               __DA023
17   __Proteobacteria __Alphaproteobacteria
18   __Proteobacteria __Deltaproteobacteria
19  __Verrucomicrobia    __Verrucomicrobiae
20   __A

In [56]:
%%R
write.csv(Top30.t1, file = "/home/bryan/ERA/data/figures/Top30_t1.csv", row.names = FALSE)

# Generate relative abundances by Family: T2

In [14]:
%%R
phy.plant.t2

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 4592 taxa and 48 samples ]
sample_data() Sample Data:       [ 48 samples by 46 sample variables ]
tax_table()   Taxonomy Table:    [ 4592 taxa by 8 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 4592 tips and 4591 internal nodes ]


In [19]:
%%R
# identify rhizosphere responders at T1
responders.t2 = filter(df.l2fc, TimePoint == "T2" & log2FoldChange > 0 & p.adj < 0.05) %>%
    select(OTU) %>%
    mutate(Responder = 1) %>%
    unique()
head(responders.t2) %>% print
dim(responders.t2)

responders.any = filter(df.l2fc, log2FoldChange > 0 & p.adj < 0.05) %>%
    select(OTU) %>%
    unique() %>%
    .$OTU %>%
    as.character()
    
    

      OTU Responder
1  OTU.91         1
2  OTU.78         1
3  OTU.12         1
4 OTU.132         1
5  OTU.82         1
6 OTU.166         1


In [22]:
%%R
#Calculate portion of rhizosphere samples occupied by responders at T2
phy.resp.t2 = prune_taxa(as.character(responders.t2$OTU), phy.plant.t2)
Val = sample_sums(phy.resp.t2) %>% mean
paste("Relative abundance of rhizosphere responders at T2: ", Val) %>% print

#Calculate portion of rhizosphere samples occupied by responders at any time point
phy.resp.t2 = prune_taxa(responders.any, phy.plant.t2)
Val = sample_sums(phy.resp.t2) %>% mean
paste("Relative abundance of rhizosphere responders from any time point at T2: ", Val)


[1] "Relative abundance of rhizosphere responders at T2:  0.139980804986853"
[1] "Relative abundance of rhizosphere responders from any time point at T2:  0.200801116096239"


In [59]:
%%R
mdf = psmelt(phy.plant.t2)
head(mdf)

          OTU         Sample  Abundance     X.sampleID TubeID ExtractionPlate
20035  OTU.12 ERA-T2_1-4c_85 0.07297909 ERA-T2_1-4c_85    380            ERA1
39     OTU.10 ERA-T2_4-2c_85 0.06577100 ERA-T2_4-2c_85    458            ERA5
41583 OTU.145 ERA-T2_3-3d_85 0.06225060 ERA-T2_3-3d_85    439            ERA4
38356  OTU.14 ERA-T2_1-2c_85 0.05279312 ERA-T2_1-2c_85    364            ERA1
19048 OTU.119 ERA-T2_3-3d_85 0.04827036 ERA-T2_3-3d_85    439            ERA4
9872   OTU.11 ERA-T2_4-4a_85 0.04645880 ERA-T2_4-4a_85    469            ERA5
      PooledDNAPlate Sample.Well.ID PrimerPlate Primer.Number Primer.Well.ID  X
20035              A             E6           1            45             E6 NA
39                 B            G12           2            95            G12 NA
41583              B             A4           2            25             A4 NA
38356              A             D2           1            12             D2 NA
19048              B             A4           2       

In [60]:
%%R
mdf.r = left_join(mdf, responders.t2, by = "OTU")
head(mdf.r)

      OTU         Sample  Abundance     X.sampleID TubeID ExtractionPlate
1  OTU.12 ERA-T2_1-4c_85 0.07297909 ERA-T2_1-4c_85    380            ERA1
2  OTU.10 ERA-T2_4-2c_85 0.06577100 ERA-T2_4-2c_85    458            ERA5
3 OTU.145 ERA-T2_3-3d_85 0.06225060 ERA-T2_3-3d_85    439            ERA4
4  OTU.14 ERA-T2_1-2c_85 0.05279312 ERA-T2_1-2c_85    364            ERA1
5 OTU.119 ERA-T2_3-3d_85 0.04827036 ERA-T2_3-3d_85    439            ERA4
6  OTU.11 ERA-T2_4-4a_85 0.04645880 ERA-T2_4-4a_85    469            ERA5
  PooledDNAPlate Sample.Well.ID PrimerPlate Primer.Number Primer.Well.ID  X
1              A             E6           1            45             E6 NA
2              B            G12           2            95            G12 NA
3              B             A4           2            25             A4 NA
4              A             D2           1            12             D2 NA
5              B             A4           2            25             A4 NA
6              B          

In [61]:
%%R
byFam = mdf.r %>%
    mutate(., Family = ifelse(grepl("ceae",Rank4), as.character(Rank4), as.character(Rank5))) %>%
    group_by(X.sampleID, Rank2,  Rank3, Rank4, Family) %>% #first sum otus to get family abundances within a sample
    summarize(fAbund = sum(Abundance),
             nOTU = length(Abundance),
             nOTUresp = sum(Responder, na.rm = TRUE)) %>%
    group_by(Rank2, Rank3, Rank4, Family) %>%
    summarize(mAbund = mean(fAbund),
             nOTU = max(nOTU),
             nResponder = max(nOTUresp))

In [63]:
%%R
# Check that calculations add up 
paste("mean abundance of familes sums to: ", sum(byFam$mAbund), sep = "") %>% print
paste("number of OTUs sums to: ", sum(byFam$nOTU), sep = "") %>% print
paste("number of rhizosphere responders sums to: ", sum(byFam$nResponder), sep = "") %>% print

[1] "mean abundance of familes sums to: 1"
[1] "number of OTUs sums to: 4592"
[1] "number of rhizosphere responders sums to: 78"


In [64]:
%%R
Top30.t2 = byFam %>% 
    arrange(desc(mAbund)) %>%
    .[1:30,]

In [66]:
%%R
print(Top30.t2, n = 30)

Source: local data frame [30 x 7]
Groups: Rank2, Rank3, Rank4 [25]

                Rank2                 Rank3
               <fctr>                <fctr>
1     __Acidobacteria               __DA023
2    __Actinobacteria    __Streptomycetales
3    __Actinobacteria     __Thermoleophilia
4    __Planctomycetes    __Planctomycetacia
5    __Proteobacteria  __Betaproteobacteria
6     __Acidobacteria               __DA023
7    __Actinobacteria __Propionibacteriales
8    __Actinobacteria     __Thermoleophilia
9       __Chloroflexi              __KD4-96
10 __Gemmatimonadetes    __Gemmatimonadales
11   __Proteobacteria  __Betaproteobacteria
12   __Proteobacteria __Alphaproteobacteria
13  __Verrucomicrobia    __Verrucomicrobiae
14   __Proteobacteria __Gammaproteobacteria
15   __Proteobacteria  __Betaproteobacteria
16  __Verrucomicrobia    __OPB35_soil_group
17   __Actinobacteria   __Pseudonocardiales
18    __Bacteroidetes          __Cytophagia
19   __Actinobacteria     __Thermoleophilia
20    __

In [67]:
%%R
write.csv(Top30.t2, file = "/home/bryan/ERA/data/figures/Top30_t2.csv", row.names = FALSE)

# Generate relative abundances by Family: T3

In [24]:
%%R
phy.plant.t3

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 4592 taxa and 72 samples ]
sample_data() Sample Data:       [ 72 samples by 46 sample variables ]
tax_table()   Taxonomy Table:    [ 4592 taxa by 8 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 4592 tips and 4591 internal nodes ]


In [25]:
%%R
# identify rhizosphere responders at T3
responders.t3 = filter(df.l2fc, TimePoint == "T3" & log2FoldChange > 0 & p.adj < 0.05) %>%
    select(OTU) %>%
    mutate(Responder = 1) %>%
    unique()
head(responders.t3) %>% print
dim(responders.t3)

       OTU Responder
1 OTU.1012         1
2 OTU.1495         1
3  OTU.314         1
4 OTU.1098         1
5   OTU.91         1
6  OTU.388         1
[1] 241   2


In [26]:
%%R
#Calculate portion of rhizosphere samples occupied by responders at T1
phy.resp.t3 = prune_taxa(as.character(responders.t1$OTU), phy.plant.t3)
Val = sample_sums(phy.resp.t3) %>% mean
paste("Relative abundance of rhizosphere responders from any time point at T3: ", Val) %>% print

#Calculate portion of rhizosphere samples occupied by responders at any time point
phy.resp.t3 = prune_taxa(responders.any, phy.plant.t3)
Val = sample_sums(phy.resp.t3) %>% mean
paste("Relative abundance of rhizosphere responders from any time point at T3: ", Val) %>% print

[1] "Relative abundance of rhizosphere responders from any time point at T3:  0.18539440050168"
[1] "Relative abundance of rhizosphere responders from any time point at T3:  0.264744740653749"


In [70]:
%%R
mdf = psmelt(phy.plant.t3)
head(mdf)

            OTU          Sample  Abundance      X.sampleID TubeID
226175    OTU.4 ERA-T3_4-2c_170 0.06991515 ERA-T3_4-2c_170    575
226941 OTU.4019 ERA-T3_4-5b_170 0.06578727 ERA-T3_4-5b_170    585
226931 OTU.4019 ERA-T3_4-2c_170 0.06525578 ERA-T3_4-2c_170    575
226185    OTU.4  ERA-T3_2-5a_85 0.06388878  ERA-T3_2-5a_85    555
226157    OTU.4  ERA-T3_1-4a_85 0.05863104  ERA-T3_1-4a_85    540
226154    OTU.4  ERA-T3_3-1c_85 0.05862398  ERA-T3_3-1c_85    559
       ExtractionPlate PooledDNAPlate Sample.Well.ID PrimerPlate Primer.Number
226175            ERA5              B             E8           2            61
226941            ERA5              B            D10           2            76
226931            ERA5              B             E8           2            61
226185            ERA2              A            B11           1            82
226157            ERA2              A             A7           1            49
226154            ERA4              B             E2           2

In [71]:
%%R
mdf.r = left_join(mdf, responders.t3, by = "OTU")
head(mdf.r)

       OTU          Sample  Abundance      X.sampleID TubeID ExtractionPlate
1    OTU.4 ERA-T3_4-2c_170 0.06991515 ERA-T3_4-2c_170    575            ERA5
2 OTU.4019 ERA-T3_4-5b_170 0.06578727 ERA-T3_4-5b_170    585            ERA5
3 OTU.4019 ERA-T3_4-2c_170 0.06525578 ERA-T3_4-2c_170    575            ERA5
4    OTU.4  ERA-T3_2-5a_85 0.06388878  ERA-T3_2-5a_85    555            ERA2
5    OTU.4  ERA-T3_1-4a_85 0.05863104  ERA-T3_1-4a_85    540            ERA2
6    OTU.4  ERA-T3_3-1c_85 0.05862398  ERA-T3_3-1c_85    559            ERA4
  PooledDNAPlate Sample.Well.ID PrimerPlate Primer.Number Primer.Well.ID  X
1              B             E8           2            61             E8 NA
2              B            D10           2            76            D10 NA
3              B             E8           2            61             E8 NA
4              A            B11           1            82            B11 NA
5              A             A7           1            49             A7 NA
6    

In [72]:
%%R
byFam = mdf.r %>%
    mutate(., Family = ifelse(grepl("ceae",Rank4), as.character(Rank4), as.character(Rank5))) %>%
    group_by(X.sampleID, Rank2,  Rank3, Rank4, Family) %>% #first sum otus to get family abundances within a sample
    summarize(fAbund = sum(Abundance),
             nOTU = length(Abundance),
             nOTUresp = sum(Responder, na.rm = TRUE)) %>%
    group_by(Rank2, Rank3, Rank4, Family) %>%
    summarize(mAbund = mean(fAbund),
             nOTU = max(nOTU),
             nResponder = max(nOTUresp))

In [73]:
%%R
# Check that calculations add up 
paste("mean abundance of familes sums to: ", sum(byFam$mAbund), sep = "") %>% print
paste("number of OTUs sums to: ", sum(byFam$nOTU), sep = "") %>% print
paste("number of rhizosphere responders sums to: ", sum(byFam$nResponder), sep = "") %>% print

[1] "mean abundance of familes sums to: 1"
[1] "number of OTUs sums to: 4592"
[1] "number of rhizosphere responders sums to: 241"


In [74]:
%%R
Top30.t3= byFam %>% 
    arrange(desc(mAbund)) %>%
    .[1:30,]

In [75]:
%%R
print(Top30.t3, n = 30)

Source: local data frame [30 x 7]
Groups: Rank2, Rank3, Rank4 [24]

                Rank2                 Rank3
               <fctr>                <fctr>
1    __Actinobacteria    __Streptomycetales
2     __Acidobacteria               __DA023
3    __Proteobacteria  __Betaproteobacteria
4    __Actinobacteria     __Thermoleophilia
5    __Planctomycetes    __Planctomycetacia
6    __Actinobacteria __Propionibacteriales
7    __Actinobacteria   __Pseudonocardiales
8     __Acidobacteria               __DA023
9    __Actinobacteria     __Thermoleophilia
10      __Chloroflexi              __KD4-96
11   __Proteobacteria __Alphaproteobacteria
12 __Gemmatimonadetes    __Gemmatimonadales
13   __Proteobacteria __Gammaproteobacteria
14  __Verrucomicrobia    __OPB35_soil_group
15    __Bacteroidetes          __Cytophagia
16   __Proteobacteria  __Betaproteobacteria
17   __Proteobacteria  __Betaproteobacteria
18  __Verrucomicrobia    __Verrucomicrobiae
19    __Bacteroidetes    __Sphingobacteriia
20   __A

In [76]:
%%R
write.csv(Top30.t3, file = "/home/bryan/ERA/data/figures/Top30_t3.csv", row.names = FALSE)

In [None]:
# Composition of rhizosphere responders

In [80]:
%%R
OTUl2f = filter(df.l2fc,log2FoldChange > 0 & p.adj < 0.05) %>%
    group_by(Rank2, Rank3, Rank4, Rank5) %>%
    summarize(numOTUs = length(OTU),
             maxl2fc_withinFamily = max(log2FoldChange))%>%
    arrange(Rank5)



In [81]:
%%R
write.csv(OTUl2f, file = '/home/bryan/ERA/data/figures/l2f_byFamily_table.csv')

# Composition of rhizosphere samples by OTU

In [83]:
%%R
mdf.1 = psmelt(phy.plant.t1)
mdf.2 = psmelt(phy.plant.t2)
mdf.3 = psmelt(phy.plant.t3)

In [85]:
%%R
mdf.1.r = left_join(mdf.1, responders.t1, by = "OTU")
mdf.2.r = left_join(mdf.2, responders.t2, by = "OTU")
mdf.3.r = left_join(mdf.3, responders.t3, by = "OTU")

In [90]:
%%R
byOTU.1 = mdf.1.r %>% 
    group_by(OTU, Rank2, Rank3, Rank4, Rank5, Rank6, Rank7) %>%
    summarize(relAbund = mean(Abundance)*100,
             responder = max(Responder, na.rm = TRUE)) %>%
    arrange(desc(relAbund)) %>%
    .[1:30,] %>%
    mutate(TimePoint = "T1")

byOTU.2 = mdf.2.r %>% 
     group_by(OTU, Rank2, Rank3, Rank4, Rank5, Rank6, Rank7) %>%
    summarize(relAbund = mean(Abundance)*100,
             responder = max(Responder, na.rm = TRUE)) %>%
    arrange(desc(relAbund))%>%
    .[1:30,]%>%
    mutate(TimePoint = "T2")

byOTU.3 = mdf.3.r %>% 
    group_by(OTU, Rank2, Rank3, Rank4, Rank5, Rank6, Rank7) %>%
    summarize(relAbund = mean(Abundance)*100,
             responder = max(Responder, na.rm = TRUE)) %>%
    arrange(desc(relAbund))%>%
    .[1:30,] %>%
    mutate(TimePoint = "T3")

Top30 = bind_rows(byOTU.1, byOTU.2, byOTU.3)

In [91]:
%%R
head(Top30)

Source: local data frame [6 x 10]
Groups: OTU, Rank2, Rank3, Rank4, Rank5, Rank6 [6]

     OTU            Rank2                Rank3                  Rank4
   <chr>           <fctr>               <fctr>                 <fctr>
1 OTU.16    __Chloroflexi             __KD4-96 __uncultured_bacterium
2 OTU.27 __Actinobacteria    __Thermoleophilia           __Gaiellales
3  OTU.6 __Actinobacteria          __MB-A2-108 __uncultured_bacterium
4 OTU.17 __Actinobacteria      __Micrococcales   __Intrasporangiaceae
5 OTU.13 __Actinobacteria    __Thermoleophilia           __Gaiellales
6 OTU.30 __Proteobacteria __Betaproteobacteria      __Burkholderiales
# ... with 6 more variables: Rank5 <fctr>, Rank6 <fctr>, Rank7 <fctr>,
#   relAbund <dbl>, responder <dbl>, TimePoint <chr>


In [92]:
%%R
write.csv(Top30, file = '/home/bryan/ERA/data/figures/Top30_rhizOTUs.csv')

In [77]:
%%R
sessionInfo()

R version 3.3.2 (2016-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 16.04.2 LTS

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] tools     stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
[1] reshape2_1.4.2  tidyr_0.6.1     dplyr_0.5.0     phyloseq_1.19.1
[5] ggplot2_2.2.1  

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.10        plyr_1.8.4          XVector_0.14.1     
 [4] iterators_1.0.8     zlibbioc_1.20.0     jsonlite_1.4       
 [7] tibble_1.3.0        gtable_0.2.0        nlme_3.1-131       
[10] rhdf5_2.18.0        lattice_0.20-35     mgcv_1.8-17        
[13]