In [1]:
from IPython.display import FileLink

In [2]:
%load_ext rpy2.ipython

In [3]:
%%R
library(DESeq2)
library(phyloseq)
library(plyr); library(dplyr)
library(ggplot2)
library(doParallel)
library(foreach)
library(reshape)
library(RColorBrewer)
library(gridExtra)
library(stats)
library(data.table)
library(compare, lib.loc="/home/ashley/R/x86_64-pc-linux-gnu-library/3.1/")
library(lawstat, lib.loc="/home/ashley/R/x86_64-pc-linux-gnu-library/3.1/")

Loading required package: S4Vectors
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following object is masked from ‘package:stats’:

    xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, as.vector, cbind, colnames,
    do.call, duplicated, eval, evalq, Filter, Find, get, intersect,
    is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax,
    pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rep.int,
    rownames, sapply, setdiff, sort, table, tapply, union, unique,
    unlist, unsplit

Loading required package: IRanges
Loading required package: GenomicRanges
Loading required package:

###This is the file from the [Sparsity Notebook](./Sparsity_make_l2fc_table.ipynb).

In [4]:
%%R
df_all = read.csv("/var/seq_data/priming_exp/data/l2fc_table.csv")
head(df_all)

  sparsity_threshold Day Treatment       OTU log2FoldChange         p    Rank1
1               0.25  14    13C000  OTU.4204    -0.20553170 1.0000000 Bacteria
2               0.25  14    13C000   OTU.467    -0.33054855 1.0000000 Bacteria
3               0.25  14    13C000   OTU.905     0.01698885 0.9827594 Bacteria
4               0.25  14    13C000  OTU.7253     0.02799601 0.9772968 Bacteria
5               0.25  14    13C000    OTU.67    -0.60115954 1.0000000 Bacteria
6               0.25  14    13C000 OTU.10580     0.36193427 0.7248674 Bacteria
          Rank2 Rank3                Rank4 Rank5 Rank6 Rank7 Rank8 padj
1 Acidobacteria DA023                 <NA>  <NA>  <NA>  <NA>  <NA>    1
2 Acidobacteria DA023 uncultured_bacterium  <NA>  <NA>  <NA>  <NA>    1
3 Acidobacteria DA023 uncultured_bacterium  <NA>  <NA>  <NA>  <NA>    1
4 Acidobacteria DA023 uncultured_bacterium  <NA>  <NA>  <NA>  <NA>    1
5 Acidobacteria DA023 uncultured_bacterium  <NA>  <NA>  <NA>  <NA>    1
6 Acidobacteria

In [5]:
%%R
nrow (df_all)

[1] 28471


### Treatment: CC

In [6]:
%%R
FDR = 0.10
df_CC_resp = subset(df_all, (Treatment == "13C000")&(padj <= FDR))
head(df_CC_resp)

    sparsity_threshold Day Treatment       OTU log2FoldChange            p
134               0.25  14    13C000  OTU.7337       2.477516 0.0018908765
251               0.25  14    13C000   OTU.950       3.497786 0.0021470637
252               0.25  14    13C000 OTU.11380       5.169283 0.0002911960
263               0.25  14    13C000  OTU.5228       2.654885 0.0002285059
269               0.25  14    13C000   OTU.903       3.807659 0.0015749699
270               0.25  14    13C000   OTU.752       4.813177 0.0002952637
       Rank1           Rank2            Rank3              Rank4
134 Bacteria   Acidobacteria       Holophagae             iii1-8
251 Bacteria Verrucomicrobia Verrucomicrobiae Verrucomicrobiales
252 Bacteria Verrucomicrobia Verrucomicrobiae Verrucomicrobiales
263 Bacteria Verrucomicrobia Verrucomicrobiae Verrucomicrobiales
269 Bacteria Verrucomicrobia Verrucomicrobiae Verrucomicrobiales
270 Bacteria Verrucomicrobia   Spartobacteria Chthoniobacterales
                   R

In [7]:
%%R
phy.sum.day.CC = df_CC_resp %>%
    select(OTU, Day, Treatment, Rank2, Rank3) %>%
    group_by(Day, Rank2, Treatment) %>%
    summarize(Counts = n()) %>%
    arrange(desc(Counts)) 
    
phy.sum.day.CC

Source: local data frame [35 x 4]
Groups: Day, Rank2

   Day                   Rank2 Treatment Counts
1   14           Acidobacteria    13C000      4
2   14          Actinobacteria    13C000      2
3   14         Armatimonadetes    13C000      4
4   14           Bacteroidetes    13C000     39
5   14 Candidate_division_BRC1    13C000      1
6   14                Chlorobi    13C000      2
7   14             Chloroflexi    13C000     11
8   14           Cyanobacteria    13C000      1
9   14             Nitrospirae    13C000      1
10  14          Planctomycetes    13C000     11
.. ...                     ...       ...    ...


In [8]:
%%R
df_CC_resp_unique = subset(df_CC_resp, !duplicated(OTU))
nrow(df_CC_resp_unique)

[1] 369


In [9]:
%%R
CC_counts_perday = df_CC_resp %>%
    group_by(Treatment, Rank2, Day) %>%
    summarise (n = n()) %>%
    group_by(Day) %>%
    mutate(pct.wi.day = (100*(n / sum(n)))) %>%
    arrange(Day)
    
print (CC_counts_perday)

Source: local data frame [35 x 5]
Groups: Day

   Treatment                   Rank2 Day  n pct.wi.day
1     13C000           Acidobacteria  14  4  2.2857143
2     13C000          Actinobacteria  14  2  1.1428571
3     13C000         Armatimonadetes  14  4  2.2857143
4     13C000           Bacteroidetes  14 39 22.2857143
5     13C000 Candidate_division_BRC1  14  1  0.5714286
6     13C000                Chlorobi  14  2  1.1428571
7     13C000             Chloroflexi  14 11  6.2857143
8     13C000           Cyanobacteria  14  1  0.5714286
9     13C000             Nitrospirae  14  1  0.5714286
10    13C000          Planctomycetes  14 11  6.2857143
..       ...                     ... ... ..        ...


In [10]:
%%R
df_CC_resp_day14 = subset(df_CC_resp, (Day == "14"))
#print(df_xylose_resp_day1)
df_CC_resp_day28 = subset(df_CC_resp, (Day == "28"))
#print(df_xylose_resp_day3)
df_CC_resp_day45 = subset(df_CC_resp, (Day == "45"))
#print(df_xylose_resp_day7)


In [11]:
%%R
df_CC_resp_counts = ddply(df_CC_resp_day45, .(Rank2), summarize, NumSubs = length(unique(OTU)))
print (df_CC_resp_counts)

                    Rank2 NumSubs
1           Acidobacteria       5
2          Actinobacteria       7
3         Armatimonadetes       2
4           Bacteroidetes      18
5                   BD1-5       2
6  Candidate_division_OD1       1
7                Chlorobi       1
8             Chloroflexi      15
9           Cyanobacteria       2
10          Elusimicrobia       3
11          Fibrobacteres       2
12       Gemmatimonadetes       1
13         Planctomycetes      51
14         Proteobacteria     120
15        Verrucomicrobia      29


### Treatment: 100

In [12]:
%%R
FDR = 0.10
df_100_resp = subset(df_all, (Treatment == "13C100")&(padj <= FDR))
nrow(df_100_resp)

[1] 416


In [13]:
%%R
phy.sum.day.C100 = df_100_resp %>%
    select(OTU, Day, Treatment, Rank2, Rank3) %>%
    group_by(Day, Rank2, Treatment) %>%
    summarize(Counts = n()) %>%
    arrange(desc(Counts)) 
    
phy.sum.day.C100

Source: local data frame [32 x 4]
Groups: Day, Rank2

   Day                   Rank2 Treatment Counts
1   14          Actinobacteria    13C100      3
2   14           Bacteroidetes    13C100     10
3   14 Candidate_division_BRC1    13C100      2
4   14                Chlorobi    13C100      1
5   14             Chloroflexi    13C100      7
6   14           Cyanobacteria    13C100      2
7   14          Planctomycetes    13C100     11
8   14          Proteobacteria    13C100     69
9   14         Verrucomicrobia    13C100      3
10  28           Acidobacteria    13C100      4
.. ...                     ...       ...    ...


In [14]:
%%R
df_100_resp_unique = subset(df_100_resp, !duplicated(OTU))
nrow(df_100_resp_unique)

[1] 273


In [15]:
%%R
C100_counts_perday = df_100_resp %>%
    group_by(Treatment, Rank2, Day) %>%
    summarise (n = n()) %>%
    group_by(Day) %>%
    mutate(pct.wi.day = (100*(n / sum(n)))) %>%
    arrange(Day)
print (C100_counts_perday)

Source: local data frame [32 x 5]
Groups: Day

   Treatment                   Rank2 Day  n pct.wi.day
1     13C100          Actinobacteria  14  3  2.7777778
2     13C100           Bacteroidetes  14 10  9.2592593
3     13C100 Candidate_division_BRC1  14  2  1.8518519
4     13C100                Chlorobi  14  1  0.9259259
5     13C100             Chloroflexi  14  7  6.4814815
6     13C100           Cyanobacteria  14  2  1.8518519
7     13C100          Planctomycetes  14 11 10.1851852
8     13C100          Proteobacteria  14 69 63.8888889
9     13C100         Verrucomicrobia  14  3  2.7777778
10    13C100           Acidobacteria  28  4  2.3121387
..       ...                     ... ... ..        ...


In [16]:
%%R
df_C100_resp_day14 = subset(df_100_resp, (Day == "14"))
#print(df_xylose_resp_day1)
df_C100_resp_day28 = subset(df_100_resp, (Day == "28"))
#print(df_xylose_resp_day3)
df_C100_resp_day45 = subset(df_100_resp, (Day == "45"))
#print(df_xylose_resp_day7)

In [17]:
%%R
df_C100_resp_counts = ddply(df_C100_resp_day45, .(Rank2), summarize, NumSubs = length(unique(OTU)))
print (df_C100_resp_counts)

             Rank2 NumSubs
1    Acidobacteria       6
2   Actinobacteria       1
3    Bacteroidetes      19
4            BD1-5       1
5         Chlorobi       1
6      Chloroflexi       7
7    Cyanobacteria       2
8    Fibrobacteres       1
9   Planctomycetes      20
10  Proteobacteria      59
11 Verrucomicrobia      18


### Treatment: 700

In [18]:
%%R
FDR = 0.10
df_700_resp = subset(df_all, (Treatment == "13C700")&(padj <= FDR))
nrow (df_700_resp)

[1] 524


In [19]:
%%R
phy.sum.day.C700 = df_700_resp %>%
    select(OTU, Day, Treatment, Rank2, Rank3) %>%
    group_by(Day, Rank2, Treatment) %>%
    summarize(Counts = n()) %>%
    arrange(desc(Counts)) 
    
phy.sum.day.C700

Source: local data frame [36 x 4]
Groups: Day, Rank2

   Day                   Rank2 Treatment Counts
1   14           Acidobacteria    13C700      4
2   14          Actinobacteria    13C700      1
3   14         Armatimonadetes    13C700      3
4   14           Bacteroidetes    13C700     94
5   14 Candidate_division_BRC1    13C700      1
6   14  Candidate_division_WS3    13C700      3
7   14                Chlorobi    13C700      1
8   14             Chloroflexi    13C700     11
9   14          Planctomycetes    13C700     18
10  14          Proteobacteria    13C700    102
.. ...                     ...       ...    ...


In [20]:
%%R
df_700_resp_unique = subset(df_700_resp, !duplicated(OTU))
nrow(df_700_resp_unique)

[1] 358


In [21]:
%%R
C700_counts_perday = df_700_resp %>%
    group_by(Treatment, Rank2, Day) %>%
    summarise (n = n()) %>%
    group_by(Day) %>%
    mutate(pct.wi.day = (100*(n / sum(n)))) %>%
    arrange(Day)

print (C700_counts_perday)

Source: local data frame [36 x 5]
Groups: Day

   Treatment                   Rank2 Day   n pct.wi.day
1     13C700           Acidobacteria  14   4  1.6064257
2     13C700          Actinobacteria  14   1  0.4016064
3     13C700         Armatimonadetes  14   3  1.2048193
4     13C700           Bacteroidetes  14  94 37.7510040
5     13C700 Candidate_division_BRC1  14   1  0.4016064
6     13C700  Candidate_division_WS3  14   3  1.2048193
7     13C700                Chlorobi  14   1  0.4016064
8     13C700             Chloroflexi  14  11  4.4176707
9     13C700          Planctomycetes  14  18  7.2289157
10    13C700          Proteobacteria  14 102 40.9638554
..       ...                     ... ... ...        ...


In [22]:
%%R
df_C700_resp_day14 = subset(df_700_resp, (Day == "14"))
#print(df_xylose_resp_day1)
df_C700_resp_day28 = subset(df_700_resp, (Day == "28"))
#print(df_xylose_resp_day3)
df_C700_resp_day45 = subset(df_700_resp, (Day == "45"))
#print(df_xylose_resp_day7)

In [23]:
%%R
df_C700_resp_counts = ddply(df_C700_resp_day45, .(Rank2), summarize, NumSubs = length(unique(OTU)))
print (df_C700_resp_counts)

                     Rank2 NumSubs
1            Acidobacteria       8
2           Actinobacteria       2
3          Armatimonadetes       3
4            Bacteroidetes      11
5  Candidate_division_OP11       1
6                 Chlorobi       1
7              Chloroflexi      11
8            Cyanobacteria       2
9            Elusimicrobia       1
10           Fibrobacteres       2
11              Firmicutes       1
12          Planctomycetes      20
13          Proteobacteria      64
14         Verrucomicrobia      19
15                    <NA>       1


In [24]:
%%R
phy.counts = rbind(phy.sum.day.CC, phy.sum.day.C100, phy.sum.day.C700)
phy.counts

Source: local data frame [103 x 4]
Groups: Day, Rank2

   Day                   Rank2 Treatment Counts
1   14           Acidobacteria    13C000      4
2   14          Actinobacteria    13C000      2
3   14         Armatimonadetes    13C000      4
4   14           Bacteroidetes    13C000     39
5   14 Candidate_division_BRC1    13C000      1
6   14                Chlorobi    13C000      2
7   14             Chloroflexi    13C000     11
8   14           Cyanobacteria    13C000      1
9   14             Nitrospirae    13C000      1
10  14          Planctomycetes    13C000     11
.. ...                     ...       ...    ...


In [25]:
%%R

write.table(phy.counts, 
            file = "data/phy_counts.csv",
            row.names = FALSE,
            sep = ",")

In [26]:
%%R
phy.counts.pct = rbind(CC_counts_perday, C700_counts_perday, C100_counts_perday)
phy.counts.pct

Source: local data frame [103 x 5]
Groups: Day

   Treatment                   Rank2 Day  n pct.wi.day
1     13C000           Acidobacteria  14  4  2.2857143
2     13C000          Actinobacteria  14  2  1.1428571
3     13C000         Armatimonadetes  14  4  2.2857143
4     13C000           Bacteroidetes  14 39 22.2857143
5     13C000 Candidate_division_BRC1  14  1  0.5714286
6     13C000                Chlorobi  14  2  1.1428571
7     13C000             Chloroflexi  14 11  6.2857143
8     13C000           Cyanobacteria  14  1  0.5714286
9     13C000             Nitrospirae  14  1  0.5714286
10    13C000          Planctomycetes  14 11  6.2857143
..       ...                     ... ... ..        ...


In [27]:
%%R

write.table(phy.counts.pct, 
            file = "data/phy_counts_pct.csv",
            row.names = FALSE,
            sep = ",")