## Create Responder Table of BLAST results

In [1]:
from IPython.display import Image, FileLink
import pandas as pd
import pandas.rpy.common as com



In [2]:
%load_ext rpy2.ipython

In [3]:
%%R
library(data.table)
library(dplyr)
library(magrittr)

data.table 1.9.4  For help type: ?data.table
*** NB: by=.EACHI is now explicit. See README to restore previous behaviour.

Attaching package: ‘dplyr’

The following objects are masked from ‘package:data.table’:

    between, last

The following object is masked from ‘package:stats’:

    filter

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



###This file is from the [make l2fc notebook](./Sparsity_make_l2fc_table.ipynb)

In [4]:
%%R
FDR = 0.10

df.l2fc = read.csv("/var/seq_data/priming_exp/data/l2fc_table.csv") %>% 
    filter(padj <= FDR) %>%
    group_by(OTU, Treatment) %>%
    slice(which.max(log2FoldChange))

###This file is from the [BLAST notebook](./BLAST.ipynb)

In [5]:
%%R
df.blast = tbl_df(read.table
                  ("/home/ashley/priming_exp/data/blast_out_otusn_LTP115.m6",
                  sep = "\t"))
names(df.blast) = c("OTU", "acc", "pid", "alnlen", "mismatches", "gaps", 
                    "qstart", "qend", "sstart", "send", "evalue", "bit")

df.blast

Source: local data frame [6,367,968 x 12]

     OTU      acc pid alnlen mismatches gaps qstart qend sstart send evalue bit
1  OTU.1 AF235091 100    375          0    0      1  375    472  846      0 693
2  OTU.1 AF330692 100    375          0    0      1  375    466  840      0 693
3  OTU.1 AB588633 100    375          0    0      1  375    503  877      0 693
4  OTU.1   X80741 100    375          0    0      1  375    496  870      0 693
5  OTU.1 AM176541 100    375          0    0      1  375    505  879      0 693
6  OTU.1   X80740 100    375          0    0      1  375    497  871      0 693
7  OTU.1   X83408 100    375          0    0      1  375    493  867      0 693
8  OTU.1 AB279889 100    375          0    0      1  375    502  876      0 693
9  OTU.1 GQ406811 100    375          0    0      1  375    456  830      0 693
10 OTU.1 AB279890 100    375          0    0      1  375    506  880      0 693
..   ...      ... ...    ...        ...  ...    ...  ...    ...  ...    ... .

In [6]:
%%R
df.tax = tbl_df(read.csv("/var/seq_data/silva/silva_blastdb/full_names.csv"))
df.tax

Source: local data frame [1,426,450 x 2]

        acc                           full_name
1  AX003092          Enterococcus casseliflavus
2  AX044029              Neisseria meningitidis
3  EU271959            Myzocytiopsis intermedia
4  AX039535            Dehalococcoides mccartyi
5  EU273602                   Acorus americanus
6  AX175616                Marinomonas communis
7  AX044033              Neisseria meningitidis
8  AB000389         Pseudoalteromonas elyakovii
9  EU271960          Myzocytiopsis sp. venatrix
10 AB001439 Pseudomonas syringae pv. actinidiae
..      ...                                 ...


In [7]:
%%R
df.blast = left_join(df.blast, df.tax)

Joining by: "acc"


In [8]:
%%R
df.blast = left_join(df.blast, df.l2fc %>% select(OTU, padj, log2FoldChange, Day, Treatment, Rank2, Rank3, Rank4))

Joining by: "OTU"


In [9]:
%%R
df.top.hits = df.blast %>%
    group_by(OTU) %>%
    mutate(bit.rank = rank(desc(bit), ties.method = "min")) %>%
    filter(bit.rank == 1) %>%
    group_by()

In [10]:
%%R
df.top.hits

Source: local data frame [22,680 x 21]

     OTU      acc pid alnlen mismatches gaps qstart qend sstart send evalue bit
1  OTU.1 AF235091 100    375          0    0      1  375    472  846      0 693
2  OTU.1 AF330692 100    375          0    0      1  375    466  840      0 693
3  OTU.1 AB588633 100    375          0    0      1  375    503  877      0 693
4  OTU.1   X80741 100    375          0    0      1  375    496  870      0 693
5  OTU.1 AM176541 100    375          0    0      1  375    505  879      0 693
6  OTU.1   X80740 100    375          0    0      1  375    497  871      0 693
7  OTU.1   X83408 100    375          0    0      1  375    493  867      0 693
8  OTU.1 AB279889 100    375          0    0      1  375    502  876      0 693
9  OTU.1 GQ406811 100    375          0    0      1  375    456  830      0 693
10 OTU.1 AB279890 100    375          0    0      1  375    506  880      0 693
..   ...      ... ...    ...        ...  ...    ...  ...    ...  ...    ... ...


In [11]:
%%R
FDR = 0.10

df.table = df.top.hits %>%
    filter(padj <= FDR) %>%
    group_by(OTU) %>%
    summarize(hits = paste(full_name, collapse = "|"), 
              pid = first(pid), 
              log2FoldChange = first(log2FoldChange),
              Day = first(Day),
              Phylum = first(as.character(Rank2)),
              Class = first(as.character(Rank3)),
              Order = first(as.character(Rank4)),
              Treatment = first(as.character(Treatment))) %>%
    arrange(Phylum, Class, Order, desc(log2FoldChange))

In [12]:
df_table = com.load_data("df.table")

In [13]:
df_table.head()

Unnamed: 0,OTU,hits,pid,log2FoldChange,Day,Phylum,Class,Order,Treatment
1,OTU.1341,Acidicapsa ligni,86.02,4.774537,14,Acidobacteria,11-24,uncultured_bacterium,13C700
2,OTU.2288,Sphingopyxis soli,82.7,3.644542,45,Acidobacteria,11-24,uncultured_bacterium,13C700
3,OTU.1197,Edaphobacter aggregans,83.02,2.409639,45,Acidobacteria,11-24,uncultured_bacterium,13C700
4,OTU.10167,Telmatobacter bradus|Telmatobacter bradus|Telm...,80.65,3.860824,45,Acidobacteria,Candidatus_Chloracidobacterium,uncultured_Acidobacteria_bacterium,13C000
5,OTU.1507,Acidicapsa ligni,82.24,3.241574,45,Acidobacteria,Candidatus_Chloracidobacterium,uncultured_Acidobacteria_bacterium,13C700


In [14]:
def list_genera(l):
    if len(l.split("|")) > 10:
        names = l.split("|")
        names_abbr = []
        seen = []
        for name in names:
            genus, species = name.split(" ", 1)
            if genus in seen:
                continue
            else:
                seen.append(genus)
                names_abbr.append("\mbox{" + "\\textit{" + genus + " spp." + "}" + "}")
        return ", ".join(names_abbr) 
    elif len(l.split("|")) > 1:
        return  ", ".join(["\mbox{" + "\\textit{" + i + "}" + "}" for i in set(l.split("|"))])
    else:
        return "\mbox{" + "\\textit{" + l + "}" + "}"
    
def get_latex(row):
    latex = ""
    OTU = row["OTU"]
    pid = row["pid"]
    Day = row["Day"]
    
    if pid >= 97:
        genera = list_genera(row["hits"])
    else:
        genera = "{No hits of at least 97\% identity}"

    tax = " ".join(["\mbox{" + "\\textit{" + i + "}" + "}" for i in row[["Phylum","Class","Order"]]]).replace("_","-")
    
    l2fc = pd.Series(row["log2FoldChange"]).round(2).values[0]
    #acc = df.acc.iloc[0]
    return OTU + " & " + str(l2fc) + " & " + str(Day) + " & " + genera + " & " + str(pid) + " & " + tax + r" \\ " + r"\midrule" + "\n"

In [15]:
s = df_table[df_table["Treatment"]=="13C100"].apply(get_latex, axis=1)

table_framework = r"""
\documentclass[10pt]{article}
\usepackage{multirow, array, booktabs, longtable, threeparttablex}
\usepackage{array}
\usepackage{enumitem}
\usepackage{chngcntr}
\counterwithin{table}{section}
\usepackage[margin=1cm]{caption}
\newcolumntype{P}[1]{>{\raggedright\arraybackslash}p{#1}}

\usepackage{geometry} 
\geometry{tmargin=1cm, bmargin=1cm, lmargin=0.25cm, rmargin=0.25cm} 

\begin{document}
\setcounter{table}{2}
\setcounter{section}{4}

\pagestyle{empty}

\begin{ThreePartTable}
\begin{TableNotes}
\item[a] Maximum observed $log_{2}$ of fold change. 
\item[b] Day of maximum fold change.
\item[c] Against Living Tree Project database.
\item[d] Annotation from Silva database assigned during OTU binning (see methods).
\end{TableNotes}

\begin{longtable}{lrrP{5cm}rP{5cm}}

\caption{$^{13}$C-cellulose responders in the repeated root exudate treatment} \\
\toprule 
    \textbf{OTU ID} & 
    \textbf{Fold change} \tnote{a} & 
    \textbf{Day} \tnote{b} & 
    \textbf{Top BLAST hits} \tnote{c}& 
    \textbf{BLAST \%%ID} \tnote{c}& 
    \textbf{Phylum;Class;Order} \tnote{d}\\
\midrule
\endfirsthead

\multicolumn{3}{c}
{{\tablename\ \thetable{} -- continued from previous page}} \\
\midrule
    \textbf{OTU ID} & 
    \textbf{Fold change} & 
    \textbf{Day} & 
    \textbf{Top BLAST hits} & 
    \textbf{BLAST \%%ID} & 
    \textbf{Phylum;Class;Order} \\
\midrule
\endhead
    %s
\bottomrule
\insertTableNotes
\end{longtable}

\end{ThreePartTable}
 
\end{document}"""%"".join(s.values)

with open("/home/ashley/priming_exp/data/table_cellulose_13C100.tex", "w") as out:
    out.write(table_framework)

In [16]:
!latex /home/ashley/priming_exp/data/table_cellulose_13C100.tex >/dev/null
!dvipdf table_cellulose_13C100.dvi figs/LTP_blast_table_cellulose_13C100.pdf

In [17]:
!convert -density 600 -trim  +repage figs/LTP_blast_table_cellulose_13C100.pdf -quality 100 figs/LTP_blast_table_cellulose_13C100.png

In [18]:
FileLink("figs/LTP_blast_table_cellulose_13C100.pdf")

In [19]:
FileLink("figs/LTP_blast_table_cellulose_13C100.png")

In [20]:
FileLink("/home/ashley/priming_exp/data/table_cellulose_13C100.tex")