In [46]:
import numpy as np
import pandas as pd
import plotly
import plotly.graph_objs as go
import qgrid
import string
from IPython.display import Javascript
from plotly.offline import init_notebook_mode, iplot
from textwrap import dedent

plotly.offline.init_notebook_mode(connected=True)

JS = string.Template("""
var csv = '$csv';

var filename = '$filename';
var blob = new Blob([csv], { type: 'text/csv;charset=utf-8;' });
if (navigator.msSaveBlob) { // IE 10+
    navigator.msSaveBlob(blob, filename);
} else {
    var link = document.createElement("a");
    if (link.download !== undefined) { // feature detection
        // Browsers that support HTML5 download attribute
        var url = URL.createObjectURL(blob);
        link.setAttribute("href", url);
        link.setAttribute("download", filename);
        link.style.visibility = 'hidden';
        document.body.appendChild(link);
        link.click();
        document.body.removeChild(link);
    }
}
""")

# Root

In [2]:
root_df = pd.read_table("data/root_go.tsv")
gene_list = pd.read_table("data/gene_list.txt", header=None, names=["Gene"])
print("Gene Filter List:", len(gene_list))
print("Rows:", len(root_df))

Gene Filter List: 1164
Rows: 25484


## Apply filters

In [3]:
# select only rows with at least a significant gene
root_df = root_df[(root_df.T1_padj < 0.05) | (root_df.T2_padj < 0.05) |\
                  (root_df.T3_padj < 0.05) | (root_df.T4_padj < 0.05)]
# filter out non-descript proteins
root_df = root_df[(pd.notna(root_df["arabi-defline"]))]
# filter out poorly described proteins
root_df = root_df[(~root_df["arabi-defline"].str.contains("unknown|Uncharacteri|Expressed protein"))]

## filter by overall fold change across time points
def fc_diff_sum(s):
    total = 0
    for k in s.keys():
        if k.endswith("_fc"):
            if np.isfinite(s[k]):
                total += abs(0 - s[k])
    return total

root_df["T1_fc_abs"] = abs(root_df["T1_fc"])
root_df["T2_fc_abs"] = abs(root_df["T2_fc"])
root_df["T3_fc_abs"] = abs(root_df["T3_fc"])
root_df["T4_fc_abs"] = abs(root_df["T4_fc"])
root_df["fc_diff"] = root_df.apply(fc_diff_sum, axis=1)
root_df.sort_values("fc_diff", ascending=True, inplace=True)
print("Genes after initial filters:", len(root_df))

Genes after initial filters: 8538


## Apply gene list filter

You can choose to apply this or skip executing to keep all genes.

In [4]:
root_df = root_df[root_df.Gene.isin(gene_list.Gene)]
print("Remaining Genes:", len(root_df))

Remaining Genes: 1031


## Show the data table

In [5]:
root_widget = qgrid.show_grid(root_df, show_toolbar=True)
root_widget

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

### Current table size

In [28]:
print("Genes:", len(root_widget.get_changed_df()))

Genes: 5


## Gather the plot data

In [29]:
root_hm_df = root_widget.get_changed_df()
z = root_hm_df[["T1_fc", "T2_fc", "T3_fc", "T4_fc"]].values.tolist()
y = root_hm_df["Gene"].tolist()
x = ["T1","T2","T3","T4"]

def get_hover(df, x, y, y_label, desc):
    hover = []
    for i in y:
        hover_row = []
        s = df.loc[df[y_label] == i]
        d = s[desc].values[0]
        for j in x:
            hover_row.append("{desc}<br>padj: {padj}".format(desc=d,
                                                             padj=float(s[j+"_padj"])))
        hover.append(hover_row)
    return hover

hover = get_hover(root_hm_df, x, y, "Gene", "arabi-defline")
trace = go.Heatmap(z=z, x=x, y=y, colorscale='Viridis', text=hover, hoverinfo="text+z+y+x")
data = [trace]

## Set the plot layout

Adjusting `height` ("height": 1600) will elongate the plot and make y-axis labels more readable.

Setting autotick to `False` ("autotick": False) will show labels for all rows.

In [30]:
layout = {"autosize": True,
          "height": 800,
          "margin": {"b": 100, "l": 120},
          "title": "Root Condition 1 v Condition 2 (%d genes)" % len(root_hm_df),
          "xaxis": {"title": "Time"},
          "yaxis": {"title": "Gene"}}

## Build and show the plot

In [31]:
fig = go.Figure(data=data, layout=layout)
iplot(fig)

## Export the data

The data downloads as a CSV file which can be opened in any text editor or Excel.

In [50]:
def prepare_download(df, filename="results.csv"):
    js_download = JS.substitute({
        "csv":
        df.to_csv(index=False).replace('\n', '\\n').replace("'", "\'"),
        "filename":
        filename
    })
    return js_download

Javascript(prepare_download(root_hm_df, "root_filtered.csv"))

<IPython.core.display.Javascript object>

# Stem Base

## Import and apply filters

In [52]:
sb_df = pd.read_table("data/stem-base_go.tsv")
# select only rows with at least a significant gene
sb_df = sb_df[(sb_df.T1_padj < 0.05) | (sb_df.T2_padj < 0.05) |\
              (sb_df.T3_padj < 0.05) | (sb_df.T4_padj < 0.05)]
# filter out non-descript proteins
sb_df = sb_df[(pd.notna(sb_df["arabi-defline"]))]
# filter out poorly described proteins
sb_df = sb_df[(~sb_df["arabi-defline"].str.contains("unknown|Uncharacteri|Expressed protein"))]
sb_df["T1_fc_abs"] = abs(sb_df["T1_fc"])
sb_df["T2_fc_abs"] = abs(sb_df["T2_fc"])
sb_df["T3_fc_abs"] = abs(sb_df["T3_fc"])
sb_df["T4_fc_abs"] = abs(sb_df["T4_fc"])
sb_df["fc_diff"] = sb_df.apply(fc_diff_sum, axis=1)
sb_df.sort_values("fc_diff", ascending=True, inplace=True)
sb_widget = qgrid.show_grid(sb_df, show_toolbar=True)
sb_widget

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

## Read filtered data and plot

In [53]:
sb_hm_df = sb_widget.get_changed_df()
z = sb_hm_df[["T1_fc", "T2_fc", "T3_fc", "T4_fc"]].values.tolist()
y = sb_hm_df["Gene"].tolist()
x = ["T1","T2","T3","T4"]
hover = get_hover(sb_hm_df, x, y, "Gene", "arabi-defline")
trace = go.Heatmap(z=z, x=x, y=y, colorscale='Viridis', text=hover, hoverinfo="text+z+y+x")
layout = {"autosize": True, "height": 800, "margin": {"b": 100, "l": 120},
          "title": "Stem-Base Condition 1 v Condition 2 (%d genes)" % len(sb_hm_df),
          "xaxis": {"title": "Time"}, 
          "yaxis": {"title": "Gene"}}
fig = go.Figure(data=[trace], layout=layout)
iplot(fig)

## Export the data

In [54]:
Javascript(prepare_download(sb_hm_df, "stem-base_filtered.csv"))

<IPython.core.display.Javascript object>

# Alternate Bdist

In [55]:
bdist_df = pd.read_table("data/bdist_msu_go.tsv")
bdist_df = bdist_df[(bdist_df.T1_padj < 0.05) | (bdist_df.T2_padj < 0.05) | (bdist_df.T3_padj < 0.05)]
bdist_df = bdist_df[(pd.notna(bdist_df["arabi-defline"]))]
bdist_df = bdist_df[(~bdist_df["arabi-defline"].str.contains("unknown|Uncharacteri|Expressed protein"))]
bdist_df["fc_diff"] = bdist_df.apply(fc_diff_sum, axis=1)
bdist_df["T1_fc_abs"] = abs(bdist_df["T1_fc"])
bdist_df["T2_fc_abs"] = abs(bdist_df["T2_fc"])
bdist_df["T3_fc_abs"] = abs(bdist_df["T3_fc"])
bdist_df.sort_values("fc_diff", ascending=True, inplace=True)
bdist_widget = qgrid.show_grid(bdist_df, show_toolbar=True)
bdist_widget

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

In [56]:
bdist_hm_df = bdist_widget.get_changed_df()
z = bdist_hm_df[["T1_fc", "T2_fc", "T3_fc"]].values.tolist()
y = bdist_hm_df["Gene"].tolist()
x = ["T1","T2","T3"]
hover = get_hover(bdist_hm_df, x, y, "Gene", "arabi-defline")
trace = go.Heatmap(z=z, x=x, y=y, colorscale='Viridis', text=hover, hoverinfo="text+z+y+x")
layout = {"autosize": True, "height": 800, "margin": {"b": 100, "l": 120},
          "title": "Bdist Condition 1 v Condition 2 (%d genes)" % len(bdist_hm_df), "xaxis": {"title": "Time"}, 
          "yaxis": {"title": "Gene"}}
fig = go.Figure(data=[trace], layout=layout)
iplot(fig)

## Export the data

In [57]:
Javascript(prepare_download(bdist_hm_df, "alternate-bdist_filtered.csv"))

<IPython.core.display.Javascript object>

# Setaria

In [59]:
set_df = pd.read_table("data/setaria_msu_go.tsv")
set_df = set_df[(set_df.T1_padj < 0.05) | (set_df.T2_padj < 0.05) | (set_df.T3_padj < 0.05)]
set_df = set_df[(pd.notna(set_df["arabi-defline"]))]
set_df = set_df[(~set_df["arabi-defline"].str.contains("unknown|Uncharacteri|Expressed protein"))]
set_df["fc_diff"] = set_df.apply(fc_diff_sum, axis=1)
set_df["T1_fc_abs"] = abs(set_df["T1_fc"])
set_df["T2_fc_abs"] = abs(set_df["T2_fc"])
set_df["T3_fc_abs"] = abs(set_df["T3_fc"])
set_df.sort_values("fc_diff", ascending=True, inplace=True)
set_widget = qgrid.show_grid(set_df, show_toolbar=True)
set_widget

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

In [60]:
set_hm_df = set_widget.get_changed_df()
z = set_hm_df[["T1_fc", "T2_fc", "T3_fc"]].values.tolist()
y = set_hm_df["Gene"].tolist()
x = ["T1","T2","T3"]
hover = get_hover(set_hm_df, x, y, "Gene", "arabi-defline")
trace = go.Heatmap(z=z, x=x, y=y, colorscale='Viridis', text=hover, hoverinfo="text+z+y+x")
layout = {"autosize": True, "height": 800, "margin": {"b": 100, "l": 120},
          "title": "Setaria Condition 1 v Condition 2 (%d genes)" % len(set_hm_df), "xaxis": {"title": "Time"}, 
          "yaxis": {"title": "Gene"}}
fig = go.Figure(data=[trace], layout=layout)
iplot(fig)

## Export the data

In [61]:
Javascript(prepare_download(set_hm_df, "setaria_filtered.csv"))

<IPython.core.display.Javascript object>