In [None]:
import bokeh
import bokeh.plotting as bplot
bokeh.io.output_notebook()
import colorcet

import numpy
import pandas

In [None]:
bplot_style = {
    "attrs": {
        "Figure": {
            "background_fill_color": None,
            "outline_line_color": None,
            "border_fill_color": None,
        },
        "Grid": {
            "grid_line_color": None
        },
        "Axis": {
            "major_label_text_font": "Arial",
            "major_label_text_font_size": "10pt",
            "major_label_text_color": "#000000",
            "axis_label_text_font": "Arial",
            "axis_label_text_font_size": "12pt",
            "axis_label_text_font_style": "bold",
            "axis_label_text_color": "#000000",
            "axis_line_width": 2,
            "axis_line_cap": "butt",
            "major_tick_line_width": 2,
            "major_tick_out": 10,
            "major_tick_in": 0,
            "major_tick_line_cap": "square",
            "minor_tick_line_width": 2,
            "minor_tick_out": 5,
            "minor_tick_line_cap": "square",
        },
        "Legend": {
            "label_text_font": "Arial",
            "label_text_font_size": "8pt",
            "background_fill_color": None,
            "border_line_color": None
        },
    }
}
    
bokeh.io.curdoc().theme = bokeh.themes.Theme(json=bplot_style)

In [None]:
deseq_samples = [
    ("CrxKO", "../DESeq2_Output/deseq2_KO.csv"),
    ("D2/+", "../DESeq2_Output/deseq2_Ehet.csv"),
    ("D2/D2", "../DESeq2_Output/deseq2_Ehom.csv"),
    ("R90W/+", "../DESeq2_Output/deseq2_Rhet.csv"),
    ("R90W/R90W", "../DESeq2_Output/deseq2_Rhom.csv"),
]

In [None]:
rod_cone_groups = pandas.read_csv("rod_cone_groups.csv", usecols=["Name", "Enrichment"])
display(rod_cone_groups)
rod_cone_TFs = pandas.read_csv("rod_cone_TFs.csv", header=None, names=["Name", "Enrichment"])
display(rod_cone_TFs)

rod_cone_genes = rod_cone_groups.merge(rod_cone_TFs, how="outer", on=["Name", "Enrichment"], indicator=True)
rod_cone_genes["is_tf"] = rod_cone_genes["_merge"] == "right_only"
rod_cone_genes.drop(columns="_merge", inplace=True)
display(rod_cone_genes)

In [None]:
legend_items = []

tooltips = [
    ("Gene Name", "@external_gene_name"),
    ("Gene ID", "@ensembl_gene_id"),
    ("Biotype", "@gene_biotype"),
    ("Mean Expression", "@baseMean"),
    ("log2 Fold Change", "@log2FoldChange±@lfcSE"),
    ("padj", "@padj"),
    ("Enrichment", "@Enrichment"),
    ("TF?", "@is_tf")
]

tools = ["pan", "box_zoom", "wheel_zoom", "box_select", "reset", "save", "hover"]

volcano_figure = bplot.figure(width=700, height=700, tools=tools, tooltips=tooltips, x_axis_label="log2(Fold Change)", y_axis_label="-log10(padj)")

ma_figure = bplot.figure(width=700, height=700, tools=tools, tooltips=tooltips, x_axis_label="log10(Mean Expression)", y_axis_label="log2(Fold Change)")

for i, (sample_name, sample_file_path) in enumerate(deseq_samples):
    sample_data = pandas.read_csv(sample_file_path)
    sample_data["log10padj"] = -numpy.log10(sample_data["padj"] + 1e-310)
    sample_data["log10baseMean"] = numpy.log10(sample_data["baseMean"])
    sample_data["scaledBaseMean"] = (sample_data["log10baseMean"]/numpy.max(sample_data["log10baseMean"]) * 5) + 4
    sample_data["scaledBaseMeanRing"] = sample_data["scaledBaseMean"] + 5
    sample_data["scaledlog10padj"] = (sample_data["log10padj"]/numpy.max(sample_data["log10padj"]) * 5) + 4
    sample_data["scaledlog10padjRing"] = sample_data["scaledlog10padj"] + 5
    
    sample_data["significant"] = (sample_data["padj"] < 0.05) & (numpy.abs(sample_data["log2FoldChange"]) > 2)
    sample_data["color"] = numpy.where(sample_data["significant"], colorcet.glasbey_dark[i] + "80", "#80808030")
    
    sample_data_known_genes = sample_data.merge(rod_cone_genes, how="inner", left_on="external_gene_name", right_on="Name")
    
    if i == 0:
        display(sample_data.columns)
        display(sample_data_known_genes.columns)

    data_source = bokeh.models.ColumnDataSource(sample_data)
    data_source_known_genes = bokeh.models.ColumnDataSource(sample_data_known_genes)
    
    renderer = volcano_figure.circle(source=data_source, x="log2FoldChange", y="log10padj", fill_color="color", size="scaledBaseMean", line_color=None)
    known_renderer = volcano_figure.circle(source=data_source_known_genes, x="log2FoldChange", y="log10padj", size="scaledBaseMeanRing", fill_color=None, line_color="color")
    
    ma_renderer = ma_figure.circle(source=data_source, x="log10baseMean", y="log2FoldChange", fill_color="color", size="scaledlog10padj", line_color=None)
    known_ma_renderer = ma_figure.circle(source=data_source_known_genes, x="log10baseMean", y="log2FoldChange", size="scaledlog10padjRing", fill_color=None, line_color="color")

    if i != 0:
        renderer.visible = False
        known_renderer.visible = False
        ma_renderer.visible = False
        known_ma_renderer.visible = False
    
    legend_items.append((sample_name, [renderer, known_renderer, ma_renderer, known_ma_renderer]))

volcano_figure.add_layout(bokeh.models.Span(location=2, dimension="height", line_color="#000000", line_dash="dashed", line_width=1))
volcano_figure.add_layout(bokeh.models.Span(location=-2, dimension="height", line_color="#000000", line_dash="dashed", line_width=1))
volcano_figure.add_layout(bokeh.models.Span(location=1.3, dimension="width", line_color="#000000", line_dash="dashed", line_width=1))
    
legend = bokeh.models.Legend(items=legend_items)
legend.click_policy="hide"
legend.orientation = "horizontal"
legend.location = "center"
volcano_figure.add_layout(legend, 'below')

# Uncomment output_file to also save the plot to an HTML file
#bplot.output_file(filename="plots.html", title="Retinopathy Plots", mode="inline")
bplot.show(bokeh.layouts.gridplot([[volcano_figure, ma_figure]], toolbar_location="right"))