In [1]:
%load_ext lab_black
%matplotlib inline

# Dependencies    

1. To visualize scRNA-seq expression data, <b>MAGIC</b> (https://www.krishnaswamylab.org/projects/magic) is used to impute the Missing data.
2. The web-based function is backended by <b>Dash</b>

In [2]:
import pandas as pd
import numpy as np
import magic
import seaborn as sns
import ipywidgets as widgets
import matplotlib.pyplot as plt

In [3]:
import json, random, os
from jupyter_dash import JupyterDash
import dash_html_components as html
import dash_core_components as dcc
import dash_table
from dash.dependencies import Input, Output, State, ALL

# scRNA-seq Dataset

This example inspects the expression profile about a gene of interest in different cell types by tissues. It uses mouse cell altas data from _Han X, Wang R, Zhou Y, Fei L et al. Mapping the Mouse Cell Atlas by Microwell-Seq. Cell 2018 Feb 22;172(5):1091-1107.e17. PMID: 29474909_.  

In [13]:
# gene-cell matrix
tissue_data_path = {
    "Small-Intestine": [
        "data/GSM2906468_SmallIntestine1_dge.txt.gz",
        "data/GSM2906469_SmallIntestine2_dge.txt.gz",
        "data/GSM2906470_SmallIntestine3_dge.txt.gz",
    ],
    "Spleen": ["data/GSM2906471_Spleen_dge.txt.gz"],
    "Kidney": [
        "data/GSM2906425_Kidney1_dge.txt.gz",
        "data/GSM2906426_Kidney2_dge.txt.gz",
    ],
}

In [9]:
# barcode -> tissue mapping
sc_labels = pd.read_csv("data/MCA_CellAssignments.csv")
sc_labels.Tissue.unique()

In [10]:
# Annotation column -> cell type
sc_labels.head()

Unnamed: 0.1,Unnamed: 0,Cell.name,ClusterID,Tissue,Batch,Cell.Barcode,Annotation
0,1,Bladder_1.AAAACGAAAACGGGGCGA,Bladder_1,Bladder,Bladder_1,AAAACGAAAACGGGGCGA,Stromal cell_Dpt high(Bladder)
1,2,Bladder_1.AAAACGAAGCGGCCGCTA,Bladder_5,Bladder,Bladder_1,AAAACGAAGCGGCCGCTA,Stromal cell_Car3 high(Bladder)
2,3,Bladder_1.AAAACGAAGTACTAGCAT,Bladder_16,Bladder,Bladder_1,AAAACGAAGTACTAGCAT,Vascular smooth muscle progenitor cell(Bladder)
3,4,Bladder_1.AAAACGACGTTGCTGTGT,Bladder_8,Bladder,Bladder_1,AAAACGACGTTGCTGTGT,Vascular endothelial cell(Bladder)
4,5,Bladder_1.AAAACGAGCGAGCGAGTA,Bladder_4,Bladder,Bladder_1,AAAACGAGCGAGCGAGTA,Urothelium(Bladder)


In [15]:
# extract single cell gene expression data from the specified tissue
# MAGIC were used to impute data
def tissue_sc_profile(sc_labels, tissue, genes, data_path):
    # tissue data; Parse cell type info
    tissue_sc_config = sc_labels.loc[sc_labels.Tissue == tissue].copy()
    tissue_sc_config.loc[:, "Annotation"] = tissue_sc_config.Annotation.apply(
        lambda x: x.split("(")[0]
    ).values

    magic_imputed_gene = pd.DataFrame()
    for i, f in enumerate(data_path):
        raw = pd.read_csv(f, sep=" ")
        print("*****", f.split("/")[-1], "has", raw.shape[1], "cells")
        # magic impute gene expression after normalization and filter
        upm_mtx = raw * 1e6 / raw.sum()
        mask_low_gene = np.sum(raw.values != 0, axis=1) >= 5
        mask_low_sqrt_mat = np.sqrt(upm_mtx.loc[mask_low_gene, :])
        magic_op = magic.MAGIC()
        magic_imputed = magic_op.fit_transform(mask_low_sqrt_mat.T, genes=genes)
        magic_imputed_gene = magic_imputed_gene.append(np.power(magic_imputed, 2))

    gene_exp_df = magic_imputed_gene.copy()

    # only takes the single cells with labels
    gene_exp_df = tissue_sc_config.merge(
        gene_exp_df, left_on="Cell.name", right_index=True, how="inner"
    )

    # support multiple genes in the future
    gene_exp_df["log2TPM"] = np.log2(gene_exp_df[genes[0]].values + 1)

    # cell type mean
    data = []
    for clt, grp in gene_exp_df.groupby(by="Annotation"):
        data.append([clt, len(grp), grp[genes[0]].mean()])

    cell_type_mean = pd.DataFrame(
        {
            "CellType": [content[0] for content in data],
            "CellCount": [content[1] for content in data],
            "MeanExp": [round(content[2], 4) for content in data],
        },
        index=[content[0] for content in data],
    )

    return cell_type_mean, gene_exp_df

In [18]:
def create_tissue_profile_dashboard(sc_labels, tissue, genes, data_path):
    cell_type_mean, gene_exp_df = tissue_sc_profile(sc_labels, tissue, genes, data_path)
    cell_types = list(cell_type_mean["CellType"].values)
    cell_type_options = [{"label": ct, "value": ct} for ct in cell_types]
    # fig_file = make_plot(icosl_exp_df, cell_type_mean, tissue)

    return (
        cell_type_mean,
        # app layout: horizontal violin plot align to cell type mean table which can serve as label
        html.Div(
            [
                html.Div(
                    html.Img(id=f"{tissue}_plot"),
                    style={
                        "display": "inline-block",
                        "margin-top": "4px",
                    },
                ),
                dcc.Store(
                    id=f"{tissue}_curr_celltype_order", data=json.dumps(cell_types)
                ),
                dcc.Store(f"{tissue}_tag", data=json.dumps(tissue)),
                html.Div(
                    dash_table.DataTable(
                        id=f"{tissue}_expression_table",
                        columns=[
                            {
                                "name": "CellType",
                                "id": "CellType",
                                "presentation": "dropdown",
                            },
                            {"name": "CellCount", "id": "CellCount"},
                            {"name": "MeanExp", "id": "MeanExp"},
                        ],
                        data=cell_type_mean.to_dict("records"),
                        style_cell={"minHeight": 0, "maxHeight": 95, "height": 36.1},
                        style_cell_conditional=[
                            {"if": {"column_id": "CellType"}, "textAlign": "left"}
                        ],
                        editable=True,
                        dropdown={
                            "CellType": {"options": cell_type_options},
                        },
                    ),
                    style={
                        "vertical-align": "top",
                        "display": "inline-block",
                    },
                ),
            ],
            style={
                "display": "block",
                "margin-left": "23%",
                "margin-right": "auto",
            },
        ),
        gene_exp_df,
    )


def make_plot(data, df, tissue):
    orders = list(df["CellType"].values)
    row_size = 13.05 / 28
    fig, ax = plt.subplots(figsize=(6, row_size * len(orders)), dpi=100)
    sns.violinplot(
        x="log2TPM",
        y="Annotation",
        data=data,
        order=orders,
        scale="width",
        inner=None,
        linewidth=1,
        cut=0,
        ax=ax,
    )
    ax.tick_params(
        axis="x",  # changes apply to the x-axis
        which="both",  # both major and minor ticks are affected
        bottom=False,  # ticks along the bottom edge are off
        top=True,  # ticks along the top edge are off
        labelbottom=False,
        labeltop=True,
    )
    ax.tick_params(
        axis="y",  # changes apply to the x-axis
        which="both",  # both major and minor ticks are affected
        bottom=False,  # ticks along the bottom edge are off
        top=False,  # ticks along the top edge are off
        right=True,
        left=False,
        labelleft=False,
    )
    ax.invert_xaxis()
    ax.xaxis.label.set_visible(False)
    ax.yaxis.label.set_visible(False)
    out_file = f"assets/{tissue}_{random.randrange(10000)}.png"
    fig.savefig(out_file, bbox_inches="tight")
    plt.close()
    return out_file

In [19]:
# create layouts for each of the tissue
tissue_layout = {
    tissue: create_tissue_profile_dashboard(sc_labels, tissue, ["Icosl"], data_path)
    for tissue, data_path in tissue_data_path.items()
}

***** GSM2906468_SmallIntestine1_dge.txt.gz has 4764 cells
Calculating MAGIC...
  Running MAGIC on 4764 cells and 14570 genes.
  Calculating graph and diffusion operator...
    Calculating PCA...
    Calculated PCA in 15.55 seconds.
    Calculating KNN search...
    Calculated KNN search in 2.63 seconds.
    Calculating affinities...
    Calculated affinities in 2.80 seconds.
  Calculated graph and diffusion operator in 23.06 seconds.
  Calculating imputation...
Calculated MAGIC in 23.74 seconds.
***** GSM2906469_SmallIntestine2_dge.txt.gz has 2215 cells
Calculating MAGIC...
  Running MAGIC on 2215 cells and 12295 genes.
  Calculating graph and diffusion operator...
    Calculating PCA...
    Calculated PCA in 7.18 seconds.
    Calculating KNN search...
    Calculated KNN search in 0.51 seconds.
    Calculating affinities...
    Calculated affinities in 0.50 seconds.
  Calculated graph and diffusion operator in 9.01 seconds.
  Calculating imputation...
Calculated MAGIC in 9.20 seconds.

  genes[~np.isin(genes, gene_names)]


***** GSM2906426_Kidney2_dge.txt.gz has 6220 cells
Calculating MAGIC...
  Running MAGIC on 6220 cells and 14298 genes.
  Calculating graph and diffusion operator...
    Calculating PCA...
    Calculated PCA in 20.23 seconds.
    Calculating KNN search...
    Calculated KNN search in 5.19 seconds.
    Calculating affinities...
    Calculated affinities in 5.17 seconds.
  Calculated graph and diffusion operator in 32.96 seconds.
  Calculating imputation...
Calculated MAGIC in 33.38 seconds.


In [20]:
app = JupyterDash(__name__)
app.layout = html.Div(
    [
        html.H1(
            "Rearrange Cell Type Orders in Violin Plots",
            style={"text-align": "center"},
        ),
        # organize tissues' dashboards into tabs
        dcc.Tabs(
            id="tissue_selector",
            value="Small-Intestine",
            children=[
                dcc.Tab(
                    label=f"{tissue}_tab",
                    value=tissue,
                    children=[
                        html.H3(
                            'Change the cell type in "CellType" column to reorder',
                            style={"text-align": "center"},
                        ),
                        data[1],
                        html.Div(
                            [
                                html.H3(
                                    "Horizontal View",
                                    style={"text-align": "center"},
                                ),
                                html.Button(
                                    "Download Figure",
                                    id=f"{tissue}_btn_figure",
                                    style={
                                        "display": "block",
                                        "margin-left": "auto",
                                        "margin-right": "auto",
                                        "margin-bottom": "10px",
                                    },
                                ),
                                dcc.Download(id=f"{tissue}_download_figure"),
                                html.Img(
                                    id=f"{tissue}_final_plot",
                                    style={
                                        "width": "70%",
                                        "height": "auto",
                                        "display": "block",
                                        "margin-left": "auto",
                                        "margin-right": "auto",
                                    },
                                ),
                                html.P(id=f"{tissue}_display_cell_type_order"),
                            ],
                        ),
                    ],
                )
                for tissue, data in tissue_layout.items()
            ],
            style={"padding": "10px"},
        ),
    ]
)

# arrange row order in the table and generating reordered violine plot
for tissue in tissue_layout.keys():

    @app.callback(
        Output(f"{tissue}_expression_table", "data"),
        Output(f"{tissue}_curr_celltype_order", "data"),
        Input(f"{tissue}_expression_table", "data"),
        Input(f"{tissue}_curr_celltype_order", "data"),
        State(f"{tissue}_tag", "data"),
        prevent_initial_call=True,
    )
    def swap_cell_type_order(df_dict, curr_ctype_order_json, tissue_tag):
        selected_tissue = json.loads(tissue_tag)
        cell_type_mean = tissue_layout[selected_tissue][0]
        cell_types = list(cell_type_mean["CellType"].values)
        current_ctype_order = json.loads(curr_ctype_order_json)

        df = pd.DataFrame.from_dict(df_dict)
        values = df["CellType"].values

        for ori, modified in zip(current_ctype_order, values):
            if ori != modified:
                break

        values[current_ctype_order.index(modified)] = ori
        return cell_type_mean.loc[values, :].to_dict("records"), json.dumps(
            list(values)
        )

    @app.callback(
        Output(f"{tissue}_plot", "src"),
        Input(f"{tissue}_expression_table", "data"),
        Input(f"{tissue}_plot", "src"),
        State(f"{tissue}_tag", "data"),
    )
    def update_plot(df_dict, old_plot, tissue_tag):
        selected_tissue = json.loads(tissue_tag)
        if old_plot is not None:
            os.remove(old_plot)
        df = pd.DataFrame.from_dict(df_dict)

        return make_plot(tissue_layout[selected_tissue][2], df, selected_tissue)

    @app.callback(
        Output(f"{tissue}_display_cell_type_order", "children"),
        Output(f"{tissue}_final_plot", "src"),
        Input(f"{tissue}_expression_table", "data"),
        Input(f"{tissue}_final_plot", "src"),
        State(f"{tissue}_tag", "data"),
    )
    def update_final_plot(df_dict, old_plot, tissue_tag):
        selected_tissue = json.loads(tissue_tag)
        if old_plot is not None:
            os.remove(old_plot)
        df = pd.DataFrame.from_dict(df_dict)

        orders = list(df["CellType"].values)
        fig, ax = plt.subplots(figsize=(20, 6), dpi=100)
        sns.violinplot(
            x="Annotation",
            y="log2TPM",
            data=tissue_layout[selected_tissue][2],
            order=orders,
            scale="width",
            inner=None,
            linewidth=1,
            cut=0,
            legend=True,
            ax=ax,
        )
        ax.tick_params(axis="x", rotation=90)  # changes apply to the x-axis
        out_file = f"assets/{selected_tissue}_final_{random.randrange(10000)}.png"
        fig.savefig(out_file, bbox_inches="tight")
        plt.close()
        return ", ".join(orders), out_file

    @app.callback(
        Output(f"{tissue}_download_figure", "data"),
        Input(f"{tissue}_btn_figure", "n_clicks"),
        State(f"{tissue}_expression_table", "data"),
        State(f"{tissue}_tag", "data"),
        prevent_initial_call=True,
    )
    def make_figure(n_clicks, df_dict, tissue_tag):
        selected_tissue = json.loads(tissue_tag)
        df = pd.DataFrame.from_dict(df_dict)
        orders = list(df["CellType"].values)
        fig, ax = plt.subplots(figsize=(20, 6), dpi=400)
        sns.violinplot(
            x="Annotation",
            y="log2TPM",
            data=tissue_layout[selected_tissue][2],
            order=orders,
            scale="width",
            inner=None,
            linewidth=1,
            cut=0,
            legend=True,
            ax=ax,
        )
        ax.tick_params(axis="x", rotation=90)  # changes apply to the x-axis
        out_file = f"assets/{selected_tissue}_final_figure.png"
        fig.savefig(out_file, bbox_inches="tight")
        plt.close()

        with open(f"assets/{selected_tissue}_final_figure_order.txt", "w") as o:
            o.write(", ".join(orders))

        return dcc.send_file(out_file)

In [27]:
app.run_server(host="192.168.0.3", port=8892, debug=False)

192.168.0.3 - - [10/Jul/2021 12:37:47] "[37mGET /_shutdown_96511a27-5262-40ab-acdc-495cf3464116 HTTP/1.1[0m" 200 -
 * Running on http://192.168.0.3:8892/ (Press CTRL+C to quit)
192.168.0.3 - - [10/Jul/2021 12:37:47] "[37mGET /_alive_96511a27-5262-40ab-acdc-495cf3464116 HTTP/1.1[0m" 200 -


Dash app running on http://192.168.0.3:8892/


192.168.0.5 - - [10/Jul/2021 12:37:50] "[37mGET / HTTP/1.1[0m" 200 -
192.168.0.5 - - [10/Jul/2021 12:37:50] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
192.168.0.5 - - [10/Jul/2021 12:37:50] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
192.168.0.5 - - [10/Jul/2021 12:37:50] "[37mGET /_dash-component-suites/dash_table/async-highlight.js HTTP/1.1[0m" 200 -
192.168.0.5 - - [10/Jul/2021 12:37:50] "[37mGET /_dash-component-suites/dash_table/async-table.js HTTP/1.1[0m" 200 -
192.168.0.5 - - [10/Jul/2021 12:37:52] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
192.168.0.5 - - [10/Jul/2021 12:37:53] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
192.168.0.5 - - [10/Jul/2021 12:37:53] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
192.168.0.5 - - [10/Jul/2021 12:37:53] "[37mGET /assets/Small-Intestine_final_696.png HTTP/1.1[0m" 200 -
192.168.0.5 - - [10/Jul/2021 12:37:53] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
192.168.0.5 - - [10/Jul/2021 12: