In [5]:
import pandas as pd
import numpy as np


df = pd.read_csv('./demo_deseq.txt', sep='\t')

In [6]:
import plotly.graph_objects as go
from plotly.utils import PlotlyJSONEncoder
import json


def plot_volcano(df, p_threshold, logFC_threshold, up_color, down_color, symbol_num, bubble_size, up_donw_num):

    # 重新设置列名
    df.columns = ['Symbol', 'logFC', 'P.Value']

    # 如果df有空值，删除空值
    df = df.dropna()

    # 提取数据
    gene_names = df['Symbol'].values
    logFC = df['logFC'].values
    pvalue = -np.log10(df['P.Value'].values)

    # 根据阈值判断显著性
    significant = (np.abs(logFC) > logFC_threshold) & (df['P.Value'].values < p_threshold)
    upregulated = significant & (logFC > 0)
    downregulated = significant & (logFC < 0)
    nonsignificant = ~significant


    # Sort to get top 5 upregulated and downregulated genes
    df_upregulated = df[upregulated].nlargest(symbol_num, 'logFC')
    df_downregulated = df[downregulated].nsmallest(symbol_num, 'logFC')

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=logFC[upregulated], y=pvalue[upregulated], mode='markers',
                            marker=dict(color=up_color, size=bubble_size, sizemode='area'), name='Up',
                            text=gene_names[upregulated]))
    fig.add_trace(go.Scatter(x=logFC[downregulated], y=pvalue[downregulated], mode='markers',
                            marker=dict(color=down_color, size=bubble_size, sizemode='area'), name='Down',
                            text=gene_names[downregulated]))
    fig.add_trace(go.Scatter(x=logFC[nonsignificant], y=pvalue[nonsignificant], mode='markers',
                            marker=dict(color='grey', size=bubble_size, sizemode='area'), name='Nonsignificant',
                            text=gene_names[nonsignificant]))


    # Annotate top 5 upregulated genes
    for _, row in df_upregulated.iterrows():
        fig.add_trace(go.Scatter(x=[row['logFC']], y=[-np.log10(row['P.Value'])],
                                 text=[row['Symbol']], mode='text',
                                 textposition="top center",
                                 textfont=dict(size=11),
                                 showlegend=False))

    # Annotate top 5 downregulated genes
    for _, row in df_downregulated.iterrows():
        fig.add_trace(go.Scatter(x=[row['logFC']], y=[-np.log10(row['P.Value'])],
                                 text=[row['Symbol']], mode='text',
                                 textposition="bottom center",
                                 # 定义字体大小
                                 textfont=dict(size=11),
                                 showlegend=False))
        
    # 添加阈值线
    x_min = np.min(logFC)
    x_max = np.max(logFC)
    fig.add_shape(
        type="line",
        x0=x_min,
        x1=x_max,
        y0=-np.log10(p_threshold),
        y1=-np.log10(p_threshold),
        line=dict(color="Black", width=1, dash="dash")
    )
    fig.add_shape(
        type="line",
        x0=logFC_threshold,
        x1=logFC_threshold,
        y0=0,
        y1=max(pvalue),
        line=dict(color="Black", width=1, dash="dash")
    )
    fig.add_shape(
        type="line",
        x0=-logFC_threshold,
        x1=-logFC_threshold,
        y0=0,
        y1=max(pvalue),
        line=dict(color="Black", width=1, dash="dash")
    )

    if up_donw_num == 'yes':
        # 计算上调和下调基因的数量，并在图像中左上角添加注释
        upregulated_num = np.sum(upregulated)
        downregulated_num = np.sum(downregulated)
        fig.add_annotation(xref="paper", yref="paper",
                        x=0, y=1, showarrow=False,
                        text="Up: {} genes".format(upregulated_num))
        fig.add_annotation(xref="paper", yref="paper",
                            x=0, y=0.95, showarrow=False,
                            text="Downregulated: {} genes".format(downregulated_num))
    else:
        pass

    # 设置图像布局
    fig.update_layout(xaxis_title='log2 Fold Change',
                      yaxis_title='-log10(p-value)',
                      title='DEG Analysis Volcano Plot',
                      template="plotly_white",
                      height=600,
                      width=800)

    # 将fig转为json格式
    # fig_json = json.dumps(fig, cls=PlotlyJSONEncoder)
    # return fig_json

    plot_html = plot(fig, output_type='div', include_plotlyjs=False)        
    return plot_html



plot_volcano(df2,0.05,2,'#EF553b','#1F77b4',3,5,'yes')

NameError: name 'df2' is not defined

In [10]:
def plot_volcano(df, p_threshold, logFC_threshold, up_color, down_color, symbol_num, bubble_size, up_donw_num):

    # 重新设置列名
    df.columns = ['Symbol', 'logFC', 'P.Value']

    # 如果df中有空值，删除该行
    df.dropna(inplace=True)

    # 提取数据
    gene_names = df['Symbol'].values
    logFC = df['logFC'].values
    pvalue = -np.log10(df['P.Value'].values)

    # 根据阈值判断显著性
    significant = (np.abs(logFC) > logFC_threshold) & (df['P.Value'].values < p_threshold)
    upregulated = significant & (logFC > 0)
    downregulated = significant & (logFC < 0)
    nonsignificant = ~significant


    # Sort to get top 5 upregulated and downregulated genes
    df_upregulated = df[upregulated].nlargest(symbol_num, 'logFC')
    df_downregulated = df[downregulated].nsmallest(symbol_num, 'logFC')

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=logFC[upregulated], y=pvalue[upregulated], mode='markers',
                            marker=dict(color=up_color, size=bubble_size, sizemode='area'), name='Up',
                            text=gene_names[upregulated]))
    fig.add_trace(go.Scatter(x=logFC[downregulated], y=pvalue[downregulated], mode='markers',
                            marker=dict(color=down_color, size=bubble_size, sizemode='area'), name='Down',
                            text=gene_names[downregulated]))
    fig.add_trace(go.Scatter(x=logFC[nonsignificant], y=pvalue[nonsignificant], mode='markers',
                            marker=dict(color='grey', size=bubble_size, sizemode='area'), name='Nonsignificant',
                            text=gene_names[nonsignificant]))


    # Annotate top 5 upregulated genes
    for _, row in df_upregulated.iterrows():
        fig.add_trace(go.Scatter(x=[row['logFC']], y=[-np.log10(row['P.Value'])],
                                 text=[row['Symbol']], mode='text',
                                 textposition="top center",
                                 textfont=dict(size=11),
                                 showlegend=False))

    # Annotate top 5 downregulated genes
    for _, row in df_downregulated.iterrows():
        fig.add_trace(go.Scatter(x=[row['logFC']], y=[-np.log10(row['P.Value'])],
                                 text=[row['Symbol']], mode='text',
                                 textposition="bottom center",
                                 # 定义字体大小
                                 textfont=dict(size=11),
                                 showlegend=False))

    if up_donw_num == 'yes':
        # 计算上调和下调基因的数量，并在图像中左上角添加注释
        upregulated_num = np.sum(upregulated)
        downregulated_num = np.sum(downregulated)
        fig.add_annotation(xref="paper", yref="paper",
                        x=0, y=1, showarrow=False,
                        text="Up: {} genes".format(upregulated_num))
        fig.add_annotation(xref="paper", yref="paper",
                            x=0, y=0.95, showarrow=False,
                            text="Downregulated: {} genes".format(downregulated_num))
    else:
        pass

    # 设置图像布局
    fig.update_layout(xaxis_title='log2 Fold Change',
                      yaxis_title='-log10(p-value)',
                      title='DEG Analysis Volcano Plot',
                      template="plotly_white",
                      height=600,
                      width=800)
            
    # 添加阈值线
    x_min = np.min(logFC)
    x_max = np.max(logFC)
    fig.update_layout(shapes=[
        dict(type="line", x0=x_min, x1=x_max, y0=-np.log10(p_threshold), y1=-np.log10(p_threshold), line=dict(color="Black", width=1, dash="dash")),
        dict(type="line", x0=logFC_threshold, x1=logFC_threshold, y0=0, y1=max(pvalue)+10, line=dict(color="Black", width=1, dash="dash")),
        dict(type="line", x0=-logFC_threshold, x1=-logFC_threshold, y0=0, y1=max(pvalue)+10, line=dict(color="Black", width=1, dash="dash"))
    ])

    return fig

plot_volcano(df,0.05,2,'#EF553b','#1F77b4',5,10,'yes')