In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.offline import plot

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.offline import plot


def plot_volcano(df, p_threshold=0.05, logFC_threshold=1, width=1200, height=900 ,up_color='#f08d1a', down_color='#7fa4ca', symbol_num=3, bubble_size=5, up_donw_num='yes'):

    # 重新设置列名
    df.columns = ['Symbol', 'logFC', 'P.Value']

    # 如果df中有空值，删除该行
    df.dropna(inplace=True)

    # 提取数据
    gene_names = df['Symbol'].values
    logFC = df['logFC'].values

    pvalue = -np.log10(df['P.Value'].replace(0, np.nan))
    pvalue_clipped = np.clip(pvalue, 0, 100)

    # 根据阈值判断显著性，并且将超出范围的 p 值修正到 100
    significant = (np.abs(logFC) > logFC_threshold) & (df['P.Value'].values < p_threshold)
    upregulated = significant & (logFC > 0)
    downregulated = significant & (logFC < 0)
    nonsignificant = ~significant
    pvalue[upregulated | downregulated] = np.minimum(pvalue[upregulated | downregulated], 100)

    # 给差异最显著的基因添加标签
    df_upregulated = df[upregulated].nlargest(symbol_num, 'logFC')
    df_downregulated = df[downregulated].nsmallest(symbol_num, 'logFC')

    fig = go.Figure()
    # opacity: 透明度
    # line: 点边界的线条属性
    # sizemode: 指定气泡大小的计算方式
    fig.add_trace(go.Scatter(x=logFC[upregulated], y=pvalue_clipped[upregulated], mode='markers',
                            marker=dict(color=up_color, size=bubble_size, sizemode='area',symbol='circle',opacity=0.8,
                                        # line_dict=1
                                        line=dict(color='black',width=0.4)), name='Up',
                            text=gene_names[upregulated]))
    fig.add_trace(go.Scatter(x=logFC[downregulated], y=pvalue_clipped[downregulated], mode='markers',
                            marker=dict(color=down_color, size=bubble_size, sizemode='area',symbol='circle',opacity=0.8,
                                        line=dict(color='black',width=0.4)), name='Down',
                            text=gene_names[downregulated]))
    fig.add_trace(go.Scatter(x=logFC[nonsignificant], y=pvalue_clipped[nonsignificant], mode='markers',
                            marker=dict(color='#A9A9A9', size=bubble_size, sizemode='area',symbol='circle',opacity=0.4,
                                        line=dict(color='black',width=0.4)), name='Nonsignificant',
                            text=gene_names[nonsignificant]))

    # 添加差异显著的标签信息
    for _, row in df_upregulated.iterrows():
        fig.add_trace(go.Scatter(x=[row['logFC']], y=[-np.log10(row['P.Value'])],
                                 text=[row['Symbol']], mode='text',
                                 textposition="top center",
                                 textfont=dict(size=11),
                                 showlegend=False))
        
    for _, row in df_downregulated.iterrows():
        fig.add_trace(go.Scatter(x=[row['logFC']], y=[-np.log10(row['P.Value'])],
                                 text=[row['Symbol']], mode='text',
                                 textposition="bottom center",
                                 # 定义字体大小
                                 textfont=dict(size=11),
                                 showlegend=False))

    if up_donw_num == 'yes':
    # 计算上调和下调基因的数量，并在图像中右上角添加注释
        upregulated_num = np.sum(upregulated)
        downregulated_num = np.sum(downregulated)
        fig.add_annotation(xref="paper", yref="paper",
                        x=0, y=1, showarrow=False,
                        xanchor='left', yanchor='top',  # 修改这里为 'left'
                        text="Up: {} genes".format(upregulated_num),
                        font=dict(size=14),
                        align="left",  # 修改对齐方式为 'left'
                        bgcolor="white",
                        borderpad=4)
        fig.add_annotation(xref="paper", yref="paper",
                        x=0, y=0.95, showarrow=False,
                        xanchor='left', yanchor='top',  # 修改这里为 'left'
                        text="Down: {} genes".format(downregulated_num),
                        font=dict(size=14),
                        align="left",  # 修改对齐方式为 'left'
                        bgcolor="white",
                        borderpad=4)

    # 添加阈值线
    x_min = np.min(logFC)
    x_max = np.max(logFC)
    fig.update_layout(shapes=[
        dict(type="line", x0=x_min, x1=x_max, y0=-np.log10(p_threshold), y1=-np.log10(p_threshold), line=dict(color="Black", width=1, dash="dash")),
        dict(type="line", x0=logFC_threshold, x1=logFC_threshold, y0=0, y1=max(pvalue)+10, line=dict(color="Black", width=1, dash="dash")),
        dict(type="line", x0=-logFC_threshold, x1=-logFC_threshold, y0=0, y1=max(pvalue)+10, line=dict(color="Black", width=1, dash="dash"))
    ])

    # 设置图像布局，并限制 y 轴的范围
    fig.update_layout(
    xaxis_title='log2 Fold Change',
    yaxis_title='-log10(p-value)',
    yaxis=dict(range=[0, 105]),  # 增加 y 轴范围以完整显示顶端点
    title='DE Analysis Volcano Plot',
    template="plotly_white",
    height=height,
    width=width,
    )

    return fig


# df = pd.read_csv('./N1.6d_vs_Vma1.6d.csv', sep=',').copy()
df = pd.read_csv('./demo_deseq2.txt', sep='\t').copy()
plot_volcano(df, logFC_threshold=1, width=900, height=700,up_color='#f08d1a', down_color='#7fa4ca', symbol_num=0, bubble_size=9, up_donw_num='yes')