In [7]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go
import copy

In [8]:
def Kegg_BubbleChart(df, colors=None, bubble_num=None, bubble_size=None, stanalytype=None, 
                     stanalynum=None, width=None, height=None, sort=True):
    # 根据stanalytype（pvalue、padjust、qvalue）筛选数据
    if stanalytype == 'pvalue':
        if stanalynum:
            # 选取值小于stanalynum的数据
            df = df[df['pvalue']<stanalynum]
    elif stanalytype == 'p.adjust':
        if stanalynum:
            # 选取值小于stanalynum的数据
            df = df[df['p.adjust']<stanalynum]
    elif stanalytype == 'qvalue':
        if stanalynum:
            # 选取值小于stanalynum的数据
            df = df[df['qvalue']<stanalynum]
    
    # 选取前bubble_num个数据
    if bubble_num:
        df = df.iloc[:bubble_num, :]

    # 是否进行排序，默认排序
    if sort == True:
        # 按照Count值进行排序
        df = df.sort_values(by='Count', ascending=False)

    fig = px.scatter(df, 
                     x='Count', 
                     y='Description', 
                     size='Count', 
                     color=stanalytype, 
                     color_continuous_scale=colors, # 'Tealrose_r'
                     opacity=0.85, 
                     size_max=40)

    fig.update_layout(title='KEGG Enrichment Analysis', 
                      xaxis_title='Counts', 
                      yaxis_title='Terms', 
                      yaxis=dict(autorange='reversed'), # 从小到大排序
                      font=dict(family='Arial', size=16), 
                      template='plotly_white',
                      width=width, #图表宽度
                      height=height, # 图表高度
                      )
    fig.update_coloraxes(colorbar_title=stanalytype, 
                         colorbar_tickformat='.3f', # colorbar=dict(dtick=0.005)
                        )
    fig.show()
    return fig

In [9]:
filepath = './enrich.kegg.txt'
df_kegg = pd.read_csv(filepath, sep=',')
df_kegg

Unnamed: 0.1,Unnamed: 0,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count
0,ko03030,ko03030,DNA replication,15/310,34/2405,6e-06,0.000511,0.000468,ANI_1_52134/ANI_1_338024/ANI_1_2728014/ANI_1_9...,15
1,ko03015,ko03015,mRNA surveillance pathway,18/310,49/2405,1.6e-05,0.000671,0.000615,ANI_1_2466014/ANI_1_1820074/ANI_1_32174/ANI_1_...,18
2,ko04011,ko04011,MAPK signaling pathway - yeast,22/310,69/2405,2.5e-05,0.000699,0.00064,ANI_1_184174/ANI_1_424024/ANI_1_188054/ANI_1_7...,22
3,ko04111,ko04111,Cell cycle - yeast,22/310,81/2405,0.000355,0.007545,0.006915,ANI_1_408024/ANI_1_836064/ANI_1_3096024/ANI_1_...,22
4,ko04113,ko04113,Meiosis - yeast,19/310,71/2405,0.001095,0.018623,0.017066,ANI_1_2466014/ANI_1_824074/ANI_1_212094/ANI_1_...,19


In [12]:
fig_kegg = Kegg_BubbleChart(df_kegg, 
                            colors='Rdbu_r',  # colorbar颜色
                            bubble_num=3,   # 选取前100个数据
                            bubble_size=35,   # bubble大小
                            stanalytype='p.adjust',  # 筛选数据的类型
                            stanalynum=0.05,  # 筛选数据的值
                            width=900,        # 图表宽度
                            height=600,       # 图表高度
                            sort=True)        # 是否排序