In [1]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

In [2]:
df_kegg = pd.read_csv('./enrich_kegg.tsv', sep='\t')
df_kegg

Unnamed: 0,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count
0,ang03030,DNA replication,15/328,33/2418,7e-06,0.000679,0.000615,ANI_1_52134/ANI_1_338024/ANI_1_2728014/ANI_1_9...,15
1,ang03015,mRNA surveillance pathway,18/328,49/2418,3.2e-05,0.001511,0.001368,ANI_1_2466014/ANI_1_1820074/ANI_1_32174/ANI_1_...,18
2,ang04011,MAPK signaling pathway - yeast,22/328,69/2418,5.6e-05,0.001733,0.001569,ANI_1_184174/ANI_1_424024/ANI_1_188054/ANI_1_7...,22
3,ang04111,Cell cycle - yeast,22/328,82/2418,0.000886,0.020588,0.018642,ANI_1_408024/ANI_1_836064/ANI_1_3096024/ANI_1_...,22
4,ang04113,Meiosis - yeast,19/328,71/2418,0.002045,0.038037,0.034442,ANI_1_2466014/ANI_1_824074/ANI_1_212094/ANI_1_...,19


In [6]:
def Kegg_BubbleChart(df_kegg, pic_type='bubble', color='Geyser', p_adjust=0.05 ,font_size=15 ,bubble_num=30, bubble_size=30, width=900, height=800):
    """根据输入的kegg富集结果，绘制气泡图
    """

    # 数据处理
    df_kegg = df_kegg.copy()
    df_kegg.columns = ["ID", "Pathway", "GeneRatio", "BgRatio", "P.value", "P.adjust", 'qvalue', 'geneID', 'Count']
    df_kegg = df_kegg[['Pathway', 'GeneRatio', 'P.adjust', 'Count']]

    df_kegg["GeneRatio"] = df_kegg["GeneRatio"].apply(lambda x: round(eval(x), 3)) # GeneRatio列输出处理为浮点数
    df_kegg['P.adjust'] = df_kegg['P.adjust'].apply(lambda x: round(x, 6)) # 控制P.adjust列的小数位数
    df_kegg = df_kegg[df_kegg['P.adjust'] < p_adjust] # 过滤P.adjust值
    df_kegg = df_kegg.sort_values(by='Count', ascending=False) # 按照Count列降序排列
    df_kegg = df_kegg.iloc[:bubble_num] # 取前bubble_num个数据

    # 图表公共布局设置
    layout_args = {
        'title': "KEGG Enrichment Analysis",
        'yaxis_title': "Pathway",
        'yaxis': dict(autorange="reversed"),
        'font': dict(family="Arial", size=font_size),
        'template': "plotly_white",
        'width': width,
        'height': height
    }
    # 颜色轴设置
    color_axis_args = {
        'colorbar_title': "P.adjust",
        'colorbar_tickformat': ".3f",
        'colorbar': dict(dtick=0.005)
    }
    # 根据pic_type绘制不同类型的图表
    if pic_type == 'bubble':
        fig = px.scatter(
            df_kegg, 
            x='GeneRatio',
            y='Pathway',
            size='Count',
            color='P.adjust',
            color_continuous_scale=color,
            opacity=0.85,
            hover_data=['P.adjust', 'Count'],
            size_max=bubble_size
            )
    
    elif pic_type == 'bar':
        fig = px.bar(
            df_kegg, 
            x='Count', 
            y='Pathway', 
            color='P.adjust', 
            color_continuous_scale=color, 
            opacity=0.85, 
            hover_data=['P.adjust', 'Count']
            )

    # 应用颜色轴设置
    fig.update_layout(**layout_args)
    fig.update_coloraxes(**color_axis_args)
    
    return fig

Kegg_BubbleChart(df_kegg, color='RdBu_r',pic_type='bar')

In [5]:
df_davie = pd.read_excel('./David-kegg.xlsx')
df_davie

Unnamed: 0,Category,Term,Count,%,PValue,Genes,List Total,Pop Hits,Pop Total,Fold Enrichment,Bonferroni,Benjamini,FDR
0,KEGG_PATHWAY,mtm00190:Oxidative phosphorylation,22,6.094183,3.000078e-09,"MYCTH_73070, MYCTH_2315552, MYCTH_2316564, MYC...",131,82,2194,4.49339,2.160056e-07,2.160056e-07,2.160056e-07
1,KEGG_PATHWAY,mtm03040:Spliceosome,12,3.3241,0.007747638,"MYCTH_2298987, MYCTH_2296655, MYCTH_2137175, M...",131,82,2194,2.45094,0.4287921,0.278915,0.278915
2,KEGG_PATHWAY,mtm03050:Proteasome,7,1.939058,0.01312366,"MYCTH_2313806, MYCTH_2133968, MYCTH_2296573, M...",131,34,2194,3.448137,0.6137068,0.3149678,0.3149678
3,KEGG_PATHWAY,mtm03018:RNA degradation,7,1.939058,0.06096503,"MYCTH_2298987, MYCTH_2311727, MYCTH_2115531, M...",131,48,2194,2.44243,0.9892084,1.0,1.0
4,KEGG_PATHWAY,mtm03010:Ribosome,11,3.047091,0.08830933,"MYCTH_2311726, MYCTH_2304441, MYCTH_2086470, M...",131,105,2194,1.754562,0.9987147,1.0,1.0


### TBtools

In [11]:
df_kegg = pd.read_csv('./KEGG_Enrichment_demo.csv', sep=',').copy()
df_kegg.head()

fig_kegg = Kegg_BubbleChart(df_kegg, 'Rdbu_r')

Unnamed: 0,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count,Link
0,hsa03013,Nucleocytoplasmic transport,7/54,140/11319,4e-06,0.000266,0.00024,O00410/O00422/O00505/O00629/Q86V81/Q8IYB3/Q8N1F7,7,http://www.genome.jp/kegg-bin/show_pathway?hsa...
1,hsa03040,Spliceosome,7/54,180/11319,2.2e-05,0.000628,0.000568,O14776/Q6P2Q9/Q7L014/Q7RTV0/Q86V81/Q86XP3/Q8IWX8,7,http://www.genome.jp/kegg-bin/show_pathway?hsa...
2,hsa03015,mRNA surveillance pathway,6/54,127/11319,2.9e-05,0.000628,0.000568,O00422/Q6UN15/Q6UXN9/Q86U42/Q86V81/Q8IYB3,12,http://www.genome.jp/kegg-bin/show_pathway?hsa...
3,hsa03050,Proteasome,4/54,75/11319,0.000438,0.007002,0.006334,O00231/O00232/O00233/O00487,4,http://www.genome.jp/kegg-bin/show_pathway?hsa...
4,hsa05017,Spinocerebellar ataxia,5/54,204/11319,0.002797,0.035801,0.032386,O00231/O00232/O00233/O00487/O00567,5,http://www.genome.jp/kegg-bin/show_pathway?hsa...
