In [13]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

BubbleChart 富集

In [16]:
def BubbleChart_2(df, color, bubble_num, bubble_size, go_type='all', width=1000, height=800):

    # 拷贝并重命名列名
    df.columns = ["Class", "ID", "Description", "Counts", "GeneRatio", "P.adjust"]

    df = df.sort_values(by='Counts', ascending=False)
    # 选取p值小于0.05的数据
    df = df[df["P.adjust"] < 0.05]
    # df只保留前num行数据
    df = df.iloc[:bubble_num]

    if go_type == "BP":
        df = df[df["Class"].str.contains("BP")]
    elif go_type == "CC":
        df = df[df["Class"].str.contains("CC")]
    elif go_type == "MF":
        df = df[df["Class"].str.contains("MF")]
    else:
        pass

    # 创建图表
    fig = px.scatter(
        df,
        x="GeneRatio",
        y="Description",
        size="Counts",
        color="P.adjust",
        color_continuous_scale=color,  # _r表示颜色反转
        # color_continuous_scale='Geyser_r',
        opacity=0.85,
        hover_name="Class",
        hover_data=["ID", "Description", "Counts", "GeneRatio", "P.adjust"],
        size_max=bubble_size,
    )

    # 设置图表属性
    fig.update_layout(
        title="GO Enrichment Analysis",
        xaxis_title = "GeneRatio",
        yaxis_title = "Description",
        yaxis = dict(autorange="reversed"),  # y轴反转
        font=dict(family="Arial", size=14),
        template="plotly_white",
        # template="simple_white",
        width=width,  # 图表宽度
        height=height,  # 图表高度
    )

    # 设置颜色轴的属性
    colorbar_title = "P.adjust"
    colorbar_tickformat = ".3f"
    colorbar_dtick = 0.005
    fig.update_coloraxes(
        colorbar_title=colorbar_title,
        colorbar_tickformat=colorbar_tickformat,
        colorbar=dict(dtick=colorbar_dtick),
    )
    
    # 显示图表并返回
    return fig

In [17]:
df_go_enrich = pd.read_csv("./go_enrich.tsv", sep="\t")
# 指定列
df_go_enrich = df_go_enrich[["category", "ID", "Description", "Count", 'GeneRatio',"p.adjust"]]
df_go_enrich["GeneRatio"] = df_go_enrich["GeneRatio"].apply(lambda x: round(eval(x), 3))
df_go_enrich

Unnamed: 0,category,ID,Description,Count,GeneRatio,p.adjust
0,MF,GO:0003924,GTPase activity,24,0.037,1.292722e-05
1,MF,GO:0016887,ATPase activity,31,0.048,1.392562e-05
2,MF,GO:0005525,GTP binding,26,0.04,2.634029e-05
3,MF,GO:0003676,nucleic acid binding,45,0.07,0.0009061235
4,MF,GO:0004842,ubiquitin-protein transferase activity,9,0.014,0.001208201
5,MF,GO:0005085,guanyl-nucleotide exchange factor activity,10,0.015,0.001495814
6,MF,GO:0017056,structural constituent of nuclear pore,6,0.009,0.003082665
7,MF,GO:0004672,protein kinase activity,30,0.046,0.005755555
8,MF,GO:0003723,RNA binding,32,0.05,0.007359525
9,MF,GO:0031267,small GTPase binding,6,0.009,0.02120568


In [20]:
BubbleChart_2(df_go_enrich, color='Geyser', bubble_num=50, bubble_size=50, go_type='MF', width=1200, height=800)

### TBtools出来的测试数据

In [2]:
def BubbleChart(df, color, bubble_num, bubble_size, go_type='all', width=1000, height=800):

    # 拷贝并重命名列名
    df = df.copy()
    df.columns = ["Class", "GO_ID", "GO_Function", "Counts", "Enrichment Score", "P.adjust"]

    df = df.sort_values(by='Counts', ascending=False)

    # 选取p值小于0.05的数据
    df = df[df["P.adjust"] < 0.05]
    
    # df只保留前num行数据
    df = df.iloc[:bubble_num]

    if go_type == "BP":
        df = df[df["Class"].str.contains("Biological process")]
    elif go_type == "CC":
        df = df[df["Class"].str.contains("Cellular component")]
    elif go_type == "MF":
        df = df[df["Class"].str.contains("Molecular function")]
    else:
        pass

    # 创建图表
    fig = px.scatter(
        df,
        x="Enrichment Score",
        y="GO_Function",
        size="Counts",
        color="P.adjust",
        color_continuous_scale=color,  # _r表示颜色反转
        # color_continuous_scale='Geyser_r',
        opacity=0.85,
        hover_name="Class",
        hover_data=["GO_ID", "GO_Function", "Counts", "Enrichment Score", "P.adjust"],
        size_max=bubble_size,
    )

    # 设置图表属性
    fig.update_layout(
        title="GO Enrichment Analysis",
        xaxis_title = "Enrichment Score",
        yaxis_title = "GO_Function",
        yaxis = dict(autorange="reversed"),  # y轴反转
        font=dict(family="Arial", size=14),
        template="plotly_white",
        # template="simple_white",
        width=width,  # 图表宽度
        height=height,  # 图表高度
    )

    # 设置颜色轴的属性
    colorbar_title = "P.adjust"
    colorbar_tickformat = ".3f"
    colorbar_dtick = 0.005
    fig.update_coloraxes(
        colorbar_title=colorbar_title,
        colorbar_tickformat=colorbar_tickformat,
        colorbar=dict(dtick=colorbar_dtick),
    )
    
    # 显示图表并返回
    return fig

In [6]:
# df_GoEnrich = pd.read_csv("GO_Enrichment_demo.csv")
# df_BubbleChart = df_GoEnrich[['Class', 'GO_ID', 'GO_Name',
#                               'HitsGenesCountsInSelectedSet', 'EnrichmentScore', 'corrected p-value(BH method)']]
# df_BubbleChart

# BubbleChart(df_BubbleChart, 'Rdbu_r', 50, 30, width=1000, height=600)

Unnamed: 0,Class,GO_ID,GO_Name,HitsGenesCountsInSelectedSet,EnrichmentScore,corrected p-value(BH method)
0,Molecular function,GO:0003723,RNA binding,37,3.996168,2.650000e-12
1,Molecular function,GO:0003735,structural constituent of ribosome,28,4.893587,6.750000e-12
2,Molecular function,GO:0005198,structural molecule activity,28,4.412251,1.290000e-10
3,Molecular function,GO:0005488,binding,183,1.348980,2.770000e-08
4,Molecular function,GO:0016875,"ligase activity, forming carbon-oxygen bonds",14,6.408269,9.090000e-08
...,...,...,...,...,...,...
295,Biological process,GO:0006066,alcohol metabolic process,8,1.982942,1.827045e-01
296,Biological process,GO:0051253,negative regulation of RNA metabolic process,18,1.523480,1.837833e-01
297,Biological process,GO:0019222,regulation of metabolic process,45,1.273017,1.866047e-01
298,Biological process,GO:0080090,regulation of primary metabolic process,42,1.285996,1.871500e-01
