In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

BubbleChart 富集

In [3]:
def BubbleChart(df, color, bubble_num, bubble_size, go_type, width=1000, height=800):
    """ 
    Args:
        df: GO enrichment analysis result
        num: number of rows to show
    Returns:
        fig: plotly figure
    """
    # 拷贝并重命名列名
    df = df.copy()
    df.columns = ["Class", "GO_ID", "GO_Function", "Counts", "Enrichment Score", "P.adjust"]

    df = df.sort_values(by='Counts', ascending=False)

    # 选取p值小于0.05的数据
    df = df[df["P.adjust"] < 0.05]
    
    # df只保留前num行数据
    df = df.iloc[:bubble_num]

    if go_type == "BP":
        df = df[df["Class"].str.contains("Biological process")]
    elif go_type == "CC":
        df = df[df["Class"].str.contains("Cellular component")]
    elif go_type == "MF":
        df = df[df["Class"].str.contains("Molecular function")]
    else:  # 修复了这里的错误
        pass

    # 创建图表
    fig = px.scatter(
        df,
        x="Enrichment Score",
        y="GO_Function",
        size="Counts",
        color="P.adjust",
        color_continuous_scale=color,  # _r表示颜色反转
        # color_continuous_scale='Geyser_r',
        opacity=0.85,
        hover_name="Class",
        hover_data=["GO_ID", "GO_Function", "Counts", "Enrichment Score", "P.adjust"],
        size_max=bubble_size,
    )

    # 设置图表属性
    fig.update_layout(
        title="GO Enrichment Analysis",
        xaxis_title = "Enrichment Score",
        yaxis_title = "GO_Function",
        yaxis = dict(autorange="reversed"),  # y轴反转
        font=dict(family="Arial", size=14),
        template="plotly_white",
        # template="simple_white",
        width=width,  # 图表宽度
        height=height,  # 图表高度
    )

    # 设置颜色轴的属性
    colorbar_title = "P.adjust"
    colorbar_tickformat = ".3f"
    colorbar_dtick = 0.005
    fig.update_coloraxes(
        colorbar_title=colorbar_title,
        colorbar_tickformat=colorbar_tickformat,
        colorbar=dict(dtick=colorbar_dtick),
    )
    
    # 显示图表并返回
    return fig

In [4]:
df_GoEnrich = pd.read_csv("GO_Enrichment_demo.csv")
df_GoEnrich.head()

Unnamed: 0,Class,GO_Name,GO_ID,GO_Level,P_value,EnrichmentScore,HitsGenesCountsInSelectedSet,HitsGenesCountsInBackground,AllGenesCountsInSelectedSet,AllGenesCountsInBackground,GenesOfSelectedSetInGOterm,corrected p-value(BH method)
0,Molecular function,RNA binding,GO:0003723,5,4.33e-15,3.996168,37,89,258,2480,"rplR,rplT,rplN,rplW,rplY,rplB,rplE,srmB,prfA,r...",2.65e-12
1,Molecular function,structural constituent of ribosome,GO:0003735,3,2.21e-14,4.893587,28,55,258,2480,"rpmG,rplR,rplU,rplT,rplN,rplW,rplY,rplC,rplB,r...",6.75e-12
2,Molecular function,structural molecule activity,GO:0005198,2,6.36e-13,4.412251,28,61,258,2480,"rpmG,rplR,rplU,rplT,rplN,rplW,rplY,rplC,rplB,r...",1.29e-10
3,Molecular function,binding,GO:0005488,2,1.82e-10,1.34898,183,1304,258,2480,"minE,arcA,msrB,nusA,sspA,yqjD,hmp,fabI,wrbA,ac...",2.77e-08
4,Molecular function,"ligase activity, forming carbon-oxygen bonds",GO:0016875,4,7.44e-10,6.408269,14,21,258,2480,"gltX,argS,glyS,pheT,ileS,trpS,cysS,hisS,tyrS,a...",9.09e-08


In [5]:
# 指定列
df_BubbleChart = df_GoEnrich[['Class', 'GO_ID', 'GO_Name', 'HitsGenesCountsInSelectedSet', 'EnrichmentScore', 'corrected p-value(BH method)']]

In [7]:
BubbleChart(df_BubbleChart, 'Geyser', 50, 30, 'CC',width=900, height=600)