In [None]:
%pip install plotly

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

BubbleChart 富集

In [2]:
def read_matrix(file_path):
    df = pd.read_csv(file_path)
    return df

In [3]:
def BubbleChart(df, num):
    """ 
    Args:
        df: GO enrichment analysis result
        num: number of rows to show
    Returns:
        fig: plotly figure
    """
    # 拷贝并重命名列名
    df = df.copy()
    df.columns = ["Class", "GO_ID", "GO_Function", "Counts", "Enrichment Score", "P.adjust"]

    # 选取p值小于0.05的数据
    df = df[df["P.adjust"] < 0.05]
    
    # df只保留前num行数据
    df = df.iloc[:num]

    # 创建图表
    fig = px.scatter(
        df,
        x="Enrichment Score",
        y="GO_Function",
        size="Counts",
        color="P.adjust",
        color_continuous_scale="Tealrose_r",  # _r表示颜色反转
        opacity=0.85,
        hover_name="Class",
        hover_data=["GO_ID", "GO_Function", "Counts", "Enrichment Score", "P.adjust"],
        size_max=45,
    )

    # 设置图表属性
    fig.update_layout(
        title="GO Enrichment Analysis",
        xaxis_title = "Enrichment Score",
        yaxis_title = "GO_Function",
        font=dict(family="Arial", size=14),
        template="plotly_white",
        width=1000,  # 图表宽度
        height=600,  # 图表高度
    )

    # 设置颜色轴的属性
    colorbar_title = "P.adjust"
    colorbar_tickformat = ".3f"
    colorbar_dtick = 0.005
    fig.update_coloraxes(
        colorbar_title=colorbar_title,
        colorbar_tickformat=colorbar_tickformat,
        colorbar=dict(dtick=colorbar_dtick),
    )
    
    # 显示图表并返回
    fig.show()
    return fig

In [4]:
df_GoEnrich = read_matrix("GO_Enrichment_final.csv")
# 按照HitsGenesCountsInSelectedSet降序排列
df_GoEnrich = df_GoEnrich.sort_values(by='HitsGenesCountsInSelectedSet', ascending=False)
df_GoEnrich.head()

Unnamed: 0,Class,GO_Name,GO_ID,GO_Level,P_value,EnrichmentScore,HitsGenesCountsInSelectedSet,HitsGenesCountsInBackground,AllGenesCountsInSelectedSet,AllGenesCountsInBackground,GenesOfSelectedSetInGOterm,corrected p-value(BH method)
63,Cellular component,obsolete cell,GO:0005623,0,0.001689669,1.023364,261,2354,261,2409,"minE,arcA,msrB,nusA,sspA,yqjD,hmp,rpmG,fabI,wr...",0.009446784
62,Cellular component,obsolete cell part,GO:0044464,0,0.001689669,1.023364,261,2354,261,2409,"minE,arcA,msrB,nusA,sspA,yqjD,hmp,rpmG,fabI,wr...",0.009446784
275,Biological process,cellular process,GO:0009987,2,0.0194334,1.039395,252,2524,268,2790,"minE,arcA,msrB,nusA,sspA,hmp,rpmG,fabI,wrbA,ac...",0.09645603
77,Biological process,metabolic process,GO:0008152,2,3.94e-12,1.237716,236,1985,268,2790,"arcA,msrB,nusA,sspA,hmp,rpmG,fabI,wrbA,aceF,gm...",3.44e-10
45,Cellular component,intracellular anatomical structure,GO:0005622,3,0.0,1.652447,222,1240,261,2409,"minE,arcA,msrB,nusA,sspA,hmp,rpmG,fabI,wrbA,ac...",0.0


In [5]:
# 指定列
df_BubbleChart = df_GoEnrich[['Class', 'GO_ID', 'GO_Name', 'HitsGenesCountsInSelectedSet', 'EnrichmentScore', 'corrected p-value(BH method)']]

In [6]:
# fig1 = BubbleChart(df_BubbleChart.iloc[0:30, :])
fig1 = BubbleChart(df_BubbleChart, 30)