In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [1]:
df_go = pd.read_csv("./enrich_go.tsv", sep="\t")
df_go

NameError: name 'pd' is not defined

In [12]:
def GO_Chart(df_go, pic_type='bubble', color='Geyser', go_type='all', p_value=0.5, bubble_num=30, bubble_size=30, width=1000, height=800):
    """根据输入的GO富集分析结果，绘制气泡图
    """

    # 数据处理
    df_go = df_go.copy()
    df_go = df_go[["category", "ID", "Description", "Count", 'GeneRatio',"p.adjust"]]
    df_go.columns = ["Class", "ID", "Description", "Count", "GeneRatio", "P.adjust"]

    df_go["GeneRatio"] = df_go["GeneRatio"].apply(lambda x: round(eval(x), 3)) # GeneRatio列输出处理为浮点数
    df_go['P.adjust'] = df_go['P.adjust'].apply(lambda x: round(x, 6)) # 控制P.adjust列的小数位数
    df_go = df_go.sort_values(by='Count', ascending=False) # 按照Count列降序排列
    df_go = df_go[df_go["P.adjust"] < p_value] # 过滤P.adjust列小于p_value的行
    df_go = df_go.iloc[:bubble_num] # 取前bubble_num行

    if go_type in ["BP", "CC", "MF"]:
        df_go = df_go[df_go["Class"].str.contains(go_type)]

    # 如果pic_type为bubble，则绘制气泡图
    if pic_type == "bubble":
        # 创建图表
        fig = px.scatter(
            df_go,
            x="GeneRatio",
            y="Description",
            size="Count",
            color="P.adjust",
            color_continuous_scale=color,
            opacity=0.85,
            hover_name="Class",
            hover_data=["ID", "Description", "Count", "GeneRatio", "P.adjust"],
            size_max=bubble_size,
        )
        fig.update_layout(
            title="GO Enrichment Analysis",
            xaxis_title = "GeneRatio",
            yaxis_title = "Description",
            yaxis = dict(autorange="reversed"),  # y轴反转
            font=dict(family="Arial", size=14),
            template="plotly_white",
            # template="simple_white",
            width=width,  # 图表宽度
            height=height,  # 图表高度
        )
    
    # 显示图表并返回
    return fig

# GO_Chart(df_go)

In [17]:
import plotly.express as px

def GO_Chart(df_go, pic_type='bubble', color='Geyser', go_type='all', p_value=0.5, chart_num=30, chart_size=30, width=1000, height=800):
    """根据输入的GO富集分析结果，根据用户选择绘制气泡图或柱状图。
    """

    # 数据处理
    df_go = df_go.copy()
    df_go = df_go[["category", "ID", "Description", "Count", 'GeneRatio', "p.adjust"]]
    df_go.columns = ["Class", "ID", "Description", "Count", "GeneRatio", "P.adjust"]

    # 数据列处理
    df_go["GeneRatio"] = df_go["GeneRatio"].apply(lambda x: round(eval(x), 3))
    df_go['P.adjust'] = df_go['P.adjust'].apply(lambda x: round(x, 6))
    df_go = df_go.sort_values(by='Count', ascending=False)
    df_go = df_go[df_go["P.adjust"] < p_value]
    df_go = df_go.iloc[:chart_num]

    # 过滤GO类型
    if go_type in ["BP", "CC", "MF"]:
        df_go = df_go[df_go["Class"].str.contains(go_type)]

    # 图表公共布局设置
    layout_args = {
        'title': "GO Enrichment Analysis",
        'yaxis_title': "Description",
        'yaxis': dict(autorange="reversed"),
        'font': dict(family="Arial", size=14),
        'template': "plotly_white",
        'width': width,
        'height': height
    }

    # 颜色轴设置
    color_axis_args = {
        'colorbar_title': "P.adjust",
        'colorbar_tickformat': ".3f",
        'colorbar': dict(dtick=0.005)
    }

    # 根据pic_type绘制不同类型的图表
    if pic_type == "bubble":
        fig = px.scatter(
            df_go,
            x="GeneRatio",
            y="Description",
            size="Count",
            color="P.adjust",
            color_continuous_scale=color,
            opacity=0.85,
            hover_name="Class",
            hover_data=["ID", "Description", "Count", "GeneRatio", "P.adjust"],
            size_max=chart_size,
        )
        fig.update_layout(**layout_args)
        fig.update_layout(xaxis_title="GeneRatio")
        
    elif pic_type == "bar":
        fig = px.bar(
            df_go,
            x='Count',
            y='Description',
            color='P.adjust',
            color_continuous_scale=color,
            opacity=0.85,
            hover_data=["Class", "ID", "GeneRatio", "P.adjust"],
        )
        fig.update_layout(**layout_args)
        fig.update_layout(xaxis_title="Count")
        
    else:
        raise ValueError("Invalid pic_type. Expected 'bubble' or 'bar', got {}".format(pic_type))

    # 应用颜色轴设置
    fig.update_coloraxes(**color_axis_args)
    
    return fig

# 调用函数示例
GO_Chart(df_go, pic_type='bubble')


### TBtools出来的测试数据

In [2]:
def BubbleChart(df, color, bubble_num, bubble_size, go_type='all', width=1000, height=800):

    # 拷贝并重命名列名
    df = df.copy()
    df.columns = ["Class", "GO_ID", "GO_Function", "Counts", "Enrichment Score", "P.adjust"]

    df = df.sort_values(by='Counts', ascending=False)

    # 选取p值小于0.05的数据
    df = df[df["P.adjust"] < 0.05]
    
    # df只保留前num行数据
    df = df.iloc[:bubble_num]

    if go_type == "BP":
        df = df[df["Class"].str.contains("Biological process")]
    elif go_type == "CC":
        df = df[df["Class"].str.contains("Cellular component")]
    elif go_type == "MF":
        df = df[df["Class"].str.contains("Molecular function")]
    else:
        pass

    # 创建图表
    fig = px.scatter(
        df,
        x="Enrichment Score",
        y="GO_Function",
        size="Counts",
        color="P.adjust",
        color_continuous_scale=color,  # _r表示颜色反转
        # color_continuous_scale='Geyser_r',
        opacity=0.85,
        hover_name="Class",
        hover_data=["GO_ID", "GO_Function", "Counts", "Enrichment Score", "P.adjust"],
        size_max=bubble_size,
    )

    # 设置图表属性
    fig.update_layout(
        title="GO Enrichment Analysis",
        xaxis_title = "Enrichment Score",
        yaxis_title = "GO_Function",
        yaxis = dict(autorange="reversed"),  # y轴反转
        font=dict(family="Arial", size=14),
        template="plotly_white",
        # template="simple_white",
        width=width,  # 图表宽度
        height=height,  # 图表高度
    )

    # 设置颜色轴的属性
    colorbar_title = "P.adjust"
    colorbar_tickformat = ".3f"
    colorbar_dtick = 0.005
    fig.update_coloraxes(
        colorbar_title=colorbar_title,
        colorbar_tickformat=colorbar_tickformat,
        colorbar=dict(dtick=colorbar_dtick),
    )
    
    # 显示图表并返回
    return fig

In [6]:
# df_GoEnrich = pd.read_csv("GO_Enrichment_demo.csv")
# df_BubbleChart = df_GoEnrich[['Class', 'GO_ID', 'GO_Name',
#                               'HitsGenesCountsInSelectedSet', 'EnrichmentScore', 'corrected p-value(BH method)']]
# df_BubbleChart

# BubbleChart(df_BubbleChart, 'Rdbu_r', 50, 30, width=1000, height=600)

Unnamed: 0,Class,GO_ID,GO_Name,HitsGenesCountsInSelectedSet,EnrichmentScore,corrected p-value(BH method)
0,Molecular function,GO:0003723,RNA binding,37,3.996168,2.650000e-12
1,Molecular function,GO:0003735,structural constituent of ribosome,28,4.893587,6.750000e-12
2,Molecular function,GO:0005198,structural molecule activity,28,4.412251,1.290000e-10
3,Molecular function,GO:0005488,binding,183,1.348980,2.770000e-08
4,Molecular function,GO:0016875,"ligase activity, forming carbon-oxygen bonds",14,6.408269,9.090000e-08
...,...,...,...,...,...,...
295,Biological process,GO:0006066,alcohol metabolic process,8,1.982942,1.827045e-01
296,Biological process,GO:0051253,negative regulation of RNA metabolic process,18,1.523480,1.837833e-01
297,Biological process,GO:0019222,regulation of metabolic process,45,1.273017,1.866047e-01
298,Biological process,GO:0080090,regulation of primary metabolic process,42,1.285996,1.871500e-01
