In [5]:
import plotly.express as px
import pandas as pd
import numpy as np


# 读取CSV文件
df_keyword_sort = pd.read_csv('Mt_keywords_counts_year.tsv', sep='\t')

def plot_keywords_scatter(df_keyword_sort,chart_size=20, color_continuous_scale="RdBu"):
    """
    创建一个散点图来展示关键词在不同年份的数量。
    """
    # 转换Count列为float类型，并添加噪声,防止点都聚在一个位置
    df_keyword_sort['Count'] = df_keyword_sort['Count'].astype(float)
    df_keyword_sort['Counts'] = df_keyword_sort['Count'] + np.random.uniform(-0.5, 0.5, size=len(df_keyword_sort))
    df_keyword_sort['Years'] = df_keyword_sort['Year'] + np.random.uniform(-0.3, 0.3, size=len(df_keyword_sort))

    # 创建散点图
    fig = px.scatter(df_keyword_sort, 
                     x='Years', 
                     y='Counts', 
                     size='Count',
                     color='Count',
                     hover_name='Keyword',  # 使用关键词作为悬浮名称
                     hover_data=['Year', 'Count'],  
                     size_max=chart_size,
                     title='Species sorted by year in keywords in publicly available literature on PubMed',
                     labels={"Count": "Numbers", "Year": "Years"},
                     color_continuous_scale=color_continuous_scale,
                    )

    # 更新图表布局
    fig.update_layout(
        template="plotly_white",
        width=1280,
        height=720,
        xaxis=dict(title=dict(text='Years', font=dict(size=18))),
        yaxis=dict(title=dict(text='Number', font=dict(size=18)),
        )
    )

    # 方案1:将fig对象转为json
    # fig_json = pio.to_json(fig)
    # return fig_json

    # 方案2:将fig转为html格式，返回html代码
    # fig_html = plot(fig, output_type='div', include_plotlyjs=False)        
    # return fig_html

    # 方案3:将fig转为html格式，保存为html文件
    # fig.write_html("keywords_sort.html")
    # return "keywords_sort.html"


    # 显示图表
    return fig

# 示例调用
plot_keywords_scatter(df_keyword_sort, 
                      chart_size=20, 
                      color_continuous_scale="Geyser")