In [10]:
import requests
import pymongo
import datetime
import time

# 连接到MongoDB数据库
client = pymongo.MongoClient("mongodb://root:Zqx167613@192.168.2.3:27017")
now = datetime.datetime.now()
DATABASE_NAME = now.strftime('%Y年微博热搜')
db = client[DATABASE_NAME]

# 热搜数据的API URL
url = 'https://weibo.com/ajax/statuses/hot_band'

# 请求头信息，用于模拟浏览器访问
headers = {
    "accept": "application/json, text/plain, */*",
    "accept-encoding": "gzip, deflate, br",
    "accept-language": "zh-CN,zh;q=0.9",
    "client-version": "v2.36.4",
    "referer": "https://weibo.com/hot/search",
    "sec-ch-ua-platform": "Windows",
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-origin",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.42",
    "x-requested-with": "XMLHttpRequest"
}

# 获取当前日期和时间
today = datetime.date.today()  # 获取当前日期
now = datetime.datetime.now().strftime('%H:%M:%S')  # 获取当前时间并格式化

# 定义集合（以日期命名）
dbcol = db[f'{today}']

# 尝试获取热搜数据，最多尝试5次
for _ in range(5):
    try:
        response = requests.get(url=url, headers=headers, timeout=5)
        break
    except:
        print("爬取错误，再次尝试")
        time.sleep(5)

# 解析响应为JSON
content = response.json()
hot_list = content['data']['band_list']

# 遍历热搜数据并更新数据库
for item in hot_list:
    # 如果没有'realpos'字段，跳过该条数据
    if 'realpos' not in item:
        continue

    # 提取热搜相关信息
    realpos = item['realpos']  # 热搜排名
    category = item['category']  # 热搜标签
    word = item['word']  # 热搜话题
    raw_hot = item['raw_hot']  # 热搜热度

    # 更新数据库记录
    dbcol.update_one(
        {"话题": word},
        {
            "$set": {
                "标签": category,
                "最新出现时间": now
            },
            "$min": {"最早出现时间": now},
            "$max": {"最高热度": raw_hot},
            "$push": {
                "排名历史": {
                    "$each": [{"排名": realpos, "时间": now, "热度": raw_hot}],
                    "$position": 5  # 将最新排名插入到列表开头
                }
            }
        },
        upsert=True  # 如果不存在，插入新记录
    )

print("热搜数据已成功更新到数据库！")

热搜数据已成功更新到数据库！


In [3]:
import pandas as pd
import pymongo
import datetime
from bokeh.io import curdoc
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource, HBar, LabelSet, Slider, DatePicker, CustomJS
from bokeh.plotting import figure

# 连接 MongoDB
client = pymongo.MongoClient("mongodb://root:Zqx167613@192.168.2.3:27017")
db = client['微博']
dates = sorted(db.list_collection_names())

def get_data(k):
    global data_, timelist
    col = db[k]
    one = col.find({}, {"_id": 0})
    arry = list(one)
    headers = ['排名', '标签', '话题', '热度', '时间']
    df = pd.DataFrame(arry).set_index('时间')
    data_ = df.groupby(by='时间')
    timelist = list(keys for keys, values in data_)
    return data_, timelist

def get_daf(i):
    dict_i = data_.get_group(timelist[i]).copy()
    dict_i["位置"] = dict_i['热度'] / 2
    dict_i['热度1'] = "raw_hot=" + dict_i['热度'].astype('str')
    daf = dict_i[dict_i['排名'] <= 50]
    daf['真排名'] = dict_i['排名'].astype('str')
    daf['排名'] = dict_i['排名'][::-1] 
    max_ = daf["热度"].max()
    timemes = timelist[i]
    return daf, max_, timemes

# 默认加载今天的数据
today = datetime.date.today()
today = str(today)
data_box = get_data(today)
data_ = data_box[0]
timelist = data_box[1]
box = get_daf(0)
daf = box[0]
max_ = box[1]
timemes = box[2]
longsize = 4000000

source = ColumnDataSource(data=daf)
p = figure(
    y_range=(0, 50.5),
    x_range=(-0.02 * longsize, longsize),
    title=None,
    sizing_mode="stretch_width",
    height_policy='fixed',
    height=2000,
    x_axis_location='above',
    tools=['tap', 'reset'],
    toolbar_location=None
)

glyph1 = HBar(y='排名', right=longsize, height=0.8, fill_color='skyblue', line_color=None)
glyph2 = HBar(y='排名', right="热度", height=0.8, fill_color='red', fill_alpha=0.5, line_color=None)
labels1 = LabelSet(x=longsize / 2, y='排名', text="话题", source=source, text_align='center', y_offset=-10)
labels2 = LabelSet(x=longsize, y='排名', text="热度1", source=source, text_align='right', y_offset=-10, x_offset=-2)
labels3 = LabelSet(x=0, y='排名', text="真排名", source=source, text_align='center', y_offset=-10, x_offset=-10)

# JavaScript 回调（跳转到微博搜索页面）
callback = CustomJS(args=dict(p=p, source=source), code="""
    var selected_index = source.selected.indices[0];
    var value = source.data['话题'][selected_index];
    if (typeof(value) != "undefined") {
        window.open("https://s.weibo.com/weibo?q=%23" + value + "%23");
    }
    p.reset.emit();
""")
p.js_on_event('tap', callback)

# 滑块回调更新数据
def slider_update(attrname, old, new):
    i = slider.value
    box = get_daf(i)
    daf = box[0]
    max_ = box[1]
    timemes = box[2]
    source.data = daf
    slider.title = f'时间 {timemes}'

slider = Slider(start=0, end=len(timelist) - 1, value=1, step=1, title=f'时间 {timemes}', width_policy="max", show_value=False)
slider.on_change('value', slider_update)

# 日期选择器回调更新数据
def select_update(attrname, old, new):
    k = picker.value
    data_box = get_data(k)
    global data_, timelist
    data_ = data_box[0]
    timelist = data_box[1]
    box = get_daf(0)
    daf = box[0]
    slider.end = len(timelist) - 1
    source.data = daf

start_date = min(dates)
end_date = max(dates)
picker = DatePicker(title="日期", value=end_date, min_date=start_date, max_date=end_date, width=300)
picker.on_change('value', select_update)

p.grid.grid_line_color = None
p.axis.ticker = []
p.outline_line_color = None
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None

p.add_glyph(source, glyph1)
p.add_glyph(source, glyph2)
p.add_layout(labels1)
p.add_layout(labels2)
p.add_layout(labels3)

sliders = row(slider, picker)
sliders.sizing_mode = "stretch_width"
layout = column(sliders, p)
layout.sizing_mode = "stretch_both"

curdoc().add_root(layout)
curdoc().title = "微博热搜分析"

You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html

