In [3]:
from announcement_extractor import run_latest_announcement_extraction

run_latest_announcement_extraction()

ImportError: cannot import name 'run_latest_announcement_extraction' from 'announcement_extractor' (e:\QQbot\AstrBotLauncher-0.1.5.6\AstrBotLauncher-0.1.5.6\AstrBot\data\plugins\astrbot_plugin_shareholderperks\announcement_extractor.py)

In [2]:
import json
import requests
from datetime import datetime

def extract_latest_announcement_from_url(json_url):
    """
    从指定 URL 获取 JSON 数据，提取最新公告信息
    :param json_url: JSON 数据的在线链接
    :return: 最新公告的字典（含 secCode、公司名、PDF 链接）
    """
    # 1. 发送请求获取 JSON 数据
    try:
        response = requests.get(json_url, timeout=10)
        response.raise_for_status()  # 若状态码非 200，抛出异常
        json_data = response.json()
    except requests.exceptions.RequestException as e:
        raise ConnectionError(f"获取 JSON 数据失败：{e}")
    except json.JSONDecodeError:
        raise ValueError("获取的内容不是有效的 JSON 格式")

    # 2. 从返回的 JSON 中提取公告列表
    # 根据实际链接返回的结构，公告列表在 "announcements" 键下
    if isinstance(json_data, dict) and "announcements" in json_data and isinstance(json_data["announcements"], list):
        announcement_list = json_data["announcements"]
    else:
        raise ValueError(f"JSON 数据格式不符合预期，未找到 'announcements' 列表。实际结构: {json.dumps(json_data, indent=2)[:200]}...")

    if not announcement_list:
        raise ValueError("获取到的公告列表为空")

    # 3. 筛选出有 announcementTime 和必要字段的公告
    valid_announcements = []
    required_fields = ["secCode", "announcementTitle", "orgId", "announcementId", "announcementTime"]
    for item in announcement_list:
        if all(field in item and item[field] is not None for field in required_fields):
            valid_announcements.append(item)

    if not valid_announcements:
        raise ValueError("无有效公告数据（缺少关键字段）")

    # 4. 按 announcementTime 排序，取最新（最大时间戳）
    latest_ann = max(valid_announcements, key=lambda x: x["announcementTime"])

    # 5. 清理公司名：去除所有 <em> 和 </em> 标签
    clean_title = latest_ann["announcementTitle"].replace("\u003Cem\u003E", "").replace("\u003C/em\u003E", "")

    # 6. 拼接 PDF 链接
    announcement_time = datetime.fromtimestamp(latest_ann["announcementTime"] / 1000).strftime("%Y-%m-%d")
    pdf_url = (
        f"https://www.cninfo.com.cn/new/disclosure/detail"
        f"?orgId={latest_ann['orgId']}"
        f"&announcementId={latest_ann['announcementId']}"
        f"&announcementTime={announcement_time}"
    )

    # 7. 整理结果
    result = {
        "股票代码(secCode)": latest_ann["secCode"],
        "公司名(announcementTitle)": clean_title,
        "公告PDF链接": pdf_url
    }

    return result

# ------------------- 核心调用 -------------------
if __name__ == "__main__":
    # 你的 JSON 在线链接
    JSON_URL = "https://www.cninfo.com.cn/new/fulltextSearch/full?searchkey=%E8%82%A1%E4%B8%9C%E5%9B%9E%E9%A6%88&sdate=&edate=&isfulltext=false&sortName=pubdate&sortType=desc&pageNum=1&pageSize=20&type="

    try:
        latest_announcement = extract_latest_announcement_from_url(JSON_URL)
        print("=" * 80)
        print("最新公告信息提取成功：")
        print("=" * 80)
        for key, value in latest_announcement.items():
            print(f"{key}：{value}")
        print("=" * 80)
    except Exception as e:
        print(f"\033[91m提取失败：{e}\033[0m")

最新公告信息提取成功：
股票代码(secCode)：688253
公司名(announcementTitle)：英诺特：关于股东回馈活动的自愿性信息披露公告
公告PDF链接：https://www.cninfo.com.cn/new/disclosure/detail?orgId=9900048398&announcementId=1224808756&announcementTime=2025-11-18
