In [2]:
# ========== AKShare工具函数 ==========
import akshare as ak
import pandas as pd
from typing import Optional
import logging

# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def get_gdp_quarterly() -> Optional[pd.DataFrame]:
    """获取季度GDP数据"""
    try:
        logger.info("开始获取GDP季度数据...")
        df = ak.macro_china_gdp()

        if df.empty:
            logger.warning("获取的GDP数据为空")
            return None

        df = df.rename(columns={
            '季度': 'quarter',
            '国内生产总值-绝对值': 'gdp',
            '国内生产总值-同比增长': 'gdp_yoy'
        })

        df = df[['quarter', 'gdp', 'gdp_yoy']]
        df['gdp'] = pd.to_numeric(df['gdp'], errors='coerce')
        df['gdp_yoy'] = pd.to_numeric(df['gdp_yoy'], errors='coerce')
        df = df.dropna().reset_index(drop=True)

        logger.info(f"成功获取GDP数据，共{len(df)}条记录")
        return df
    except Exception as e:
        logger.error(f"获取GDP数据失败: {e}")
        return None


def get_cpi_monthly() -> Optional[pd.DataFrame]:
    """获取月度CPI数据"""
    try:
        logger.info("开始获取CPI月度数据...")
        df = ak.macro_china_cpi()

        if df.empty:
            logger.warning("获取的CPI数据为空")
            return None

        df = df.rename(columns={
            '月份': 'month',
            '全国-同比增长': 'cpi_yoy',
            '全国-环比增长': 'cpi_mom',
            '全国-累计': 'cpi_ytd'
        })

        df = df[['month', 'cpi_yoy', 'cpi_mom', 'cpi_ytd']]
        df['cpi_yoy'] = pd.to_numeric(df['cpi_yoy'], errors='coerce')
        df['cpi_mom'] = pd.to_numeric(df['cpi_mom'], errors='coerce')
        df['cpi_ytd'] = pd.to_numeric(df['cpi_ytd'], errors='coerce')
        df = df.dropna().reset_index(drop=True)

        logger.info(f"成功获取CPI数据，共{len(df)}条记录")
        return df
    except Exception as e:
        logger.error(f"获取CPI数据失败: {e}")
        return None


def get_pmi_manufacturing() -> Optional[pd.DataFrame]:
    """获取制造业PMI数据"""
    try:
        logger.info("开始获取PMI数据...")
        df = ak.macro_china_pmi()

        if df.empty:
            logger.warning("获取的PMI数据为空")
            return None

        df = df.rename(columns={
            '月份': 'month',
            '制造业-指数': 'pmi',
            '制造业-同比增长': 'pmi_yoy'
        })

        df = df[['month', 'pmi', 'pmi_yoy']]
        df['pmi'] = pd.to_numeric(df['pmi'], errors='coerce')
        df['pmi_yoy'] = pd.to_numeric(df['pmi_yoy'], errors='coerce')
        df = df.dropna().reset_index(drop=True)

        logger.info(f"成功获取PMI数据，共{len(df)}条记录")
        return df
    except Exception as e:
        logger.error(f"获取PMI数据失败: {e}")
        return None


# ========== 知识库检索工具 ==========
import json
from pathlib import Path
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings


class KnowledgeRetriever:
    """知识库检索工具"""

    def __init__(self,
                 vector_db_path: str = "../data/processed/knowledge/vector_db/knowledge_base",
                 json_dir_path: str = "../data/processed/knowledge/structured/knowledge_base",
                 embedding_model: Optional[str] = None):
        """初始化知识库检索器"""
        self.vector_db_path = Path(vector_db_path)
        self.json_dir_path = Path(json_dir_path)

        if embedding_model is None:
            import os
            embedding_model = os.getenv('EMBEDDING_MODEL_PATH', 'Qwen/Qwen3-Embedding-0.6B')

        self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
        self.vector_store = Chroma(
            collection_name="knowledge_base_col",
            persist_directory=str(self.vector_db_path),
            embedding_function=self.embeddings
        )

        self._load_json_index()

    def _load_json_index(self):
        """加载JSON文件索引"""
        self.json_files = {}
        for json_file in self.json_dir_path.glob("*.json"):
            topic_num = int(json_file.name.split("_")[0])
            self.json_files[topic_num] = json_file

    def vector_search(self, query: str, k: int = 3) -> str:
        """向量检索"""
        results = self.vector_store.similarity_search(query, k=k)

        if not results:
            return "未找到相关知识"

        output = f"向量检索结果(共{len(results)}条):\n\n"
        for i, doc in enumerate(results, 1):
            output += f"[结果{i}]\n"
            output += f"内容: {doc.page_content[:200]}...\n"
            if doc.metadata:
                output += f"来源: 主题{doc.metadata.get('seq', 'N/A')} - {doc.metadata.get('topic', 'N/A')}\n"
            output += "\n"

        return output

    def get_topic_knowledge(self, topic_number: int) -> str:
        """按主题查询JSON知识"""
        if topic_number not in self.json_files:
            return f"错误: 主题{topic_number}不存在(有效范围1-17)"

        json_file = self.json_files[topic_number]
        with open(json_file, 'r', encoding='utf-8') as f:
            knowledge = json.load(f)

        output = f"主题{topic_number}: {knowledge.get('topic', 'N/A')}\n\n"

        if 'key_concepts' in knowledge:
            output += "关键概念:\n"
            for concept in knowledge['key_concepts'][:5]:
                name = concept.get('name', 'N/A')
                definition = concept.get('definition', 'N/A')
                output += f"  - {name}: {definition}\n"
            output += "\n"

        if 'indicators' in knowledge:
            output += "关键指标:\n"
            for indicator in knowledge['indicators'][:3]:
                name = indicator.get('name', 'N/A')
                description = indicator.get('description', 'N/A')
                output += f"  - {name}: {description}\n"
            output += "\n"

        if 'summary' in knowledge:
            output += f"摘要:\n{knowledge['summary'][:300]}...\n"

        return output

print("[完成] 工具函数加载完成")


[完成] 工具函数加载完成


In [3]:
# 阶段2：宏观经济SubAgent实现

import os

# 加载环境变量
from dotenv import load_dotenv
load_dotenv('../config/.env')

from deepagents import create_deep_agent
from langchain_openai import ChatOpenAI

# 初始化DeepSeek模型（使用技术决策中指定的模型）
deepseek_model = ChatOpenAI(
    model="deepseek-chat",
    openai_api_key=os.getenv("DEEPSEEK_API_KEY"),
    openai_api_base="https://api.deepseek.com",
    temperature=0.7,
)

# 初始化知识检索器（使用Cell 0中定义的类）
knowledge_retriever = KnowledgeRetriever()

# 定义宏观经济分析SubAgent
macroeconomic_subagent = {
    "name": "macroeconomic_subagent",
    "description": "负责宏观经济分析,包括GDP、CPI、PMI等指标的数据获取、分析和趋势判断。结合实时数据和理论知识提供专业分析。",
    "system_prompt": """你是宏观经济分析专家。

核心能力：
1. 数据获取：使用AKShare工具获取最新宏观经济数据(GDP、CPI、PMI等)
2. 理论支撑：查询知识库获取相关经济理论和分析框架
3. 综合分析：结合实时数据和理论知识，提供专业的经济解读

分析流程：
1. 获取相关数据（使用get_gdp_quarterly/get_cpi_monthly/get_pmi_manufacturing）
2. 查询理论框架（使用knowledge_retriever.vector_search）
3. 分析数据趋势和周期特征
4. 给出专业判断和预测

输出要求：
- 数据准确，引用来源（如"根据AKShare最新数据..."）
- 分析专业，运用理论（如"根据经济周期理论..."）
- 逻辑清晰，结构完整（数据→理论→分析→结论）
- 结论明确，便于理解""",
    "tools": [
        get_gdp_quarterly,
        get_cpi_monthly,
        get_pmi_manufacturing,
        knowledge_retriever.vector_search,
        knowledge_retriever.get_topic_knowledge,
    ],
}

# 创建DeepAgent（Main Agent）使用DeepSeek模型
agent = create_deep_agent(
    model=deepseek_model,
    subagents=[
        macroeconomic_subagent,
    ],
)

print("[成功] 宏观经济SubAgent已创建")
print(f"   模型: DeepSeek (deepseek-chat)")
print(f"   Tools数量: {len(macroeconomic_subagent['tools'])}")
print(f"   - AKShare工具: 3个 (GDP/CPI/PMI)")
print(f"   - 知识检索: 2个 (向量检索/JSON查询)")

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: Qwen/Qwen3-Embedding-0.6B
INFO:sentence_transformers.SentenceTransformer:1 prompt is loaded, with the key: query
INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


[成功] 宏观经济SubAgent已创建
   模型: DeepSeek (deepseek-chat)
   Tools数量: 5
   - AKShare工具: 3个 (GDP/CPI/PMI)
   - 知识检索: 2个 (向量检索/JSON查询)


## 测试场景

测试宏观经济SubAgent的综合分析能力


In [None]:
# 测试1：GDP分析（流式输出）
test_query_1 = "最近GDP增长率如何？有什么趋势特征？"
print(f"问题：{test_query_1}")
print("-" * 60)

# 使用values模式获取完整状态
final_chunk = None
for chunk in agent.stream({"messages": [{"role": "user", "content": test_query_1}]}, stream_mode="values"):
    final_chunk = chunk

# 打印最终回答
if final_chunk and "messages" in final_chunk:
    print("回答：")
    print(final_chunk["messages"][-1].content)
else:
    print("[错误] 未获取到回答")


问题：最近GDP增长率如何？有什么趋势特征？
------------------------------------------------------------
回答：

INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:__main__:开始获取GDP季度数据...
INFO:__main__:成功获取GDP数据，共79条记录
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:__main__:开始获取CPI月度数据...
INFO:__main__:成功获取CPI数据，共215条记录
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:__main__:开始获取PMI数据...
INFO:__main__:成功获取PMI数据，共215条记录
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx

In [None]:
# 测试2：通胀分析（流式输出）
test_query_2 = "当前通胀水平怎么样？根据经济理论应该如何解读？"
print(f"问题：{test_query_2}")
print("-" * 60)

final_chunk = None
for chunk in agent.stream({"messages": [{"role": "user", "content": test_query_2}]}, stream_mode="values"):
    final_chunk = chunk

if final_chunk and "messages" in final_chunk:
    print("回答：")
    print(final_chunk["messages"][-1].content)
else:
    print("[错误] 未获取到回答")


问题：当前通胀水平怎么样？根据经济理论应该如何解读？
------------------------------------------------------------
回答：

INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:__main__:开始获取CPI月度数据...
INFO:__main__:成功获取CPI数据，共215条记录
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:__main__:开始获取GDP季度数据...
INFO:__main__:成功获取GDP数据，共79条记录
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:__main__:开始获取PMI数据...
INFO:__main__:成功获取PMI数据，共215条记录
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx

In [None]:
# 测试3：PMI经济趋势分析（流式输出）
test_query_3 = "PMI数据显示经济趋势如何？结合经济周期理论分析"
print(f"问题：{test_query_3}")
print("-" * 60)

final_chunk = None
for chunk in agent.stream({"messages": [{"role": "user", "content": test_query_3}]}, stream_mode="values"):
    final_chunk = chunk

if final_chunk and "messages" in final_chunk:
    print("回答：")
    print(final_chunk["messages"][-1].content)
else:
    print("[错误] 未获取到回答")


## 任务3：完整测试（10个场景）

测试SubAgent的综合分析能力，评估输出质量


In [None]:
# 10个测试场景（按难度递增）
test_questions = [
    # 基础数据查询（简单）
    "2024年GDP增长率是多少？",
    "当前的通胀水平如何？",
    "最新的PMI数据是多少？",

    # 周期判断（中等）
    "当前经济处于什么周期？",
    "经济周期转折的信号是什么？",
    "什么指标变化会预示周期转折？",

    # 投资策略（困难）
    "根据当前经济周期，应该配置什么资产？",
    "投资时钟当前处于哪个阶段？",

    # 综合分析（最难）
    "给出当前宏观经济的整体判断",
    "从宏观角度看，周期性行业投资机会如何？"
]

print(f"共{len(test_questions)}个测试问题，按难度递增")
for i, q in enumerate(test_questions, 1):
    print(f"{i}. {q}")


In [None]:
# 批量测试（注释状态，需要时取消下面的三引号运行）
"""
results = []

for i, question in enumerate(test_questions, 1):
    print(f"\n{'='*60}")
    print(f"测试 {i}/{len(test_questions)}: {question}")
    print('='*60)

    try:
        result = agent.invoke({"messages": [{"role": "user", "content": question}]})
        response_text = result["messages"][-1].content
        results.append({
            'question': question,
            'response': response_text,
            'status': 'success'
        })
        print(response_text)
    except Exception as e:
        results.append({
            'question': question,
            'error': str(e),
            'status': 'failed'
        })
        print(f"[错误] 错误: {e}")

# 保存测试结果
import json
with open('../data/stage2_test_results.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

print(f"\n[完成] 测试完成！成功: {sum(1 for r in results if r['status']=='success')}/{len(results)}")
"""

print("测试脚本已准备，取消三引号可运行")


## 输出质量评估标准

**数据准确性**（30分）：
- [必须] 引用正确来源（"根据AKShare最新数据...）
- [必须] 数据时间明确（"2024年11月..."）
- [必须] 数值准确无误

**分析专业性**（40分）：
- [必须] 运用理论框架（"根据经济周期理论..."）
- [必须] 分析逻辑清晰（数据→趋势→原因→影响）
- [必须] 结合知识库内容

**结论清晰度**（30分）：
- [必须] 给出明确判断（"当前处于XX周期"）
- [必须] 提出可行建议（"建议配置XX资产"）
- [必须] 易于理解（非专业人士能看懂）

**总分≥80分：优秀** | **60-79分：良好** | **<60分：需优化**
