# backend-v2 MVP 走查（Notebook）

本 Notebook 将以可执行的方式，按步骤完成从 Catalog → Selections → Codegen → Validate → Test → Persist → Strategy → Universe → Run 的最小闭环验证。

- 运行前置：确保已在项目根或 `backend-v2/` 下执行，且 Python 环境可用。
- 必须配置真实 LLM（DashScope/OpenAI 兼容）；未配置将终止执行。



In [2]:
# 显式仅加载 backend-v2/.env（不尝试项目根 .env），在缺少 python-dotenv 时自动回退
import os
from pathlib import Path


def _load_env_file(path: Path) -> bool:
    if not path.exists():
        return False
    for line in path.read_text(encoding='utf-8').splitlines():
        s = line.strip()
        if not s or s.startswith('#') or '=' not in s:
            continue
        key, value = s.split('=', 1)
        key = key.strip()
        value = value.strip().strip('"').strip("'")
        if key and key not in os.environ:
            os.environ[key] = value
    return True

start = Path.cwd()
backend_dir = None
for cand in [start, *start.parents]:
    if (cand / 'backend-v2' / 'app').exists() or (cand / 'backend-v2' / '.env').exists():
        backend_dir = cand / 'backend-v2'
        break
if backend_dir is None and (start / 'app').exists():
    backend_dir = start
if backend_dir is None:
    backend_dir = start

p = backend_dir / '.env'

loaded = False
try:
    from dotenv import load_dotenv  # type: ignore
    if p.exists():
        load_dotenv(dotenv_path=p, override=True)
        loaded = True
except Exception:
    loaded = _load_env_file(p)

print('env loaded from', str(p), ':', loaded)



env loaded from /Users/yikazhu/code/j-traders/backend-v2/.env : True


In [3]:
# 环境准备
import os, sys
from pathlib import Path

# 解析 backend-v2 根目录（兼容从项目根/任意子目录运行）
def find_backend_root(start: Path) -> Path:
    for cand in [start, *start.parents]:
        if (cand / 'backend-v2' / 'app').exists():
            return cand / 'backend-v2'
        if (cand / 'app').exists() and (cand / 'core').exists():
            return cand
    raise RuntimeError('未找到 backend-v2 根目录，请从项目根或 backend-v2 下运行。')

here = Path.cwd()
backend = find_backend_root(here)
os.chdir(backend)
sys.path.insert(0, str(backend))

# 必须配置 DashScope/OpenAI 兼容 LLM：至少提供以下之一
# 方案 A（推荐）：DASHSCOPE_API_KEY（默认使用 qwen3-coder-plus）
from app.core.config import load_settings
settings = load_settings()

has_dashscope = bool(os.getenv('DASHSCOPE_API_KEY'))
if not (has_dashscope):
    raise SystemExit('缺少 LLM 配置：请设置 DASHSCOPE_API_KEY 后重试。')

print('Using backend path:', backend)
print('LLM endpoint:', settings.ai_endpoint)
print('LLM model:', settings.ai_model or os.getenv('AI_MODEL') or 'qwen3-coder-plus')



Using backend path: /Users/yikazhu/code/j-traders/backend-v2
LLM endpoint: https://dashscope.aliyuncs.com/compatible-mode/v1
LLM model: qwen3-coder-plus


In [4]:
# 启动 FastAPI 应用（内嵌）
from fastapi.testclient import TestClient
from app.main import create_app

app = create_app()
client = TestClient(app)
print('/health ->', client.get('/health').json())



/health -> {'status': 'ok', 'version': '0.1.0'}


In [5]:
# M1: Catalog & Selections
import json

# 列出端点
resp = client.get('/catalog/endpoints')
endpoints = resp.json()
print('endpoints:', [e['name'] for e in endpoints])

# 获取 daily_basic 详情
print('daily_basic:', client.get('/catalog/endpoints/daily_basic').status_code)

# 新建/读取一个 selection（若已存在则读取）
sel_slug = 'val_low_combo'
sel_path = backend / 'catalog' / 'selections' / f'{sel_slug}.json'
if not sel_path.exists():
    body = {
        "factor_slug": sel_slug,
        "title": "低估值复合（PE_TTM + PB）",
        "output_index": ["ts_code","trade_date"],
        "selection": [{
            "endpoint": "daily_basic",
            "fields": ["ts_code","trade_date","pe_ttm","pb"],
            "param_binding": {},
            "join_keys": ["ts_code","trade_date"]
        }],
        "constraints": {"winsor": [0.01,0.99], "zscore_axis": "trade_date"},
        "code_contract": { "signature": "def compute_factor(data: dict[str, pd.DataFrame], params: dict) -> pd.DataFrame", "data_keys": ["daily_basic"] }
    }
    print('create selection ->', client.post('/catalog/selections', json=body).json())
else:
    print('selection exists ->', sel_slug)

selection = json.loads(sel_path.read_text(encoding='utf-8'))
print('selection ok, fields:', selection['selection'][0]['fields'])



endpoints: ['cashflow', 'daily_basic']
daily_basic: 200
selection exists -> val_low_combo
selection ok, fields: ['ts_code', 'trade_date', 'pe_ttm', 'pb']


In [6]:
# M3: Codegen → Validate（打印最终 Prompt 与生成的代码）
import os
from app.models.selection import SelectionSpec
from app.services.context_builder import build_agent_context
from app.services.ai.agent import CodegenAgent

# 1) 定义用户自然语言需求（以 value_low_combo 为例）
user_factor_spec = (
    "请基于 daily_basic 的字段 pe_ttm 与 pb 构建一个“低估值复合”原始因子：\n"
    "- 因子含义：估值越低越好（仅说明方向，函数中不要做方向一致化）。\n"
    "- 计算：可对 pe_ttm 与 pb 分别取倒数或取负，使其数值与“越低越好”一致；随后等权平均得到 factor。\n"
    "- 输出：保留 selection 的 output_index，并包含列名为 factor 的结果列。\n"
)

# 2) 组装最终 system prompt（与后端一致的模板替换流程）
spec_model = SelectionSpec.model_validate(selection)
context = build_agent_context(spec_model)
agent = CodegenAgent(None)
msgs = agent._build_messages(context, user_factor_spec, None)
system_prompt = msgs[0].content
print("=== System Prompt (final) ===\n", system_prompt)

# 3) 调用后端进行代码生成
coding_prefs = { 'model': os.getenv('AI_MODEL', 'qwen3-coder-plus'), 'extra': { 'enable_thinking': False } }
r1 = client.post('/factors/codegen', json={
    'selection': selection,
    'user_factor_spec': user_factor_spec,
    'coding_prefs': coding_prefs,
})
js1 = r1.json()
print('codegen status:', r1.status_code)
code_text = js1.get('code_text')
print("=== Codegen (code) ===\n", code_text)

# 4) 校验
r2 = client.post('/factors/validate', json={'code_text': code_text, 'selection': selection})
print('validate:', r2.status_code, r2.json())



=== System Prompt (final) ===
 你是量化研究编码助手。请只生成 Python 代码，函数签名：
def compute_factor(data: dict[str, pd.DataFrame], params: dict) -> pd.DataFrame
约束：仅可使用 pandas/numpy；禁止 IO/网络/子进程/动态导入/反射；输出需保留 output_index，且包含 'factor' 列。
不要在函数中做 winsor、缺失填充、标准化或方向一致化，这些在系统后续阶段统一处理。

[SELECTION_CONTEXT_JSON]
{"selection": {"factor_slug": "val_low_combo", "title": "低估值复合（PE_TTM + PB）", "output_index": ["ts_code", "trade_date"], "selection": [{"endpoint": "daily_basic", "fields": ["ts_code", "trade_date", "pe_ttm", "pb"], "param_binding": {"start_date": {"type": "request_arg", "name": "start_date"}, "end_date": {"type": "request_arg", "name": "end_date"}}, "join_keys": ["ts_code", "trade_date"]}], "alignment": [], "constraints": {"winsor": [0.01, 0.99], "zscore_axis": "trade_date"}, "code_contract": {"signature": "def compute_factor(data: dict[str, pd.DataFrame], params: dict) -> pd.DataFrame", "data_keys": ["daily_basic"]}}, "endpoints": {"daily_basic": {"name": "daily_basic", "description": "每日估值/规模/流动性指

In [7]:
# M4: Test（含标准化预览）

from datetime import date

r3 = client.post('/factors/test', json={
    'selection': selection,
    'code_text': code_text,
    'ts_codes': ['000001.SZ','600000.SH'],
    'start_date': '20210101',
    'end_date': '20210108',
    'top_n': 3,
    'normalization': {
        'method': 'zscore',
        'winsor': [0.01, 0.99],
        'fill': 'median',
        'by': ['trade_date']
    }
})
print('test:', r3.status_code)
print('sample_rows:', r3.json().get('sample_rows')[:2])
print('diagnosis:', r3.json().get('diagnosis'))



test: 200
sample_rows: [{'ts_code': '000001.SZ', 'trade_date': '20210101', 'factor': 0.0}, {'ts_code': '000001.SZ', 'trade_date': '20210108', 'factor': 0.0}]
diagnosis: {'mean': 0.0, 'std': 0.0, 'skew': 0.0, 'kurt': 0.0, 'missing_rate': 0.0}


In [8]:
# M5: Persist 因子与策略

# 保存因子
save_factor = client.post('/factors', json={
    'name': 'notebook_factor',
    'code_text': code_text,
    'fields_used': ['ts_code','trade_date'],
    'selection': selection
})
fid = save_factor.json().get('id')
print('save factor id:', fid)

# 创建策略并设置权重与标准化
create_strategy = client.post('/strategies', json={'name': 'nb_strategy'})
sid = create_strategy.json().get('id')
print('strategy id:', sid)

client.put(f'/strategies/{sid}/weights', json={'weights':[{'factor_id': fid, 'weight': 1.0}]})
client.put(f'/strategies/{sid}/normalization', json={'normalization': {'method':'zscore','winsor':[0.01,0.99],'fill':'median'}})



save factor id: 24
strategy id: 25


<Response [200 OK]>

In [9]:
# M6: Universe（可选，使用 mock）
# 同步股票池，用于策略运行范围过滤
sync = client.post('/universe/sync')
print('universe sync:', sync.json())

# 简单查询
print('universe list:', client.get('/universe/stocks?industry=银行').json()[:2])



universe sync: {'ok': True, 'synced': 5422}
universe list: [{'ts_code': '000001.SZ', 'sec_type': 'stock', 'symbol': '000001', 'name': '平安银行', 'area': '深圳', 'industry': '银行', 'market': '主板', 'exchange': 'SZSE', 'list_status': 'L', 'list_date': '19910403', 'delist_date': None, 'is_hs': 'S', 'updated_at': '2025-08-09 09:09:16'}, {'ts_code': '001227.SZ', 'sec_type': 'stock', 'symbol': '001227', 'name': '兰州银行', 'area': '甘肃', 'industry': '银行', 'market': '主板', 'exchange': 'SZSE', 'list_status': 'L', 'list_date': '20220117', 'delist_date': None, 'is_hs': 'S', 'updated_at': '2025-08-09 09:09:16'}]


In [10]:
# M7: 运行策略（含 Universe 过滤）
run = client.post(f'/strategies/{sid}/run', json={
    'industry': '银行',
    'ts_codes': ['000001.SZ','600000.SH'],
    'start_date': '20210101',
    'end_date': '20210108',
    'top_n': 2
})
print('run status:', run.status_code)
print('results:', run.json().get('results'))



run status: 200
results: [{'ts_code': '000001.SZ', 'trade_date': '20210108', 'factor_24': 1.0204081632653064, 'score': 1.0204081632653064}, {'ts_code': '000001.SZ', 'trade_date': '20210107', 'factor_24': 1.0204081632653061, 'score': 1.0204081632653061}]
