# OFI (Order Flow Imbalance) 分析

本notebook用于计算和分析订单流失衡指标。

## 步骤:
1. 认证聚宽账号
2. 获取tick数据
3. 计算OFI指标

In [17]:
from jqdatasdk import *
import pandas as pd
import numpy as np
from datetime import datetime

# print(auth('YOUR_PHONE', 'YOUR_PASSWORD'))  # 请替换为你的账号和密码

# 需要先进行认证，请替换为你的账号和密码
# auth('username', 'password')
# 如果已经认证，可以注释掉上面这行

# 检查是否已认证
try:
    is_auth()
    print("已认证成功")
except:
    print("需要认证，请运行: auth('your_username', 'your_password')")


None
已认证成功


In [21]:
print(get_query_count())
print(get_price("000001.XSHE", start_date="2024-11-01", end_date="2025-10-01").head())

{'total': 1000000, 'spare': 999998}
             open  close   high    low       volume         money
2024-11-01  10.81  10.85  10.97  10.77  167435405.0  1.821423e+09
2024-11-04  10.85  10.88  10.88  10.69  119168785.0  1.285874e+09
2024-11-05  10.84  11.06  11.07  10.81  175398198.0  1.925261e+09
2024-11-06  11.03  10.97  11.05  10.89  154004779.0  1.689978e+09
2024-11-07  10.92  11.31  11.33  10.90  236211369.0  2.627008e+09


In [None]:
from jqdata import *
import pandas as pd
import numpy as np
from datetime import datetime

# ====== 1) 配置 ======
UNIVERSE = [
    '511360.XSHG','511090.XSHG','511380.XSHG','518880.XSHG','510500.XSHG',
    '159919.XSHE','510300.XSHG','510310.XSHG','159915.XSHE','510050.XSHG',
    '513090.XSHG','588000.XSHG'
]

def last_closed_trade_day():
    days = get_trade_days(end_date=datetime.now().date(), count=2)
    return days[-2]

DATE = last_closed_trade_day()

FIELDS = [
    'time',
    'a1_p','b1_p','a1_v','b1_v',
    'a2_p','b2_p','a2_v','b2_v',
    'a3_p','b3_p','a3_v','b3_v',
    'a4_p','b4_p','a4_v','b4_v',
    'a5_p','b5_p','a5_v','b5_v',
    'volume','money','current'
]

# ====== 2) 拉取 & 合并 tick ======
def fetch_one(code, date, start="09:30:00", end="15:00:00"):
    start_dt = f"{date} {start}"
    end_dt   = f"{date} {end}"
    tk = get_ticks(code, start_dt=start_dt, end_dt=end_dt, fields=FIELDS)
    if tk is None or len(tk) < 50:
        return None
    df = pd.DataFrame.from_records(tk) if not isinstance(tk, pd.DataFrame) else tk.copy()
    df['dt'] = pd.to_datetime(df['time'], errors='coerce')
    df = df.dropna(subset=['dt']).sort_values('dt')
    df['code'] = code
    # 最小过滤：报价合法
    df = df.dropna(subset=['a1_p','b1_p','a1_v','b1_v'])
    df = df[(df['a1_p'] >= df['b1_p']) & (df['a1_p'] > 0) & (df['b1_p'] > 0)]
    return df

dfs = []
for code in UNIVERSE:
    dfi = fetch_one(code, DATE)
    if dfi is not None and len(dfi) >= 200:
        dfs.append(dfi)

ticks = pd.concat(dfs, ignore_index=True)
print("DATE:", DATE, "codes:", ticks['code'].nunique(), "rows:", len(ticks))
print(ticks.groupby('code').size().sort_values(ascending=False).head())

# ====== 3) 构造 mid/spread/depth/imbalance ======
def build_micro_features(df):
    df = df.sort_values('dt').copy()
    df['mid'] = (df['a1_p'] + df['b1_p']) / 2.0
    df['spread'] = df['a1_p'] - df['b1_p']
    df['rel_spread'] = df['spread'] / df['mid'].replace(0, np.nan)

    bid_depth5 = df[['b1_v','b2_v','b3_v','b4_v','b5_v']].sum(axis=1)
    ask_depth5 = df[['a1_v','a2_v','a3_v','a4_v','a5_v']].sum(axis=1)
    df['bid_depth5'] = bid_depth5
    df['ask_depth5'] = ask_depth5
    df['imbalance5'] = (bid_depth5 - ask_depth5) / (bid_depth5 + ask_depth5).replace(0, np.nan)
    return df

ticks_feat = ticks.groupby('code', group_keys=False).apply(build_micro_features)

# ====== 4) OFI(L1) ======
def ofi_l1(df):
    df = df.sort_values('dt').copy()

    b1p, b1v = df['b1_p'].to_numpy(), df['b1_v'].to_numpy()
    a1p, a1v = df['a1_p'].to_numpy(), df['a1_v'].to_numpy()

    b1p0, b1v0 = np.roll(b1p, 1), np.roll(b1v, 1)
    a1p0, a1v0 = np.roll(a1p, 1), np.roll(a1v, 1)
    b1p0[0], b1v0[0] = b1p[0], b1v[0]
    a1p0[0], a1v0[0] = a1p[0], a1v[0]

    bid = np.where(b1p > b1p0,  b1v,
          np.where(b1p < b1p0, -b1v0,
                              (b1v - b1v0)))

    ask = np.where(a1p < a1p0, -a1v,
          np.where(a1p > a1p0,  a1v0,
                              -(a1v - a1v0)))

    df['OFI'] = bid + ask
    return df

ticks_feat = ticks_feat.groupby('code', group_keys=False).apply(ofi_l1)

# ====== 5) 未来收益（用 asof 对齐） ======
def add_future_return(df, horizons=(3, 15, 60), tol=6):
    df = df.sort_values('dt').copy()
    base = df[['dt','mid']].copy()

    for h in horizons:
        future = base.copy()
        future['dt'] = future['dt'] + pd.to_timedelta(h, unit='s')
        aligned = pd.merge_asof(
            future.sort_values('dt'),
            base.sort_values('dt'),
            on='dt',
            direction='forward',
            tolerance=pd.to_timedelta(tol, unit='s')
        )
        # aligned['mid_y'] 是未来mid
        df[f'ret_fwd_{h}s'] = np.log(aligned['mid_y'].to_numpy()) - np.log(base['mid'].to_numpy())

    return df

ticks_feat = ticks_feat.groupby('code', group_keys=False).apply(add_future_return)

print("done:", ticks_feat.columns.tolist()[:10], "...")


ModuleNotFoundError: No module named 'jqdata'