<a href="https://colab.research.google.com/github/david132313/A_shareStock_data_and_model/blob/main/%E6%95%B0%E6%8D%AE%E5%BA%93startup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Cell 0：挂载 Drive

In [None]:
from google.colab import drive
drive.mount("/content/drive")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Cell 1：路径

In [None]:
import os

BASE_DIR = "/content/drive/MyDrive/AshareDB"
DAILY_DIR = os.path.join(BASE_DIR, "daily_parquet")     # 你的 parquet 目录
DB_DIR    = os.path.join(BASE_DIR, "db")               # 你的 db 目录

os.makedirs(DAILY_DIR, exist_ok=True)
os.makedirs(DB_DIR, exist_ok=True)

DB_DRIVE = os.path.join(DB_DIR, "ashare.sqlite")       # 最终输出
DB_LOCAL = "/content/ashare.sqlite"                    # 本地写库（快）

# 清理 Drive 上旧库（避免 WAL 残留）
for fp in [DB_DRIVE, DB_DRIVE+"-wal", DB_DRIVE+"-shm", DB_DRIVE+".tmp"]:
    if os.path.exists(fp):
        os.remove(fp)
        print("deleted:", fp)

# 清理本地旧库
for fp in [DB_LOCAL, DB_LOCAL+"-wal", DB_LOCAL+"-shm"]:
    if os.path.exists(fp):
        os.remove(fp)
        print("deleted:", fp)

print("DAILY_DIR =", DAILY_DIR)
print("DB_DRIVE  =", DB_DRIVE)
print("DB_LOCAL  =", DB_LOCAL)


deleted: /content/ashare.sqlite
deleted: /content/ashare.sqlite-wal
deleted: /content/ashare.sqlite-shm
DAILY_DIR = /content/drive/MyDrive/AshareDB/daily_parquet
DB_DRIVE  = /content/drive/MyDrive/AshareDB/db/ashare.sqlite
DB_LOCAL  = /content/ashare.sqlite


In [None]:
import os, re

DATE_RE = re.compile(r"^daily_(\d{8})\.parquet$")
files = []
for fn in os.listdir(DAILY_DIR):
    m = DATE_RE.match(fn)
    if m:
        files.append((m.group(1), os.path.join(DAILY_DIR, fn)))
files.sort()

print("num parquet files:", len(files))
if files:
    print("first:", files[0][0], "last:", files[-1][0])
else:
    raise RuntimeError("No parquet files found in DAILY_DIR.")


num parquet files: 6293
first: 20000104 last: 20251219


In [None]:
import sqlite3

def connect_local_sqlite(path: str) -> sqlite3.Connection:
    conn = sqlite3.connect(path, timeout=60)
    conn.isolation_level = None
    conn.execute("PRAGMA journal_mode=WAL;")       # 本地 WAL 快
    conn.execute("PRAGMA synchronous=NORMAL;")
    conn.execute("PRAGMA temp_store=MEMORY;")
    conn.execute("PRAGMA cache_size=-200000;")
    conn.execute("PRAGMA busy_timeout=60000;")
    conn.execute("PRAGMA locking_mode=EXCLUSIVE;")
    return conn

SCHEMA_NO_INDEX = """
CREATE TABLE IF NOT EXISTS security_map (
  sec_id  INTEGER PRIMARY KEY AUTOINCREMENT,
  ts_code TEXT NOT NULL UNIQUE
);

CREATE TABLE IF NOT EXISTS daily_price (
  trade_date INTEGER NOT NULL,   -- YYYYMMDD
  sec_id     INTEGER NOT NULL,
  open       REAL,
  high       REAL,
  low        REAL,
  close      REAL,
  pre_close  REAL,
  change     REAL,
  pct_chg    REAL,
  vol        REAL,
  amount     REAL,
  PRIMARY KEY (trade_date, sec_id)
);

CREATE TABLE IF NOT EXISTS ingest_manifest (
  trade_date   INTEGER PRIMARY KEY,
  parquet_file TEXT NOT NULL,
  rows         INTEGER NOT NULL,
  status       TEXT NOT NULL,     -- ok/fail
  message      TEXT,
  loaded_at    TEXT NOT NULL
);
"""

conn = connect_local_sqlite(DB_LOCAL)
conn.executescript(SCHEMA_NO_INDEX)

print("local DB created:", DB_LOCAL)
print(conn.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;").fetchall())


local DB created: /content/ashare.sqlite
[('daily_price',), ('ingest_manifest',), ('security_map',), ('sqlite_sequence',)]


In [None]:
import pandas as pd
from tqdm import tqdm
from datetime import datetime
import sqlite3, os

UPSERT_DAILY_SQL = """
INSERT INTO daily_price(
  trade_date, sec_id, open, high, low, close, pre_close, change, pct_chg, vol, amount
) VALUES (?,?,?,?,?,?,?,?,?,?,?)
ON CONFLICT(trade_date, sec_id) DO UPDATE SET
  open=excluded.open,
  high=excluded.high,
  low=excluded.low,
  close=excluded.close,
  pre_close=excluded.pre_close,
  change=excluded.change,
  pct_chg=excluded.pct_chg,
  vol=excluded.vol,
  amount=excluded.amount;
"""

PARQUET_COLS = ['ts_code','trade_date','open','high','low','close','pre_close','change','pct_chg','vol','amount']
SQL_COLS     = ["trade_date","sec_id","open","high","low","close","pre_close","change","pct_chg","vol","amount"]

def chunked(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i+n]

# 映射缓存（几千条，放内存很小）
cache_ts2id = {}
for ts_code, sec_id in conn.execute("SELECT ts_code, sec_id FROM security_map;"):
    cache_ts2id[ts_code] = sec_id

def ensure_sec_ids_cached(conn: sqlite3.Connection, codes: list[str]):
    codes = [str(x) for x in codes]
    new_codes = [c for c in codes if c not in cache_ts2id]
    if not new_codes:
        return

    conn.executemany("INSERT OR IGNORE INTO security_map(ts_code) VALUES (?);",
                     [(c,) for c in set(new_codes)])

    uniq = sorted(set(new_codes))
    for batch in chunked(uniq, 900):
        placeholders = ",".join(["?"] * len(batch))
        q = f"SELECT ts_code, sec_id FROM security_map WHERE ts_code IN ({placeholders});"
        for ts_code, sec_id in conn.execute(q, batch):
            cache_ts2id[ts_code] = sec_id

def backup_local_to_drive(local_conn: sqlite3.Connection, drive_path: str):
    # 用 sqlite backup 生成“干净的主库文件”，避免 -wal/-shm 残留在 Drive
    tmp = drive_path + ".tmp"
    dst = sqlite3.connect(tmp)
    try:
        local_conn.backup(dst)
        dst.commit()
    finally:
        dst.close()
    os.replace(tmp, drive_path)

# 断点：跳过已 ok 的日期
loaded_ok = set(r[0] for r in conn.execute(
    "SELECT trade_date FROM ingest_manifest WHERE status='ok';"
).fetchall())
todo = [(d,p) for (d,p) in files if int(d) not in loaded_ok]
print("already ok:", len(loaded_ok), "| remaining:", len(todo), "| next:", (todo[0][0] if todo else None))

SAVE_EVERY_N = 22  # 每 ~1 个月备份一次到 Drive（你可以改大一点更省 Drive I/O）
ok_n = fail_n = 0

for i, (d, p) in enumerate(tqdm(todo, desc="ingest"), start=1):
    parquet_name = os.path.basename(p)
    trade_date_int = int(d)
    try:
        df = pd.read_parquet(p)

        missing = [c for c in PARQUET_COLS if c not in df.columns]
        if missing:
            raise ValueError(f"Missing columns {missing} in {parquet_name}")

        df["trade_date"] = df["trade_date"].astype(str).astype(int)

        conn.execute("BEGIN;")

        codes = df["ts_code"].astype(str).unique().tolist()
        ensure_sec_ids_cached(conn, codes)
        df["sec_id"] = df["ts_code"].astype(str).map(cache_ts2id).astype(int)

        df2 = df[SQL_COLS].where(pd.notnull(df[SQL_COLS]), None)

        # sanity（防止写反）
        td0 = int(df2.iloc[0,0]); sid0 = int(df2.iloc[0,1])
        if td0 < 19900101 or td0 > 21000101:
            raise ValueError(f"Sanity failed trade_date: {td0}")
        if sid0 <= 0:
            raise ValueError(f"Sanity failed sec_id: {sid0}")

        conn.executemany(UPSERT_DAILY_SQL, df2.itertuples(index=False, name=None))

        conn.execute("""
        INSERT OR REPLACE INTO ingest_manifest(trade_date, parquet_file, rows, status, message, loaded_at)
        VALUES (?,?,?,?,?,?)
        """, (trade_date_int, parquet_name, int(len(df2)), "ok", None,
              datetime.now().isoformat(timespec="seconds")))

        conn.execute("COMMIT;")
        ok_n += 1

    except Exception as e:
        try:
            conn.execute("ROLLBACK;")
        except Exception:
            pass
        conn.execute("BEGIN;")
        conn.execute("""
        INSERT OR REPLACE INTO ingest_manifest(trade_date, parquet_file, rows, status, message, loaded_at)
        VALUES (?,?,?,?,?,?)
        """, (trade_date_int, parquet_name, 0, "fail", repr(e),
              datetime.now().isoformat(timespec="seconds")))
        conn.execute("COMMIT;")
        fail_n += 1

    if i % SAVE_EVERY_N == 0:
        backup_local_to_drive(conn, DB_DRIVE)
        print(f"\n[checkpoint] saved to Drive at {d} | ok={ok_n} fail={fail_n} | cache={len(cache_ts2id)}")

# 最后备份一次
backup_local_to_drive(conn, DB_DRIVE)
print("DONE. ok:", ok_n, "fail:", fail_n, "saved to:", DB_DRIVE)
print("integrity_check:", conn.execute("PRAGMA integrity_check;").fetchone()[0])


already ok: 0 | remaining: 6293 | next: 20000104


ingest:   0%|          | 26/6293 [00:03<09:06, 11.46it/s]


[checkpoint] saved to Drive at 20000216 | ok=22 fail=0 | cache=931


ingest:   1%|          | 49/6293 [00:03<03:58, 26.15it/s]


[checkpoint] saved to Drive at 20000317 | ok=44 fail=0 | cache=938


ingest:   1%|          | 71/6293 [02:31<14:40:32,  8.49s/it]


[checkpoint] saved to Drive at 20000418 | ok=66 fail=0 | cache=942


ingest:   1%|▏         | 93/6293 [02:32<1:54:42,  1.11s/it]


[checkpoint] saved to Drive at 20000525 | ok=88 fail=0 | cache=952


ingest:   2%|▏         | 115/6293 [02:33<19:41,  5.23it/s]


[checkpoint] saved to Drive at 20000626 | ok=110 fail=0 | cache=971


ingest:   2%|▏         | 137/6293 [02:34<07:22, 13.91it/s]


[checkpoint] saved to Drive at 20000726 | ok=132 fail=0 | cache=994


ingest:   3%|▎         | 160/6293 [02:35<06:10, 16.56it/s]


[checkpoint] saved to Drive at 20000825 | ok=154 fail=0 | cache=1004


ingest:   3%|▎         | 179/6293 [02:36<07:01, 14.51it/s]


[checkpoint] saved to Drive at 20000926 | ok=176 fail=0 | cache=1016


ingest:   3%|▎         | 204/6293 [02:38<05:54, 17.17it/s]


[checkpoint] saved to Drive at 20001102 | ok=198 fail=0 | cache=1024


ingest:   4%|▎         | 224/6293 [02:39<07:01, 14.39it/s]


[checkpoint] saved to Drive at 20001204 | ok=220 fail=0 | cache=1035


ingest:   4%|▍         | 246/6293 [02:40<08:01, 12.56it/s]


[checkpoint] saved to Drive at 20010104 | ok=242 fail=0 | cache=1062


ingest:   4%|▍         | 271/6293 [02:42<07:21, 13.63it/s]


[checkpoint] saved to Drive at 20010219 | ok=264 fail=0 | cache=1081


ingest:   5%|▍         | 292/6293 [02:43<08:01, 12.47it/s]


[checkpoint] saved to Drive at 20010321 | ok=286 fail=0 | cache=1095


ingest:   5%|▍         | 313/6293 [02:45<08:34, 11.62it/s]


[checkpoint] saved to Drive at 20010420 | ok=308 fail=0 | cache=1101


ingest:   5%|▌         | 334/6293 [02:46<08:39, 11.47it/s]


[checkpoint] saved to Drive at 20010529 | ok=330 fail=0 | cache=1107


ingest:   6%|▌         | 357/6293 [02:48<08:59, 11.00it/s]


[checkpoint] saved to Drive at 20010628 | ok=352 fail=0 | cache=1116


ingest:   6%|▌         | 377/6293 [02:50<09:44, 10.11it/s]


[checkpoint] saved to Drive at 20010730 | ok=374 fail=0 | cache=1119


ingest:   6%|▋         | 403/6293 [02:52<09:28, 10.37it/s]


[checkpoint] saved to Drive at 20010829 | ok=396 fail=0 | cache=1130


ingest:   7%|▋         | 423/6293 [02:54<09:19, 10.49it/s]


[checkpoint] saved to Drive at 20010928 | ok=418 fail=0 | cache=1133


ingest:   7%|▋         | 446/6293 [02:56<10:18,  9.45it/s]


[checkpoint] saved to Drive at 20011106 | ok=440 fail=0 | cache=1133


ingest:   7%|▋         | 465/6293 [02:58<12:22,  7.85it/s]


[checkpoint] saved to Drive at 20011206 | ok=462 fail=0 | cache=1135


ingest:   8%|▊         | 487/6293 [03:00<14:25,  6.71it/s]


[checkpoint] saved to Drive at 20020110 | ok=484 fail=0 | cache=1142


ingest:   8%|▊         | 511/6293 [03:03<18:23,  5.24it/s]


[checkpoint] saved to Drive at 20020225 | ok=506 fail=0 | cache=1147


ingest:   8%|▊         | 532/6293 [03:06<13:32,  7.09it/s]


[checkpoint] saved to Drive at 20020327 | ok=528 fail=0 | cache=1150


ingest:   9%|▉         | 557/6293 [03:08<12:40,  7.54it/s]


[checkpoint] saved to Drive at 20020426 | ok=550 fail=0 | cache=1156


ingest:   9%|▉         | 578/6293 [03:11<16:05,  5.92it/s]


[checkpoint] saved to Drive at 20020604 | ok=572 fail=0 | cache=1163


ingest:  10%|▉         | 599/6293 [03:15<19:05,  4.97it/s]


[checkpoint] saved to Drive at 20020704 | ok=594 fail=0 | cache=1172


ingest:  10%|▉         | 620/6293 [03:17<14:40,  6.44it/s]


[checkpoint] saved to Drive at 20020805 | ok=616 fail=0 | cache=1180


ingest:  10%|█         | 644/6293 [03:20<13:12,  7.13it/s]


[checkpoint] saved to Drive at 20020904 | ok=638 fail=0 | cache=1188


ingest:  11%|█         | 666/6293 [03:23<18:26,  5.08it/s]


[checkpoint] saved to Drive at 20021014 | ok=660 fail=0 | cache=1200


ingest:  11%|█         | 686/6293 [03:27<22:08,  4.22it/s]


[checkpoint] saved to Drive at 20021113 | ok=682 fail=0 | cache=1206


ingest:  11%|█         | 704/6293 [03:30<25:24,  3.67it/s]


[checkpoint] saved to Drive at 20021213 | ok=704 fail=0 | cache=1210


ingest:  12%|█▏        | 730/6293 [03:33<15:53,  5.83it/s]


[checkpoint] saved to Drive at 20030115 | ok=726 fail=0 | cache=1215


ingest:  12%|█▏        | 752/6293 [03:37<22:45,  4.06it/s]


[checkpoint] saved to Drive at 20030225 | ok=748 fail=0 | cache=1219


ingest:  12%|█▏        | 775/6293 [03:41<20:29,  4.49it/s]


[checkpoint] saved to Drive at 20030327 | ok=770 fail=0 | cache=1225


ingest:  13%|█▎        | 797/6293 [03:45<20:30,  4.47it/s]


[checkpoint] saved to Drive at 20030428 | ok=792 fail=0 | cache=1231


ingest:  13%|█▎        | 814/6293 [03:49<32:21,  2.82it/s]


[checkpoint] saved to Drive at 20030606 | ok=814 fail=0 | cache=1234


ingest:  13%|█▎        | 840/6293 [04:02<1:24:56,  1.07it/s]


[checkpoint] saved to Drive at 20030708 | ok=836 fail=0 | cache=1242


ingest:  14%|█▎        | 862/6293 [04:07<30:56,  2.92it/s]


[checkpoint] saved to Drive at 20030807 | ok=858 fail=0 | cache=1251


ingest:  14%|█▍        | 885/6293 [04:12<28:15,  3.19it/s]


[checkpoint] saved to Drive at 20030908 | ok=880 fail=0 | cache=1260


ingest:  14%|█▍        | 905/6293 [04:17<31:40,  2.84it/s]


[checkpoint] saved to Drive at 20031015 | ok=902 fail=0 | cache=1267


ingest:  15%|█▍        | 928/6293 [04:21<22:42,  3.94it/s]


[checkpoint] saved to Drive at 20031114 | ok=924 fail=0 | cache=1272


ingest:  15%|█▌        | 951/6293 [04:27<32:39,  2.73it/s]


[checkpoint] saved to Drive at 20031216 | ok=946 fail=0 | cache=1277


ingest:  15%|█▌        | 971/6293 [04:32<28:44,  3.09it/s]


[checkpoint] saved to Drive at 20040116 | ok=968 fail=0 | cache=1281


ingest:  16%|█▌        | 993/6293 [04:36<25:17,  3.49it/s]


[checkpoint] saved to Drive at 20040227 | ok=990 fail=0 | cache=1286


ingest:  16%|█▌        | 1017/6293 [04:41<25:01,  3.51it/s]


[checkpoint] saved to Drive at 20040330 | ok=1012 fail=0 | cache=1296


ingest:  16%|█▋        | 1036/6293 [04:47<47:32,  1.84it/s]


[checkpoint] saved to Drive at 20040429 | ok=1034 fail=0 | cache=1308


ingest:  17%|█▋        | 1062/6293 [04:51<20:43,  4.21it/s]


[checkpoint] saved to Drive at 20040607 | ok=1056 fail=0 | cache=1321


ingest:  17%|█▋        | 1082/6293 [04:57<28:36,  3.04it/s]


[checkpoint] saved to Drive at 20040707 | ok=1078 fail=0 | cache=1345


ingest:  18%|█▊        | 1105/6293 [05:01<23:09,  3.73it/s]


[checkpoint] saved to Drive at 20040806 | ok=1100 fail=0 | cache=1365


ingest:  18%|█▊        | 1127/6293 [05:06<23:22,  3.68it/s]


[checkpoint] saved to Drive at 20040907 | ok=1122 fail=0 | cache=1377


ingest:  18%|█▊        | 1150/6293 [05:11<24:44,  3.47it/s]


[checkpoint] saved to Drive at 20041014 | ok=1144 fail=0 | cache=1379


ingest:  19%|█▊        | 1171/6293 [05:16<27:33,  3.10it/s]


[checkpoint] saved to Drive at 20041115 | ok=1166 fail=0 | cache=1379


ingest:  19%|█▉        | 1193/6293 [05:20<24:20,  3.49it/s]


[checkpoint] saved to Drive at 20041215 | ok=1188 fail=0 | cache=1379


ingest:  19%|█▉        | 1215/6293 [05:26<28:31,  2.97it/s]


[checkpoint] saved to Drive at 20050117 | ok=1210 fail=0 | cache=1379


ingest:  20%|█▉        | 1234/6293 [05:33<42:19,  1.99it/s]


[checkpoint] saved to Drive at 20050225 | ok=1232 fail=0 | cache=1380


ingest:  20%|█▉        | 1257/6293 [05:39<31:12,  2.69it/s]


[checkpoint] saved to Drive at 20050329 | ok=1254 fail=0 | cache=1382


ingest:  20%|██        | 1276/6293 [05:44<37:08,  2.25it/s]


[checkpoint] saved to Drive at 20050428 | ok=1276 fail=0 | cache=1385


ingest:  21%|██        | 1302/6293 [05:50<27:28,  3.03it/s]


[checkpoint] saved to Drive at 20050606 | ok=1298 fail=0 | cache=1393


ingest:  21%|██        | 1325/6293 [05:55<27:06,  3.05it/s]


[checkpoint] saved to Drive at 20050706 | ok=1320 fail=0 | cache=1394


ingest:  21%|██▏       | 1345/6293 [06:00<31:24,  2.63it/s]


[checkpoint] saved to Drive at 20050805 | ok=1342 fail=0 | cache=1394


ingest:  22%|██▏       | 1370/6293 [06:06<29:29,  2.78it/s]


[checkpoint] saved to Drive at 20050906 | ok=1364 fail=0 | cache=1394


ingest:  22%|██▏       | 1391/6293 [06:14<44:46,  1.82it/s]  


[checkpoint] saved to Drive at 20051013 | ok=1386 fail=0 | cache=1394


ingest:  22%|██▏       | 1414/6293 [06:20<32:47,  2.48it/s]


[checkpoint] saved to Drive at 20051114 | ok=1408 fail=0 | cache=1394


ingest:  23%|██▎       | 1436/6293 [06:26<25:26,  3.18it/s]


[checkpoint] saved to Drive at 20051214 | ok=1430 fail=0 | cache=1394


ingest:  23%|██▎       | 1452/6293 [06:34<46:46,  1.72it/s]


[checkpoint] saved to Drive at 20060117 | ok=1452 fail=0 | cache=1394


ingest:  23%|██▎       | 1478/6293 [06:41<30:40,  2.62it/s]


[checkpoint] saved to Drive at 20060227 | ok=1474 fail=0 | cache=1394


ingest:  24%|██▍       | 1500/6293 [06:46<28:53,  2.77it/s]


[checkpoint] saved to Drive at 20060329 | ok=1496 fail=0 | cache=1394


ingest:  24%|██▍       | 1523/6293 [06:55<37:53,  2.10it/s]


[checkpoint] saved to Drive at 20060428 | ok=1518 fail=0 | cache=1394


ingest:  25%|██▍       | 1543/6293 [07:01<38:14,  2.07it/s]


[checkpoint] saved to Drive at 20060606 | ok=1540 fail=0 | cache=1394


ingest:  25%|██▍       | 1566/6293 [07:09<39:54,  1.97it/s]


[checkpoint] saved to Drive at 20060706 | ok=1562 fail=0 | cache=1399


ingest:  25%|██▌       | 1587/6293 [07:15<29:45,  2.64it/s]


[checkpoint] saved to Drive at 20060807 | ok=1584 fail=0 | cache=1406


ingest:  26%|██▌       | 1610/6293 [07:21<32:59,  2.37it/s]


[checkpoint] saved to Drive at 20060906 | ok=1606 fail=0 | cache=1415


ingest:  26%|██▌       | 1630/6293 [07:28<41:31,  1.87it/s]


[checkpoint] saved to Drive at 20061013 | ok=1628 fail=0 | cache=1421


ingest:  26%|██▋       | 1656/6293 [07:35<31:26,  2.46it/s]


[checkpoint] saved to Drive at 20061114 | ok=1650 fail=0 | cache=1429


ingest:  27%|██▋       | 1674/6293 [07:42<38:55,  1.98it/s]


[checkpoint] saved to Drive at 20061214 | ok=1672 fail=0 | cache=1447


ingest:  27%|██▋       | 1696/6293 [07:56<1:02:34,  1.22it/s]


[checkpoint] saved to Drive at 20070118 | ok=1694 fail=0 | cache=1464


ingest:  27%|██▋       | 1722/6293 [08:05<44:04,  1.73it/s]  


[checkpoint] saved to Drive at 20070226 | ok=1716 fail=0 | cache=1476


ingest:  28%|██▊       | 1743/6293 [08:15<50:46,  1.49it/s]  


[checkpoint] saved to Drive at 20070328 | ok=1738 fail=0 | cache=1486


ingest:  28%|██▊       | 1764/6293 [08:25<52:44,  1.43it/s]  


[checkpoint] saved to Drive at 20070427 | ok=1760 fail=0 | cache=1499


ingest:  28%|██▊       | 1786/6293 [08:35<48:52,  1.54it/s]  


[checkpoint] saved to Drive at 20070605 | ok=1782 fail=0 | cache=1505


ingest:  29%|██▊       | 1804/6293 [08:42<47:39,  1.57it/s]


[checkpoint] saved to Drive at 20070705 | ok=1804 fail=0 | cache=1509


ingest:  29%|██▉       | 1828/6293 [08:56<1:14:52,  1.01s/it]


[checkpoint] saved to Drive at 20070806 | ok=1826 fail=0 | cache=1517


ingest:  29%|██▉       | 1853/6293 [09:06<50:31,  1.46it/s]  


[checkpoint] saved to Drive at 20070905 | ok=1848 fail=0 | cache=1536


ingest:  30%|██▉       | 1875/6293 [09:13<33:05,  2.22it/s]


[checkpoint] saved to Drive at 20071012 | ok=1870 fail=0 | cache=1552


ingest:  30%|███       | 1896/6293 [09:25<1:00:03,  1.22it/s]


[checkpoint] saved to Drive at 20071113 | ok=1892 fail=0 | cache=1559


ingest:  30%|███       | 1918/6293 [09:37<55:14,  1.32it/s]  


[checkpoint] saved to Drive at 20071213 | ok=1914 fail=0 | cache=1575


ingest:  31%|███       | 1941/6293 [09:44<34:24,  2.11it/s]


[checkpoint] saved to Drive at 20080116 | ok=1936 fail=0 | cache=1587


ingest:  31%|███       | 1964/6293 [09:57<54:33,  1.32it/s]  


[checkpoint] saved to Drive at 20080222 | ok=1958 fail=0 | cache=1602


ingest:  32%|███▏      | 1983/6293 [10:08<53:04,  1.35it/s]  


[checkpoint] saved to Drive at 20080325 | ok=1980 fail=0 | cache=1608


ingest:  32%|███▏      | 2007/6293 [10:16<41:07,  1.74it/s]


[checkpoint] saved to Drive at 20080425 | ok=2002 fail=0 | cache=1613


ingest:  32%|███▏      | 2023/6293 [10:16<09:32,  7.46it/s]


[checkpoint] saved to Drive at 20080529 | ok=2024 fail=0 | cache=1632


ingest:  33%|███▎      | 2049/6293 [10:40<1:00:02,  1.18it/s]


[checkpoint] saved to Drive at 20080701 | ok=2046 fail=0 | cache=1644


ingest:  33%|███▎      | 2072/6293 [10:51<50:01,  1.41it/s]  


[checkpoint] saved to Drive at 20080731 | ok=2068 fail=0 | cache=1651


ingest:  33%|███▎      | 2090/6293 [11:02<1:16:11,  1.09s/it]


[checkpoint] saved to Drive at 20080901 | ok=2090 fail=0 | cache=1657


ingest:  34%|███▎      | 2117/6293 [11:09<33:15,  2.09it/s]


[checkpoint] saved to Drive at 20081009 | ok=2112 fail=0 | cache=1662


ingest:  34%|███▍      | 2137/6293 [11:22<59:53,  1.16it/s]  


[checkpoint] saved to Drive at 20081110 | ok=2134 fail=0 | cache=1662


ingest:  34%|███▍      | 2160/6293 [11:37<1:15:52,  1.10s/it]


[checkpoint] saved to Drive at 20081210 | ok=2156 fail=0 | cache=1663


ingest:  35%|███▍      | 2181/6293 [11:47<49:07,  1.39it/s]  


[checkpoint] saved to Drive at 20090113 | ok=2178 fail=0 | cache=1663


ingest:  35%|███▍      | 2199/6293 [11:48<09:02,  7.54it/s]


[checkpoint] saved to Drive at 20090219 | ok=2200 fail=0 | cache=1663


ingest:  35%|███▌      | 2226/6293 [12:11<1:12:42,  1.07s/it]


[checkpoint] saved to Drive at 20090323 | ok=2222 fail=0 | cache=1663


ingest:  36%|███▌      | 2247/6293 [12:24<1:01:35,  1.09it/s]


[checkpoint] saved to Drive at 20090423 | ok=2244 fail=0 | cache=1663


ingest:  36%|███▌      | 2269/6293 [12:38<1:03:48,  1.05it/s]


[checkpoint] saved to Drive at 20090526 | ok=2266 fail=0 | cache=1663


ingest:  36%|███▋      | 2288/6293 [12:47<1:03:54,  1.04it/s]


[checkpoint] saved to Drive at 20090629 | ok=2288 fail=0 | cache=1664


ingest:  37%|███▋      | 2316/6293 [12:57<40:51,  1.62it/s]  


[checkpoint] saved to Drive at 20090729 | ok=2310 fail=0 | cache=1669


ingest:  37%|███▋      | 2337/6293 [13:06<37:15,  1.77it/s]


[checkpoint] saved to Drive at 20090828 | ok=2332 fail=0 | cache=1680


ingest:  37%|███▋      | 2357/6293 [13:18<55:04,  1.19it/s]  


[checkpoint] saved to Drive at 20090929 | ok=2354 fail=0 | cache=1692


ingest:  38%|███▊      | 2379/6293 [13:32<1:15:43,  1.16s/it]


[checkpoint] saved to Drive at 20091106 | ok=2376 fail=0 | cache=1728


ingest:  38%|███▊      | 2400/6293 [13:45<1:25:43,  1.32s/it]


[checkpoint] saved to Drive at 20091208 | ok=2398 fail=0 | cache=1738


ingest:  38%|███▊      | 2418/6293 [13:46<12:29,  5.17it/s]


[checkpoint] saved to Drive at 20100108 | ok=2420 fail=0 | cache=1773


ingest:  39%|███▉      | 2445/6293 [14:19<1:35:55,  1.50s/it]


[checkpoint] saved to Drive at 20100209 | ok=2442 fail=0 | cache=1811


ingest:  39%|███▉      | 2467/6293 [14:36<1:18:26,  1.23s/it]


[checkpoint] saved to Drive at 20100318 | ok=2464 fail=0 | cache=1839


ingest:  40%|███▉      | 2489/6293 [14:52<1:20:58,  1.28s/it]


[checkpoint] saved to Drive at 20100420 | ok=2486 fail=0 | cache=1867


ingest:  40%|███▉      | 2512/6293 [15:08<1:03:09,  1.00s/it]


[checkpoint] saved to Drive at 20100521 | ok=2508 fail=0 | cache=1901


ingest:  40%|████      | 2530/6293 [15:22<1:49:58,  1.75s/it]


[checkpoint] saved to Drive at 20100625 | ok=2530 fail=0 | cache=1940


ingest:  41%|████      | 2555/6293 [15:38<1:08:37,  1.10s/it]


[checkpoint] saved to Drive at 20100727 | ok=2552 fail=0 | cache=1961


ingest:  41%|████      | 2578/6293 [15:52<1:00:43,  1.02it/s]


[checkpoint] saved to Drive at 20100826 | ok=2574 fail=0 | cache=1989


ingest:  41%|████      | 2595/6293 [15:53<11:45,  5.24it/s]


[checkpoint] saved to Drive at 20100930 | ok=2596 fail=0 | cache=2026


ingest:  42%|████▏     | 2622/6293 [16:21<1:06:20,  1.08s/it]


[checkpoint] saved to Drive at 20101108 | ok=2618 fail=0 | cache=2055


ingest:  42%|████▏     | 2643/6293 [16:36<1:06:53,  1.10s/it]


[checkpoint] saved to Drive at 20101208 | ok=2640 fail=0 | cache=2085


ingest:  42%|████▏     | 2665/6293 [16:50<1:03:40,  1.05s/it]


[checkpoint] saved to Drive at 20110110 | ok=2662 fail=0 | cache=2121


ingest:  43%|████▎     | 2687/6293 [17:05<1:05:06,  1.08s/it]


[checkpoint] saved to Drive at 20110216 | ok=2684 fail=0 | cache=2158


ingest:  43%|████▎     | 2709/6293 [17:21<1:20:02,  1.34s/it]


[checkpoint] saved to Drive at 20110318 | ok=2706 fail=0 | cache=2195


ingest:  43%|████▎     | 2733/6293 [17:36<1:04:44,  1.09s/it]


[checkpoint] saved to Drive at 20110421 | ok=2728 fail=0 | cache=2217


ingest:  44%|████▎     | 2752/6293 [17:50<1:16:24,  1.29s/it]


[checkpoint] saved to Drive at 20110524 | ok=2750 fail=0 | cache=2244


ingest:  44%|████▍     | 2776/6293 [18:05<1:03:03,  1.08s/it]


[checkpoint] saved to Drive at 20110624 | ok=2772 fail=0 | cache=2274


ingest:  44%|████▍     | 2797/6293 [18:21<1:07:01,  1.15s/it]


[checkpoint] saved to Drive at 20110726 | ok=2794 fail=0 | cache=2296


ingest:  45%|████▍     | 2821/6293 [18:36<1:04:53,  1.12s/it]


[checkpoint] saved to Drive at 20110825 | ok=2816 fail=0 | cache=2321


ingest:  45%|████▌     | 2838/6293 [18:52<1:45:24,  1.83s/it]


[checkpoint] saved to Drive at 20110927 | ok=2838 fail=0 | cache=2342


ingest:  45%|████▌     | 2862/6293 [19:09<1:23:09,  1.45s/it]


[checkpoint] saved to Drive at 20111103 | ok=2860 fail=0 | cache=2364


ingest:  46%|████▌     | 2886/6293 [19:28<1:28:41,  1.56s/it]


[checkpoint] saved to Drive at 20111205 | ok=2882 fail=0 | cache=2379


ingest:  46%|████▌     | 2907/6293 [19:48<1:27:09,  1.54s/it]


[checkpoint] saved to Drive at 20120106 | ok=2904 fail=0 | cache=2399


ingest:  47%|████▋     | 2928/6293 [20:05<1:29:49,  1.60s/it]


[checkpoint] saved to Drive at 20120214 | ok=2926 fail=0 | cache=2408


ingest:  47%|████▋     | 2950/6293 [20:23<1:50:29,  1.98s/it]


[checkpoint] saved to Drive at 20120315 | ok=2948 fail=0 | cache=2428


ingest:  47%|████▋     | 2972/6293 [20:43<1:35:35,  1.73s/it]


[checkpoint] saved to Drive at 20120419 | ok=2970 fail=0 | cache=2452


ingest:  48%|████▊     | 2993/6293 [21:02<1:45:03,  1.91s/it]


[checkpoint] saved to Drive at 20120523 | ok=2992 fail=0 | cache=2474


ingest:  48%|████▊     | 3016/6293 [21:21<1:53:32,  2.08s/it]


[checkpoint] saved to Drive at 20120625 | ok=3014 fail=0 | cache=2498


ingest:  48%|████▊     | 3038/6293 [21:42<1:51:01,  2.05s/it]


[checkpoint] saved to Drive at 20120725 | ok=3036 fail=0 | cache=2512


ingest:  49%|████▊     | 3060/6293 [22:01<1:32:31,  1.72s/it]


[checkpoint] saved to Drive at 20120824 | ok=3058 fail=0 | cache=2534


ingest:  49%|████▉     | 3083/6293 [22:22<1:46:27,  1.99s/it]


[checkpoint] saved to Drive at 20120925 | ok=3080 fail=0 | cache=2545


ingest:  49%|████▉     | 3104/6293 [22:43<1:39:42,  1.88s/it]


[checkpoint] saved to Drive at 20121101 | ok=3102 fail=0 | cache=2551


ingest:  50%|████▉     | 3124/6293 [23:01<2:19:37,  2.64s/it]


[checkpoint] saved to Drive at 20121203 | ok=3124 fail=0 | cache=2552


ingest:  50%|█████     | 3149/6293 [23:24<1:34:57,  1.81s/it]


[checkpoint] saved to Drive at 20130107 | ok=3146 fail=0 | cache=2553


ingest:  50%|█████     | 3170/6293 [23:44<1:38:50,  1.90s/it]


[checkpoint] saved to Drive at 20130206 | ok=3168 fail=0 | cache=2553


ingest:  51%|█████     | 3192/6293 [24:06<2:03:57,  2.40s/it]


[checkpoint] saved to Drive at 20130315 | ok=3190 fail=0 | cache=2553


ingest:  51%|█████     | 3215/6293 [24:38<2:26:31,  2.86s/it]


[checkpoint] saved to Drive at 20130418 | ok=3212 fail=0 | cache=2553


ingest:  51%|█████▏    | 3236/6293 [25:03<2:01:45,  2.39s/it]


[checkpoint] saved to Drive at 20130523 | ok=3234 fail=0 | cache=2553


ingest:  52%|█████▏    | 3257/6293 [25:24<2:12:37,  2.62s/it]


[checkpoint] saved to Drive at 20130627 | ok=3256 fail=0 | cache=2553


ingest:  52%|█████▏    | 3280/6293 [25:48<1:40:31,  2.00s/it]


[checkpoint] saved to Drive at 20130729 | ok=3278 fail=0 | cache=2553


ingest:  52%|█████▏    | 3302/6293 [26:11<1:55:34,  2.32s/it]


[checkpoint] saved to Drive at 20130828 | ok=3300 fail=0 | cache=2553


ingest:  53%|█████▎    | 3324/6293 [26:33<1:36:49,  1.96s/it]


[checkpoint] saved to Drive at 20131008 | ok=3322 fail=0 | cache=2554


ingest:  53%|█████▎    | 3345/6293 [26:55<2:29:36,  3.04s/it]


[checkpoint] saved to Drive at 20131107 | ok=3344 fail=0 | cache=2554


ingest:  54%|█████▎    | 3368/6293 [27:18<1:32:30,  1.90s/it]


[checkpoint] saved to Drive at 20131209 | ok=3366 fail=0 | cache=2554


ingest:  54%|█████▍    | 3390/6293 [27:44<2:00:01,  2.48s/it]


[checkpoint] saved to Drive at 20140109 | ok=3388 fail=0 | cache=2555


ingest:  54%|█████▍    | 3412/6293 [28:08<2:02:14,  2.55s/it]


[checkpoint] saved to Drive at 20140217 | ok=3410 fail=0 | cache=2604


ingest:  55%|█████▍    | 3434/6293 [28:30<1:27:43,  1.84s/it]


[checkpoint] saved to Drive at 20140319 | ok=3432 fail=0 | cache=2607


ingest:  55%|█████▍    | 3456/6293 [28:56<1:49:23,  2.31s/it]


[checkpoint] saved to Drive at 20140421 | ok=3454 fail=0 | cache=2607


ingest:  55%|█████▌    | 3479/6293 [29:18<1:42:46,  2.19s/it]


[checkpoint] saved to Drive at 20140523 | ok=3476 fail=0 | cache=2608


ingest:  56%|█████▌    | 3500/6293 [29:42<1:47:00,  2.30s/it]


[checkpoint] saved to Drive at 20140625 | ok=3498 fail=0 | cache=2608


ingest:  56%|█████▌    | 3520/6293 [30:08<2:55:35,  3.80s/it]


[checkpoint] saved to Drive at 20140725 | ok=3520 fail=0 | cache=2618


ingest:  56%|█████▋    | 3545/6293 [30:31<1:43:03,  2.25s/it]


[checkpoint] saved to Drive at 20140826 | ok=3542 fail=0 | cache=2631


ingest:  57%|█████▋    | 3567/6293 [30:57<1:40:51,  2.22s/it]


[checkpoint] saved to Drive at 20140926 | ok=3564 fail=0 | cache=2640


ingest:  57%|█████▋    | 3588/6293 [31:23<1:55:09,  2.55s/it]


[checkpoint] saved to Drive at 20141104 | ok=3586 fail=0 | cache=2662


ingest:  57%|█████▋    | 3608/6293 [31:51<3:33:24,  4.77s/it]


[checkpoint] saved to Drive at 20141204 | ok=3608 fail=0 | cache=2676


ingest:  58%|█████▊    | 3632/6293 [32:20<2:11:15,  2.96s/it]


[checkpoint] saved to Drive at 20150107 | ok=3630 fail=0 | cache=2692


ingest:  58%|█████▊    | 3653/6293 [32:44<2:12:15,  3.01s/it]


[checkpoint] saved to Drive at 20150206 | ok=3652 fail=0 | cache=2715


ingest:  58%|█████▊    | 3676/6293 [33:07<1:22:31,  1.89s/it]


[checkpoint] saved to Drive at 20150317 | ok=3674 fail=0 | cache=2744


ingest:  59%|█████▉    | 3698/6293 [33:34<1:40:11,  2.32s/it]


[checkpoint] saved to Drive at 20150417 | ok=3696 fail=0 | cache=2770


ingest:  59%|█████▉    | 3719/6293 [34:07<2:53:45,  4.05s/it]


[checkpoint] saved to Drive at 20150520 | ok=3718 fail=0 | cache=2827


ingest:  59%|█████▉    | 3741/6293 [34:50<3:09:51,  4.46s/it]


[checkpoint] saved to Drive at 20150619 | ok=3740 fail=0 | cache=2879


ingest:  60%|█████▉    | 3763/6293 [35:19<2:12:11,  3.13s/it]


[checkpoint] saved to Drive at 20150722 | ok=3762 fail=0 | cache=2909


ingest:  60%|██████    | 3784/6293 [35:50<2:19:57,  3.35s/it]


[checkpoint] saved to Drive at 20150821 | ok=3784 fail=0 | cache=2915


ingest:  61%|██████    | 3808/6293 [36:19<2:13:14,  3.22s/it]


[checkpoint] saved to Drive at 20150924 | ok=3806 fail=0 | cache=2921


ingest:  61%|██████    | 3831/6293 [36:49<1:45:56,  2.58s/it]


[checkpoint] saved to Drive at 20151102 | ok=3828 fail=0 | cache=2927


ingest:  61%|██████    | 3850/6293 [37:16<2:15:14,  3.32s/it]


[checkpoint] saved to Drive at 20151202 | ok=3850 fail=0 | cache=2931


ingest:  62%|██████▏   | 3872/6293 [37:49<2:31:32,  3.76s/it]


[checkpoint] saved to Drive at 20160104 | ok=3872 fail=0 | cache=2971


ingest:  62%|██████▏   | 3894/6293 [38:17<3:02:41,  4.57s/it]


[checkpoint] saved to Drive at 20160203 | ok=3894 fail=0 | cache=2982


ingest:  62%|██████▏   | 3919/6293 [38:44<1:25:23,  2.16s/it]


[checkpoint] saved to Drive at 20160311 | ok=3916 fail=0 | cache=2998


ingest:  63%|██████▎   | 3938/6293 [39:13<2:15:15,  3.45s/it]


[checkpoint] saved to Drive at 20160413 | ok=3938 fail=0 | cache=3014


ingest:  63%|██████▎   | 3960/6293 [39:42<2:12:47,  3.42s/it]


[checkpoint] saved to Drive at 20160516 | ok=3960 fail=0 | cache=3031


ingest:  63%|██████▎   | 3984/6293 [40:10<1:28:05,  2.29s/it]


[checkpoint] saved to Drive at 20160617 | ok=3982 fail=0 | cache=3052


ingest:  64%|██████▎   | 4004/6293 [40:42<2:24:28,  3.79s/it]


[checkpoint] saved to Drive at 20160719 | ok=4004 fail=0 | cache=3072


ingest:  64%|██████▍   | 4028/6293 [41:10<1:29:18,  2.37s/it]


[checkpoint] saved to Drive at 20160818 | ok=4026 fail=0 | cache=3097


ingest:  64%|██████▍   | 4048/6293 [41:42<2:39:17,  4.26s/it]


[checkpoint] saved to Drive at 20160921 | ok=4048 fail=0 | cache=3125


ingest:  65%|██████▍   | 4073/6293 [42:10<1:21:42,  2.21s/it]


[checkpoint] saved to Drive at 20161028 | ok=4070 fail=0 | cache=3157


ingest:  65%|██████▌   | 4092/6293 [42:38<2:31:57,  4.14s/it]


[checkpoint] saved to Drive at 20161129 | ok=4092 fail=0 | cache=3201


ingest:  65%|██████▌   | 4117/6293 [43:07<1:20:37,  2.22s/it]


[checkpoint] saved to Drive at 20161229 | ok=4114 fail=0 | cache=3249


ingest:  66%|██████▌   | 4139/6293 [43:35<1:26:17,  2.40s/it]


[checkpoint] saved to Drive at 20170207 | ok=4136 fail=0 | cache=3313


ingest:  66%|██████▌   | 4158/6293 [44:08<2:28:26,  4.17s/it]


[checkpoint] saved to Drive at 20170309 | ok=4158 fail=0 | cache=3354


ingest:  66%|██████▋   | 4181/6293 [44:38<2:03:35,  3.51s/it]


[checkpoint] saved to Drive at 20170412 | ok=4180 fail=0 | cache=3409


ingest:  67%|██████▋   | 4202/6293 [45:23<7:20:44, 12.65s/it]


[checkpoint] saved to Drive at 20170515 | ok=4202 fail=0 | cache=3458


ingest:  67%|██████▋   | 4224/6293 [45:59<3:19:20,  5.78s/it]


[checkpoint] saved to Drive at 20170616 | ok=4224 fail=0 | cache=3500


ingest:  68%|██████▊   | 4249/6293 [46:29<1:26:59,  2.55s/it]


[checkpoint] saved to Drive at 20170718 | ok=4246 fail=0 | cache=3537


ingest:  68%|██████▊   | 4268/6293 [47:00<2:03:27,  3.66s/it]


[checkpoint] saved to Drive at 20170817 | ok=4268 fail=0 | cache=3575


ingest:  68%|██████▊   | 4290/6293 [47:30<2:42:34,  4.87s/it]


[checkpoint] saved to Drive at 20170918 | ok=4290 fail=0 | cache=3611


ingest:  69%|██████▊   | 4312/6293 [48:03<3:09:04,  5.73s/it]


[checkpoint] saved to Drive at 20171025 | ok=4312 fail=0 | cache=3650


ingest:  69%|██████▉   | 4334/6293 [48:41<3:26:58,  6.34s/it]


[checkpoint] saved to Drive at 20171124 | ok=4334 fail=0 | cache=3689


ingest:  69%|██████▉   | 4356/6293 [49:13<2:24:23,  4.47s/it]


[checkpoint] saved to Drive at 20171226 | ok=4356 fail=0 | cache=3718


ingest:  70%|██████▉   | 4380/6293 [49:49<2:11:02,  4.11s/it]


[checkpoint] saved to Drive at 20180126 | ok=4378 fail=0 | cache=3743


ingest:  70%|██████▉   | 4400/6293 [50:25<2:30:07,  4.76s/it]


[checkpoint] saved to Drive at 20180306 | ok=4400 fail=0 | cache=3762


ingest:  70%|███████   | 4424/6293 [50:58<1:57:16,  3.76s/it]


[checkpoint] saved to Drive at 20180409 | ok=4422 fail=0 | cache=3773


ingest:  71%|███████   | 4444/6293 [51:33<2:41:46,  5.25s/it]


[checkpoint] saved to Drive at 20180511 | ok=4444 fail=0 | cache=3785


ingest:  71%|███████   | 4466/6293 [52:09<1:59:13,  3.92s/it]


[checkpoint] saved to Drive at 20180612 | ok=4466 fail=0 | cache=3794


ingest:  71%|███████▏  | 4491/6293 [52:47<1:33:57,  3.13s/it]


[checkpoint] saved to Drive at 20180713 | ok=4488 fail=0 | cache=3805


ingest:  72%|███████▏  | 4513/6293 [53:18<1:15:54,  2.56s/it]


[checkpoint] saved to Drive at 20180814 | ok=4510 fail=0 | cache=3810


ingest:  72%|███████▏  | 4534/6293 [53:52<1:53:25,  3.87s/it]


[checkpoint] saved to Drive at 20180913 | ok=4532 fail=0 | cache=3817


ingest:  72%|███████▏  | 4554/6293 [54:30<2:44:17,  5.67s/it]


[checkpoint] saved to Drive at 20181023 | ok=4554 fail=0 | cache=3829


ingest:  73%|███████▎  | 4576/6293 [55:19<3:49:24,  8.02s/it]


[checkpoint] saved to Drive at 20181122 | ok=4576 fail=0 | cache=3835


ingest:  73%|███████▎  | 4598/6293 [56:01<3:28:07,  7.37s/it]


[checkpoint] saved to Drive at 20181224 | ok=4598 fail=0 | cache=3848


ingest:  73%|███████▎  | 4621/6293 [56:41<2:34:24,  5.54s/it]


[checkpoint] saved to Drive at 20190125 | ok=4620 fail=0 | cache=3866


ingest:  74%|███████▍  | 4643/6293 [57:20<2:28:26,  5.40s/it]


[checkpoint] saved to Drive at 20190305 | ok=4642 fail=0 | cache=3876


ingest:  74%|███████▍  | 4664/6293 [58:01<3:15:57,  7.22s/it]


[checkpoint] saved to Drive at 20190404 | ok=4664 fail=0 | cache=3887


ingest:  74%|███████▍  | 4686/6293 [58:42<3:07:56,  7.02s/it]


[checkpoint] saved to Drive at 20190510 | ok=4686 fail=0 | cache=3900


ingest:  75%|███████▍  | 4708/6293 [59:19<2:28:06,  5.61s/it]


[checkpoint] saved to Drive at 20190612 | ok=4708 fail=0 | cache=3912


ingest:  75%|███████▌  | 4730/6293 [59:58<2:12:15,  5.08s/it]


[checkpoint] saved to Drive at 20190712 | ok=4730 fail=0 | cache=3925


ingest:  76%|███████▌  | 4753/6293 [1:00:40<2:26:37,  5.71s/it]


[checkpoint] saved to Drive at 20190813 | ok=4752 fail=0 | cache=3967


ingest:  76%|███████▌  | 4774/6293 [1:01:21<3:00:54,  7.15s/it]


[checkpoint] saved to Drive at 20190912 | ok=4774 fail=0 | cache=3977


ingest:  76%|███████▌  | 4796/6293 [1:02:03<2:44:38,  6.60s/it]


[checkpoint] saved to Drive at 20191022 | ok=4796 fail=0 | cache=3990


ingest:  77%|███████▋  | 4820/6293 [1:02:42<1:33:56,  3.83s/it]


[checkpoint] saved to Drive at 20191121 | ok=4818 fail=0 | cache=4026


ingest:  77%|███████▋  | 4842/6293 [1:03:24<1:44:03,  4.30s/it]


[checkpoint] saved to Drive at 20191223 | ok=4840 fail=0 | cache=4054


ingest:  77%|███████▋  | 4862/6293 [1:04:05<2:28:49,  6.24s/it]


[checkpoint] saved to Drive at 20200123 | ok=4862 fail=0 | cache=4078


ingest:  78%|███████▊  | 4884/6293 [1:04:49<3:12:52,  8.21s/it]


[checkpoint] saved to Drive at 20200303 | ok=4884 fail=0 | cache=4102


ingest:  78%|███████▊  | 4906/6293 [1:05:48<3:21:02,  8.70s/it]


[checkpoint] saved to Drive at 20200402 | ok=4906 fail=0 | cache=4123


ingest:  78%|███████▊  | 4928/6293 [1:06:32<2:57:40,  7.81s/it]


[checkpoint] saved to Drive at 20200508 | ok=4928 fail=0 | cache=4158


ingest:  79%|███████▊  | 4950/6293 [1:07:17<2:52:05,  7.69s/it]


[checkpoint] saved to Drive at 20200609 | ok=4950 fail=0 | cache=4184


ingest:  79%|███████▉  | 4972/6293 [1:08:00<2:46:57,  7.58s/it]


[checkpoint] saved to Drive at 20200713 | ok=4972 fail=0 | cache=4216


ingest:  79%|███████▉  | 4996/6293 [1:08:44<1:51:03,  5.14s/it]


[checkpoint] saved to Drive at 20200812 | ok=4994 fail=0 | cache=4268


ingest:  80%|███████▉  | 5016/6293 [1:09:27<2:16:53,  6.43s/it]


[checkpoint] saved to Drive at 20200911 | ok=5016 fail=0 | cache=4335


ingest:  80%|████████  | 5038/6293 [1:10:15<2:52:26,  8.24s/it]


[checkpoint] saved to Drive at 20201021 | ok=5038 fail=0 | cache=4392


ingest:  80%|████████  | 5062/6293 [1:10:59<1:44:43,  5.10s/it]


[checkpoint] saved to Drive at 20201120 | ok=5060 fail=0 | cache=4423


ingest:  81%|████████  | 5082/6293 [1:11:43<2:12:49,  6.58s/it]


[checkpoint] saved to Drive at 20201222 | ok=5082 fail=0 | cache=4466


ingest:  81%|████████  | 5104/6293 [1:12:29<2:36:18,  7.89s/it]


[checkpoint] saved to Drive at 20210122 | ok=5104 fail=0 | cache=4510


ingest:  81%|████████▏ | 5126/6293 [1:13:16<1:58:24,  6.09s/it]


[checkpoint] saved to Drive at 20210302 | ok=5126 fail=0 | cache=4555


ingest:  82%|████████▏ | 5148/6293 [1:14:04<2:41:09,  8.45s/it]


[checkpoint] saved to Drive at 20210401 | ok=5148 fail=0 | cache=4593


ingest:  82%|████████▏ | 5172/6293 [1:14:54<1:36:22,  5.16s/it]


[checkpoint] saved to Drive at 20210507 | ok=5170 fail=0 | cache=4655


ingest:  83%|████████▎ | 5192/6293 [1:16:01<3:02:33,  9.95s/it]


[checkpoint] saved to Drive at 20210608 | ok=5192 fail=0 | cache=4703


ingest:  83%|████████▎ | 5214/6293 [1:17:00<3:05:55, 10.34s/it]


[checkpoint] saved to Drive at 20210709 | ok=5214 fail=0 | cache=4756


ingest:  83%|████████▎ | 5236/6293 [1:17:49<2:29:39,  8.50s/it]


[checkpoint] saved to Drive at 20210810 | ok=5236 fail=0 | cache=4800


ingest:  84%|████████▎ | 5260/6293 [1:18:42<1:45:40,  6.14s/it]


[checkpoint] saved to Drive at 20210909 | ok=5258 fail=0 | cache=4841


ingest:  84%|████████▍ | 5280/6293 [1:19:31<2:04:47,  7.39s/it]


[checkpoint] saved to Drive at 20211020 | ok=5280 fail=0 | cache=4883


ingest:  84%|████████▍ | 5302/6293 [1:20:19<2:18:54,  8.41s/it]


[checkpoint] saved to Drive at 20211119 | ok=5302 fail=0 | cache=4925


ingest:  85%|████████▍ | 5324/6293 [1:21:08<2:20:58,  8.73s/it]


[checkpoint] saved to Drive at 20211221 | ok=5324 fail=0 | cache=4960


ingest:  85%|████████▍ | 5348/6293 [1:21:57<1:28:22,  5.61s/it]


[checkpoint] saved to Drive at 20220121 | ok=5346 fail=0 | cache=5000


ingest:  85%|████████▌ | 5370/6293 [1:22:48<1:23:08,  5.40s/it]


[checkpoint] saved to Drive at 20220301 | ok=5368 fail=0 | cache=5029


ingest:  86%|████████▌ | 5390/6293 [1:23:38<1:53:03,  7.51s/it]


[checkpoint] saved to Drive at 20220331 | ok=5390 fail=0 | cache=5064


ingest:  86%|████████▌ | 5412/6293 [1:24:39<2:35:42, 10.60s/it]


[checkpoint] saved to Drive at 20220509 | ok=5412 fail=0 | cache=5100


ingest:  86%|████████▋ | 5434/6293 [1:25:42<2:39:38, 11.15s/it]


[checkpoint] saved to Drive at 20220609 | ok=5434 fail=0 | cache=5125


ingest:  87%|████████▋ | 5456/6293 [1:26:48<2:40:24, 11.50s/it]


[checkpoint] saved to Drive at 20220711 | ok=5456 fail=0 | cache=5153


ingest:  87%|████████▋ | 5478/6293 [1:27:48<2:30:00, 11.04s/it]


[checkpoint] saved to Drive at 20220810 | ok=5478 fail=0 | cache=5184


ingest:  87%|████████▋ | 5500/6293 [1:28:51<3:01:47, 13.76s/it]


[checkpoint] saved to Drive at 20220909 | ok=5500 fail=0 | cache=5229


ingest:  88%|████████▊ | 5522/6293 [1:29:51<1:54:50,  8.94s/it]


[checkpoint] saved to Drive at 20221019 | ok=5522 fail=0 | cache=5273


ingest:  88%|████████▊ | 5544/6293 [1:30:52<2:12:53, 10.65s/it]


[checkpoint] saved to Drive at 20221118 | ok=5544 fail=0 | cache=5302


ingest:  88%|████████▊ | 5566/6293 [1:31:48<1:42:20,  8.45s/it]


[checkpoint] saved to Drive at 20221220 | ok=5566 fail=0 | cache=5322


ingest:  89%|████████▉ | 5588/6293 [1:32:46<1:58:18, 10.07s/it]


[checkpoint] saved to Drive at 20230120 | ok=5588 fail=0 | cache=5341


ingest:  89%|████████▉ | 5610/6293 [1:33:45<1:55:48, 10.17s/it]


[checkpoint] saved to Drive at 20230228 | ok=5610 fail=0 | cache=5358


ingest:  89%|████████▉ | 5632/6293 [1:34:52<2:10:39, 11.86s/it]


[checkpoint] saved to Drive at 20230330 | ok=5632 fail=0 | cache=5382


ingest:  90%|████████▉ | 5654/6293 [1:36:26<4:44:06, 26.68s/it]


[checkpoint] saved to Drive at 20230505 | ok=5654 fail=0 | cache=5413


ingest:  90%|█████████ | 5676/6293 [1:37:49<2:12:36, 12.90s/it]


[checkpoint] saved to Drive at 20230606 | ok=5676 fail=0 | cache=5443


ingest:  91%|█████████ | 5698/6293 [1:38:54<1:36:41,  9.75s/it]


[checkpoint] saved to Drive at 20230710 | ok=5698 fail=0 | cache=5476


ingest:  91%|█████████ | 5720/6293 [1:39:53<1:40:46, 10.55s/it]


[checkpoint] saved to Drive at 20230809 | ok=5720 fail=0 | cache=5511


ingest:  91%|█████████ | 5742/6293 [1:40:51<1:32:38, 10.09s/it]


[checkpoint] saved to Drive at 20230908 | ok=5742 fail=0 | cache=5533


ingest:  92%|█████████▏| 5764/6293 [1:41:47<1:26:06,  9.77s/it]


[checkpoint] saved to Drive at 20231018 | ok=5764 fail=0 | cache=5548


ingest:  92%|█████████▏| 5788/6293 [1:42:45<56:10,  6.67s/it]  


[checkpoint] saved to Drive at 20231117 | ok=5786 fail=0 | cache=5562


ingest:  92%|█████████▏| 5808/6293 [1:43:41<1:08:21,  8.46s/it]


[checkpoint] saved to Drive at 20231219 | ok=5808 fail=0 | cache=5573


ingest:  93%|█████████▎| 5832/6293 [1:44:41<47:53,  6.23s/it]  


[checkpoint] saved to Drive at 20240119 | ok=5830 fail=0 | cache=5589


ingest:  93%|█████████▎| 5852/6293 [1:45:40<1:05:12,  8.87s/it]


[checkpoint] saved to Drive at 20240228 | ok=5852 fail=0 | cache=5597


ingest:  93%|█████████▎| 5874/6293 [1:46:59<1:36:07, 13.76s/it]


[checkpoint] saved to Drive at 20240329 | ok=5874 fail=0 | cache=5604


ingest:  94%|█████████▎| 5896/6293 [1:48:10<1:23:00, 12.54s/it]


[checkpoint] saved to Drive at 20240507 | ok=5896 fail=0 | cache=5608


ingest:  94%|█████████▍| 5918/6293 [1:49:32<1:29:29, 14.32s/it]


[checkpoint] saved to Drive at 20240606 | ok=5918 fail=0 | cache=5612


ingest:  94%|█████████▍| 5940/6293 [1:50:39<1:09:23, 11.79s/it]


[checkpoint] saved to Drive at 20240709 | ok=5940 fail=0 | cache=5619


ingest:  95%|█████████▍| 5964/6293 [1:51:42<39:59,  7.29s/it]  


[checkpoint] saved to Drive at 20240808 | ok=5962 fail=0 | cache=5625


ingest:  95%|█████████▌| 5984/6293 [1:52:41<45:57,  8.92s/it]


[checkpoint] saved to Drive at 20240909 | ok=5984 fail=0 | cache=5635


ingest:  95%|█████████▌| 6006/6293 [1:53:40<49:09, 10.28s/it]


[checkpoint] saved to Drive at 20241018 | ok=6006 fail=0 | cache=5646


ingest:  96%|█████████▌| 6029/6293 [1:54:40<36:18,  8.25s/it]


[checkpoint] saved to Drive at 20241119 | ok=6028 fail=0 | cache=5657


ingest:  96%|█████████▌| 6050/6293 [1:55:41<37:34,  9.28s/it]


[checkpoint] saved to Drive at 20241219 | ok=6050 fail=0 | cache=5668


ingest:  96%|█████████▋| 6072/6293 [1:57:03<53:14, 14.46s/it]


[checkpoint] saved to Drive at 20250121 | ok=6072 fail=0 | cache=5680


ingest:  97%|█████████▋| 6094/6293 [1:58:10<39:04, 11.78s/it]


[checkpoint] saved to Drive at 20250228 | ok=6094 fail=0 | cache=5686


ingest:  97%|█████████▋| 6118/6293 [1:59:17<22:38,  7.77s/it]


[checkpoint] saved to Drive at 20250401 | ok=6116 fail=0 | cache=5700


ingest:  98%|█████████▊| 6138/6293 [2:00:17<23:25,  9.07s/it]


[checkpoint] saved to Drive at 20250507 | ok=6138 fail=0 | cache=5710


ingest:  98%|█████████▊| 6160/6293 [2:01:18<23:32, 10.62s/it]


[checkpoint] saved to Drive at 20250609 | ok=6160 fail=0 | cache=5718


ingest:  98%|█████████▊| 6182/6293 [2:02:19<19:53, 10.75s/it]


[checkpoint] saved to Drive at 20250709 | ok=6182 fail=0 | cache=5726


ingest:  99%|█████████▊| 6205/6293 [2:03:21<12:48,  8.74s/it]


[checkpoint] saved to Drive at 20250808 | ok=6204 fail=0 | cache=5735


ingest:  99%|█████████▉| 6226/6293 [2:04:26<12:50, 11.50s/it]


[checkpoint] saved to Drive at 20250909 | ok=6226 fail=0 | cache=5742


ingest:  99%|█████████▉| 6248/6293 [2:05:36<09:06, 12.15s/it]


[checkpoint] saved to Drive at 20251017 | ok=6248 fail=0 | cache=5754


ingest: 100%|█████████▉| 6270/6293 [2:07:00<05:43, 14.94s/it]


[checkpoint] saved to Drive at 20251118 | ok=6270 fail=0 | cache=5768


ingest: 100%|█████████▉| 6292/6293 [2:08:15<00:18, 18.83s/it]


[checkpoint] saved to Drive at 20251218 | ok=6292 fail=0 | cache=5778


ingest: 100%|██████████| 6293/6293 [2:08:16<00:00,  1.22s/it]


DONE. ok: 6293 fail: 0 saved to: /content/drive/MyDrive/AshareDB/db/ashare.sqlite
integrity_check: ok


In [None]:
conn.execute("BEGIN;")
conn.execute("CREATE INDEX IF NOT EXISTS idx_daily_date     ON daily_price(trade_date);")
conn.execute("CREATE INDEX IF NOT EXISTS idx_daily_sec_date ON daily_price(sec_id, trade_date);")
conn.execute("COMMIT;")

# 再备份一次到 Drive（带上索引）
import os, sqlite3
tmp = DB_DRIVE + ".tmp"
dst = sqlite3.connect(tmp)
conn.backup(dst)
dst.commit()
dst.close()
os.replace(tmp, DB_DRIVE)

print("indexes built and saved to Drive:", DB_DRIVE)
print(conn.execute("PRAGMA integrity_check;").fetchone()[0])


indexes built and saved to Drive: /content/drive/MyDrive/AshareDB/db/ashare.sqlite
ok


In [None]:
import pandas as pd

print(pd.read_sql_query("""
SELECT MIN(trade_date) AS min_date, MAX(trade_date) AS max_date, COUNT(*) AS total_rows
FROM daily_price;
""", conn))

print(pd.read_sql_query("""
SELECT status, COUNT(*) AS n
FROM ingest_manifest
GROUP BY status;
""", conn))

print(pd.read_sql_query("""
SELECT trade_date, parquet_file, message
FROM ingest_manifest
WHERE status='fail'
ORDER BY trade_date
LIMIT 20;
""", conn))

conn.close()


   min_date  max_date  total_rows
0  20000104  20251219    16430270
  status     n
0     ok  6293
Empty DataFrame
Columns: [trade_date, parquet_file, message]
Index: []


# 测试 1：确认每天大概有多少股票（抽样 10 天）

In [None]:
import sqlite3, pandas as pd, numpy as np

DB = "/content/drive/MyDrive/AshareDB/db/ashare.sqlite"
conn = sqlite3.connect(f"file:{DB}?mode=ro", uri=True)

# 抽 10 个交易日
dates = pd.read_sql_query("SELECT DISTINCT trade_date FROM daily_price ORDER BY trade_date;", conn)["trade_date"].values
sample = np.linspace(0, len(dates)-1, 10, dtype=int)
sample_dates = dates[sample].tolist()

df = pd.read_sql_query(f"""
SELECT trade_date, COUNT(*) AS n_stocks
FROM daily_price
WHERE trade_date IN ({",".join(["?"]*len(sample_dates))})
GROUP BY trade_date
ORDER BY trade_date;
""", conn, params=sample_dates)

conn.close()
df


Unnamed: 0,trade_date,n_stocks
0,20000104,913
1,20021209,1176
2,20051101,1233
3,20080911,1524
4,20110729,2125
5,20140623,2308
6,20170504,3017
7,20200317,3851
8,20230203,5068
9,20251219,5448


测试 2：随机抽一只股票，拉它完整历史（检查索引是否好用）

In [None]:
import sqlite3, pandas as pd, random, time

DB = "/content/drive/MyDrive/AshareDB/db/ashare.sqlite"
conn = sqlite3.connect(f"file:{DB}?mode=ro", uri=True)

# 随机抽一个 sec_id
sec_id = conn.execute("SELECT sec_id FROM security_map ORDER BY RANDOM() LIMIT 1;").fetchone()[0]
ts_code = conn.execute("SELECT ts_code FROM security_map WHERE sec_id=?;", (sec_id,)).fetchone()[0]

t0 = time.perf_counter()
hist = pd.read_sql_query("""
SELECT trade_date, open, high, low, close, vol, amount
FROM daily_price
WHERE sec_id=?
ORDER BY trade_date;
""", conn, params=(sec_id,))
t1 = time.perf_counter()

conn.close()
print("ts_code:", ts_code, "rows:", len(hist), "query_sec_history_time_sec:", round(t1-t0, 3))
hist.head()


ts_code: 601857.SH rows: 4399 query_sec_history_time_sec: 6.634


Unnamed: 0,trade_date,open,high,low,close,vol,amount
0,20071105,48.6,48.62,41.7,43.96,15474994.87,69991390.0
1,20071106,41.4,42.4,39.8,39.99,3429778.2,14000250.0
2,20071107,39.7,40.73,38.28,40.43,2166188.7,8575267.0
3,20071108,39.2,39.75,38.0,38.19,1400509.61,5447045.0
4,20071109,37.85,38.39,36.66,38.18,1432266.03,5379485.0


测试 3：查某一天全市场数据并做一个简单 alpha（不需要历史）

In [None]:
import sqlite3, pandas as pd, time

DB = "/content/drive/MyDrive/AshareDB/db/ashare.sqlite"
conn = sqlite3.connect(f"file:{DB}?mode=ro", uri=True)

# 取最后一个交易日
d = pd.read_sql_query("SELECT MAX(trade_date) AS d FROM daily_price;", conn)["d"].iloc[0]

t0 = time.perf_counter()
day = pd.read_sql_query("""
SELECT dp.sec_id, sm.ts_code, dp.close, dp.pct_chg, dp.vol, dp.amount
FROM daily_price dp
JOIN security_map sm ON sm.sec_id=dp.sec_id
WHERE dp.trade_date=?;
""", conn, params=(int(d),))
t1 = time.perf_counter()

conn.close()
print("trade_date:", d, "rows:", len(day), "query_time_sec:", round(t1-t0, 3))

# 一个简单 alpha：流动性 alpha = log(amount)
import numpy as np
day["alpha"] = np.log(day["amount"].clip(lower=1))
day = day.replace([np.inf, -np.inf], np.nan).dropna(subset=["alpha"])

day.sort_values("alpha", ascending=False).head(10)[["ts_code","alpha","amount","pct_chg"]]


trade_date: 20251219 rows: 5448 query_time_sec: 0.074


Unnamed: 0,ts_code,alpha,amount,pct_chg
1946,300502.SZ,16.564123,15620940.0,2.0682
1767,300308.SZ,16.378848,12979060.0,0.0875
125,000547.SZ,16.179395,10632170.0,3.9627
3824,601933.SH,16.111256,9931838.0,4.918
3545,600879.SH,16.069845,9528948.0,4.5718
2956,600118.SH,15.913131,8146760.0,4.2799
1841,300394.SZ,15.884153,7914076.0,-2.1845
1737,300274.SZ,15.794011,7231889.0,1.6986
4440,603986.SH,15.766391,7034877.0,-1.9772
2181,300750.SZ,15.698452,6572808.0,0.5376


测试 4：带历史窗口的 alpha（处理“历史不够”的股票）

In [None]:
import sqlite3, pandas as pd, numpy as np

DB = "/content/drive/MyDrive/AshareDB/db/ashare.sqlite"
conn = sqlite3.connect(f"file:{DB}?mode=ro", uri=True)

# 选一个测试日期：用最新日期
d = pd.read_sql_query("SELECT MAX(trade_date) AS d FROM daily_price;", conn)["d"].iloc[0]
d = int(d)

# 拉过去 60 个交易日的截面（够你算 20 日动量）
# 先取最近 60 个交易日列表
dates = pd.read_sql_query("""
SELECT DISTINCT trade_date
FROM daily_price
WHERE trade_date <= ?
ORDER BY trade_date DESC
LIMIT 60;
""", conn, params=(d,))["trade_date"].tolist()
dates = sorted(dates)

# 拉这 60 天的 close
q = f"""
SELECT trade_date, sec_id, close
FROM daily_price
WHERE trade_date IN ({",".join(["?"]*len(dates))});
"""
px = pd.read_sql_query(q, conn, params=dates)

conn.close()

# pivot: index=trade_date, columns=sec_id
pivot = px.pivot(index="trade_date", columns="sec_id", values="close").sort_index()

# 20日动量
mom20 = pivot.iloc[-1] / pivot.iloc[-21] - 1   # 需要至少 21 行
mom20 = mom20.dropna()

print("alpha computed date:", d, "| valid stocks:", len(mom20), "/", pivot.shape[1])

mom20.sort_values(ascending=False).head(10)


alpha computed date: 20251219 | valid stocks: 5431 / 5464


Unnamed: 0_level_0,0
sec_id,Unnamed: 1_level_1
4644,2.167894
2195,1.399421
5164,1.340739
2700,1.334831
1520,1.334158
844,1.215962
5450,1.146828
1228,1.145556
271,1.028153
5008,1.010141
