In [3]:
import os
import sys
import cv2
import json
import shutil
from glob import glob
from tqdm import tqdm
from pathlib import Path

from utils.table2label import table2layout, fuse_gt_info, judge_error
from utils.table_helper import correct_table



def table2label(table_dir, label_dir, error_file_path):
    table_error = {}
    json_files = sorted(glob(os.path.join(table_dir, '*.json')))
    for idx, json_path in tqdm(enumerate(json_files), total=len(json_files)):
        json_dir = os.path.dirname(json_path)
        json_name = os.path.basename(json_path)
        # json_name = "06615.json"
        # json_path = os.path.join(json_dir, json_name)

        json_id = json_name.split('.')[0]
        table = json.load(open(json_path, 'r'))

        table['is_wireless'] = True

        # table = correct_table(table)
        try:
            gt_label = table2layout(table)
        except:
            table_error[json_id] = 'table2layout error'
            continue

        ## 有线表格得到的bbox还是cell框，不是text框
        try:
            gt_label = fuse_gt_info(gt_label, table)
        except:
            table_error[json_id] = "fuse_gt_info error" # 仅有1cell的有线表格 滤过
            continue

        valid, msg = judge_error(table, gt_label)
        if not valid:
            print(json_name, msg)
            table_error[json_id] = msg
            continue

        gt_json_path = os.path.join(label_dir, f'{json_id}-gt.json')
        json.dump(gt_label, open(gt_json_path, 'w'), indent=4)
    json.dump(table_error, open(error_file_path, 'w'), indent=4)

    print('table error: {}'.format(len(table_error)))

## STEP.1 gen_gt_labels

In [4]:
'''
输入
    训练集目录: {dataset_root}/train
输出
    训练集标注目录: {dataset_root}/train_gt_json/
    {dataset_root}/train_error.json

使用 fix_gt_table 前 报错的：684
使用 fix_gt_table 后 报错的：665
    + 清除area==0的数据：664
'''
DATASET = "train_jpg480max"
DATASET_ROOT = '/media/ubuntu/Date12/TableStruct/new_data'

TABLE_DIR = os.path.join(DATASET_ROOT, DATASET)
LABEL_DIR = os.path.join(DATASET_ROOT, f'{DATASET}_gt_json')
TABLE_ERROR_PATH = os.path.join(DATASET_ROOT, f'{DATASET}_error.json')

if os.path.exists(LABEL_DIR):
    shutil.rmtree(LABEL_DIR)
Path.mkdir(Path(LABEL_DIR), parents=True)

table2label(TABLE_DIR, LABEL_DIR, TABLE_ERROR_PATH)


  0%|          | 36/12104 [00:01<08:36, 23.36it/s]

00029.json line idx:[[6], [12], [113]] not find


  1%|          | 61/12104 [00:02<09:48, 20.45it/s]

00057.json line idx:[[34]] not find


  1%|          | 92/12104 [00:03<05:57, 33.56it/s]

00084.json line idx:[[20], [61], [62], [103], [104]] not find


  1%|▏         | 175/12104 [00:05<03:06, 63.83it/s]

00169.json line idx:[[0], [2], [9], [10], [16], [17]] not find


  2%|▏         | 264/12104 [00:07<03:12, 61.46it/s]

00250.json line idx:[[6]] not find


  2%|▏         | 286/12104 [00:07<03:03, 64.32it/s]

00273.json line idx:[[2], [8]] not find
00280.json line idx:[[35]] not find


  3%|▎         | 317/12104 [00:07<02:49, 69.61it/s]

00301.json line idx:[[17], [18], [20]] not find
00303.json line idx:[[9], [17], [18]] not find
00307.json line idx:[[4], [18], [20]] not find
00314.json line idx:[[32], [35]] not find


  3%|▎         | 332/12104 [00:08<03:31, 55.70it/s]

00325.json line idx:[[19], [20]] not find


  3%|▎         | 338/12104 [00:08<04:23, 44.72it/s]

00334.json line idx:[[0], [1], [2], [3]] not find


  3%|▎         | 357/12104 [00:09<06:58, 28.04it/s]

00353.json line idx:[[13], [23], [47], [48], [49], [50], [72], [73], [74], [75]] not find


  3%|▎         | 364/12104 [00:09<05:37, 34.78it/s]

00358.json line idx:[[0], [13], [18], [23]] not find


  3%|▎         | 395/12104 [00:09<02:42, 72.07it/s]

00374.json line idx:[[18]] not find


  4%|▍         | 493/12104 [00:13<16:51, 11.48it/s] 

00489.json line idx:[[26], [27], [36], [37]] not find


  5%|▍         | 589/12104 [00:14<02:20, 81.93it/s]

00573.json line idx:[[0], [2]] not find


  5%|▍         | 598/12104 [00:15<02:37, 73.25it/s]

00600.json line idx:[[0], [1], [35], [36]] not find
00602.json line idx:[[4], [11], [12]] not find


  5%|▌         | 621/12104 [00:15<03:28, 55.07it/s]

00611.json line idx:[[3]] not find


  5%|▌         | 648/12104 [00:16<04:10, 45.75it/s]

00641.json line idx:[[13], [14], [15], [16]] not find


  6%|▌         | 680/12104 [00:16<03:26, 55.21it/s]

00668.json line idx:[[4], [6]] not find
00674.json line idx:[[34]] not find


  6%|▌         | 738/12104 [00:18<04:20, 43.69it/s]

00731.json line idx:[[0], [1], [2], [3]] not find
00741.json line idx:[[14], [33], [34], [49]] not find


  6%|▌         | 749/12104 [00:18<04:06, 46.08it/s]

00745.json line idx:[[24]] not find


  6%|▋         | 767/12104 [00:19<05:32, 34.09it/s]

00759.json line idx:[[102]] not find


  6%|▋         | 779/12104 [00:19<06:39, 28.34it/s]

00773.json line idx:[[39]] not find


  7%|▋         | 788/12104 [00:19<07:01, 26.85it/s]

00782.json line idx:[[111], [127]] not find


  7%|▋         | 901/12104 [00:21<03:45, 49.68it/s] 

00902.json line idx:[[0], [1], [2], [3], [7], [8], [9], [10], [11], [12], [13], [14]] not find


  9%|▉         | 1128/12104 [00:27<02:55, 62.46it/s]

01119.json line idx:[[5], [40]] not find


  9%|▉         | 1135/12104 [00:27<03:06, 58.82it/s]

01131.json line idx:[[11], [61]] not find
01136.json line idx:[[87]] not find
01138.json line idx:[[16], [19], [40], [41]] not find


 10%|▉         | 1206/12104 [00:28<03:07, 58.08it/s]

01203.json line idx:[[28], [31]] not find


 11%|█▏        | 1365/12104 [00:32<06:50, 26.14it/s]

01361.json line idx:[[0], [4], [5], [6]] not find


 11%|█▏        | 1385/12104 [00:33<06:19, 28.25it/s]

01378.json line idx:[[63], [64]] not find


 12%|█▏        | 1483/12104 [00:35<02:54, 60.95it/s]

01475.json line idx:[[9]] not find


 12%|█▏        | 1507/12104 [00:35<03:26, 51.35it/s]

01498.json line idx:[[0]] not find
01499.json line idx:[[37]] not find


 13%|█▎        | 1596/12104 [00:37<03:02, 57.71it/s]

01586.json line idx:[[6], [7], [8], [9], [10], [14], [15], [16], [17], [18], [21], [22], [23], [24], [25], [26], [31], [32]] not find


 14%|█▍        | 1682/12104 [00:39<03:20, 52.07it/s]

01668.json line idx:[[57], [73], [114]] not find


 15%|█▍        | 1764/12104 [00:40<02:08, 80.41it/s] 

01768.json line idx:[[84], [88]] not find


 15%|█▍        | 1802/12104 [00:42<09:31, 18.02it/s]

01797.json line idx:[[137], [198]] not find


 16%|█▌        | 1961/12104 [00:46<02:28, 68.43it/s]

01947.json line idx:[[83], [84], [85], [86], [87], [88], [89], [90], [91], [92], [93], [94], [95]] not find
01948.json line idx:[[12]] not find


 16%|█▋        | 1992/12104 [00:47<03:04, 54.70it/s]

01990.json line idx:[[0]] not find


 17%|█▋        | 2003/12104 [00:47<05:20, 31.49it/s]

01997.json line idx:[[0], [1], [2], [3]] not find


 17%|█▋        | 2080/12104 [00:49<02:53, 57.63it/s]

02065.json line idx:[[25], [26], [27], [29], [30], [31], [32], [33], [38]] not find
02077.json line idx:[[0]] not find


 18%|█▊        | 2147/12104 [00:51<02:41, 61.72it/s]

02125.json line idx:[[16], [18], [19]] not find


 18%|█▊        | 2217/12104 [00:52<03:44, 44.00it/s] 

02209.json line idx:[[2], [3]] not find


 18%|█▊        | 2230/12104 [00:52<03:59, 41.22it/s]

02223.json line idx:[[1]] not find


 19%|█▉        | 2290/12104 [00:53<02:47, 58.58it/s]

02284.json line idx:[[26]] not find


 19%|█▉        | 2317/12104 [00:54<04:33, 35.82it/s]

02315.json line idx:[[67], [68], [69], [70], [127], [128]] not find


 20%|██        | 2470/12104 [00:56<02:32, 63.18it/s]

02465.json line idx:[[101]] not find


 21%|██        | 2489/12104 [00:57<02:15, 71.18it/s]

02478.json line idx:[[16]] not find
02489.json line idx:[[6], [32]] not find


 21%|██        | 2560/12104 [00:58<02:15, 70.50it/s]

02561.json line idx:[[224], [225], [226], [227], [228]] not find
02565.json line idx:[[1], [2], [3], [5], [7], [9]] not find


 21%|██        | 2569/12104 [00:58<03:58, 39.96it/s]

02569.json line idx:[[0], [1]] not find


 21%|██▏       | 2576/12104 [00:59<06:39, 23.83it/s]

02572.json line idx:[[0], [1], [2], [3], [4], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30]] not find
02575.json line idx:[[6], [8]] not find


 21%|██▏       | 2589/12104 [00:59<05:23, 29.43it/s]

02584.json line idx:[[19]] not find


 22%|██▏       | 2628/12104 [01:00<02:56, 53.68it/s]

02619.json line idx:[[56], [57]] not find


 22%|██▏       | 2661/12104 [01:01<03:35, 43.74it/s]

02655.json line idx:[[41], [42]] not find


 22%|██▏       | 2666/12104 [01:01<04:16, 36.75it/s]

02663.json line idx:[[18], [19], [20], [22], [23], [24], [25]] not find


 25%|██▍       | 2976/12104 [01:05<03:49, 39.83it/s] 

02967.json line idx:[[2], [6]] not find


 25%|██▍       | 3014/12104 [01:06<03:05, 48.96it/s]

03007.json line idx:[[5], [6], [7], [8]] not find


 25%|██▌       | 3049/12104 [01:07<04:15, 35.50it/s]

03051.json line idx:[[56], [62], [68]] not find


 25%|██▌       | 3062/12104 [01:08<05:40, 26.59it/s]

03056.json layout error: cell_idx: 0, row_span: [0, 12], col_span: [0, 9]


 25%|██▌       | 3074/12104 [01:08<05:37, 26.79it/s]

03068.json line idx:[[2], [3], [4]] not find


 26%|██▌       | 3152/12104 [01:09<02:08, 69.81it/s]

03141.json line idx:[[5], [9], [10]] not find


 27%|██▋       | 3298/12104 [01:12<04:40, 31.40it/s]

03296.json line idx:[[87]] not find


 28%|██▊       | 3407/12104 [01:14<02:18, 63.01it/s]

03395.json line idx:[[24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [40], [41]] not find


 31%|███       | 3766/12104 [01:19<02:01, 68.49it/s] 

03766.json line idx:[[32], [33], [34]] not find


 33%|███▎      | 3969/12104 [01:24<02:26, 55.34it/s]

03952.json line idx:[[38], [40], [41], [47], [55], [73], [166], [180], [184]] not find
03956.json line idx:[[67], [68], [69]] not find


 33%|███▎      | 4028/12104 [01:26<03:49, 35.20it/s]

04023.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7]] not find
04024.json line idx:[[37]] not find


 33%|███▎      | 4044/12104 [01:26<03:38, 36.81it/s]

04037.json line idx:[[24], [48], [49]] not find
04043.json line idx:[[30], [32], [39], [40]] not find


 34%|███▎      | 4074/12104 [01:27<02:34, 52.09it/s]

04067.json line idx:[[3]] not find


 34%|███▍      | 4087/12104 [01:27<02:32, 52.50it/s]

04081.json line idx:[[14]] not find


 36%|███▋      | 4405/12104 [01:31<01:56, 66.36it/s] 

04399.json line idx:[[5], [18]] not find
04400.json line idx:[[21], [24], [25], [26], [27], [28], [29], [30], [35]] not find


 37%|███▋      | 4505/12104 [01:33<01:58, 64.13it/s]

04493.json line idx:[[0]] not find
04503.json layout error: cell_idx: 5, row_span: [1, 6], col_span: [0, 5]


 38%|███▊      | 4540/12104 [01:34<03:04, 41.06it/s]

04533.json line idx:[[70], [90]] not find


 39%|███▊      | 4666/12104 [01:36<01:45, 70.28it/s]

04668.json layout error: cell_idx: 2, row_span: [0, 2], col_span: [6, 9]


 39%|███▉      | 4706/12104 [01:37<02:20, 52.60it/s]

04701.json line idx:[[9], [10]] not find
04705.json line idx:[[0], [1]] not find


 39%|███▉      | 4750/12104 [01:38<01:30, 81.29it/s]

04753.json line idx:[[14]] not find
04755.json line idx:[[51], [52], [53], [54], [55], [56], [57], [58], [59]] not find


 40%|███▉      | 4786/12104 [01:39<02:06, 57.74it/s]

04778.json line idx:[[6], [8]] not find


 40%|███▉      | 4799/12104 [01:39<02:26, 49.88it/s]

04789.json layout error: cell_idx: 9, row_span: [0, 1], col_span: [9, 12]


 40%|███▉      | 4830/12104 [01:40<02:55, 41.45it/s]

04822.json line idx:[[0]] not find
04826.json line idx:[[47]] not find
04828.json line idx:[[0], [1]] not find


 40%|███▉      | 4840/12104 [01:40<02:59, 40.42it/s]

04835.json line idx:[[12]] not find


 40%|████      | 4858/12104 [01:41<03:18, 36.44it/s]

04849.json line idx:[[185]] not find


 42%|████▏     | 5046/12104 [01:43<01:36, 73.42it/s] 

05027.json line idx:[[4], [9], [10], [11], [25], [26], [27], [28], [40], [41], [42], [43], [63], [64], [65], [66], [67], [68], [69], [72]] not find


 43%|████▎     | 5251/12104 [01:46<03:23, 33.63it/s]

05250.json line idx:[[5]] not find


 45%|████▌     | 5467/12104 [01:51<01:44, 63.36it/s]

05456.json line idx:[[23]] not find


 45%|████▌     | 5492/12104 [01:51<01:16, 86.82it/s]

05491.json line idx:[[1], [4]] not find
05494.json line idx:[[1], [2], [3], [4], [5], [6], [7], [11], [12], [16], [17], [18], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [39]] not find
05495.json line idx:[[1]] not find


 45%|████▌     | 5502/12104 [01:51<02:04, 53.04it/s]

05500.json line idx:[[1], [4], [5], [6], [7], [8], [9], [10], [44], [46], [47], [48], [49]] not find


 46%|████▌     | 5547/12104 [01:52<01:51, 58.73it/s]

05539.json line idx:[[2], [36], [37], [38], [61], [105], [106], [107], [108], [127], [128], [129]] not find
05540.json line idx:[[2], [133], [134]] not find
05547.json line idx:[[2], [3]] not find


 46%|████▌     | 5572/12104 [01:53<02:27, 44.35it/s]

05568.json line idx:[[24]] not find


 46%|████▋     | 5612/12104 [01:53<02:10, 49.91it/s]

05600.json line idx:[[45]] not find


 48%|████▊     | 5785/12104 [01:56<00:55, 114.54it/s]

05769.json line idx:[[5], [8], [10], [19], [24], [27]] not find


 50%|████▉     | 5999/12104 [01:59<02:22, 42.92it/s] 

05993.json line idx:[[1]] not find


 50%|████▉     | 6026/12104 [02:00<02:19, 43.50it/s]

06020.json line idx:[[0], [1], [6], [9], [10], [104]] not find


 50%|████▉     | 6040/12104 [02:00<02:05, 48.33it/s]

06035.json line idx:[[40], [42], [43], [44], [57]] not find


 50%|█████     | 6057/12104 [02:00<02:19, 43.29it/s]

06048.json line idx:[[0], [1], [4], [8], [13], [96], [97], [111], [112], [113], [114]] not find


 50%|█████     | 6079/12104 [02:01<03:04, 32.58it/s]

06077.json line idx:[[157]] not find
06079.json line idx:[[7], [8], [12], [15], [16], [17], [20], [23], [24], [25], [26], [27], [32], [34], [36], [37], [43], [44], [45], [46], [48], [50], [51], [103]] not find


 50%|█████     | 6111/12104 [02:02<02:12, 45.24it/s]

06098.json line idx:[[1], [7], [8], [11]] not find


 51%|█████     | 6123/12104 [02:02<01:35, 62.71it/s]

06120.json line idx:[[3], [4], [5]] not find


 51%|█████     | 6148/12104 [02:03<02:08, 46.23it/s]

06142.json line idx:[[0], [1], [2], [3], [4], [5]] not find


 51%|█████     | 6183/12104 [02:03<01:52, 52.57it/s]

06175.json line idx:[[2], [9]] not find


 51%|█████     | 6199/12104 [02:04<02:19, 42.39it/s]

06193.json line idx:[[5], [80]] not find


 52%|█████▏    | 6273/12104 [02:06<01:27, 66.99it/s]

06267.json line idx:[[4], [5]] not find


 52%|█████▏    | 6297/12104 [02:06<02:39, 36.44it/s]

06287.json line idx:[[19], [21], [23], [24], [25]] not find


 52%|█████▏    | 6321/12104 [02:07<02:30, 38.36it/s]

06316.json line idx:[[5], [6], [17]] not find


 53%|█████▎    | 6363/12104 [02:08<02:29, 38.39it/s]

06360.json line idx:[[19], [81]] not find


 53%|█████▎    | 6372/12104 [02:08<03:15, 29.38it/s]

06365.json line idx:[[46], [48], [52], [53]] not find


 54%|█████▍    | 6543/12104 [02:13<01:55, 48.07it/s]

06543.json line idx:[[6], [8]] not find


 54%|█████▍    | 6587/12104 [02:14<03:11, 28.87it/s]

06584.json line idx:[[15]] not find


 55%|█████▍    | 6639/12104 [02:15<02:28, 36.74it/s]

06632.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31]] not find


 55%|█████▌    | 6715/12104 [02:18<02:51, 31.42it/s]

06707.json line idx:[[140], [141], [142], [143], [144], [145], [146], [147], [148], [149], [150], [151], [152]] not find
06716.json line idx:[[0], [1], [2]] not find


 56%|█████▌    | 6720/12104 [02:18<02:33, 35.07it/s]

06719.json line idx:[[0], [1], [2], [3], [4], [54]] not find


 56%|█████▌    | 6798/12104 [02:21<02:57, 29.87it/s]

06791.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50]] not find
06794.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46]] not find


 57%|█████▋    | 6910/12104 [02:25<02:29, 34.78it/s]

06907.json line idx:[[0], [5], [13], [14]] not find


 58%|█████▊    | 6965/12104 [02:27<02:39, 32.26it/s]

06960.json line idx:[[0]] not find
06964.json line idx:[[6]] not find


 58%|█████▊    | 6994/12104 [02:28<02:27, 34.65it/s]

06989.json line idx:[[8], [11]] not find


 58%|█████▊    | 7040/12104 [02:29<02:23, 35.19it/s]

07033.json line idx:[[71]] not find


 59%|█████▉    | 7170/12104 [02:34<02:28, 33.16it/s]

07164.json line idx:[[0]] not find


 59%|█████▉    | 7196/12104 [02:34<01:41, 48.15it/s]

07187.json line idx:[[0], [1], [2], [3], [4], [5], [7], [8], [10], [12], [13], [14], [15], [16], [17], [19], [20], [21], [22], [23], [24], [26], [27], [28], [29], [30], [31], [32], [34], [35], [36], [37], [38], [40], [41], [42], [43], [44], [45], [47], [48], [49], [50], [51], [52], [54], [55], [56], [57], [58], [60], [61], [62], [63], [64], [65], [67], [68], [69], [70], [71], [72], [74], [75], [76], [77], [80], [81], [82], [83], [84], [85], [86], [88], [89], [90], [91], [92], [93], [95], [96], [97], [98], [99], [100], [102], [103], [104], [105], [106], [107], [109], [110], [111], [112], [113], [115], [116], [117], [118], [119], [120], [122], [123], [124], [125], [126], [128], [129], [130], [131], [132]] not find


 61%|██████    | 7395/12104 [02:42<02:25, 32.30it/s]

07387.json line idx:[[10], [11], [12], [13], [14], [15]] not find
07391.json line idx:[[0]] not find


 62%|██████▏   | 7499/12104 [02:45<03:02, 25.20it/s]

07494.json line idx:[[54]] not find


 62%|██████▏   | 7508/12104 [02:46<03:28, 22.06it/s]

07503.json line idx:[[0]] not find


 62%|██████▏   | 7535/12104 [02:47<02:02, 37.20it/s]

07529.json line idx:[[103]] not find
07533.json line idx:[[7], [8], [9], [10], [11], [12], [13], [14], [15], [16]] not find


 63%|██████▎   | 7598/12104 [02:48<02:27, 30.47it/s]

07593.json line idx:[[23]] not find


 63%|██████▎   | 7637/12104 [02:50<02:37, 28.42it/s]

07633.json line idx:[[0], [1], [2], [4], [5], [6], [7], [8], [9], [11], [12], [13], [14], [15], [16], [17], [19], [20], [21], [22], [23], [24], [26], [27], [28], [29], [30], [31], [33], [34], [35], [36], [37], [38], [39], [40], [42], [43], [44], [45], [46], [47], [49], [50], [51], [52], [53], [54], [55], [56], [58], [59], [60], [61], [62], [63], [65], [66], [67], [68], [69], [70], [71], [72], [74], [75], [76], [77], [78], [79], [81], [82], [83], [84], [85], [86], [87], [88], [89], [91], [92], [93], [94], [95], [97], [98], [99], [100], [101], [102], [104], [105], [106], [107], [108], [110], [111], [112], [113], [114], [115], [116], [117], [118], [120], [121], [122], [123], [124], [125], [127], [128], [129], [130], [131], [132], [133], [134], [136], [137], [138], [139], [140], [142], [143], [144], [145], [146], [147], [148], [150], [151], [152], [153], [154], [155], [156], [157], [158]] not find


 63%|██████▎   | 7649/12104 [02:50<02:03, 35.93it/s]

07642.json line idx:[[0]] not find


 64%|██████▎   | 7711/12104 [02:52<02:38, 27.79it/s]

07711.json line idx:[[0], [2], [17], [22], [27], [47], [52], [59], [63], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80], [81], [82], [83], [84], [85]] not find


 64%|██████▍   | 7719/12104 [02:53<02:49, 25.80it/s]

07713.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58]] not find
07717.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16]] not find
07719.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73]] not find


 65%|██████▍   | 7841/12104 [02:56<01:53, 37.47it/s]

07835.json line idx:[[36]] not find


 66%|██████▌   | 7934/12104 [02:59<01:49, 38.25it/s]

07930.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79]] not find


 66%|██████▌   | 7950/12104 [02:59<01:39, 41.88it/s]

07941.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [14], [15], [16], [17], [23], [24], [25], [26], [27], [28], [29], [30], [35], [36], [37], [38]] not find


 66%|██████▌   | 7959/12104 [02:59<01:53, 36.38it/s]

07951.json line idx:[[0], [5], [8], [9], [10], [11], [12], [15], [16], [17], [18], [20], [23], [24], [25], [28], [30], [31], [32], [35], [37], [38], [39], [42], [44], [45], [46], [49], [51], [52], [53], [56], [58], [59], [60], [62], [65], [66], [67], [70], [72], [73], [74], [77], [79], [80], [82], [85], [87], [89], [91], [92], [96], [97], [99], [101], [103], [104], [107], [109], [113], [115], [117], [122], [124], [125], [126], [127], [129], [131], [132], [133], [134], [138], [139], [140], [143], [145], [147], [148], [151], [152], [153], [157], [158], [159], [165], [166], [167], [170], [171], [172], [173], [174], [175], [176], [177], [178], [179], [180], [181], [182], [183]] not find


 66%|██████▌   | 7965/12104 [02:59<01:41, 40.84it/s]

07962.json line idx:[[0], [1], [2], [3]] not find


 67%|██████▋   | 8059/12104 [03:03<03:21, 20.08it/s]

08056.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7]] not find
08058.json line idx:[[149]] not find


 67%|██████▋   | 8079/12104 [03:04<02:00, 33.33it/s]

08077.json line idx:[[10]] not find


 67%|██████▋   | 8117/12104 [03:05<01:36, 41.21it/s]

08118.json line idx:[[66], [67], [68], [69], [70], [71], [72]] not find


 67%|██████▋   | 8155/12104 [03:07<02:53, 22.77it/s]

08152.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80], [81], [82], [83], [84], [85], [86], [87], [88], [89], [90], [91], [92], [93], [94], [95], [96], [97], [98], [99], [100], [101], [102], [103]] not find


 68%|██████▊   | 8225/12104 [03:09<02:09, 30.06it/s]

08227.json line idx:[[0], [1], [2], [12], [53], [54], [55], [56], [57]] not find


 68%|██████▊   | 8243/12104 [03:10<02:29, 25.84it/s]

08234.json line idx:[[41]] not find


 68%|██████▊   | 8253/12104 [03:10<02:11, 29.34it/s]

08246.json line idx:[[60]] not find
08253.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40]] not find


 68%|██████▊   | 8271/12104 [03:11<01:23, 45.76it/s]

08263.json line idx:[[0], [1], [2], [3], [6], [8], [9], [10], [11], [13], [14], [15], [16], [17], [19], [20], [23], [24], [25], [26], [27], [28], [29], [31], [32], [33], [34], [35], [37], [38], [39], [40], [41], [43], [44], [45], [46], [48], [49], [50], [51], [53], [54], [55], [56], [57], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80]] not find


 69%|██████▉   | 8334/12104 [03:13<02:12, 28.41it/s]

08328.json line idx:[[15], [16], [17]] not find
08329.json line idx:[[29]] not find
08332.json line idx:[[0], [1], [2], [3], [5], [6], [7], [8], [9], [11], [12], [13], [14], [15], [17], [18], [19], [20], [21], [22], [23], [24], [25], [28], [29], [30], [31], [32], [33], [34], [35], [36], [39], [40], [41], [42], [43], [44], [45], [46], [47], [50], [51], [52], [53], [54], [55], [56], [57], [58], [61], [62], [63], [64], [65], [66], [67], [68], [69], [72], [73], [74], [75], [76], [77], [78], [79], [80], [83], [84], [85], [86], [87], [88], [89], [90], [91], [94], [95], [96], [97], [98], [99], [100], [101], [102], [105], [106], [107], [108], [109], [110], [111], [112], [113], [116], [117], [118], [119], [120], [121], [122], [123], [124], [127], [128], [129], [130], [131], [132], [133], [134], [135], [138], [139], [140], [141], [142], [143], [144], [145], [146], [149], [150], [151], [152], [153], [154], [155], [156], [157], [160], [161], [162], [163], [164], [165], [166], [167], [169], [171], 

 69%|██████▉   | 8356/12104 [03:13<01:44, 35.80it/s]

08350.json line idx:[[0], [4], [5], [8], [11], [13], [16], [17], [23], [24], [29], [31], [34], [37], [38], [39], [43], [44], [45], [50], [51], [57], [58], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76]] not find
08356.json line idx:[[0], [2], [3], [10], [26], [31], [36], [46], [55], [62], [65], [74], [76], [78], [80], [81], [82], [83], [84]] not find


 70%|██████▉   | 8449/12104 [03:17<01:37, 37.46it/s]

08442.json line idx:[[64], [65], [66], [67], [68]] not find


 70%|███████   | 8507/12104 [03:18<01:13, 48.63it/s]

08502.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]] not find


 70%|███████   | 8518/12104 [03:18<01:35, 37.48it/s]

08513.json line idx:[[12], [15], [21], [24], [31], [34]] not find


 71%|███████▏  | 8645/12104 [03:23<01:34, 36.72it/s]

08644.json line idx:[[0], [19], [20]] not find


 72%|███████▏  | 8661/12104 [03:24<02:18, 24.86it/s]

08656.json line idx:[[105]] not find


 72%|███████▏  | 8771/12104 [03:27<01:50, 30.26it/s]

08764.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80], [81], [82], [83], [84], [85], [86], [87], [88], [89], [90], [91], [92], [93], [94], [95], [96], [97], [98], [99], [100], [101]] not find


 73%|███████▎  | 8833/12104 [03:29<02:27, 22.22it/s]

08831.json line idx:[[7], [9], [10], [11], [12], [13]] not find


 73%|███████▎  | 8853/12104 [03:30<01:35, 34.12it/s]

08843.json line idx:[[2]] not find


 73%|███████▎  | 8873/12104 [03:30<01:24, 38.32it/s]

08865.json line idx:[[3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [119], [120], [121], [122], [123], [124], [125], [126], [127], [128], [129], [130], [131], [132], [133], [134], [135], [136], [137], [138], [139], [140], [141], [142], [167]] not find


 74%|███████▍  | 9015/12104 [03:34<01:00, 50.98it/s]

08997.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47]] not find


 75%|███████▍  | 9044/12104 [03:35<01:20, 38.11it/s]

09036.json line idx:[[8], [9], [10], [64]] not find


 76%|███████▌  | 9163/12104 [03:39<01:05, 45.05it/s]

09150.json line idx:[[0]] not find


 76%|███████▋  | 9252/12104 [03:42<01:06, 42.71it/s]

09246.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80], [81], [82], [83], [84], [85], [86], [87], [88], [89], [90], [91], [92], [93], [94], [95], [96], [97], [98], [99], [100], [101]] not find


 77%|███████▋  | 9372/12104 [03:45<01:18, 34.81it/s]

09369.json line idx:[[99], [103]] not find


 78%|███████▊  | 9388/12104 [03:46<02:02, 22.17it/s]

09384.json line idx:[[41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76]] not find


 78%|███████▊  | 9393/12104 [03:46<01:41, 26.82it/s]

09389.json line idx:[[22], [25], [63], [64], [68], [108], [110]] not find
09390.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53]] not find
09398.json line idx:[[1]] not find


 78%|███████▊  | 9414/12104 [03:46<01:06, 40.24it/s]

09405.json line idx:[[0]] not find
09406.json line idx:[[0], [1], [114], [115]] not find


 78%|███████▊  | 9426/12104 [03:46<01:04, 41.50it/s]

09418.json line idx:[[0], [1], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [39], [40], [41], [42], [43], [44], [45], [46], [55]] not find


 78%|███████▊  | 9488/12104 [03:48<01:25, 30.68it/s]

09483.json layout error: cell_idx: 0, row_span: [0, 2], col_span: [0, 11]


 80%|███████▉  | 9631/12104 [03:52<00:32, 76.65it/s]

09610.json line idx:[[0]] not find


 80%|████████  | 9696/12104 [03:53<01:11, 33.91it/s]

09692.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48]] not find
09695.json line idx:[[1]] not find


 80%|████████  | 9724/12104 [03:54<01:02, 38.13it/s]

09717.json line idx:[[147], [148]] not find
09723.json line idx:[[21], [22], [23], [26], [27], [29], [31], [34], [35], [37], [39], [41], [43], [45], [48], [49], [51], [53], [56], [57], [84], [85], [86], [87], [89], [90], [91], [92], [95], [96], [97], [98], [101], [102], [103], [104], [107], [108], [109], [110], [113], [114], [115], [116], [119], [120], [121], [122], [125], [126], [127], [128], [131], [132], [133], [134], [136], [137], [139], [140]] not find


 80%|████████  | 9736/12104 [03:54<00:51, 45.80it/s]

09726.json line idx:[[0], [1], [7], [8], [9], [10], [11], [12], [16], [17], [18], [19], [24], [25], [26], [27], [32], [33], [34], [35], [40], [41], [42], [43], [50], [51], [52], [53], [54], [55], [60], [61], [62], [63], [67], [68], [69], [70], [74], [78], [81], [84], [87], [90], [93], [96], [99], [102], [105], [108], [112], [113], [116], [119], [125], [130], [131]] not find


 81%|████████  | 9833/12104 [03:57<01:10, 32.40it/s]

09829.json line idx:[[56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69]] not find
09830.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [23], [24], [25], [26], [27], [28], [29], [30], [31], [33], [34], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [68], [69], [70], [71], [72]] not find
09832.json line idx:[[15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32]] not find


 81%|████████▏ | 9842/12104 [03:58<01:21, 27.86it/s]

09836.json line idx:[[28], [29], [30], [65], [101]] not find
09843.json line idx:[[22], [23]] not find


 81%|████████▏ | 9860/12104 [03:58<01:03, 35.29it/s]

09853.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70]] not find


 82%|████████▏ | 9870/12104 [03:58<01:00, 36.96it/s]

09866.json layout error: cell_idx: 2, row_span: [0, 3], col_span: [2, 4]
09868.json layout error: cell_idx: 4, row_span: [0, 2], col_span: [11, 17]


 82%|████████▏ | 9915/12104 [03:59<00:38, 56.32it/s]

09895.json line idx:[[1]] not find
09915.json line idx:[[23], [26], [29], [32], [35], [38], [39], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [57], [58], [61], [64], [66], [68], [70]] not find


 82%|████████▏ | 9929/12104 [04:00<00:42, 51.46it/s]

09921.json line idx:[[3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [164], [165], [166]] not find


 82%|████████▏ | 9941/12104 [04:00<00:40, 53.73it/s]

09934.json line idx:[[24], [25]] not find


 83%|████████▎ | 9989/12104 [04:01<00:55, 37.91it/s]

09979.json line idx:[[0], [1]] not find


 83%|████████▎ | 10000/12104 [04:02<00:51, 40.98it/s]

09997.json line idx:[[19], [25], [31], [36], [42], [47]] not find


 83%|████████▎ | 10011/12104 [04:02<00:49, 42.44it/s]

10013.json line idx:[[0]] not find


 83%|████████▎ | 10072/12104 [04:05<01:02, 32.63it/s]

10063.json line idx:[[62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72]] not find


 83%|████████▎ | 10104/12104 [04:05<00:44, 45.10it/s]

10090.json line idx:[[28]] not find


 84%|████████▎ | 10127/12104 [04:06<00:45, 43.00it/s]

10120.json line idx:[[0]] not find


 84%|████████▍ | 10151/12104 [04:07<00:47, 41.29it/s]

10145.json line idx:[[8]] not find


 85%|████████▍ | 10239/12104 [04:09<00:51, 36.52it/s]

10228.json line idx:[[15], [16], [17], [18], [19]] not find


 85%|████████▍ | 10270/12104 [04:10<00:56, 32.32it/s]

10266.json line idx:[[6], [7], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80], [81], [82], [83], [84], [85], [86], [87], [88], [89], [90]] not find
10267.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80], [81], [82], [83], [84], [85], [86], [

 85%|████████▍ | 10284/12104 [04:10<00:49, 37.13it/s]

10277.json line idx:[[0], [1]] not find


 85%|████████▌ | 10305/12104 [04:11<00:51, 34.72it/s]

10300.json layout error: cell_idx: 0, row_span: [0, 2], col_span: [0, 8]


 86%|████████▋ | 10451/12104 [04:16<00:45, 36.68it/s]

10444.json line idx:[[0], [1], [2], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [38], [39], [40], [41], [42], [44], [45], [46], [47], [48], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59]] not find


 87%|████████▋ | 10560/12104 [04:19<00:51, 29.96it/s]

10555.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80], [81], [82], [83], [84], [85], [86], [87], [88], [89], [90], [91], [92]] not find
10562.json line idx:[[0]] not find


 87%|████████▋ | 10573/12104 [04:19<00:40, 37.58it/s]

10566.json line idx:[[0]] not find


 88%|████████▊ | 10606/12104 [04:20<00:42, 35.50it/s]

10603.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12]] not find


 88%|████████▊ | 10707/12104 [04:22<00:32, 42.76it/s]

10702.json line idx:[[0], [11], [47], [48]] not find


 89%|████████▊ | 10725/12104 [04:23<00:33, 41.50it/s]

10723.json line idx:[[28], [29], [30], [31]] not find
10725.json line idx:[[7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19]] not find


 89%|████████▊ | 10742/12104 [04:23<00:39, 34.78it/s]

10741.json line idx:[[13], [15], [17], [20], [21], [23], [25], [27], [29], [31], [33], [35], [37], [39], [41], [43], [44], [46], [48], [51], [52], [55], [58], [59], [60], [62], [64], [66], [68], [70], [72], [74], [76]] not find


 90%|████████▉ | 10845/12104 [04:26<00:36, 34.79it/s]

10841.json line idx:[[25], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80], [81], [82], [83], [84]] not find
10845.json line idx:[[23], [24], [33], [34], [35], [45], [51], [58], [66], [73], [79], [80], [90], [91], [98], [99], [106], [107], [114], [133]] not find


 90%|█████████ | 10952/12104 [04:29<00:27, 42.34it/s]

10951.json line idx:[[13], [14], [15], [16], [17], [18]] not find


 91%|█████████ | 11000/12104 [04:31<00:32, 34.07it/s]

10992.json line idx:[[49], [52], [53], [55], [56], [59], [60], [63], [108], [109], [110], [111]] not find


 91%|█████████ | 11005/12104 [04:31<00:30, 36.41it/s]

11001.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [27], [28], [30], [31], [33], [35], [37], [39], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80], [81], [82], [83], [84], [85], [86], [87], [88], [89]] not find
11008.json line idx:[[0]] not find


 91%|█████████ | 11023/12104 [04:31<00:25, 41.99it/s]

11016.json line idx:[[3]] not find


 91%|█████████ | 11033/12104 [04:32<00:27, 39.03it/s]

11026.json line idx:[[0], [1]] not find


 92%|█████████▏| 11078/12104 [04:33<00:15, 67.73it/s]

11069.json line idx:[[5]] not find
11077.json line idx:[[0], [9], [21], [36], [37], [62], [63]] not find


 92%|█████████▏| 11106/12104 [04:34<00:25, 39.74it/s]

11098.json line idx:[[13], [14]] not find


 92%|█████████▏| 11161/12104 [04:35<00:25, 37.55it/s]

11148.json line idx:[[25]] not find


 92%|█████████▏| 11183/12104 [04:36<00:23, 38.41it/s]

11177.json line idx:[[10], [11], [38], [39], [44], [45], [56], [57], [82], [83], [108], [109], [120], [121], [143], [144]] not find
11186.json line idx:[[65], [66], [92], [93], [117], [118]] not find


 93%|█████████▎| 11293/12104 [04:39<00:26, 30.68it/s]

11295.json line idx:[[17], [18], [19], [20]] not find


 93%|█████████▎| 11315/12104 [04:40<00:21, 36.53it/s]

11305.json line idx:[[5]] not find


 94%|█████████▎| 11329/12104 [04:40<00:23, 32.71it/s]

11327.json line idx:[[12], [13]] not find


 94%|█████████▍| 11418/12104 [04:43<00:19, 35.09it/s]

11416.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80], [81], [82], [83], [84], [85], [86], [87], [88], [89], [90], [91], [92], [93], [94], [95], [96], [97], [98], [99], [100], [101], [102], [103], [104], [105], [106], [107], [108], [109], [110], [111], [112], [113], [114], [115], [116], [117], [118], [119], [120], [121], [122], [123], [124], [125], [126], [127], [128], [129], [130], [131], [132], [133], [134], [135], [136], [137], [138], [139], [140], [141], [142], [143], [144], [145], [146], [147], [148], [149], [150], [151], [152], [153], [154]] not 

 94%|█████████▍| 11425/12104 [04:44<00:32, 20.58it/s]

11420.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80], [81], [82], [83], [84], [85], [86], [87], [88], [89], [90], [91], [92], [93], [94], [95], [96], [97], [98], [99], [100], [101], [102], [103], [104]] not find


 95%|█████████▍| 11439/12104 [04:44<00:18, 36.74it/s]

11428.json line idx:[[2], [10], [15], [24]] not find


 95%|█████████▍| 11446/12104 [04:44<00:15, 41.18it/s]

11443.json line idx:[[0], [1], [2], [3], [4], [5]] not find


 95%|█████████▍| 11462/12104 [04:44<00:16, 39.52it/s]

11454.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67]] not find


 95%|█████████▌| 11504/12104 [04:45<00:07, 78.67it/s]

11495.json line idx:[[0]] not find


 96%|█████████▌| 11571/12104 [04:47<00:17, 30.66it/s]

11566.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80], [81], [82], [83], [84]] not find


 96%|█████████▌| 11587/12104 [04:48<00:18, 27.93it/s]

11575.json layout error: cell_idx: 425, row_span: [32, 33], col_span: [0, 14]


 96%|█████████▌| 11607/12104 [04:49<00:14, 33.69it/s]

11598.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [80], [81], [82], [83], [84], [85], [104], [105], [106], [107], [108], [109], [110], [111], [112], [113], [114], [115], [116], [117], [118], [119], [120], [121], [122], [123], [124], [146]] not find
11606.json line idx:[[0]] not find


 97%|█████████▋| 11729/12104 [04:53<00:09, 38.87it/s]

11720.json line idx:[[25], [26], [52], [53]] not find
11721.json layout error: cell_idx: 0, row_span: [0, 2], col_span: [0, 5]


 97%|█████████▋| 11743/12104 [04:53<00:07, 46.39it/s]

11735.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51]] not find


 97%|█████████▋| 11754/12104 [04:54<00:08, 41.83it/s]

11745.json line idx:[[20], [21], [22], [23], [52], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [75], [76], [77], [78], [79], [80], [81], [82], [83], [88], [89], [90], [91], [92], [93], [94], [95], [96], [97], [98], [99], [100], [101], [102], [103], [104], [105], [106], [107], [108], [109], [110], [111], [112], [113], [114], [115], [116], [117], [118], [119], [120], [121], [122], [123], [124], [125], [126], [127], [128], [129], [130], [131], [132], [133], [134], [135], [136], [137], [138], [139], [140], [141], [142], [143], [144], [145], [146]] not find


 98%|█████████▊| 11888/12104 [04:57<00:08, 25.38it/s]

11882.json line idx:[[0]] not find


 99%|█████████▉| 12009/12104 [05:01<00:03, 26.10it/s]

12004.json line idx:[[57]] not find
12006.json line idx:[[275]] not find
12009.json line idx:[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15]] not find


100%|██████████| 12104/12104 [05:04<00:00, 39.76it/s]

12099.json line idx:[[3], [6], [8]] not find
table error: 280



