# Import

In [15]:
import os
import re
import json
import time
import datetime
import random
import glob
import importlib

import numpy as np
import pandas as pd
import nltk
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import collections

random.seed(123)
np.random.seed(456)

# Hyper-parameters

In [16]:
MAX_LENGTH = 64 # max no. words for each sentence.
OVERLAP = 20 # if a sentence exceeds MAX_LENGTH, we split it to multiple sentences with overlapping

MAX_SAMPLE = None # set a small number for experimentation, set None for production.

# Load data

In [17]:
train_path = './input/train.csv'
paper_train_folder = './input/train'
paper_sample_folder='./input/test'
sample_sub = pd.read_csv('./input/sample_submission.csv')

train = pd.read_csv(train_path)
train = train[:MAX_SAMPLE]
print(f'No. raw training rows: {len(train)}')
train = train.groupby('Id').agg({
    'pub_title': 'first',
    'dataset_title': '|'.join,
    'dataset_label': '|'.join,
    'cleaned_label': '|'.join
}).reset_index()

print(f'No. grouped training rows: {len(train)}')

No. raw training rows: 19661
No. grouped training rows: 14316


In [18]:
train_num=int(len(train)*0.98)
validation_df=train.iloc[train_num:]
train=train.iloc[:train_num]
print("train_num,validation_num:",len(train),len(validation_df))

train_num,validation_num: 14029 287


In [19]:
def read_papers(paper_folder,data_df):
    papers = {}
    for paper_id in data_df['Id'].unique():
        with open(f'{paper_folder}/{paper_id}.json', 'r') as f:
            paper = json.load(f)
            papers[paper_id] = paper
    return papers

# Transform data to NER format

In [20]:

def clean_training_text(txt):
    """
    similar to the default clean_text function but without lowercasing.
    """
    return re.sub('[^A-Za-z0-9]+', ' ', str(txt)).strip()

def shorten_sentences(sentences):
    short_sentences = []
    for sentence in sentences:
        words = sentence.split()
        if len(words) > MAX_LENGTH:
            for p in range(0, len(words), MAX_LENGTH - OVERLAP):
                short_sentences.append(' '.join(words[p:p+MAX_LENGTH]))
        else:
            short_sentences.append(sentence)
    return short_sentences

def find_sublist(big_list, small_list):
    all_positions = []
    for i in range(len(big_list) - len(small_list) + 1):
        if small_list == big_list[i:i+len(small_list)]:
            all_positions.append(i)
    
    return all_positions

def tag_sentence(sentence, labels): # requirement: both sentence and labels are already cleaned
    sentence_words = sentence.split()  
    if labels is not None and any(re.findall(f'\\b{label}\\b', sentence)
                                  for label in labels): # positive sample
        nes = ['O'] * len(sentence_words)
        for label in labels:
            label_words = label.split()

            all_pos = find_sublist(sentence_words, label_words)
            for pos in all_pos:
                nes[pos] = 'B'
                for i in range(pos+1, pos+len(label_words)):
                    nes[i] = 'I'

        return True, list(zip(sentence_words, nes))
        
    else: # negative sample
        nes = ['O'] * len(sentence_words)
        return False, list(zip(sentence_words, nes))

In [21]:
#返回ner_data
cnt_pos, cnt_neg = 0, 0 # number of sentences that contain/not contain labels
ner_data = []
papers_train=read_papers(paper_train_folder,train)
pbar = tqdm(total=len(train))
for i, id, dataset_label in train[['Id', 'dataset_label']].itertuples():
    # paper
    paper = papers_train[id]
    
    # labels
    labels = dataset_label.split('|')
    labels = [clean_training_text(label) for label in labels]
    
    # sentences
    sentences = set([clean_training_text(sentence) for section in paper 
                 for sentence in section['text'].split('.') 
                ])
    sentences = shorten_sentences(sentences) # make sentences short
    sentences = [sentence for sentence in sentences if len(sentence) > 10] # only accept sentences with length > 10 chars
    
    # positive sample
    for sentence in sentences:
        is_positive, tags = tag_sentence(sentence, labels)
        if is_positive:
            cnt_pos += 1
            ner_data.append(tags)
        elif any(word in sentence.lower() for word in ['data', 'study']) and cnt_neg<=cnt_pos: 
            
            ner_data.append(tags)
            cnt_neg += 1
    
    # process bar
    pbar.update(1)
    pbar.set_description(f"Training data size: {cnt_pos} positives + {cnt_neg} negatives")

# shuffling
random.shuffle(ner_data)


  0%|                                                                                        | 0/14029 [00:00<?, ?it/s][A
Training data size: 1 positives + 2 negatives:   0%|                                | 1/14029 [00:00<01:51, 125.32it/s][A
Training data size: 2 positives + 3 negatives:   0%|                                | 2/14029 [00:00<01:37, 143.24it/s][A
Training data size: 4 positives + 5 negatives:   0%|                                | 3/14029 [00:00<01:38, 143.03it/s][A
Training data size: 14 positives + 13 negatives:   0%|                              | 4/14029 [00:00<01:34, 148.39it/s][A
Training data size: 15 positives + 15 negatives:   0%|                              | 5/14029 [00:00<01:23, 167.11it/s][A
Training data size: 18 positives + 19 negatives:   0%|                              | 6/14029 [00:00<01:26, 162.60it/s][A
Training data size: 20 positives + 21 negatives:   0%|                              | 7/14029 [00:00<01:35, 146.23it/s][A
Training data s

Training data size: 305 positives + 306 negatives:   1%|▏                         | 125/14029 [00:00<01:25, 161.97it/s][A
Training data size: 305 positives + 306 negatives:   1%|▏                         | 126/14029 [00:00<01:25, 161.97it/s][A
Training data size: 306 positives + 307 negatives:   1%|▏                         | 127/14029 [00:00<01:25, 161.97it/s][A
Training data size: 307 positives + 308 negatives:   1%|▏                         | 128/14029 [00:00<01:25, 161.97it/s][A
Training data size: 326 positives + 327 negatives:   1%|▏                         | 129/14029 [00:00<01:25, 161.97it/s][A
Training data size: 332 positives + 333 negatives:   1%|▏                         | 130/14029 [00:00<01:25, 161.97it/s][A
Training data size: 332 positives + 333 negatives:   1%|▏                         | 131/14029 [00:00<01:29, 154.96it/s][A
Training data size: 333 positives + 334 negatives:   1%|▏                         | 131/14029 [00:00<01:29, 154.96it/s][A
Training data si

Training data size: 803 positives + 804 negatives:   2%|▍                         | 249/14029 [00:01<01:44, 131.63it/s][A
Training data size: 804 positives + 805 negatives:   2%|▍                         | 250/14029 [00:01<01:44, 131.63it/s][A
Training data size: 804 positives + 805 negatives:   2%|▍                         | 251/14029 [00:01<01:44, 131.63it/s][A
Training data size: 805 positives + 806 negatives:   2%|▍                         | 252/14029 [00:01<01:44, 131.63it/s][A
Training data size: 805 positives + 806 negatives:   2%|▍                         | 253/14029 [00:01<01:44, 131.63it/s][A
Training data size: 807 positives + 808 negatives:   2%|▍                         | 254/14029 [00:01<01:44, 131.63it/s][A
Training data size: 810 positives + 811 negatives:   2%|▍                         | 255/14029 [00:01<01:44, 131.63it/s][A
Training data size: 811 positives + 812 negatives:   2%|▍                         | 256/14029 [00:01<01:44, 131.63it/s][A
Training data si

Training data size: 1139 positives + 1140 negatives:   3%|▋                       | 373/14029 [00:02<01:52, 120.96it/s][A
Training data size: 1140 positives + 1141 negatives:   3%|▋                       | 374/14029 [00:02<01:52, 120.96it/s][A
Training data size: 1152 positives + 1144 negatives:   3%|▋                       | 375/14029 [00:02<01:52, 120.96it/s][A
Training data size: 1153 positives + 1146 negatives:   3%|▋                       | 376/14029 [00:02<01:52, 120.96it/s][A
Training data size: 1154 positives + 1155 negatives:   3%|▋                       | 377/14029 [00:02<01:52, 120.96it/s][A
Training data size: 1159 positives + 1159 negatives:   3%|▋                       | 378/14029 [00:02<01:52, 120.96it/s][A
Training data size: 1177 positives + 1177 negatives:   3%|▋                       | 379/14029 [00:02<01:52, 120.96it/s][A
Training data size: 1207 positives + 1204 negatives:   3%|▋                       | 380/14029 [00:02<01:52, 120.96it/s][A
Training data si

Training data size: 1561 positives + 1561 negatives:   4%|▊                       | 498/14029 [00:03<01:35, 141.41it/s][A
Training data size: 1562 positives + 1563 negatives:   4%|▊                       | 499/14029 [00:03<01:35, 141.41it/s][A
Training data size: 1562 positives + 1563 negatives:   4%|▊                       | 500/14029 [00:03<01:33, 144.98it/s][A
Training data size: 1563 positives + 1564 negatives:   4%|▊                       | 500/14029 [00:03<01:33, 144.98it/s][A
Training data size: 1565 positives + 1566 negatives:   4%|▊                       | 501/14029 [00:03<01:33, 144.98it/s][A
Training data size: 1571 positives + 1572 negatives:   4%|▊                       | 502/14029 [00:03<01:33, 144.98it/s][A
Training data size: 1573 positives + 1573 negatives:   4%|▊                       | 503/14029 [00:03<01:33, 144.98it/s][A
Training data size: 1575 positives + 1576 negatives:   4%|▊                       | 504/14029 [00:03<01:33, 144.98it/s][A
Training data si

Training data size: 1886 positives + 1887 negatives:   4%|█                       | 622/14029 [00:05<02:12, 101.14it/s][A
Training data size: 1886 positives + 1887 negatives:   4%|█                       | 623/14029 [00:05<02:12, 101.14it/s][A
Training data size: 1888 positives + 1889 negatives:   4%|█                       | 624/14029 [00:05<02:12, 101.14it/s][A
Training data size: 1890 positives + 1891 negatives:   4%|█                       | 625/14029 [00:05<02:12, 101.14it/s][A
Training data size: 1890 positives + 1891 negatives:   4%|█                       | 626/14029 [00:05<02:00, 111.40it/s][A
Training data size: 1891 positives + 1892 negatives:   4%|█                       | 626/14029 [00:05<02:00, 111.40it/s][A
Training data size: 1893 positives + 1894 negatives:   4%|█                       | 627/14029 [00:05<02:00, 111.40it/s][A
Training data size: 1905 positives + 1906 negatives:   4%|█                       | 628/14029 [00:05<02:00, 111.40it/s][A
Training data si

Training data size: 2289 positives + 2283 negatives:   5%|█▎                      | 746/14029 [00:06<01:53, 117.09it/s][A
Training data size: 2289 positives + 2283 negatives:   5%|█▎                      | 747/14029 [00:06<01:39, 133.41it/s][A
Training data size: 2307 positives + 2308 negatives:   5%|█▎                      | 747/14029 [00:06<01:39, 133.41it/s][A
Training data size: 2308 positives + 2309 negatives:   5%|█▎                      | 748/14029 [00:06<01:39, 133.41it/s][A
Training data size: 2309 positives + 2310 negatives:   5%|█▎                      | 749/14029 [00:06<01:39, 133.41it/s][A
Training data size: 2310 positives + 2311 negatives:   5%|█▎                      | 750/14029 [00:06<01:39, 133.41it/s][A
Training data size: 2311 positives + 2312 negatives:   5%|█▎                      | 751/14029 [00:06<01:39, 133.41it/s][A
Training data size: 2312 positives + 2313 negatives:   5%|█▎                      | 752/14029 [00:06<01:39, 133.41it/s][A
Training data si

Training data size: 2810 positives + 2811 negatives:   6%|█▌                       | 869/14029 [00:07<02:12, 99.26it/s][A
Training data size: 2812 positives + 2813 negatives:   6%|█▌                       | 870/14029 [00:07<02:12, 99.26it/s][A
Training data size: 2814 positives + 2815 negatives:   6%|█▌                       | 871/14029 [00:07<02:12, 99.26it/s][A
Training data size: 2815 positives + 2816 negatives:   6%|█▌                       | 872/14029 [00:07<02:12, 99.26it/s][A
Training data size: 2816 positives + 2817 negatives:   6%|█▌                       | 873/14029 [00:07<02:12, 99.26it/s][A
Training data size: 2817 positives + 2818 negatives:   6%|█▌                       | 874/14029 [00:07<02:12, 99.26it/s][A
Training data size: 2818 positives + 2818 negatives:   6%|█▌                       | 875/14029 [00:07<02:12, 99.26it/s][A
Training data size: 2819 positives + 2820 negatives:   6%|█▌                       | 876/14029 [00:07<02:12, 99.26it/s][A
Training data si

Training data size: 3248 positives + 3249 negatives:   7%|█▊                       | 993/14029 [00:09<02:18, 94.09it/s][A
Training data size: 3249 positives + 3250 negatives:   7%|█▊                       | 994/14029 [00:09<02:18, 94.09it/s][A
Training data size: 3250 positives + 3251 negatives:   7%|█▊                       | 995/14029 [00:09<02:18, 94.09it/s][A
Training data size: 3251 positives + 3252 negatives:   7%|█▊                       | 996/14029 [00:09<02:18, 94.09it/s][A
Training data size: 3252 positives + 3253 negatives:   7%|█▊                       | 997/14029 [00:09<02:18, 94.09it/s][A
Training data size: 3253 positives + 3254 negatives:   7%|█▊                       | 998/14029 [00:09<02:18, 94.09it/s][A
Training data size: 3254 positives + 3255 negatives:   7%|█▊                       | 999/14029 [00:09<02:18, 94.09it/s][A
Training data size: 3255 positives + 3256 negatives:   7%|█▋                      | 1000/14029 [00:09<02:18, 94.09it/s][A
Training data si

Training data size: 3537 positives + 3538 negatives:   8%|█▊                     | 1117/14029 [00:10<01:28, 146.64it/s][A
Training data size: 3538 positives + 3539 negatives:   8%|█▊                     | 1118/14029 [00:10<01:28, 146.64it/s][A
Training data size: 3541 positives + 3542 negatives:   8%|█▊                     | 1119/14029 [00:10<01:28, 146.64it/s][A
Training data size: 3543 positives + 3544 negatives:   8%|█▊                     | 1120/14029 [00:10<01:28, 146.64it/s][A
Training data size: 3544 positives + 3545 negatives:   8%|█▊                     | 1121/14029 [00:10<01:28, 146.64it/s][A
Training data size: 3545 positives + 3546 negatives:   8%|█▊                     | 1122/14029 [00:10<01:28, 146.64it/s][A
Training data size: 3546 positives + 3547 negatives:   8%|█▊                     | 1123/14029 [00:10<01:28, 146.64it/s][A
Training data size: 3547 positives + 3548 negatives:   8%|█▊                     | 1124/14029 [00:10<01:28, 146.64it/s][A
Training data si

Training data size: 3922 positives + 3923 negatives:   9%|██                     | 1242/14029 [00:10<01:35, 134.31it/s][A
Training data size: 3923 positives + 3924 negatives:   9%|██                     | 1243/14029 [00:10<01:35, 134.31it/s][A
Training data size: 3923 positives + 3924 negatives:   9%|██                     | 1244/14029 [00:10<01:35, 133.28it/s][A
Training data size: 4029 positives + 3970 negatives:   9%|██                     | 1244/14029 [00:10<01:35, 133.28it/s][A
Training data size: 4030 positives + 3998 negatives:   9%|██                     | 1245/14029 [00:10<01:35, 133.28it/s][A
Training data size: 4031 positives + 3999 negatives:   9%|██                     | 1246/14029 [00:10<01:35, 133.28it/s][A
Training data size: 4031 positives + 4002 negatives:   9%|██                     | 1247/14029 [00:11<01:35, 133.28it/s][A
Training data size: 4032 positives + 4012 negatives:   9%|██                     | 1248/14029 [00:11<01:35, 133.28it/s][A
Training data si

Training data size: 4375 positives + 4376 negatives:  10%|██▎                     | 1365/14029 [00:13<03:31, 59.86it/s][A
Training data size: 4409 positives + 4404 negatives:  10%|██▎                     | 1366/14029 [00:13<03:31, 59.86it/s][A
Training data size: 4410 positives + 4411 negatives:  10%|██▎                     | 1367/14029 [00:13<03:31, 59.86it/s][A
Training data size: 4411 positives + 4412 negatives:  10%|██▎                     | 1368/14029 [00:13<03:31, 59.86it/s][A
Training data size: 4428 positives + 4426 negatives:  10%|██▎                     | 1369/14029 [00:13<03:31, 59.86it/s][A
Training data size: 4430 positives + 4431 negatives:  10%|██▎                     | 1370/14029 [00:13<03:31, 59.86it/s][A
Training data size: 4431 positives + 4432 negatives:  10%|██▎                     | 1371/14029 [00:13<03:31, 59.86it/s][A
Training data size: 4432 positives + 4433 negatives:  10%|██▎                     | 1372/14029 [00:13<03:31, 59.86it/s][A
Training data si

Training data size: 4765 positives + 4766 negatives:  11%|██▍                    | 1489/14029 [00:14<01:30, 138.05it/s][A
Training data size: 4772 positives + 4773 negatives:  11%|██▍                    | 1490/14029 [00:14<01:30, 138.05it/s][A
Training data size: 4773 positives + 4774 negatives:  11%|██▍                    | 1491/14029 [00:14<01:30, 138.05it/s][A
Training data size: 4774 positives + 4775 negatives:  11%|██▍                    | 1492/14029 [00:14<01:30, 138.05it/s][A
Training data size: 4779 positives + 4779 negatives:  11%|██▍                    | 1493/14029 [00:14<01:30, 138.05it/s][A
Training data size: 4780 positives + 4781 negatives:  11%|██▍                    | 1494/14029 [00:14<01:30, 138.05it/s][A
Training data size: 4780 positives + 4781 negatives:  11%|██▍                    | 1495/14029 [00:14<01:30, 138.05it/s][A
Training data size: 4782 positives + 4783 negatives:  11%|██▍                    | 1496/14029 [00:14<01:30, 138.05it/s][A
Training data si

Training data size: 5063 positives + 5064 negatives:  12%|██▋                    | 1614/14029 [00:14<01:18, 159.05it/s][A
Training data size: 5065 positives + 5066 negatives:  12%|██▋                    | 1615/14029 [00:14<01:18, 159.05it/s][A
Training data size: 5066 positives + 5067 negatives:  12%|██▋                    | 1616/14029 [00:14<01:18, 159.05it/s][A
Training data size: 5078 positives + 5072 negatives:  12%|██▋                    | 1617/14029 [00:14<01:18, 159.05it/s][A
Training data size: 5079 positives + 5080 negatives:  12%|██▋                    | 1618/14029 [00:14<01:18, 159.05it/s][A
Training data size: 5080 positives + 5081 negatives:  12%|██▋                    | 1619/14029 [00:15<01:18, 159.05it/s][A
Training data size: 5080 positives + 5081 negatives:  12%|██▋                    | 1620/14029 [00:15<01:18, 159.05it/s][A
Training data size: 5081 positives + 5082 negatives:  12%|██▋                    | 1621/14029 [00:15<01:18, 159.05it/s][A
Training data si

Training data size: 5852 positives + 5853 negatives:  12%|██▊                    | 1739/14029 [00:15<01:22, 149.03it/s][A
Training data size: 5853 positives + 5854 negatives:  12%|██▊                    | 1740/14029 [00:15<01:22, 149.03it/s][A
Training data size: 5854 positives + 5855 negatives:  12%|██▊                    | 1741/14029 [00:15<01:22, 149.03it/s][A
Training data size: 5855 positives + 5856 negatives:  12%|██▊                    | 1742/14029 [00:15<01:22, 149.03it/s][A
Training data size: 5855 positives + 5856 negatives:  12%|██▊                    | 1743/14029 [00:15<01:35, 128.13it/s][A
Training data size: 5856 positives + 5857 negatives:  12%|██▊                    | 1743/14029 [00:15<01:35, 128.13it/s][A
Training data size: 5860 positives + 5861 negatives:  12%|██▊                    | 1744/14029 [00:15<01:35, 128.13it/s][A
Training data size: 5861 positives + 5862 negatives:  12%|██▊                    | 1745/14029 [00:16<01:35, 128.13it/s][A
Training data si

Training data size: 6187 positives + 6188 negatives:  13%|███                    | 1864/14029 [00:16<01:24, 144.12it/s][A
Training data size: 6188 positives + 6189 negatives:  13%|███                    | 1864/14029 [00:16<01:24, 144.12it/s][A
Training data size: 6189 positives + 6190 negatives:  13%|███                    | 1865/14029 [00:16<01:24, 144.12it/s][A
Training data size: 6190 positives + 6191 negatives:  13%|███                    | 1866/14029 [00:16<01:24, 144.12it/s][A
Training data size: 6191 positives + 6192 negatives:  13%|███                    | 1867/14029 [00:16<01:24, 144.12it/s][A
Training data size: 6192 positives + 6193 negatives:  13%|███                    | 1868/14029 [00:16<01:24, 144.12it/s][A
Training data size: 6201 positives + 6202 negatives:  13%|███                    | 1869/14029 [00:16<01:24, 144.12it/s][A
Training data size: 6202 positives + 6203 negatives:  13%|███                    | 1870/14029 [00:16<01:24, 144.12it/s][A
Training data si

Training data size: 6527 positives + 6528 negatives:  14%|███▎                   | 1989/14029 [00:17<01:15, 159.93it/s][A
Training data size: 6527 positives + 6528 negatives:  14%|███▎                   | 1990/14029 [00:17<01:14, 161.98it/s][A
Training data size: 6528 positives + 6529 negatives:  14%|███▎                   | 1990/14029 [00:17<01:14, 161.98it/s][A
Training data size: 6529 positives + 6530 negatives:  14%|███▎                   | 1991/14029 [00:17<01:14, 161.98it/s][A
Training data size: 6531 positives + 6531 negatives:  14%|███▎                   | 1992/14029 [00:17<01:14, 161.98it/s][A
Training data size: 6532 positives + 6533 negatives:  14%|███▎                   | 1993/14029 [00:17<01:14, 161.98it/s][A
Training data size: 6533 positives + 6534 negatives:  14%|███▎                   | 1994/14029 [00:17<01:14, 161.98it/s][A
Training data size: 6544 positives + 6542 negatives:  14%|███▎                   | 1995/14029 [00:17<01:14, 161.98it/s][A
Training data si

Training data size: 6899 positives + 6900 negatives:  15%|███▍                   | 2113/14029 [00:18<01:13, 161.96it/s][A
Training data size: 6900 positives + 6901 negatives:  15%|███▍                   | 2114/14029 [00:18<01:13, 161.96it/s][A
Training data size: 6903 positives + 6904 negatives:  15%|███▍                   | 2115/14029 [00:18<01:13, 161.96it/s][A
Training data size: 6904 positives + 6905 negatives:  15%|███▍                   | 2116/14029 [00:18<01:13, 161.96it/s][A
Training data size: 6905 positives + 6906 negatives:  15%|███▍                   | 2117/14029 [00:18<01:13, 161.96it/s][A
Training data size: 6906 positives + 6907 negatives:  15%|███▍                   | 2118/14029 [00:18<01:13, 161.96it/s][A
Training data size: 6950 positives + 6926 negatives:  15%|███▍                   | 2119/14029 [00:18<01:13, 161.96it/s][A
Training data size: 6951 positives + 6945 negatives:  15%|███▍                   | 2120/14029 [00:18<01:13, 161.96it/s][A
Training data si

Training data size: 7436 positives + 7437 negatives:  16%|███▋                   | 2238/14029 [00:19<01:29, 132.43it/s][A
Training data size: 7436 positives + 7437 negatives:  16%|███▋                   | 2239/14029 [00:19<01:28, 133.49it/s][A
Training data size: 7437 positives + 7438 negatives:  16%|███▋                   | 2239/14029 [00:19<01:28, 133.49it/s][A
Training data size: 7437 positives + 7438 negatives:  16%|███▋                   | 2240/14029 [00:19<01:28, 133.49it/s][A
Training data size: 7438 positives + 7439 negatives:  16%|███▋                   | 2241/14029 [00:19<01:28, 133.49it/s][A
Training data size: 7440 positives + 7441 negatives:  16%|███▋                   | 2242/14029 [00:19<01:28, 133.49it/s][A
Training data size: 7443 positives + 7443 negatives:  16%|███▋                   | 2243/14029 [00:19<01:28, 133.49it/s][A
Training data size: 7445 positives + 7446 negatives:  16%|███▋                   | 2244/14029 [00:19<01:28, 133.49it/s][A
Training data si

Training data size: 7896 positives + 7897 negatives:  17%|███▊                   | 2361/14029 [00:20<01:28, 132.59it/s][A
Training data size: 7900 positives + 7901 negatives:  17%|███▊                   | 2362/14029 [00:20<01:27, 132.59it/s][A
Training data size: 7901 positives + 7902 negatives:  17%|███▊                   | 2363/14029 [00:20<01:27, 132.59it/s][A
Training data size: 7912 positives + 7913 negatives:  17%|███▉                   | 2364/14029 [00:20<01:27, 132.59it/s][A
Training data size: 7924 positives + 7920 negatives:  17%|███▉                   | 2365/14029 [00:20<01:27, 132.59it/s][A
Training data size: 7925 positives + 7926 negatives:  17%|███▉                   | 2366/14029 [00:20<01:27, 132.59it/s][A
Training data size: 7926 positives + 7927 negatives:  17%|███▉                   | 2367/14029 [00:20<01:27, 132.59it/s][A
Training data size: 7927 positives + 7927 negatives:  17%|███▉                   | 2368/14029 [00:20<01:27, 132.59it/s][A
Training data si

Training data size: 8256 positives + 8257 negatives:  18%|████                   | 2486/14029 [00:21<01:23, 138.14it/s][A
Training data size: 8260 positives + 8261 negatives:  18%|████                   | 2487/14029 [00:21<01:23, 138.14it/s][A
Training data size: 8260 positives + 8261 negatives:  18%|████                   | 2488/14029 [00:21<01:28, 129.89it/s][A
Training data size: 8263 positives + 8263 negatives:  18%|████                   | 2488/14029 [00:21<01:28, 129.89it/s][A
Training data size: 8264 positives + 8265 negatives:  18%|████                   | 2489/14029 [00:21<01:28, 129.89it/s][A
Training data size: 8267 positives + 8268 negatives:  18%|████                   | 2490/14029 [00:21<01:28, 129.89it/s][A
Training data size: 8271 positives + 8272 negatives:  18%|████                   | 2491/14029 [00:21<01:28, 129.89it/s][A
Training data size: 8279 positives + 8280 negatives:  18%|████                   | 2492/14029 [00:21<01:28, 129.89it/s][A
Training data si

Training data size: 8854 positives + 8855 negatives:  19%|████▎                  | 2609/14029 [00:22<01:28, 128.76it/s][A
Training data size: 8856 positives + 8857 negatives:  19%|████▎                  | 2610/14029 [00:22<01:28, 128.76it/s][A
Training data size: 8857 positives + 8858 negatives:  19%|████▎                  | 2611/14029 [00:22<01:28, 128.76it/s][A
Training data size: 8858 positives + 8859 negatives:  19%|████▎                  | 2612/14029 [00:22<01:28, 128.76it/s][A
Training data size: 8859 positives + 8860 negatives:  19%|████▎                  | 2613/14029 [00:22<01:28, 128.76it/s][A
Training data size: 8859 positives + 8860 negatives:  19%|████▎                  | 2614/14029 [00:22<01:28, 128.76it/s][A
Training data size: 8860 positives + 8861 negatives:  19%|████▎                  | 2615/14029 [00:22<01:28, 128.76it/s][A
Training data size: 8862 positives + 8862 negatives:  19%|████▎                  | 2616/14029 [00:22<01:28, 128.76it/s][A
Training data si

Training data size: 9228 positives + 9229 negatives:  19%|████▍                  | 2733/14029 [00:23<01:24, 132.90it/s][A
Training data size: 9237 positives + 9236 negatives:  19%|████▍                  | 2734/14029 [00:23<01:24, 132.90it/s][A
Training data size: 9238 positives + 9239 negatives:  19%|████▍                  | 2735/14029 [00:23<01:24, 132.90it/s][A
Training data size: 9239 positives + 9240 negatives:  20%|████▍                  | 2736/14029 [00:23<01:24, 132.90it/s][A
Training data size: 9241 positives + 9242 negatives:  20%|████▍                  | 2737/14029 [00:23<01:24, 132.90it/s][A
Training data size: 9244 positives + 9245 negatives:  20%|████▍                  | 2738/14029 [00:23<01:24, 132.90it/s][A
Training data size: 9245 positives + 9245 negatives:  20%|████▍                  | 2739/14029 [00:23<01:24, 132.90it/s][A
Training data size: 9246 positives + 9246 negatives:  20%|████▍                  | 2740/14029 [00:23<01:24, 132.90it/s][A
Training data si

Training data size: 9684 positives + 9685 negatives:  20%|████▋                  | 2857/14029 [00:23<01:15, 147.93it/s][A
Training data size: 9686 positives + 9686 negatives:  20%|████▋                  | 2858/14029 [00:23<01:15, 147.93it/s][A
Training data size: 9704 positives + 9699 negatives:  20%|████▋                  | 2859/14029 [00:23<01:15, 147.93it/s][A
Training data size: 9705 positives + 9706 negatives:  20%|████▋                  | 2860/14029 [00:23<01:15, 147.93it/s][A
Training data size: 9707 positives + 9708 negatives:  20%|████▋                  | 2861/14029 [00:23<01:15, 147.93it/s][A
Training data size: 9715 positives + 9716 negatives:  20%|████▋                  | 2862/14029 [00:24<01:15, 147.93it/s][A
Training data size: 9716 positives + 9717 negatives:  20%|████▋                  | 2863/14029 [00:24<01:15, 147.93it/s][A
Training data size: 9717 positives + 9718 negatives:  20%|████▋                  | 2864/14029 [00:24<01:15, 147.93it/s][A
Training data si

Training data size: 9969 positives + 9969 negatives:  21%|████▉                  | 2982/14029 [00:24<01:22, 134.03it/s][A
Training data size: 9969 positives + 9970 negatives:  21%|████▉                  | 2982/14029 [00:24<01:22, 134.03it/s][A
Training data size: 9976 positives + 9976 negatives:  21%|████▉                  | 2983/14029 [00:24<01:22, 134.03it/s][A
Training data size: 9987 positives + 9987 negatives:  21%|████▉                  | 2984/14029 [00:24<01:22, 134.03it/s][A
Training data size: 9990 positives + 9991 negatives:  21%|████▉                  | 2985/14029 [00:24<01:22, 134.03it/s][A
Training data size: 9993 positives + 9994 negatives:  21%|████▉                  | 2986/14029 [00:24<01:22, 134.03it/s][A
Training data size: 9994 positives + 9995 negatives:  21%|████▉                  | 2987/14029 [00:24<01:22, 134.03it/s][A
Training data size: 9995 positives + 9996 negatives:  21%|████▉                  | 2988/14029 [00:24<01:22, 134.03it/s][A
Training data si

Training data size: 10350 positives + 10351 negatives:  22%|████▋                | 3106/14029 [00:25<01:16, 143.17it/s][A
Training data size: 10397 positives + 10398 negatives:  22%|████▋                | 3107/14029 [00:25<01:16, 143.17it/s][A
Training data size: 10399 positives + 10400 negatives:  22%|████▋                | 3108/14029 [00:25<01:16, 143.17it/s][A
Training data size: 10403 positives + 10404 negatives:  22%|████▋                | 3109/14029 [00:25<01:16, 143.17it/s][A
Training data size: 10407 positives + 10407 negatives:  22%|████▋                | 3110/14029 [00:25<01:16, 143.17it/s][A
Training data size: 10411 positives + 10412 negatives:  22%|████▋                | 3111/14029 [00:25<01:16, 143.17it/s][A
Training data size: 10411 positives + 10412 negatives:  22%|████▋                | 3112/14029 [00:25<01:26, 126.58it/s][A
Training data size: 10422 positives + 10423 negatives:  22%|████▋                | 3112/14029 [00:25<01:26, 126.58it/s][A
Training data si

Training data size: 10845 positives + 10846 negatives:  23%|████▊                | 3230/14029 [00:26<01:19, 135.60it/s][A
Training data size: 10846 positives + 10847 negatives:  23%|████▊                | 3231/14029 [00:26<01:19, 135.60it/s][A
Training data size: 10847 positives + 10848 negatives:  23%|████▊                | 3232/14029 [00:26<01:19, 135.60it/s][A
Training data size: 10848 positives + 10849 negatives:  23%|████▊                | 3233/14029 [00:26<01:19, 135.60it/s][A
Training data size: 10849 positives + 10850 negatives:  23%|████▊                | 3234/14029 [00:26<01:19, 135.60it/s][A
Training data size: 10850 positives + 10851 negatives:  23%|████▊                | 3235/14029 [00:26<01:19, 135.60it/s][A
Training data size: 10851 positives + 10852 negatives:  23%|████▊                | 3236/14029 [00:26<01:19, 135.60it/s][A
Training data size: 10851 positives + 10852 negatives:  23%|████▊                | 3237/14029 [00:26<01:19, 135.60it/s][A
Training data si

Training data size: 11221 positives + 11222 negatives:  24%|█████                | 3355/14029 [00:27<01:23, 128.34it/s][A
Training data size: 11223 positives + 11224 negatives:  24%|█████                | 3356/14029 [00:27<01:23, 128.34it/s][A
Training data size: 11224 positives + 11225 negatives:  24%|█████                | 3357/14029 [00:27<01:23, 128.34it/s][A
Training data size: 11225 positives + 11225 negatives:  24%|█████                | 3358/14029 [00:27<01:23, 128.34it/s][A
Training data size: 11225 positives + 11225 negatives:  24%|█████                | 3359/14029 [00:27<01:17, 136.88it/s][A
Training data size: 11226 positives + 11227 negatives:  24%|█████                | 3359/14029 [00:27<01:17, 136.88it/s][A
Training data size: 11228 positives + 11229 negatives:  24%|█████                | 3360/14029 [00:27<01:17, 136.88it/s][A
Training data size: 11229 positives + 11230 negatives:  24%|█████                | 3361/14029 [00:27<01:17, 136.88it/s][A
Training data si

Training data size: 11633 positives + 11634 negatives:  25%|█████▏               | 3479/14029 [00:28<01:36, 109.00it/s][A
Training data size: 11642 positives + 11643 negatives:  25%|█████▏               | 3480/14029 [00:28<01:36, 109.00it/s][A
Training data size: 11646 positives + 11647 negatives:  25%|█████▏               | 3481/14029 [00:28<01:36, 109.00it/s][A
Training data size: 11649 positives + 11650 negatives:  25%|█████▏               | 3482/14029 [00:28<01:36, 109.00it/s][A
Training data size: 11649 positives + 11650 negatives:  25%|█████▏               | 3483/14029 [00:28<01:32, 114.46it/s][A
Training data size: 11651 positives + 11652 negatives:  25%|█████▏               | 3483/14029 [00:28<01:32, 114.46it/s][A
Training data size: 11652 positives + 11653 negatives:  25%|█████▏               | 3484/14029 [00:28<01:32, 114.46it/s][A
Training data size: 11653 positives + 11654 negatives:  25%|█████▏               | 3485/14029 [00:28<01:32, 114.46it/s][A
Training data si

Training data size: 12059 positives + 12060 negatives:  26%|█████▍               | 3602/14029 [00:29<01:12, 142.84it/s][A
Training data size: 12061 positives + 12062 negatives:  26%|█████▍               | 3603/14029 [00:29<01:12, 142.84it/s][A
Training data size: 12073 positives + 12074 negatives:  26%|█████▍               | 3604/14029 [00:29<01:12, 142.84it/s][A
Training data size: 12085 positives + 12085 negatives:  26%|█████▍               | 3605/14029 [00:29<01:12, 142.84it/s][A
Training data size: 12091 positives + 12091 negatives:  26%|█████▍               | 3606/14029 [00:29<01:12, 142.84it/s][A
Training data size: 12093 positives + 12094 negatives:  26%|█████▍               | 3607/14029 [00:29<01:12, 142.84it/s][A
Training data size: 12094 positives + 12095 negatives:  26%|█████▍               | 3608/14029 [00:29<01:12, 142.84it/s][A
Training data size: 12095 positives + 12096 negatives:  26%|█████▍               | 3609/14029 [00:29<01:12, 142.84it/s][A
Training data si

Training data size: 12522 positives + 12523 negatives:  27%|█████▊                | 3727/14029 [00:31<03:33, 48.28it/s][A
Training data size: 12524 positives + 12525 negatives:  27%|█████▊                | 3728/14029 [00:31<03:33, 48.28it/s][A
Training data size: 12524 positives + 12525 negatives:  27%|█████▊                | 3729/14029 [00:31<02:41, 63.93it/s][A
Training data size: 12525 positives + 12526 negatives:  27%|█████▊                | 3729/14029 [00:31<02:41, 63.93it/s][A
Training data size: 12526 positives + 12526 negatives:  27%|█████▊                | 3730/14029 [00:31<02:41, 63.93it/s][A
Training data size: 12526 positives + 12527 negatives:  27%|█████▊                | 3731/14029 [00:31<02:41, 63.93it/s][A
Training data size: 12527 positives + 12528 negatives:  27%|█████▊                | 3732/14029 [00:31<02:41, 63.93it/s][A
Training data size: 12528 positives + 12529 negatives:  27%|█████▊                | 3733/14029 [00:31<02:41, 63.93it/s][A
Training data si

Training data size: 12927 positives + 12927 negatives:  27%|█████▊               | 3851/14029 [00:32<01:22, 123.31it/s][A
Training data size: 12928 positives + 12928 negatives:  27%|█████▊               | 3852/14029 [00:32<01:22, 123.31it/s][A
Training data size: 12929 positives + 12930 negatives:  27%|█████▊               | 3853/14029 [00:32<01:22, 123.31it/s][A
Training data size: 12931 positives + 12931 negatives:  27%|█████▊               | 3854/14029 [00:32<01:22, 123.31it/s][A
Training data size: 12932 positives + 12932 negatives:  27%|█████▊               | 3855/14029 [00:32<01:22, 123.31it/s][A
Training data size: 12933 positives + 12934 negatives:  27%|█████▊               | 3856/14029 [00:32<01:22, 123.31it/s][A
Training data size: 12936 positives + 12937 negatives:  27%|█████▊               | 3857/14029 [00:32<01:22, 123.31it/s][A
Training data size: 12937 positives + 12938 negatives:  28%|█████▊               | 3858/14029 [00:32<01:22, 123.31it/s][A
Training data si

Training data size: 13312 positives + 13313 negatives:  28%|█████▉               | 3976/14029 [00:33<01:06, 150.14it/s][A
Training data size: 13315 positives + 13316 negatives:  28%|█████▉               | 3977/14029 [00:33<01:06, 150.14it/s][A
Training data size: 13318 positives + 13319 negatives:  28%|█████▉               | 3978/14029 [00:33<01:06, 150.14it/s][A
Training data size: 13319 positives + 13320 negatives:  28%|█████▉               | 3979/14029 [00:33<01:06, 150.14it/s][A
Training data size: 13319 positives + 13320 negatives:  28%|█████▉               | 3980/14029 [00:33<01:11, 140.15it/s][A
Training data size: 13320 positives + 13321 negatives:  28%|█████▉               | 3980/14029 [00:33<01:11, 140.15it/s][A
Training data size: 13320 positives + 13321 negatives:  28%|█████▉               | 3981/14029 [00:33<01:11, 140.15it/s][A
Training data size: 13325 positives + 13326 negatives:  28%|█████▉               | 3982/14029 [00:33<01:11, 140.15it/s][A
Training data si

Training data size: 13707 positives + 13708 negatives:  29%|██████▏              | 4099/14029 [00:34<01:22, 120.78it/s][A
Training data size: 13708 positives + 13709 negatives:  29%|██████▏              | 4100/14029 [00:34<01:22, 120.78it/s][A
Training data size: 13709 positives + 13710 negatives:  29%|██████▏              | 4101/14029 [00:34<01:22, 120.78it/s][A
Training data size: 13710 positives + 13711 negatives:  29%|██████▏              | 4102/14029 [00:34<01:22, 120.78it/s][A
Training data size: 13711 positives + 13712 negatives:  29%|██████▏              | 4103/14029 [00:34<01:22, 120.78it/s][A
Training data size: 13716 positives + 13717 negatives:  29%|██████▏              | 4104/14029 [00:34<01:22, 120.78it/s][A
Training data size: 13717 positives + 13717 negatives:  29%|██████▏              | 4105/14029 [00:34<01:22, 120.78it/s][A
Training data size: 13718 positives + 13719 negatives:  29%|██████▏              | 4106/14029 [00:34<01:22, 120.78it/s][A
Training data si

Training data size: 14124 positives + 14125 negatives:  30%|██████▎              | 4223/14029 [00:35<01:08, 143.13it/s][A
Training data size: 14125 positives + 14126 negatives:  30%|██████▎              | 4224/14029 [00:35<01:08, 143.13it/s][A
Training data size: 14126 positives + 14127 negatives:  30%|██████▎              | 4225/14029 [00:35<01:08, 143.13it/s][A
Training data size: 14127 positives + 14128 negatives:  30%|██████▎              | 4226/14029 [00:35<01:08, 143.13it/s][A
Training data size: 14127 positives + 14128 negatives:  30%|██████▎              | 4227/14029 [00:35<01:08, 143.13it/s][A
Training data size: 14129 positives + 14130 negatives:  30%|██████▎              | 4228/14029 [00:35<01:08, 143.13it/s][A
Training data size: 14130 positives + 14131 negatives:  30%|██████▎              | 4229/14029 [00:35<01:08, 143.13it/s][A
Training data size: 14141 positives + 14141 negatives:  30%|██████▎              | 4230/14029 [00:35<01:08, 143.13it/s][A
Training data si

Training data size: 14464 positives + 14465 negatives:  31%|██████▌              | 4348/14029 [00:35<01:06, 145.69it/s][A
Training data size: 14465 positives + 14466 negatives:  31%|██████▌              | 4349/14029 [00:35<01:06, 145.69it/s][A
Training data size: 14465 positives + 14466 negatives:  31%|██████▌              | 4350/14029 [00:35<01:04, 151.05it/s][A
Training data size: 14466 positives + 14466 negatives:  31%|██████▌              | 4350/14029 [00:35<01:04, 151.05it/s][A
Training data size: 14467 positives + 14467 negatives:  31%|██████▌              | 4351/14029 [00:35<01:04, 151.05it/s][A
Training data size: 14468 positives + 14469 negatives:  31%|██████▌              | 4352/14029 [00:35<01:04, 151.05it/s][A
Training data size: 14469 positives + 14469 negatives:  31%|██████▌              | 4353/14029 [00:35<01:04, 151.05it/s][A
Training data size: 14544 positives + 14488 negatives:  31%|██████▌              | 4354/14029 [00:35<01:04, 151.05it/s][A
Training data si

Training data size: 15033 positives + 15034 negatives:  32%|███████               | 4471/14029 [00:37<01:50, 86.49it/s][A
Training data size: 15037 positives + 15037 negatives:  32%|███████               | 4472/14029 [00:37<01:50, 86.49it/s][A
Training data size: 15062 positives + 15041 negatives:  32%|███████               | 4473/14029 [00:37<01:50, 86.49it/s][A
Training data size: 15069 positives + 15056 negatives:  32%|███████               | 4474/14029 [00:37<01:50, 86.49it/s][A
Training data size: 15070 positives + 15071 negatives:  32%|███████               | 4475/14029 [00:37<01:50, 86.49it/s][A
Training data size: 15072 positives + 15073 negatives:  32%|███████               | 4476/14029 [00:37<01:50, 86.49it/s][A
Training data size: 15073 positives + 15074 negatives:  32%|███████               | 4477/14029 [00:37<01:50, 86.49it/s][A
Training data size: 15087 positives + 15085 negatives:  32%|███████               | 4478/14029 [00:37<01:50, 86.49it/s][A
Training data si

Training data size: 15420 positives + 15420 negatives:  33%|██████▉              | 4595/14029 [00:38<01:15, 125.24it/s][A
Training data size: 15425 positives + 15426 negatives:  33%|██████▉              | 4596/14029 [00:38<01:15, 125.24it/s][A
Training data size: 15427 positives + 15428 negatives:  33%|██████▉              | 4597/14029 [00:38<01:15, 125.24it/s][A
Training data size: 15446 positives + 15437 negatives:  33%|██████▉              | 4598/14029 [00:38<01:15, 125.24it/s][A
Training data size: 15447 positives + 15448 negatives:  33%|██████▉              | 4599/14029 [00:38<01:15, 125.24it/s][A
Training data size: 15447 positives + 15448 negatives:  33%|███████▏              | 4600/14029 [00:38<01:45, 88.98it/s][A
Training data size: 15449 positives + 15450 negatives:  33%|███████▏              | 4600/14029 [00:38<01:45, 88.98it/s][A
Training data size: 15452 positives + 15453 negatives:  33%|███████▏              | 4601/14029 [00:38<01:45, 88.98it/s][A
Training data si

Training data size: 15784 positives + 15785 negatives:  34%|███████              | 4719/14029 [00:39<01:31, 101.76it/s][A
Training data size: 15786 positives + 15787 negatives:  34%|███████              | 4720/14029 [00:39<01:31, 101.76it/s][A
Training data size: 15786 positives + 15787 negatives:  34%|███████              | 4721/14029 [00:39<01:31, 101.76it/s][A
Training data size: 15787 positives + 15788 negatives:  34%|███████              | 4722/14029 [00:39<01:31, 101.76it/s][A
Training data size: 15787 positives + 15788 negatives:  34%|███████              | 4723/14029 [00:39<01:31, 101.76it/s][A
Training data size: 15788 positives + 15789 negatives:  34%|███████              | 4724/14029 [00:39<01:31, 101.76it/s][A
Training data size: 15800 positives + 15801 negatives:  34%|███████              | 4725/14029 [00:39<01:31, 101.76it/s][A
Training data size: 15801 positives + 15802 negatives:  34%|███████              | 4726/14029 [00:39<01:31, 101.76it/s][A
Training data si

Training data size: 16265 positives + 16266 negatives:  35%|███████▎             | 4844/14029 [00:40<01:03, 145.57it/s][A
Training data size: 16266 positives + 16267 negatives:  35%|███████▎             | 4845/14029 [00:40<01:03, 145.57it/s][A
Training data size: 16269 positives + 16270 negatives:  35%|███████▎             | 4846/14029 [00:40<01:03, 145.57it/s][A
Training data size: 16269 positives + 16270 negatives:  35%|███████▎             | 4847/14029 [00:40<01:03, 145.57it/s][A
Training data size: 16270 positives + 16271 negatives:  35%|███████▎             | 4848/14029 [00:40<01:03, 145.57it/s][A
Training data size: 16271 positives + 16272 negatives:  35%|███████▎             | 4849/14029 [00:40<01:03, 145.57it/s][A
Training data size: 16272 positives + 16273 negatives:  35%|███████▎             | 4850/14029 [00:40<01:03, 145.57it/s][A
Training data size: 16272 positives + 16273 negatives:  35%|███████▎             | 4851/14029 [00:40<01:03, 145.57it/s][A
Training data si

Training data size: 16626 positives + 16627 negatives:  35%|███████▍             | 4969/14029 [00:41<01:21, 111.33it/s][A
Training data size: 16629 positives + 16630 negatives:  35%|███████▍             | 4970/14029 [00:41<01:21, 111.33it/s][A
Training data size: 16629 positives + 16630 negatives:  35%|███████▍             | 4971/14029 [00:41<01:13, 123.68it/s][A
Training data size: 16630 positives + 16631 negatives:  35%|███████▍             | 4971/14029 [00:41<01:13, 123.68it/s][A
Training data size: 16631 positives + 16632 negatives:  35%|███████▍             | 4972/14029 [00:41<01:13, 123.68it/s][A
Training data size: 16633 positives + 16634 negatives:  35%|███████▍             | 4973/14029 [00:41<01:13, 123.68it/s][A
Training data size: 16634 positives + 16635 negatives:  35%|███████▍             | 4974/14029 [00:41<01:13, 123.68it/s][A
Training data size: 16634 positives + 16635 negatives:  35%|███████▍             | 4975/14029 [00:41<01:13, 123.68it/s][A
Training data si

Training data size: 16966 positives + 16967 negatives:  36%|███████▌             | 5093/14029 [00:42<01:07, 132.56it/s][A
Training data size: 16967 positives + 16968 negatives:  36%|███████▋             | 5094/14029 [00:42<01:07, 132.56it/s][A
Training data size: 16967 positives + 16968 negatives:  36%|███████▋             | 5095/14029 [00:42<01:03, 140.19it/s][A
Training data size: 16968 positives + 16969 negatives:  36%|███████▋             | 5095/14029 [00:42<01:03, 140.19it/s][A
Training data size: 16969 positives + 16970 negatives:  36%|███████▋             | 5096/14029 [00:42<01:03, 140.19it/s][A
Training data size: 16973 positives + 16974 negatives:  36%|███████▋             | 5097/14029 [00:42<01:03, 140.19it/s][A
Training data size: 16975 positives + 16976 negatives:  36%|███████▋             | 5098/14029 [00:42<01:03, 140.19it/s][A
Training data size: 16976 positives + 16977 negatives:  36%|███████▋             | 5099/14029 [00:42<01:03, 140.19it/s][A
Training data si

Training data size: 17223 positives + 17224 negatives:  37%|███████▊             | 5217/14029 [00:43<01:00, 144.46it/s][A
Training data size: 17224 positives + 17224 negatives:  37%|███████▊             | 5218/14029 [00:43<01:00, 144.46it/s][A
Training data size: 17230 positives + 17230 negatives:  37%|███████▊             | 5219/14029 [00:43<01:00, 144.46it/s][A
Training data size: 17231 positives + 17232 negatives:  37%|███████▊             | 5220/14029 [00:43<01:00, 144.46it/s][A
Training data size: 17232 positives + 17233 negatives:  37%|███████▊             | 5221/14029 [00:43<01:00, 144.46it/s][A
Training data size: 17233 positives + 17234 negatives:  37%|███████▊             | 5222/14029 [00:43<01:00, 144.46it/s][A
Training data size: 17233 positives + 17234 negatives:  37%|███████▊             | 5223/14029 [00:43<01:05, 133.56it/s][A
Training data size: 17234 positives + 17235 negatives:  37%|███████▊             | 5223/14029 [00:43<01:05, 133.56it/s][A
Training data si

Training data size: 17814 positives + 17815 negatives:  38%|███████▉             | 5341/14029 [00:44<01:08, 127.45it/s][A
Training data size: 17814 positives + 17815 negatives:  38%|███████▉             | 5342/14029 [00:44<01:08, 127.15it/s][A
Training data size: 17816 positives + 17817 negatives:  38%|███████▉             | 5342/14029 [00:44<01:08, 127.15it/s][A
Training data size: 17820 positives + 17821 negatives:  38%|███████▉             | 5343/14029 [00:44<01:08, 127.15it/s][A
Training data size: 17821 positives + 17822 negatives:  38%|███████▉             | 5344/14029 [00:44<01:08, 127.15it/s][A
Training data size: 17822 positives + 17823 negatives:  38%|████████             | 5345/14029 [00:44<01:08, 127.15it/s][A
Training data size: 17825 positives + 17826 negatives:  38%|████████             | 5346/14029 [00:44<01:08, 127.15it/s][A
Training data size: 17908 positives + 17830 negatives:  38%|████████             | 5347/14029 [00:44<01:08, 127.15it/s][A
Training data si

Training data size: 18258 positives + 18259 negatives:  39%|████████▌             | 5465/14029 [00:47<05:09, 27.63it/s][A
Training data size: 18260 positives + 18261 negatives:  39%|████████▌             | 5466/14029 [00:47<05:09, 27.63it/s][A
Training data size: 18261 positives + 18262 negatives:  39%|████████▌             | 5467/14029 [00:47<05:09, 27.63it/s][A
Training data size: 18271 positives + 18271 negatives:  39%|████████▌             | 5468/14029 [00:47<05:09, 27.63it/s][A
Training data size: 18272 positives + 18273 negatives:  39%|████████▌             | 5469/14029 [00:47<05:09, 27.63it/s][A
Training data size: 18273 positives + 18274 negatives:  39%|████████▌             | 5470/14029 [00:47<05:09, 27.63it/s][A
Training data size: 18274 positives + 18275 negatives:  39%|████████▌             | 5471/14029 [00:47<05:09, 27.63it/s][A
Training data size: 18276 positives + 18277 negatives:  39%|████████▌             | 5472/14029 [00:47<05:09, 27.63it/s][A
Training data si

Training data size: 18702 positives + 18703 negatives:  40%|████████▎            | 5588/14029 [00:48<01:09, 120.85it/s][A
Training data size: 18704 positives + 18705 negatives:  40%|████████▎            | 5589/14029 [00:48<01:09, 120.85it/s][A
Training data size: 18712 positives + 18712 negatives:  40%|████████▎            | 5590/14029 [00:48<01:09, 120.85it/s][A
Training data size: 18715 positives + 18716 negatives:  40%|████████▎            | 5591/14029 [00:48<01:09, 120.85it/s][A
Training data size: 18716 positives + 18717 negatives:  40%|████████▎            | 5592/14029 [00:48<01:09, 120.85it/s][A
Training data size: 18719 positives + 18720 negatives:  40%|████████▎            | 5593/14029 [00:48<01:09, 120.85it/s][A
Training data size: 18720 positives + 18721 negatives:  40%|████████▎            | 5594/14029 [00:48<01:09, 120.85it/s][A
Training data size: 18725 positives + 18724 negatives:  40%|████████▍            | 5595/14029 [00:48<01:09, 120.85it/s][A
Training data si

Training data size: 19150 positives + 19151 negatives:  41%|████████▉             | 5713/14029 [00:49<02:01, 68.46it/s][A
Training data size: 19151 positives + 19152 negatives:  41%|████████▉             | 5714/14029 [00:49<02:01, 68.46it/s][A
Training data size: 19152 positives + 19153 negatives:  41%|████████▉             | 5715/14029 [00:49<02:01, 68.46it/s][A
Training data size: 19170 positives + 19169 negatives:  41%|████████▉             | 5716/14029 [00:49<02:01, 68.46it/s][A
Training data size: 19172 positives + 19172 negatives:  41%|████████▉             | 5717/14029 [00:49<02:01, 68.46it/s][A
Training data size: 19175 positives + 19176 negatives:  41%|████████▉             | 5718/14029 [00:49<02:01, 68.46it/s][A
Training data size: 19179 positives + 19180 negatives:  41%|████████▉             | 5719/14029 [00:49<02:01, 68.46it/s][A
Training data size: 19179 positives + 19180 negatives:  41%|████████▉             | 5720/14029 [00:49<02:01, 68.46it/s][A
Training data si

Training data size: 19616 positives + 19616 negatives:  42%|████████▋            | 5837/14029 [00:50<01:06, 123.74it/s][A
Training data size: 19617 positives + 19618 negatives:  42%|████████▋            | 5838/14029 [00:50<01:06, 123.74it/s][A
Training data size: 19618 positives + 19619 negatives:  42%|████████▋            | 5839/14029 [00:50<01:06, 123.74it/s][A
Training data size: 19624 positives + 19625 negatives:  42%|████████▋            | 5840/14029 [00:50<01:06, 123.74it/s][A
Training data size: 19625 positives + 19626 negatives:  42%|████████▋            | 5841/14029 [00:50<01:06, 123.74it/s][A
Training data size: 19626 positives + 19627 negatives:  42%|████████▋            | 5842/14029 [00:50<01:06, 123.74it/s][A
Training data size: 19627 positives + 19628 negatives:  42%|████████▋            | 5843/14029 [00:50<01:06, 123.74it/s][A
Training data size: 19627 positives + 19628 negatives:  42%|████████▋            | 5844/14029 [00:50<01:01, 132.19it/s][A
Training data si

Training data size: 20015 positives + 20016 negatives:  42%|████████▉            | 5961/14029 [00:51<01:17, 104.50it/s][A
Training data size: 20017 positives + 20018 negatives:  42%|████████▉            | 5962/14029 [00:51<01:17, 104.50it/s][A
Training data size: 20018 positives + 20019 negatives:  43%|████████▉            | 5963/14029 [00:51<01:17, 104.50it/s][A
Training data size: 20021 positives + 20022 negatives:  43%|████████▉            | 5964/14029 [00:51<01:17, 104.50it/s][A
Training data size: 20021 positives + 20022 negatives:  43%|████████▉            | 5965/14029 [00:51<01:06, 120.66it/s][A
Training data size: 20028 positives + 20029 negatives:  43%|████████▉            | 5965/14029 [00:51<01:06, 120.66it/s][A
Training data size: 20030 positives + 20031 negatives:  43%|████████▉            | 5966/14029 [00:51<01:06, 120.66it/s][A
Training data size: 20031 positives + 20032 negatives:  43%|████████▉            | 5967/14029 [00:51<01:06, 120.66it/s][A
Training data si

Training data size: 20365 positives + 20366 negatives:  43%|█████████            | 6085/14029 [00:52<00:54, 146.78it/s][A
Training data size: 20366 positives + 20367 negatives:  43%|█████████            | 6086/14029 [00:52<00:54, 146.78it/s][A
Training data size: 20367 positives + 20368 negatives:  43%|█████████            | 6087/14029 [00:52<00:54, 146.78it/s][A
Training data size: 20368 positives + 20369 negatives:  43%|█████████            | 6088/14029 [00:52<00:54, 146.78it/s][A
Training data size: 20370 positives + 20371 negatives:  43%|█████████            | 6089/14029 [00:52<00:54, 146.78it/s][A
Training data size: 20374 positives + 20375 negatives:  43%|█████████            | 6090/14029 [00:52<00:54, 146.78it/s][A
Training data size: 20375 positives + 20376 negatives:  43%|█████████            | 6091/14029 [00:52<00:54, 146.78it/s][A
Training data size: 20376 positives + 20377 negatives:  43%|█████████            | 6092/14029 [00:52<00:54, 146.78it/s][A
Training data si

Training data size: 20696 positives + 20697 negatives:  44%|█████████▎           | 6209/14029 [00:53<00:55, 140.77it/s][A
Training data size: 20702 positives + 20702 negatives:  44%|█████████▎           | 6210/14029 [00:53<00:55, 140.77it/s][A
Training data size: 20720 positives + 20719 negatives:  44%|█████████▎           | 6211/14029 [00:53<00:55, 140.77it/s][A
Training data size: 20721 positives + 20722 negatives:  44%|█████████▎           | 6212/14029 [00:53<00:55, 140.77it/s][A
Training data size: 20722 positives + 20723 negatives:  44%|█████████▎           | 6213/14029 [00:53<00:55, 140.77it/s][A
Training data size: 20723 positives + 20724 negatives:  44%|█████████▎           | 6214/14029 [00:53<00:55, 140.77it/s][A
Training data size: 20724 positives + 20725 negatives:  44%|█████████▎           | 6215/14029 [00:53<00:55, 140.77it/s][A
Training data size: 20726 positives + 20727 negatives:  44%|█████████▎           | 6216/14029 [00:53<00:55, 140.77it/s][A
Training data si

Training data size: 21089 positives + 21090 negatives:  45%|█████████▍           | 6334/14029 [00:54<00:58, 131.16it/s][A
Training data size: 21090 positives + 21091 negatives:  45%|█████████▍           | 6335/14029 [00:54<00:58, 131.16it/s][A
Training data size: 21091 positives + 21092 negatives:  45%|█████████▍           | 6336/14029 [00:54<00:58, 131.16it/s][A
Training data size: 21091 positives + 21092 negatives:  45%|█████████▍           | 6337/14029 [00:54<00:59, 130.08it/s][A
Training data size: 21121 positives + 21120 negatives:  45%|█████████▍           | 6337/14029 [00:54<00:59, 130.08it/s][A
Training data size: 21123 positives + 21124 negatives:  45%|█████████▍           | 6338/14029 [00:54<00:59, 130.08it/s][A
Training data size: 21124 positives + 21125 negatives:  45%|█████████▍           | 6339/14029 [00:54<00:59, 130.08it/s][A
Training data size: 21136 positives + 21135 negatives:  45%|█████████▍           | 6340/14029 [00:54<00:59, 130.08it/s][A
Training data si

Training data size: 21545 positives + 21546 negatives:  46%|█████████▋           | 6457/14029 [00:55<01:04, 117.98it/s][A
Training data size: 21575 positives + 21574 negatives:  46%|█████████▋           | 6458/14029 [00:55<01:04, 117.98it/s][A
Training data size: 21576 positives + 21577 negatives:  46%|█████████▋           | 6459/14029 [00:55<01:04, 117.98it/s][A
Training data size: 21580 positives + 21580 negatives:  46%|█████████▋           | 6460/14029 [00:55<01:04, 117.98it/s][A
Training data size: 21587 positives + 21586 negatives:  46%|█████████▋           | 6461/14029 [00:55<01:04, 117.98it/s][A
Training data size: 21588 positives + 21589 negatives:  46%|█████████▋           | 6462/14029 [00:55<01:04, 117.98it/s][A
Training data size: 21590 positives + 21591 negatives:  46%|█████████▋           | 6463/14029 [00:55<01:04, 117.98it/s][A
Training data size: 21591 positives + 21592 negatives:  46%|█████████▋           | 6464/14029 [00:55<01:04, 117.98it/s][A
Training data si

Training data size: 21989 positives + 21990 negatives:  47%|██████████▎           | 6581/14029 [00:56<01:59, 62.46it/s][A
Training data size: 21991 positives + 21992 negatives:  47%|██████████▎           | 6582/14029 [00:56<01:59, 62.46it/s][A
Training data size: 21992 positives + 21993 negatives:  47%|██████████▎           | 6583/14029 [00:56<01:59, 62.46it/s][A
Training data size: 21994 positives + 21995 negatives:  47%|██████████▎           | 6584/14029 [00:56<01:59, 62.46it/s][A
Training data size: 21995 positives + 21996 negatives:  47%|██████████▎           | 6585/14029 [00:56<01:59, 62.46it/s][A
Training data size: 21996 positives + 21997 negatives:  47%|██████████▎           | 6586/14029 [00:56<01:59, 62.46it/s][A
Training data size: 22000 positives + 22001 negatives:  47%|██████████▎           | 6587/14029 [00:56<01:59, 62.46it/s][A
Training data size: 22001 positives + 22002 negatives:  47%|██████████▎           | 6588/14029 [00:56<01:59, 62.46it/s][A
Training data si

Training data size: 22397 positives + 22398 negatives:  48%|██████████           | 6706/14029 [00:57<01:10, 104.27it/s][A
Training data size: 22398 positives + 22399 negatives:  48%|██████████           | 6707/14029 [00:57<01:10, 104.27it/s][A
Training data size: 22398 positives + 22399 negatives:  48%|██████████           | 6708/14029 [00:57<01:01, 119.81it/s][A
Training data size: 22405 positives + 22406 negatives:  48%|██████████           | 6708/14029 [00:57<01:01, 119.81it/s][A
Training data size: 22406 positives + 22407 negatives:  48%|██████████           | 6709/14029 [00:57<01:01, 119.81it/s][A
Training data size: 22416 positives + 22414 negatives:  48%|██████████           | 6710/14029 [00:57<01:01, 119.81it/s][A
Training data size: 22417 positives + 22418 negatives:  48%|██████████           | 6711/14029 [00:57<01:01, 119.81it/s][A
Training data size: 22418 positives + 22419 negatives:  48%|██████████           | 6712/14029 [00:57<01:01, 119.81it/s][A
Training data si

Training data size: 22805 positives + 22806 negatives:  49%|██████████▏          | 6830/14029 [00:58<00:55, 129.47it/s][A
Training data size: 22805 positives + 22806 negatives:  49%|██████████▏          | 6831/14029 [00:58<01:02, 115.04it/s][A
Training data size: 22837 positives + 22817 negatives:  49%|██████████▏          | 6831/14029 [00:58<01:02, 115.04it/s][A
Training data size: 22839 positives + 22822 negatives:  49%|██████████▏          | 6832/14029 [00:58<01:02, 115.04it/s][A
Training data size: 22840 positives + 22841 negatives:  49%|██████████▏          | 6833/14029 [00:58<01:02, 115.04it/s][A
Training data size: 22879 positives + 22855 negatives:  49%|██████████▏          | 6834/14029 [00:58<01:02, 115.04it/s][A
Training data size: 22880 positives + 22881 negatives:  49%|██████████▏          | 6835/14029 [00:58<01:02, 115.04it/s][A
Training data size: 22881 positives + 22882 negatives:  49%|██████████▏          | 6836/14029 [00:58<01:02, 115.04it/s][A
Training data si

Training data size: 23226 positives + 23226 negatives:  50%|██████████▍          | 6954/14029 [00:59<00:52, 133.97it/s][A
Training data size: 23227 positives + 23228 negatives:  50%|██████████▍          | 6955/14029 [00:59<00:52, 133.97it/s][A
Training data size: 23228 positives + 23229 negatives:  50%|██████████▍          | 6956/14029 [00:59<00:52, 133.97it/s][A
Training data size: 23229 positives + 23230 negatives:  50%|██████████▍          | 6957/14029 [00:59<00:52, 133.97it/s][A
Training data size: 23236 positives + 23235 negatives:  50%|██████████▍          | 6958/14029 [00:59<00:52, 133.97it/s][A
Training data size: 23237 positives + 23238 negatives:  50%|██████████▍          | 6959/14029 [00:59<00:52, 133.97it/s][A
Training data size: 23238 positives + 23239 negatives:  50%|██████████▍          | 6960/14029 [00:59<00:52, 133.97it/s][A
Training data size: 23238 positives + 23239 negatives:  50%|██████████▍          | 6961/14029 [00:59<00:52, 133.97it/s][A
Training data si

Training data size: 23614 positives + 23615 negatives:  50%|██████████▌          | 7079/14029 [01:00<00:50, 136.30it/s][A
Training data size: 23617 positives + 23618 negatives:  50%|██████████▌          | 7080/14029 [01:00<00:50, 136.30it/s][A
Training data size: 23619 positives + 23620 negatives:  50%|██████████▌          | 7081/14029 [01:00<00:50, 136.30it/s][A
Training data size: 23619 positives + 23620 negatives:  50%|██████████▌          | 7082/14029 [01:00<00:49, 139.22it/s][A
Training data size: 23629 positives + 23630 negatives:  50%|██████████▌          | 7082/14029 [01:00<00:49, 139.22it/s][A
Training data size: 23630 positives + 23631 negatives:  50%|██████████▌          | 7083/14029 [01:00<00:49, 139.22it/s][A
Training data size: 23631 positives + 23632 negatives:  50%|██████████▌          | 7084/14029 [01:00<00:49, 139.22it/s][A
Training data size: 23632 positives + 23633 negatives:  51%|██████████▌          | 7085/14029 [01:00<00:49, 139.22it/s][A
Training data si

Training data size: 23981 positives + 23977 negatives:  51%|██████████▊          | 7203/14029 [01:01<00:49, 136.89it/s][A
Training data size: 23983 positives + 23984 negatives:  51%|██████████▊          | 7204/14029 [01:01<00:49, 136.89it/s][A
Training data size: 23983 positives + 23984 negatives:  51%|██████████▊          | 7205/14029 [01:01<00:49, 136.89it/s][A
Training data size: 23983 positives + 23984 negatives:  51%|██████████▊          | 7206/14029 [01:01<00:49, 136.89it/s][A
Training data size: 23983 positives + 23984 negatives:  51%|██████████▊          | 7207/14029 [01:01<00:49, 136.89it/s][A
Training data size: 23984 positives + 23985 negatives:  51%|██████████▊          | 7208/14029 [01:01<00:49, 136.89it/s][A
Training data size: 23984 positives + 23985 negatives:  51%|██████████▊          | 7209/14029 [01:01<01:00, 113.38it/s][A
Training data size: 23985 positives + 23986 negatives:  51%|██████████▊          | 7209/14029 [01:01<01:00, 113.38it/s][A
Training data si

Training data size: 24311 positives + 24312 negatives:  52%|██████████▉          | 7327/14029 [01:02<00:51, 131.13it/s][A
Training data size: 24312 positives + 24313 negatives:  52%|██████████▉          | 7328/14029 [01:02<00:51, 131.13it/s][A
Training data size: 24313 positives + 24314 negatives:  52%|██████████▉          | 7329/14029 [01:02<00:51, 131.13it/s][A
Training data size: 24314 positives + 24315 negatives:  52%|██████████▉          | 7330/14029 [01:02<00:51, 131.13it/s][A
Training data size: 24318 positives + 24319 negatives:  52%|██████████▉          | 7331/14029 [01:02<00:51, 131.13it/s][A
Training data size: 24320 positives + 24321 negatives:  52%|██████████▉          | 7332/14029 [01:02<00:51, 131.13it/s][A
Training data size: 24323 positives + 24324 negatives:  52%|██████████▉          | 7333/14029 [01:02<00:51, 131.13it/s][A
Training data size: 24324 positives + 24325 negatives:  52%|██████████▉          | 7334/14029 [01:02<00:51, 131.13it/s][A
Training data si

Training data size: 24754 positives + 24755 negatives:  53%|███████████▏         | 7452/14029 [01:03<00:43, 150.11it/s][A
Training data size: 24755 positives + 24756 negatives:  53%|███████████▏         | 7453/14029 [01:03<00:43, 150.11it/s][A
Training data size: 24756 positives + 24757 negatives:  53%|███████████▏         | 7454/14029 [01:03<00:43, 150.11it/s][A
Training data size: 24761 positives + 24762 negatives:  53%|███████████▏         | 7455/14029 [01:03<00:43, 150.11it/s][A
Training data size: 24783 positives + 24773 negatives:  53%|███████████▏         | 7456/14029 [01:03<00:43, 150.11it/s][A
Training data size: 24784 positives + 24785 negatives:  53%|███████████▏         | 7457/14029 [01:03<00:43, 150.11it/s][A
Training data size: 24784 positives + 24785 negatives:  53%|███████████▏         | 7458/14029 [01:03<00:53, 123.33it/s][A
Training data size: 24794 positives + 24795 negatives:  53%|███████████▏         | 7458/14029 [01:03<00:53, 123.33it/s][A
Training data si

Training data size: 25099 positives + 25100 negatives:  54%|███████████▎         | 7576/14029 [01:04<00:53, 119.54it/s][A
Training data size: 25100 positives + 25101 negatives:  54%|███████████▎         | 7576/14029 [01:04<00:53, 119.54it/s][A
Training data size: 25101 positives + 25102 negatives:  54%|███████████▎         | 7577/14029 [01:04<00:53, 119.54it/s][A
Training data size: 25102 positives + 25103 negatives:  54%|███████████▎         | 7578/14029 [01:04<00:53, 119.54it/s][A
Training data size: 25102 positives + 25103 negatives:  54%|███████████▎         | 7579/14029 [01:04<00:53, 119.54it/s][A
Training data size: 25103 positives + 25104 negatives:  54%|███████████▎         | 7580/14029 [01:04<00:53, 119.54it/s][A
Training data size: 25105 positives + 25106 negatives:  54%|███████████▎         | 7581/14029 [01:04<00:53, 119.54it/s][A
Training data size: 25106 positives + 25107 negatives:  54%|███████████▎         | 7582/14029 [01:04<00:53, 119.54it/s][A
Training data si

Training data size: 25475 positives + 25476 negatives:  55%|███████████▌         | 7699/14029 [01:05<00:54, 115.91it/s][A
Training data size: 25477 positives + 25478 negatives:  55%|███████████▌         | 7700/14029 [01:05<00:54, 115.91it/s][A
Training data size: 25480 positives + 25480 negatives:  55%|███████████▌         | 7701/14029 [01:05<00:54, 115.91it/s][A
Training data size: 25481 positives + 25482 negatives:  55%|███████████▌         | 7702/14029 [01:05<00:54, 115.91it/s][A
Training data size: 25482 positives + 25483 negatives:  55%|███████████▌         | 7703/14029 [01:05<00:54, 115.91it/s][A
Training data size: 25483 positives + 25484 negatives:  55%|███████████▌         | 7704/14029 [01:05<00:54, 115.91it/s][A
Training data size: 25497 positives + 25498 negatives:  55%|███████████▌         | 7705/14029 [01:05<00:54, 115.91it/s][A
Training data size: 25499 positives + 25500 negatives:  55%|███████████▌         | 7706/14029 [01:05<00:54, 115.91it/s][A
Training data si

Training data size: 25910 positives + 25911 negatives:  56%|███████████▋         | 7823/14029 [01:06<00:49, 124.57it/s][A
Training data size: 25911 positives + 25912 negatives:  56%|███████████▋         | 7824/14029 [01:06<00:49, 124.57it/s][A
Training data size: 25913 positives + 25914 negatives:  56%|███████████▋         | 7825/14029 [01:06<00:49, 124.57it/s][A
Training data size: 25916 positives + 25917 negatives:  56%|███████████▋         | 7826/14029 [01:06<00:49, 124.57it/s][A
Training data size: 25917 positives + 25918 negatives:  56%|███████████▋         | 7827/14029 [01:06<00:49, 124.57it/s][A
Training data size: 25918 positives + 25919 negatives:  56%|███████████▋         | 7828/14029 [01:06<00:49, 124.57it/s][A
Training data size: 25918 positives + 25919 negatives:  56%|███████████▋         | 7829/14029 [01:06<00:49, 124.57it/s][A
Training data size: 25919 positives + 25920 negatives:  56%|███████████▋         | 7830/14029 [01:06<00:49, 124.57it/s][A
Training data si

Training data size: 26199 positives + 26200 negatives:  57%|███████████▉         | 7947/14029 [01:07<00:47, 127.65it/s][A
Training data size: 26200 positives + 26201 negatives:  57%|███████████▉         | 7948/14029 [01:07<00:47, 127.65it/s][A
Training data size: 26201 positives + 26202 negatives:  57%|███████████▉         | 7949/14029 [01:07<00:47, 127.65it/s][A
Training data size: 26210 positives + 26211 negatives:  57%|███████████▉         | 7950/14029 [01:07<00:47, 127.65it/s][A
Training data size: 26214 positives + 26215 negatives:  57%|███████████▉         | 7951/14029 [01:07<00:47, 127.65it/s][A
Training data size: 26219 positives + 26220 negatives:  57%|███████████▉         | 7952/14029 [01:07<00:47, 127.65it/s][A
Training data size: 26222 positives + 26223 negatives:  57%|███████████▉         | 7953/14029 [01:07<00:47, 127.65it/s][A
Training data size: 26224 positives + 26225 negatives:  57%|███████████▉         | 7954/14029 [01:07<00:47, 127.65it/s][A
Training data si

Training data size: 26696 positives + 26697 negatives:  58%|████████████         | 8070/14029 [01:08<00:50, 117.73it/s][A
Training data size: 26701 positives + 26701 negatives:  58%|████████████         | 8071/14029 [01:08<00:50, 117.73it/s][A
Training data size: 26703 positives + 26704 negatives:  58%|████████████         | 8072/14029 [01:08<00:50, 117.73it/s][A
Training data size: 26705 positives + 26706 negatives:  58%|████████████         | 8073/14029 [01:08<00:50, 117.73it/s][A
Training data size: 26706 positives + 26707 negatives:  58%|████████████         | 8074/14029 [01:08<00:50, 117.73it/s][A
Training data size: 26707 positives + 26708 negatives:  58%|████████████         | 8075/14029 [01:08<00:50, 117.73it/s][A
Training data size: 26711 positives + 26712 negatives:  58%|████████████         | 8076/14029 [01:08<00:50, 117.73it/s][A
Training data size: 26712 positives + 26713 negatives:  58%|████████████         | 8077/14029 [01:08<00:50, 117.73it/s][A
Training data si

Training data size: 27216 positives + 27217 negatives:  58%|████████████▎        | 8194/14029 [01:09<00:42, 137.57it/s][A
Training data size: 27221 positives + 27222 negatives:  58%|████████████▎        | 8195/14029 [01:09<00:42, 137.57it/s][A
Training data size: 27224 positives + 27225 negatives:  58%|████████████▎        | 8196/14029 [01:09<00:42, 137.57it/s][A
Training data size: 27231 positives + 27232 negatives:  58%|████████████▎        | 8197/14029 [01:09<00:42, 137.57it/s][A
Training data size: 27232 positives + 27233 negatives:  58%|████████████▎        | 8198/14029 [01:09<00:42, 137.57it/s][A
Training data size: 27233 positives + 27234 negatives:  58%|████████████▎        | 8199/14029 [01:09<00:42, 137.57it/s][A
Training data size: 27234 positives + 27235 negatives:  58%|████████████▎        | 8200/14029 [01:09<00:42, 137.57it/s][A
Training data size: 27234 positives + 27235 negatives:  58%|████████████▎        | 8201/14029 [01:09<00:45, 128.81it/s][A
Training data si

Training data size: 27621 positives + 27622 negatives:  59%|████████████▍        | 8317/14029 [01:10<00:43, 132.72it/s][A
Training data size: 27623 positives + 27624 negatives:  59%|████████████▍        | 8318/14029 [01:10<00:43, 132.72it/s][A
Training data size: 27643 positives + 27643 negatives:  59%|████████████▍        | 8319/14029 [01:10<00:43, 132.72it/s][A
Training data size: 27646 positives + 27646 negatives:  59%|████████████▍        | 8320/14029 [01:10<00:43, 132.72it/s][A
Training data size: 27646 positives + 27647 negatives:  59%|████████████▍        | 8321/14029 [01:10<00:43, 132.72it/s][A
Training data size: 27648 positives + 27649 negatives:  59%|████████████▍        | 8322/14029 [01:10<00:42, 132.72it/s][A
Training data size: 27649 positives + 27650 negatives:  59%|████████████▍        | 8323/14029 [01:10<00:42, 132.72it/s][A
Training data size: 27655 positives + 27656 negatives:  59%|████████████▍        | 8324/14029 [01:10<00:42, 132.72it/s][A
Training data si

Training data size: 28034 positives + 28034 negatives:  60%|████████████▋        | 8441/14029 [01:11<00:42, 130.46it/s][A
Training data size: 28041 positives + 28042 negatives:  60%|████████████▋        | 8442/14029 [01:11<00:42, 130.46it/s][A
Training data size: 28042 positives + 28043 negatives:  60%|████████████▋        | 8443/14029 [01:11<00:42, 130.46it/s][A
Training data size: 28044 positives + 28045 negatives:  60%|████████████▋        | 8444/14029 [01:11<00:42, 130.46it/s][A
Training data size: 28045 positives + 28046 negatives:  60%|████████████▋        | 8445/14029 [01:11<00:42, 130.46it/s][A
Training data size: 28045 positives + 28046 negatives:  60%|████████████▋        | 8446/14029 [01:11<00:52, 107.21it/s][A
Training data size: 28061 positives + 28058 negatives:  60%|████████████▋        | 8446/14029 [01:11<00:52, 107.21it/s][A
Training data size: 28062 positives + 28063 negatives:  60%|████████████▋        | 8447/14029 [01:11<00:52, 107.21it/s][A
Training data si

Training data size: 28415 positives + 28416 negatives:  61%|████████████▊        | 8565/14029 [01:12<00:41, 132.71it/s][A
Training data size: 28416 positives + 28417 negatives:  61%|████████████▊        | 8566/14029 [01:12<00:41, 132.71it/s][A
Training data size: 28416 positives + 28417 negatives:  61%|████████████▊        | 8567/14029 [01:12<00:42, 129.54it/s][A
Training data size: 28417 positives + 28418 negatives:  61%|████████████▊        | 8567/14029 [01:12<00:42, 129.54it/s][A
Training data size: 28419 positives + 28419 negatives:  61%|████████████▊        | 8568/14029 [01:12<00:42, 129.54it/s][A
Training data size: 28420 positives + 28421 negatives:  61%|████████████▊        | 8569/14029 [01:12<00:42, 129.54it/s][A
Training data size: 28420 positives + 28421 negatives:  61%|████████████▊        | 8570/14029 [01:12<00:42, 129.54it/s][A
Training data size: 28421 positives + 28422 negatives:  61%|████████████▊        | 8571/14029 [01:12<00:42, 129.54it/s][A
Training data si

Training data size: 28866 positives + 28867 negatives:  62%|█████████████        | 8689/14029 [01:13<00:40, 131.54it/s][A
Training data size: 28878 positives + 28879 negatives:  62%|█████████████        | 8690/14029 [01:13<00:40, 131.54it/s][A
Training data size: 28878 positives + 28879 negatives:  62%|█████████████        | 8691/14029 [01:13<00:40, 131.43it/s][A
Training data size: 28880 positives + 28880 negatives:  62%|█████████████        | 8691/14029 [01:13<00:40, 131.43it/s][A
Training data size: 28889 positives + 28890 negatives:  62%|█████████████        | 8692/14029 [01:13<00:40, 131.43it/s][A
Training data size: 28889 positives + 28890 negatives:  62%|█████████████        | 8693/14029 [01:13<00:40, 131.43it/s][A
Training data size: 28898 positives + 28899 negatives:  62%|█████████████        | 8694/14029 [01:13<00:40, 131.43it/s][A
Training data size: 28899 positives + 28900 negatives:  62%|█████████████        | 8695/14029 [01:13<00:40, 131.43it/s][A
Training data si

Training data size: 29256 positives + 29257 negatives:  63%|█████████████▏       | 8813/14029 [01:13<00:38, 135.55it/s][A
Training data size: 29261 positives + 29261 negatives:  63%|█████████████▏       | 8814/14029 [01:13<00:38, 135.55it/s][A
Training data size: 29262 positives + 29263 negatives:  63%|█████████████▏       | 8815/14029 [01:13<00:38, 135.55it/s][A
Training data size: 29263 positives + 29263 negatives:  63%|█████████████▏       | 8816/14029 [01:13<00:38, 135.55it/s][A
Training data size: 29263 positives + 29263 negatives:  63%|█████████████▏       | 8817/14029 [01:13<00:39, 131.51it/s][A
Training data size: 29267 positives + 29268 negatives:  63%|█████████████▏       | 8817/14029 [01:13<00:39, 131.51it/s][A
Training data size: 29269 positives + 29270 negatives:  63%|█████████████▏       | 8818/14029 [01:14<00:39, 131.51it/s][A
Training data size: 29271 positives + 29272 negatives:  63%|█████████████▏       | 8819/14029 [01:14<00:39, 131.51it/s][A
Training data si

Training data size: 29734 positives + 29735 negatives:  64%|█████████████▍       | 8937/14029 [01:14<00:37, 135.65it/s][A
Training data size: 29736 positives + 29737 negatives:  64%|█████████████▍       | 8938/14029 [01:14<00:37, 135.65it/s][A
Training data size: 29738 positives + 29739 negatives:  64%|█████████████▍       | 8939/14029 [01:14<00:37, 135.65it/s][A
Training data size: 29739 positives + 29740 negatives:  64%|█████████████▍       | 8940/14029 [01:14<00:37, 135.65it/s][A
Training data size: 29741 positives + 29742 negatives:  64%|█████████████▍       | 8941/14029 [01:14<00:37, 135.65it/s][A
Training data size: 29751 positives + 29751 negatives:  64%|█████████████▍       | 8942/14029 [01:14<00:37, 135.65it/s][A
Training data size: 29751 positives + 29751 negatives:  64%|█████████████▍       | 8943/14029 [01:14<00:38, 131.63it/s][A
Training data size: 29751 positives + 29752 negatives:  64%|█████████████▍       | 8943/14029 [01:14<00:38, 131.63it/s][A
Training data si

Training data size: 30262 positives + 30263 negatives:  65%|█████████████▌       | 9061/14029 [01:15<00:34, 142.10it/s][A
Training data size: 30264 positives + 30265 negatives:  65%|█████████████▌       | 9062/14029 [01:15<00:34, 142.10it/s][A
Training data size: 30266 positives + 30267 negatives:  65%|█████████████▌       | 9063/14029 [01:15<00:34, 142.10it/s][A
Training data size: 30267 positives + 30268 negatives:  65%|█████████████▌       | 9064/14029 [01:15<00:34, 142.10it/s][A
Training data size: 30269 positives + 30270 negatives:  65%|█████████████▌       | 9065/14029 [01:15<00:34, 142.10it/s][A
Training data size: 30270 positives + 30271 negatives:  65%|█████████████▌       | 9066/14029 [01:15<00:34, 142.10it/s][A
Training data size: 30271 positives + 30272 negatives:  65%|█████████████▌       | 9067/14029 [01:15<00:34, 142.10it/s][A
Training data size: 30274 positives + 30275 negatives:  65%|█████████████▌       | 9068/14029 [01:15<00:34, 142.10it/s][A
Training data si

Training data size: 30710 positives + 30711 negatives:  65%|█████████████▋       | 9185/14029 [01:16<00:39, 124.07it/s][A
Training data size: 30718 positives + 30719 negatives:  65%|█████████████▊       | 9186/14029 [01:16<00:39, 124.07it/s][A
Training data size: 30739 positives + 30725 negatives:  65%|█████████████▊       | 9187/14029 [01:16<00:39, 124.07it/s][A
Training data size: 30740 positives + 30733 negatives:  65%|█████████████▊       | 9188/14029 [01:16<00:39, 124.07it/s][A
Training data size: 30745 positives + 30746 negatives:  66%|█████████████▊       | 9189/14029 [01:16<00:39, 124.07it/s][A
Training data size: 30747 positives + 30748 negatives:  66%|█████████████▊       | 9190/14029 [01:16<00:39, 124.07it/s][A
Training data size: 30748 positives + 30749 negatives:  66%|█████████████▊       | 9191/14029 [01:16<00:38, 124.07it/s][A
Training data size: 30749 positives + 30750 negatives:  66%|█████████████▊       | 9192/14029 [01:16<00:38, 124.07it/s][A
Training data si

Training data size: 31095 positives + 31096 negatives:  66%|█████████████▉       | 9309/14029 [01:17<00:35, 131.96it/s][A
Training data size: 31095 positives + 31096 negatives:  66%|█████████████▉       | 9310/14029 [01:17<00:35, 131.96it/s][A
Training data size: 31096 positives + 31097 negatives:  66%|█████████████▉       | 9311/14029 [01:17<00:35, 131.96it/s][A
Training data size: 31097 positives + 31097 negatives:  66%|█████████████▉       | 9312/14029 [01:17<00:35, 131.96it/s][A
Training data size: 31099 positives + 31100 negatives:  66%|█████████████▉       | 9313/14029 [01:17<00:35, 131.96it/s][A
Training data size: 31101 positives + 31101 negatives:  66%|█████████████▉       | 9314/14029 [01:17<00:35, 131.96it/s][A
Training data size: 31113 positives + 31114 negatives:  66%|█████████████▉       | 9315/14029 [01:17<00:35, 131.96it/s][A
Training data size: 31118 positives + 31119 negatives:  66%|█████████████▉       | 9316/14029 [01:17<00:35, 131.96it/s][A
Training data si

Training data size: 31535 positives + 31536 negatives:  67%|██████████████       | 9434/14029 [01:18<00:32, 141.69it/s][A
Training data size: 31536 positives + 31537 negatives:  67%|██████████████       | 9434/14029 [01:18<00:32, 141.69it/s][A
Training data size: 31537 positives + 31538 negatives:  67%|██████████████       | 9435/14029 [01:18<00:32, 141.69it/s][A
Training data size: 31539 positives + 31540 negatives:  67%|██████████████       | 9436/14029 [01:18<00:32, 141.69it/s][A
Training data size: 31540 positives + 31541 negatives:  67%|██████████████▏      | 9437/14029 [01:18<00:32, 141.69it/s][A
Training data size: 31541 positives + 31541 negatives:  67%|██████████████▏      | 9438/14029 [01:18<00:32, 141.69it/s][A
Training data size: 31542 positives + 31542 negatives:  67%|██████████████▏      | 9439/14029 [01:18<00:32, 141.69it/s][A
Training data size: 31550 positives + 31551 negatives:  67%|██████████████▏      | 9440/14029 [01:18<00:32, 141.69it/s][A
Training data si

Training data size: 32041 positives + 32042 negatives:  68%|██████████████▎      | 9558/14029 [01:19<00:31, 140.64it/s][A
Training data size: 32043 positives + 32044 negatives:  68%|██████████████▎      | 9559/14029 [01:19<00:31, 140.64it/s][A
Training data size: 32043 positives + 32044 negatives:  68%|██████████████▎      | 9560/14029 [01:19<00:33, 133.47it/s][A
Training data size: 32048 positives + 32049 negatives:  68%|██████████████▎      | 9560/14029 [01:19<00:33, 133.47it/s][A
Training data size: 32049 positives + 32050 negatives:  68%|██████████████▎      | 9561/14029 [01:19<00:33, 133.47it/s][A
Training data size: 32051 positives + 32052 negatives:  68%|██████████████▎      | 9562/14029 [01:19<00:33, 133.47it/s][A
Training data size: 32052 positives + 32053 negatives:  68%|██████████████▎      | 9563/14029 [01:19<00:33, 133.47it/s][A
Training data size: 32053 positives + 32054 negatives:  68%|██████████████▎      | 9564/14029 [01:19<00:33, 133.47it/s][A
Training data si

Training data size: 32370 positives + 32371 negatives:  69%|██████████████▍      | 9682/14029 [01:20<00:28, 150.56it/s][A
Training data size: 32371 positives + 32372 negatives:  69%|██████████████▍      | 9683/14029 [01:20<00:28, 150.56it/s][A
Training data size: 32381 positives + 32382 negatives:  69%|██████████████▍      | 9684/14029 [01:20<00:28, 150.56it/s][A
Training data size: 32384 positives + 32385 negatives:  69%|██████████████▍      | 9685/14029 [01:20<00:28, 150.56it/s][A
Training data size: 32387 positives + 32388 negatives:  69%|██████████████▍      | 9686/14029 [01:20<00:28, 150.56it/s][A
Training data size: 32388 positives + 32389 negatives:  69%|██████████████▌      | 9687/14029 [01:20<00:28, 150.56it/s][A
Training data size: 32392 positives + 32393 negatives:  69%|██████████████▌      | 9688/14029 [01:20<00:28, 150.56it/s][A
Training data size: 32393 positives + 32394 negatives:  69%|██████████████▌      | 9689/14029 [01:20<00:28, 150.56it/s][A
Training data si

Training data size: 32694 positives + 32695 negatives:  70%|██████████████▋      | 9806/14029 [01:21<00:29, 141.39it/s][A
Training data size: 32696 positives + 32697 negatives:  70%|██████████████▋      | 9807/14029 [01:21<00:29, 141.39it/s][A
Training data size: 32697 positives + 32698 negatives:  70%|██████████████▋      | 9808/14029 [01:21<00:29, 141.39it/s][A
Training data size: 32698 positives + 32698 negatives:  70%|██████████████▋      | 9809/14029 [01:21<00:29, 141.39it/s][A
Training data size: 32699 positives + 32699 negatives:  70%|██████████████▋      | 9810/14029 [01:21<00:29, 141.39it/s][A
Training data size: 32700 positives + 32701 negatives:  70%|██████████████▋      | 9811/14029 [01:21<00:29, 141.39it/s][A
Training data size: 32703 positives + 32704 negatives:  70%|██████████████▋      | 9812/14029 [01:21<00:29, 141.39it/s][A
Training data size: 32704 positives + 32705 negatives:  70%|██████████████▋      | 9813/14029 [01:21<00:29, 141.39it/s][A
Training data si

Training data size: 33181 positives + 33182 negatives:  71%|██████████████▊      | 9930/14029 [01:22<00:29, 140.92it/s][A
Training data size: 33183 positives + 33184 negatives:  71%|██████████████▊      | 9931/14029 [01:22<00:29, 140.92it/s][A
Training data size: 33189 positives + 33190 negatives:  71%|██████████████▊      | 9932/14029 [01:22<00:29, 140.92it/s][A
Training data size: 33191 positives + 33192 negatives:  71%|██████████████▊      | 9933/14029 [01:22<00:29, 140.92it/s][A
Training data size: 33191 positives + 33192 negatives:  71%|██████████████▊      | 9934/14029 [01:22<00:29, 140.92it/s][A
Training data size: 33192 positives + 33193 negatives:  71%|██████████████▊      | 9935/14029 [01:22<00:29, 140.92it/s][A
Training data size: 33194 positives + 33195 negatives:  71%|██████████████▊      | 9936/14029 [01:22<00:29, 140.92it/s][A
Training data size: 33195 positives + 33196 negatives:  71%|██████████████▊      | 9937/14029 [01:22<00:29, 140.92it/s][A
Training data si

Training data size: 33515 positives + 33516 negatives:  72%|██████████████▎     | 10054/14029 [01:23<00:26, 149.01it/s][A
Training data size: 33517 positives + 33518 negatives:  72%|██████████████▎     | 10055/14029 [01:23<00:26, 149.01it/s][A
Training data size: 33518 positives + 33519 negatives:  72%|██████████████▎     | 10056/14029 [01:23<00:26, 149.01it/s][A
Training data size: 33519 positives + 33520 negatives:  72%|██████████████▎     | 10057/14029 [01:23<00:26, 149.01it/s][A
Training data size: 33520 positives + 33521 negatives:  72%|██████████████▎     | 10058/14029 [01:23<00:26, 149.01it/s][A
Training data size: 33521 positives + 33522 negatives:  72%|██████████████▎     | 10059/14029 [01:23<00:26, 149.01it/s][A
Training data size: 33525 positives + 33526 negatives:  72%|██████████████▎     | 10060/14029 [01:23<00:26, 149.01it/s][A
Training data size: 33528 positives + 33529 negatives:  72%|██████████████▎     | 10061/14029 [01:23<00:26, 149.01it/s][A
Training data si

Training data size: 33811 positives + 33812 negatives:  73%|██████████████▌     | 10179/14029 [01:23<00:28, 133.31it/s][A
Training data size: 33812 positives + 33813 negatives:  73%|██████████████▌     | 10180/14029 [01:23<00:28, 133.31it/s][A
Training data size: 33819 positives + 33820 negatives:  73%|██████████████▌     | 10181/14029 [01:23<00:28, 133.31it/s][A
Training data size: 33819 positives + 33820 negatives:  73%|██████████████▌     | 10182/14029 [01:23<00:29, 130.06it/s][A
Training data size: 33820 positives + 33821 negatives:  73%|██████████████▌     | 10182/14029 [01:23<00:29, 130.06it/s][A
Training data size: 33821 positives + 33822 negatives:  73%|██████████████▌     | 10183/14029 [01:23<00:29, 130.06it/s][A
Training data size: 33821 positives + 33822 negatives:  73%|██████████████▌     | 10184/14029 [01:23<00:29, 130.06it/s][A
Training data size: 33822 positives + 33823 negatives:  73%|██████████████▌     | 10185/14029 [01:23<00:29, 130.06it/s][A
Training data si

Training data size: 34159 positives + 34160 negatives:  73%|██████████████▋     | 10303/14029 [01:24<00:27, 134.77it/s][A
Training data size: 34161 positives + 34162 negatives:  73%|██████████████▋     | 10304/14029 [01:24<00:27, 134.77it/s][A
Training data size: 34163 positives + 34163 negatives:  73%|██████████████▋     | 10305/14029 [01:24<00:27, 134.77it/s][A
Training data size: 34164 positives + 34165 negatives:  73%|██████████████▋     | 10306/14029 [01:24<00:27, 134.77it/s][A
Training data size: 34165 positives + 34166 negatives:  73%|██████████████▋     | 10307/14029 [01:24<00:27, 134.77it/s][A
Training data size: 34167 positives + 34168 negatives:  73%|██████████████▋     | 10308/14029 [01:24<00:27, 134.77it/s][A
Training data size: 34168 positives + 34168 negatives:  73%|██████████████▋     | 10309/14029 [01:24<00:27, 134.77it/s][A
Training data size: 34170 positives + 34171 negatives:  73%|██████████████▋     | 10310/14029 [01:24<00:27, 134.77it/s][A
Training data si

Training data size: 34457 positives + 34458 negatives:  74%|██████████████▊     | 10427/14029 [01:25<00:24, 147.27it/s][A
Training data size: 34458 positives + 34459 negatives:  74%|██████████████▊     | 10428/14029 [01:25<00:24, 147.27it/s][A
Training data size: 34459 positives + 34460 negatives:  74%|██████████████▊     | 10429/14029 [01:25<00:24, 147.27it/s][A
Training data size: 34461 positives + 34462 negatives:  74%|██████████████▊     | 10430/14029 [01:25<00:24, 147.27it/s][A
Training data size: 34463 positives + 34464 negatives:  74%|██████████████▊     | 10431/14029 [01:25<00:24, 147.27it/s][A
Training data size: 34464 positives + 34465 negatives:  74%|██████████████▊     | 10432/14029 [01:25<00:24, 147.27it/s][A
Training data size: 34465 positives + 34466 negatives:  74%|██████████████▊     | 10433/14029 [01:25<00:24, 147.27it/s][A
Training data size: 34466 positives + 34467 negatives:  74%|██████████████▊     | 10434/14029 [01:25<00:24, 147.27it/s][A
Training data si

Training data size: 34909 positives + 34910 negatives:  75%|███████████████     | 10552/14029 [01:26<00:33, 104.93it/s][A
Training data size: 34910 positives + 34911 negatives:  75%|███████████████     | 10553/14029 [01:26<00:33, 104.93it/s][A
Training data size: 34912 positives + 34913 negatives:  75%|███████████████     | 10554/14029 [01:26<00:33, 104.93it/s][A
Training data size: 34913 positives + 34913 negatives:  75%|███████████████     | 10555/14029 [01:26<00:33, 104.93it/s][A
Training data size: 34916 positives + 34917 negatives:  75%|███████████████     | 10556/14029 [01:26<00:33, 104.93it/s][A
Training data size: 34917 positives + 34918 negatives:  75%|███████████████     | 10557/14029 [01:26<00:33, 104.93it/s][A
Training data size: 34917 positives + 34918 negatives:  75%|███████████████     | 10558/14029 [01:26<00:33, 104.93it/s][A
Training data size: 34917 positives + 34918 negatives:  75%|███████████████     | 10559/14029 [01:26<00:30, 115.62it/s][A
Training data si

Training data size: 35190 positives + 35191 negatives:  76%|███████████████▏    | 10676/14029 [01:27<00:24, 135.45it/s][A
Training data size: 35192 positives + 35193 negatives:  76%|███████████████▏    | 10677/14029 [01:27<00:24, 135.45it/s][A
Training data size: 35195 positives + 35195 negatives:  76%|███████████████▏    | 10678/14029 [01:27<00:24, 135.45it/s][A
Training data size: 35203 positives + 35204 negatives:  76%|███████████████▏    | 10679/14029 [01:27<00:24, 135.45it/s][A
Training data size: 35209 positives + 35210 negatives:  76%|███████████████▏    | 10680/14029 [01:27<00:24, 135.45it/s][A
Training data size: 35212 positives + 35213 negatives:  76%|███████████████▏    | 10681/14029 [01:27<00:24, 135.45it/s][A
Training data size: 35215 positives + 35215 negatives:  76%|███████████████▏    | 10682/14029 [01:27<00:24, 135.45it/s][A
Training data size: 35216 positives + 35217 negatives:  76%|███████████████▏    | 10683/14029 [01:27<00:24, 135.45it/s][A
Training data si

Training data size: 35467 positives + 35468 negatives:  77%|████████████████▏    | 10800/14029 [01:29<00:58, 55.31it/s][A
Training data size: 35468 positives + 35469 negatives:  77%|████████████████▏    | 10801/14029 [01:29<00:58, 55.31it/s][A
Training data size: 35469 positives + 35470 negatives:  77%|████████████████▏    | 10802/14029 [01:29<00:58, 55.31it/s][A
Training data size: 35470 positives + 35471 negatives:  77%|████████████████▏    | 10803/14029 [01:29<00:58, 55.31it/s][A
Training data size: 35471 positives + 35472 negatives:  77%|████████████████▏    | 10804/14029 [01:29<00:58, 55.31it/s][A
Training data size: 35472 positives + 35473 negatives:  77%|████████████████▏    | 10805/14029 [01:29<00:58, 55.31it/s][A
Training data size: 35472 positives + 35473 negatives:  77%|████████████████▏    | 10806/14029 [01:29<00:43, 74.45it/s][A
Training data size: 35473 positives + 35474 negatives:  77%|████████████████▏    | 10806/14029 [01:29<00:43, 74.45it/s][A
Training data si

Training data size: 35834 positives + 35835 negatives:  78%|███████████████▌    | 10924/14029 [01:30<00:23, 133.29it/s][A
Training data size: 35835 positives + 35836 negatives:  78%|███████████████▌    | 10925/14029 [01:30<00:23, 133.29it/s][A
Training data size: 35836 positives + 35837 negatives:  78%|███████████████▌    | 10926/14029 [01:30<00:23, 133.29it/s][A
Training data size: 35837 positives + 35838 negatives:  78%|███████████████▌    | 10927/14029 [01:30<00:23, 133.29it/s][A
Training data size: 35854 positives + 35855 negatives:  78%|███████████████▌    | 10928/14029 [01:30<00:23, 133.29it/s][A
Training data size: 35854 positives + 35855 negatives:  78%|███████████████▌    | 10929/14029 [01:30<00:22, 136.79it/s][A
Training data size: 35856 positives + 35857 negatives:  78%|███████████████▌    | 10929/14029 [01:30<00:22, 136.79it/s][A
Training data size: 35857 positives + 35858 negatives:  78%|███████████████▌    | 10930/14029 [01:30<00:22, 136.79it/s][A
Training data si

Training data size: 36401 positives + 36401 negatives:  79%|███████████████▊    | 11048/14029 [01:31<00:21, 137.33it/s][A
Training data size: 36402 positives + 36403 negatives:  79%|███████████████▊    | 11049/14029 [01:31<00:21, 137.33it/s][A
Training data size: 36404 positives + 36405 negatives:  79%|███████████████▊    | 11050/14029 [01:31<00:21, 137.33it/s][A
Training data size: 36406 positives + 36407 negatives:  79%|███████████████▊    | 11051/14029 [01:31<00:21, 137.33it/s][A
Training data size: 36407 positives + 36408 negatives:  79%|███████████████▊    | 11052/14029 [01:31<00:21, 137.33it/s][A
Training data size: 36407 positives + 36408 negatives:  79%|███████████████▊    | 11053/14029 [01:31<00:21, 141.67it/s][A
Training data size: 36408 positives + 36409 negatives:  79%|███████████████▊    | 11053/14029 [01:31<00:21, 141.67it/s][A
Training data size: 36408 positives + 36409 negatives:  79%|███████████████▊    | 11054/14029 [01:31<00:21, 141.67it/s][A
Training data si

Training data size: 36923 positives + 36924 negatives:  80%|███████████████▉    | 11172/14029 [01:32<00:23, 119.72it/s][A
Training data size: 36925 positives + 36926 negatives:  80%|███████████████▉    | 11172/14029 [01:32<00:23, 119.72it/s][A
Training data size: 36926 positives + 36927 negatives:  80%|███████████████▉    | 11173/14029 [01:32<00:23, 119.72it/s][A
Training data size: 36927 positives + 36928 negatives:  80%|███████████████▉    | 11174/14029 [01:32<00:23, 119.72it/s][A
Training data size: 36929 positives + 36930 negatives:  80%|███████████████▉    | 11175/14029 [01:32<00:23, 119.72it/s][A
Training data size: 36931 positives + 36932 negatives:  80%|███████████████▉    | 11176/14029 [01:32<00:23, 119.72it/s][A
Training data size: 36933 positives + 36934 negatives:  80%|███████████████▉    | 11177/14029 [01:32<00:23, 119.72it/s][A
Training data size: 36934 positives + 36934 negatives:  80%|███████████████▉    | 11178/14029 [01:32<00:23, 119.72it/s][A
Training data si

Training data size: 37365 positives + 37351 negatives:  81%|████████████████    | 11295/14029 [01:33<00:19, 141.89it/s][A
Training data size: 37366 positives + 37367 negatives:  81%|████████████████    | 11296/14029 [01:33<00:19, 141.89it/s][A
Training data size: 37367 positives + 37368 negatives:  81%|████████████████    | 11297/14029 [01:33<00:19, 141.89it/s][A
Training data size: 37368 positives + 37369 negatives:  81%|████████████████    | 11298/14029 [01:33<00:19, 141.89it/s][A
Training data size: 37369 positives + 37370 negatives:  81%|████████████████    | 11299/14029 [01:33<00:19, 141.89it/s][A
Training data size: 37370 positives + 37371 negatives:  81%|████████████████    | 11300/14029 [01:33<00:19, 141.89it/s][A
Training data size: 37371 positives + 37372 negatives:  81%|████████████████    | 11301/14029 [01:33<00:19, 141.89it/s][A
Training data size: 37372 positives + 37373 negatives:  81%|████████████████    | 11302/14029 [01:33<00:19, 141.89it/s][A
Training data si

Training data size: 37746 positives + 37746 negatives:  81%|████████████████▎   | 11420/14029 [01:34<00:15, 163.72it/s][A
Training data size: 37765 positives + 37766 negatives:  81%|████████████████▎   | 11421/14029 [01:34<00:15, 163.72it/s][A
Training data size: 37767 positives + 37768 negatives:  81%|████████████████▎   | 11422/14029 [01:34<00:15, 163.72it/s][A
Training data size: 37768 positives + 37769 negatives:  81%|████████████████▎   | 11423/14029 [01:34<00:15, 163.72it/s][A
Training data size: 37770 positives + 37770 negatives:  81%|████████████████▎   | 11424/14029 [01:34<00:15, 163.72it/s][A
Training data size: 37771 positives + 37772 negatives:  81%|████████████████▎   | 11425/14029 [01:34<00:15, 163.72it/s][A
Training data size: 37774 positives + 37775 negatives:  81%|████████████████▎   | 11426/14029 [01:34<00:15, 163.72it/s][A
Training data size: 37774 positives + 37775 negatives:  81%|████████████████▎   | 11427/14029 [01:34<00:15, 163.72it/s][A
Training data si

Training data size: 38181 positives + 38180 negatives:  82%|█████████████████▎   | 11545/14029 [01:35<00:25, 96.49it/s][A
Training data size: 38184 positives + 38185 negatives:  82%|█████████████████▎   | 11546/14029 [01:35<00:25, 96.49it/s][A
Training data size: 38185 positives + 38186 negatives:  82%|█████████████████▎   | 11547/14029 [01:35<00:25, 96.49it/s][A
Training data size: 38185 positives + 38186 negatives:  82%|████████████████▍   | 11548/14029 [01:35<00:23, 107.48it/s][A
Training data size: 38187 positives + 38188 negatives:  82%|████████████████▍   | 11548/14029 [01:35<00:23, 107.48it/s][A
Training data size: 38188 positives + 38189 negatives:  82%|████████████████▍   | 11549/14029 [01:35<00:23, 107.48it/s][A
Training data size: 38189 positives + 38190 negatives:  82%|████████████████▍   | 11550/14029 [01:35<00:23, 107.48it/s][A
Training data size: 38191 positives + 38192 negatives:  82%|████████████████▍   | 11551/14029 [01:35<00:23, 107.48it/s][A
Training data si

Training data size: 38458 positives + 38459 negatives:  83%|████████████████▋   | 11669/14029 [01:36<00:15, 155.21it/s][A
Training data size: 38478 positives + 38478 negatives:  83%|████████████████▋   | 11670/14029 [01:36<00:15, 155.21it/s][A
Training data size: 38480 positives + 38481 negatives:  83%|████████████████▋   | 11671/14029 [01:36<00:15, 155.21it/s][A
Training data size: 38481 positives + 38482 negatives:  83%|████████████████▋   | 11672/14029 [01:36<00:15, 155.21it/s][A
Training data size: 38484 positives + 38485 negatives:  83%|████████████████▋   | 11673/14029 [01:36<00:15, 155.21it/s][A
Training data size: 38486 positives + 38486 negatives:  83%|████████████████▋   | 11674/14029 [01:36<00:15, 155.21it/s][A
Training data size: 38487 positives + 38488 negatives:  83%|████████████████▋   | 11675/14029 [01:36<00:15, 155.21it/s][A
Training data size: 38491 positives + 38492 negatives:  83%|████████████████▋   | 11676/14029 [01:36<00:15, 155.21it/s][A
Training data si

Training data size: 38785 positives + 38786 negatives:  84%|████████████████▊   | 11793/14029 [01:37<00:21, 104.81it/s][A
Training data size: 38786 positives + 38787 negatives:  84%|████████████████▊   | 11794/14029 [01:37<00:21, 104.81it/s][A
Training data size: 38788 positives + 38789 negatives:  84%|████████████████▊   | 11795/14029 [01:37<00:21, 104.81it/s][A
Training data size: 38791 positives + 38792 negatives:  84%|████████████████▊   | 11796/14029 [01:37<00:21, 104.81it/s][A
Training data size: 38792 positives + 38793 negatives:  84%|████████████████▊   | 11797/14029 [01:37<00:21, 104.81it/s][A
Training data size: 38805 positives + 38806 negatives:  84%|████████████████▊   | 11798/14029 [01:37<00:21, 104.81it/s][A
Training data size: 38806 positives + 38807 negatives:  84%|████████████████▊   | 11799/14029 [01:37<00:21, 104.81it/s][A
Training data size: 38806 positives + 38807 negatives:  84%|████████████████▊   | 11800/14029 [01:37<00:21, 104.81it/s][A
Training data si

Training data size: 39193 positives + 39194 negatives:  85%|█████████████████▊   | 11917/14029 [01:38<00:29, 70.83it/s][A
Training data size: 39197 positives + 39198 negatives:  85%|█████████████████▊   | 11918/14029 [01:38<00:29, 70.83it/s][A
Training data size: 39206 positives + 39206 negatives:  85%|█████████████████▊   | 11919/14029 [01:38<00:29, 70.83it/s][A
Training data size: 39208 positives + 39209 negatives:  85%|█████████████████▊   | 11920/14029 [01:38<00:29, 70.83it/s][A
Training data size: 39209 positives + 39210 negatives:  85%|█████████████████▊   | 11921/14029 [01:38<00:29, 70.83it/s][A
Training data size: 39209 positives + 39210 negatives:  85%|█████████████████▊   | 11922/14029 [01:38<00:29, 70.83it/s][A
Training data size: 39210 positives + 39211 negatives:  85%|█████████████████▊   | 11923/14029 [01:38<00:29, 70.83it/s][A
Training data size: 39210 positives + 39211 negatives:  85%|█████████████████▊   | 11924/14029 [01:38<00:22, 91.75it/s][A
Training data si

Training data size: 39595 positives + 39596 negatives:  86%|█████████████████▏  | 12040/14029 [01:39<00:16, 118.19it/s][A
Training data size: 39596 positives + 39597 negatives:  86%|█████████████████▏  | 12041/14029 [01:39<00:16, 118.19it/s][A
Training data size: 39597 positives + 39598 negatives:  86%|█████████████████▏  | 12042/14029 [01:39<00:16, 118.19it/s][A
Training data size: 39598 positives + 39599 negatives:  86%|█████████████████▏  | 12043/14029 [01:39<00:16, 118.19it/s][A
Training data size: 39601 positives + 39601 negatives:  86%|█████████████████▏  | 12044/14029 [01:39<00:16, 118.19it/s][A
Training data size: 39602 positives + 39603 negatives:  86%|█████████████████▏  | 12045/14029 [01:39<00:16, 118.19it/s][A
Training data size: 39603 positives + 39604 negatives:  86%|█████████████████▏  | 12046/14029 [01:39<00:16, 118.19it/s][A
Training data size: 39603 positives + 39604 negatives:  86%|█████████████████▏  | 12047/14029 [01:39<00:16, 118.19it/s][A
Training data si

Training data size: 40101 positives + 40102 negatives:  87%|█████████████████▎  | 12165/14029 [01:40<00:12, 146.77it/s][A
Training data size: 40102 positives + 40102 negatives:  87%|█████████████████▎  | 12166/14029 [01:40<00:12, 146.77it/s][A
Training data size: 40103 positives + 40104 negatives:  87%|█████████████████▎  | 12167/14029 [01:40<00:12, 146.77it/s][A
Training data size: 40103 positives + 40104 negatives:  87%|█████████████████▎  | 12168/14029 [01:40<00:12, 150.40it/s][A
Training data size: 40104 positives + 40105 negatives:  87%|█████████████████▎  | 12168/14029 [01:40<00:12, 150.40it/s][A
Training data size: 40104 positives + 40105 negatives:  87%|█████████████████▎  | 12169/14029 [01:40<00:12, 150.40it/s][A
Training data size: 40105 positives + 40106 negatives:  87%|█████████████████▎  | 12170/14029 [01:40<00:12, 150.40it/s][A
Training data size: 40106 positives + 40107 negatives:  87%|█████████████████▎  | 12171/14029 [01:40<00:12, 150.40it/s][A
Training data si

Training data size: 40540 positives + 40541 negatives:  88%|█████████████████▌  | 12289/14029 [01:41<00:14, 116.36it/s][A
Training data size: 40540 positives + 40541 negatives:  88%|█████████████████▌  | 12290/14029 [01:41<00:13, 126.72it/s][A
Training data size: 40541 positives + 40542 negatives:  88%|█████████████████▌  | 12290/14029 [01:41<00:13, 126.72it/s][A
Training data size: 40542 positives + 40543 negatives:  88%|█████████████████▌  | 12291/14029 [01:41<00:13, 126.72it/s][A
Training data size: 40547 positives + 40548 negatives:  88%|█████████████████▌  | 12292/14029 [01:41<00:13, 126.72it/s][A
Training data size: 40548 positives + 40549 negatives:  88%|█████████████████▌  | 12293/14029 [01:41<00:13, 126.72it/s][A
Training data size: 40549 positives + 40550 negatives:  88%|█████████████████▌  | 12294/14029 [01:41<00:13, 126.72it/s][A
Training data size: 40549 positives + 40550 negatives:  88%|█████████████████▌  | 12295/14029 [01:41<00:13, 126.72it/s][A
Training data si

Training data size: 40909 positives + 40910 negatives:  88%|█████████████████▋  | 12413/14029 [01:42<00:11, 135.23it/s][A
Training data size: 40913 positives + 40914 negatives:  88%|█████████████████▋  | 12413/14029 [01:42<00:11, 135.23it/s][A
Training data size: 40915 positives + 40916 negatives:  88%|█████████████████▋  | 12414/14029 [01:42<00:11, 135.23it/s][A
Training data size: 40922 positives + 40923 negatives:  88%|█████████████████▋  | 12415/14029 [01:42<00:11, 135.23it/s][A
Training data size: 40924 positives + 40925 negatives:  89%|█████████████████▋  | 12416/14029 [01:42<00:11, 135.23it/s][A
Training data size: 40926 positives + 40927 negatives:  89%|█████████████████▋  | 12417/14029 [01:42<00:11, 135.23it/s][A
Training data size: 40927 positives + 40928 negatives:  89%|█████████████████▋  | 12418/14029 [01:42<00:11, 135.23it/s][A
Training data size: 40928 positives + 40929 negatives:  89%|█████████████████▋  | 12419/14029 [01:42<00:11, 135.23it/s][A
Training data si

Training data size: 41333 positives + 41333 negatives:  89%|█████████████████▊  | 12537/14029 [01:43<00:10, 147.71it/s][A
Training data size: 41334 positives + 41335 negatives:  89%|█████████████████▊  | 12538/14029 [01:43<00:10, 147.71it/s][A
Training data size: 41335 positives + 41335 negatives:  89%|█████████████████▉  | 12539/14029 [01:43<00:10, 147.71it/s][A
Training data size: 41336 positives + 41337 negatives:  89%|█████████████████▉  | 12540/14029 [01:43<00:10, 147.71it/s][A
Training data size: 41337 positives + 41338 negatives:  89%|█████████████████▉  | 12541/14029 [01:43<00:10, 147.71it/s][A
Training data size: 41339 positives + 41340 negatives:  89%|█████████████████▉  | 12542/14029 [01:43<00:10, 147.71it/s][A
Training data size: 41340 positives + 41341 negatives:  89%|█████████████████▉  | 12543/14029 [01:43<00:10, 147.71it/s][A
Training data size: 41341 positives + 41342 negatives:  89%|█████████████████▉  | 12544/14029 [01:43<00:10, 147.71it/s][A
Training data si

Training data size: 41731 positives + 41732 negatives:  90%|██████████████████  | 12661/14029 [01:44<00:10, 132.35it/s][A
Training data size: 41732 positives + 41733 negatives:  90%|██████████████████  | 12662/14029 [01:44<00:10, 132.35it/s][A
Training data size: 41748 positives + 41748 negatives:  90%|██████████████████  | 12663/14029 [01:44<00:10, 132.35it/s][A
Training data size: 41749 positives + 41749 negatives:  90%|██████████████████  | 12664/14029 [01:44<00:10, 132.35it/s][A
Training data size: 41750 positives + 41751 negatives:  90%|██████████████████  | 12665/14029 [01:44<00:10, 132.35it/s][A
Training data size: 41751 positives + 41752 negatives:  90%|██████████████████  | 12666/14029 [01:44<00:10, 132.35it/s][A
Training data size: 41752 positives + 41753 negatives:  90%|██████████████████  | 12667/14029 [01:44<00:10, 132.35it/s][A
Training data size: 41756 positives + 41757 negatives:  90%|██████████████████  | 12668/14029 [01:44<00:10, 132.35it/s][A
Training data si

Training data size: 42134 positives + 42135 negatives:  91%|██████████████████▏ | 12785/14029 [01:45<00:08, 141.98it/s][A
Training data size: 42150 positives + 42151 negatives:  91%|██████████████████▏ | 12786/14029 [01:45<00:08, 141.98it/s][A
Training data size: 42150 positives + 42151 negatives:  91%|██████████████████▏ | 12787/14029 [01:45<00:08, 141.98it/s][A
Training data size: 42151 positives + 42152 negatives:  91%|██████████████████▏ | 12788/14029 [01:45<00:08, 141.98it/s][A
Training data size: 42152 positives + 42152 negatives:  91%|██████████████████▏ | 12789/14029 [01:45<00:08, 141.98it/s][A
Training data size: 42153 positives + 42154 negatives:  91%|██████████████████▏ | 12790/14029 [01:45<00:08, 141.98it/s][A
Training data size: 42158 positives + 42159 negatives:  91%|██████████████████▏ | 12791/14029 [01:45<00:08, 141.98it/s][A
Training data size: 42166 positives + 42167 negatives:  91%|██████████████████▏ | 12792/14029 [01:45<00:08, 141.98it/s][A
Training data si

Training data size: 42609 positives + 42609 negatives:  92%|██████████████████▍ | 12909/14029 [01:46<00:09, 121.85it/s][A
Training data size: 42611 positives + 42612 negatives:  92%|██████████████████▍ | 12910/14029 [01:46<00:09, 121.85it/s][A
Training data size: 42612 positives + 42613 negatives:  92%|██████████████████▍ | 12911/14029 [01:46<00:09, 121.85it/s][A
Training data size: 42616 positives + 42617 negatives:  92%|██████████████████▍ | 12912/14029 [01:46<00:09, 121.85it/s][A
Training data size: 42617 positives + 42618 negatives:  92%|██████████████████▍ | 12913/14029 [01:46<00:09, 121.85it/s][A
Training data size: 42618 positives + 42619 negatives:  92%|██████████████████▍ | 12914/14029 [01:46<00:09, 121.85it/s][A
Training data size: 42618 positives + 42619 negatives:  92%|██████████████████▍ | 12915/14029 [01:46<00:09, 121.85it/s][A
Training data size: 42619 positives + 42620 negatives:  92%|██████████████████▍ | 12916/14029 [01:46<00:09, 121.85it/s][A
Training data si

Training data size: 42960 positives + 42961 negatives:  93%|██████████████████▌ | 13033/14029 [01:47<00:07, 127.32it/s][A
Training data size: 42961 positives + 42961 negatives:  93%|██████████████████▌ | 13034/14029 [01:47<00:07, 127.32it/s][A
Training data size: 42961 positives + 42962 negatives:  93%|██████████████████▌ | 13035/14029 [01:47<00:07, 127.32it/s][A
Training data size: 42962 positives + 42963 negatives:  93%|██████████████████▌ | 13036/14029 [01:47<00:07, 127.32it/s][A
Training data size: 42963 positives + 42964 negatives:  93%|██████████████████▌ | 13037/14029 [01:47<00:07, 127.32it/s][A
Training data size: 42974 positives + 42975 negatives:  93%|██████████████████▌ | 13038/14029 [01:47<00:07, 127.32it/s][A
Training data size: 42975 positives + 42976 negatives:  93%|██████████████████▌ | 13039/14029 [01:47<00:07, 127.32it/s][A
Training data size: 42977 positives + 42977 negatives:  93%|██████████████████▌ | 13040/14029 [01:47<00:07, 127.32it/s][A
Training data si

Training data size: 43400 positives + 43401 negatives:  94%|██████████████████▊ | 13157/14029 [01:48<00:06, 126.35it/s][A
Training data size: 43401 positives + 43401 negatives:  94%|██████████████████▊ | 13158/14029 [01:48<00:06, 126.35it/s][A
Training data size: 43402 positives + 43403 negatives:  94%|██████████████████▊ | 13159/14029 [01:48<00:06, 126.35it/s][A
Training data size: 43413 positives + 43414 negatives:  94%|██████████████████▊ | 13160/14029 [01:48<00:06, 126.35it/s][A
Training data size: 43414 positives + 43415 negatives:  94%|██████████████████▊ | 13161/14029 [01:48<00:06, 126.35it/s][A
Training data size: 43416 positives + 43417 negatives:  94%|██████████████████▊ | 13162/14029 [01:48<00:06, 126.35it/s][A
Training data size: 43417 positives + 43418 negatives:  94%|██████████████████▊ | 13163/14029 [01:48<00:06, 126.35it/s][A
Training data size: 43417 positives + 43418 negatives:  94%|██████████████████▊ | 13164/14029 [01:48<00:06, 129.81it/s][A
Training data si

Training data size: 43782 positives + 43783 negatives:  95%|██████████████████▉ | 13281/14029 [01:48<00:05, 143.92it/s][A
Training data size: 43792 positives + 43791 negatives:  95%|██████████████████▉ | 13282/14029 [01:48<00:05, 143.92it/s][A
Training data size: 43802 positives + 43803 negatives:  95%|██████████████████▉ | 13283/14029 [01:48<00:05, 143.92it/s][A
Training data size: 43804 positives + 43805 negatives:  95%|██████████████████▉ | 13284/14029 [01:49<00:05, 143.92it/s][A
Training data size: 43821 positives + 43821 negatives:  95%|██████████████████▉ | 13285/14029 [01:49<00:05, 143.92it/s][A
Training data size: 43822 positives + 43823 negatives:  95%|██████████████████▉ | 13286/14029 [01:49<00:05, 143.92it/s][A
Training data size: 43823 positives + 43824 negatives:  95%|██████████████████▉ | 13287/14029 [01:49<00:05, 143.92it/s][A
Training data size: 43823 positives + 43824 negatives:  95%|██████████████████▉ | 13288/14029 [01:49<00:05, 136.51it/s][A
Training data si

Training data size: 44223 positives + 44224 negatives:  96%|███████████████████ | 13405/14029 [01:49<00:04, 141.62it/s][A
Training data size: 44229 positives + 44230 negatives:  96%|███████████████████ | 13406/14029 [01:49<00:04, 141.62it/s][A
Training data size: 44229 positives + 44230 negatives:  96%|███████████████████ | 13407/14029 [01:49<00:04, 143.69it/s][A
Training data size: 44230 positives + 44230 negatives:  96%|███████████████████ | 13407/14029 [01:49<00:04, 143.69it/s][A
Training data size: 44231 positives + 44231 negatives:  96%|███████████████████ | 13408/14029 [01:50<00:04, 143.69it/s][A
Training data size: 44232 positives + 44233 negatives:  96%|███████████████████ | 13409/14029 [01:50<00:04, 143.69it/s][A
Training data size: 44239 positives + 44240 negatives:  96%|███████████████████ | 13410/14029 [01:50<00:04, 143.69it/s][A
Training data size: 44240 positives + 44241 negatives:  96%|███████████████████ | 13411/14029 [01:50<00:04, 143.69it/s][A
Training data si

Training data size: 44742 positives + 44743 negatives:  96%|███████████████████▎| 13529/14029 [01:51<00:04, 116.04it/s][A
Training data size: 44743 positives + 44744 negatives:  96%|███████████████████▎| 13530/14029 [01:51<00:04, 116.04it/s][A
Training data size: 44743 positives + 44744 negatives:  96%|███████████████████▎| 13531/14029 [01:51<00:04, 110.55it/s][A
Training data size: 44743 positives + 44744 negatives:  96%|███████████████████▎| 13531/14029 [01:51<00:04, 110.55it/s][A
Training data size: 44744 positives + 44745 negatives:  96%|███████████████████▎| 13532/14029 [01:51<00:04, 110.55it/s][A
Training data size: 44745 positives + 44746 negatives:  96%|███████████████████▎| 13533/14029 [01:51<00:04, 110.55it/s][A
Training data size: 44749 positives + 44750 negatives:  96%|███████████████████▎| 13534/14029 [01:51<00:04, 110.55it/s][A
Training data size: 44752 positives + 44753 negatives:  96%|███████████████████▎| 13535/14029 [01:51<00:04, 110.55it/s][A
Training data si

Training data size: 45049 positives + 45050 negatives:  97%|███████████████████▍| 13652/14029 [01:51<00:02, 135.81it/s][A
Training data size: 45051 positives + 45052 negatives:  97%|███████████████████▍| 13653/14029 [01:51<00:02, 135.81it/s][A
Training data size: 45052 positives + 45053 negatives:  97%|███████████████████▍| 13654/14029 [01:51<00:02, 135.81it/s][A
Training data size: 45054 positives + 45055 negatives:  97%|███████████████████▍| 13655/14029 [01:51<00:02, 135.81it/s][A
Training data size: 45055 positives + 45056 negatives:  97%|███████████████████▍| 13656/14029 [01:51<00:02, 135.81it/s][A
Training data size: 45056 positives + 45057 negatives:  97%|███████████████████▍| 13657/14029 [01:51<00:02, 135.81it/s][A
Training data size: 45057 positives + 45058 negatives:  97%|███████████████████▍| 13658/14029 [01:51<00:02, 135.81it/s][A
Training data size: 45058 positives + 45059 negatives:  97%|███████████████████▍| 13659/14029 [01:51<00:02, 135.81it/s][A
Training data si

Training data size: 45459 positives + 45460 negatives:  98%|████████████████████▌| 13775/14029 [01:53<00:02, 95.09it/s][A
Training data size: 45460 positives + 45461 negatives:  98%|████████████████████▌| 13776/14029 [01:53<00:02, 95.09it/s][A
Training data size: 45462 positives + 45463 negatives:  98%|████████████████████▌| 13777/14029 [01:53<00:02, 95.09it/s][A
Training data size: 45463 positives + 45464 negatives:  98%|████████████████████▌| 13778/14029 [01:53<00:02, 95.09it/s][A
Training data size: 45464 positives + 45465 negatives:  98%|████████████████████▋| 13779/14029 [01:53<00:02, 95.09it/s][A
Training data size: 45465 positives + 45466 negatives:  98%|████████████████████▋| 13780/14029 [01:53<00:02, 95.09it/s][A
Training data size: 45466 positives + 45467 negatives:  98%|████████████████████▋| 13781/14029 [01:53<00:02, 95.09it/s][A
Training data size: 45467 positives + 45468 negatives:  98%|████████████████████▋| 13782/14029 [01:53<00:02, 95.09it/s][A
Training data si

Training data size: 45888 positives + 45889 negatives:  99%|███████████████████▊| 13900/14029 [01:54<00:00, 147.91it/s][A
Training data size: 45890 positives + 45891 negatives:  99%|███████████████████▊| 13900/14029 [01:54<00:00, 147.91it/s][A
Training data size: 45891 positives + 45892 negatives:  99%|███████████████████▊| 13901/14029 [01:54<00:00, 147.91it/s][A
Training data size: 45892 positives + 45893 negatives:  99%|███████████████████▊| 13902/14029 [01:54<00:00, 147.91it/s][A
Training data size: 45893 positives + 45894 negatives:  99%|███████████████████▊| 13903/14029 [01:54<00:00, 147.91it/s][A
Training data size: 45894 positives + 45895 negatives:  99%|███████████████████▊| 13904/14029 [01:54<00:00, 147.91it/s][A
Training data size: 45915 positives + 45913 negatives:  99%|███████████████████▊| 13905/14029 [01:54<00:00, 147.91it/s][A
Training data size: 45916 positives + 45917 negatives:  99%|███████████████████▊| 13906/14029 [01:54<00:00, 147.91it/s][A
Training data si

Training data size: 46370 positives + 46370 negatives: 100%|███████████████████▉| 14024/14029 [01:55<00:00, 115.27it/s][A
Training data size: 46371 positives + 46372 negatives: 100%|███████████████████▉| 14025/14029 [01:55<00:00, 115.27it/s][A
Training data size: 46372 positives + 46373 negatives: 100%|███████████████████▉| 14026/14029 [01:55<00:00, 115.27it/s][A
Training data size: 46389 positives + 46390 negatives: 100%|███████████████████▉| 14027/14029 [01:55<00:00, 115.27it/s][A
Training data size: 46390 positives + 46391 negatives: 100%|███████████████████▉| 14028/14029 [01:55<00:00, 115.27it/s][A
Training data size: 46390 positives + 46391 negatives: 100%|████████████████████| 14029/14029 [01:55<00:00, 135.28it/s][A
Training data size: 46391 positives + 46392 negatives: 100%|████████████████████| 14029/14029 [01:55<00:00, 135.28it/s][A

In [22]:
words_list,nes_list=[],[]
for row in ner_data:
    words, nes = list(zip(*row))
    words_list.append(list(words))
    nes_list.append(list(nes))


In [23]:
#crf标注
import sklearn_crfsuite
from sklearn_crfsuite import CRF   # CRF的具体实现太过复杂，这里我们借助一个外部的库


def word2features(sent, i,pos_sent,compress_list):
    """抽取单个字的特征"""
    word = sent[i]
    postag = pos_sent[i][1]
    compress_tag=compress_list[i]
    features = {
        'bias': 1.0,
        'word.lower()': word.lower(),
        #提取单词后缀，比如ly结尾可能代表副词
        "word[-3:]": word[-3:],
        'word[-2:]': word[-2:],
        #isupper 全为大写
        'word.isupper()': word.isupper(),
        'word.istitle()': word.istitle(),
        'word.isdigit()': word.isdigit(),
        'postag': postag,
        'postag[:2]': postag[:2],
        
        "compress":compress_tag
    }
    if i > 0:
        #提取前一个单词特征
        word1 = sent[i-1]
        postag1 = pos_sent[i-1][1]
        compress_tag1=compress_list[i-1]
        
        features.update({
            '-1:word.lower()': word1.lower(),
            '-1:word.istitle()': word1.istitle(),
            '-1:word.isupper()': word1.isupper(),
            '-1:postag': postag1,
            '-1:postag[:2]': postag1[:2],
            
            "-1:compress":compress_tag1
        })
    else:
        features['BOS'] = True

    if i < len(sent)-1:
        #提取后一个单词特征
        word1 = sent[i+1]
        postag1 = pos_sent[i+1][1]
        compress_tag1=compress_list[i+1]
        
        features.update({
            '+1:word.lower()': word1.lower(),
            
            '+1:word.istitle()': word1.istitle(),
            '+1:word.isupper()': word1.isupper(),
            '+1:postag': postag1,
            '+1:postag[:2]': postag1[:2],
            "+1:compress":compress_tag1
        })
    else:
        features['EOS'] = True
        
        
    return features
def get_iscompress(sentence):
    #sentence 是词列表
    stop_list=["of","and","for","in","s"]
    is_compress=[0]*len(sentence)

    for i in range(len(sentence)):
        word=sentence[i]
        word_len=len(word)
        if word.isupper() ==True and word_len>=3 and i>2 :
            flag=False
            m,n=word_len-1,1
            while(i-n>=0):
                pre_word=sentence[i-n]
                if pre_word in stop_list:
                    n=n+1
                    continue
                if pre_word[0]!=word[m]:break
                if m==0:
                    flag=True
                    break
                m,n=m-1,n+1

            if flag==True:
                for k in range(i-n,i+1):
                    is_compress[k]=1
    return is_compress
def sent2features(sent):
    """抽取序列特征"""
    compress_list=get_iscompress(sent)
    pos_sent=nltk.tag.pos_tag(sent)
    return [word2features(sent, i,pos_sent,compress_list) for i in range(len(sent))]



In [24]:
%%time
X_train = [sent2features(s) for s in words_list]
y_train = nes_list



Training data size: 46391 positives + 46392 negatives: 100%|████████████████████| 14029/14029 [02:14<00:00, 135.28it/s][A

Wall time: 3min 50s


In [25]:
%%time
crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    c1=0.1,
    c2=0.1,
    max_iterations=100,
    all_possible_transitions=True
)
crf.fit(X_train, y_train)

Wall time: 1min 54s


# 验证集和F1score

In [26]:
def compute_fbeta(y_true,
                  y_pred,
                  beta=0.5) :
    """Compute the Jaccard-based micro FBeta score.

    References
    ----------
    - https://www.kaggle.com/c/coleridgeinitiative-show-us-the-data/overview/evaluation
    """

    def _jaccard_similarity(str1: str, str2: str) -> float:
        a = set(str1.split()) 
        b = set(str2.split())
        c = a.intersection(b)
        return float(len(c)) / (len(a) + len(b) - len(c))
    y_true=[ string.split("|") for string in y_true]
    y_pred=[ string.split("|") for string in y_pred]
    tp = 0  # true positive
    fp = 0  # false positive
    fn = 0  # false negative
    for ground_truth_list, predicted_string_list in zip(y_true, y_pred):
        predicted_string_list_sorted = sorted(predicted_string_list)
        for ground_truth in sorted(ground_truth_list):
            if len(predicted_string_list_sorted) == 0:
                fn += 1
            else:
                similarity_scores = [
                    _jaccard_similarity(ground_truth, predicted_string)
                    for predicted_string in predicted_string_list_sorted
                ]
                matched_idx = np.argmax(similarity_scores)
                if similarity_scores[matched_idx] >= 0.5:
                    predicted_string_list_sorted.pop(matched_idx)
                    tp += 1
                else:
                    fn += 1
        fp += len(predicted_string_list_sorted)

    tp *= (1 + beta ** 2)
    fn *= beta ** 2
    fbeta_score = tp / (tp + fp + fn)
    return fbeta_score

In [27]:
def predict_extract(paper_sample_folder,sample_sub):
    papers_sample=read_papers(paper_sample_folder,sample_sub)
    sentences_sample={}
    datasets_all=[]
    def clean_text(txt):
        return re.sub('[^A-Za-z0-9]+', ' ', str(txt).lower()).strip()
    for id in sample_sub['Id'].unique():
        # paper
        paper = papers_sample[id]

        # sentences
        sentences_sample = set([clean_training_text(sentence) for section in paper 
                     for sentence in section['text'].split('.') ])

        sentences_sample = shorten_sentences(sentences_sample) # make sentences short
        sentences_sample = [sentence for sentence in sentences_sample if len(sentence) > 10] 
        sentences_sample=[sentence.split(" ") for sentence in sentences_sample]
        features_sample=[sent2features(s) for s in sentences_sample]

        #预测并提取预测结果
        y_pred = crf.predict(features_sample)
        datasets=[]
        for i  in range(len(y_pred)):
            if "B" in y_pred[i]:
                dataset=""
                for j in range(len(y_pred[i])):
                    if y_pred[i][j]=="B":
                        if dataset!="":
                            dataset=dataset.rstrip()
                            dataset=clean_text(dataset)
                            if dataset  not in datasets:
                                datasets.append(dataset)
                            dataset=""
                        dataset+=sentences_sample[i][j]+" "
                    elif y_pred[i][j]=="I":
                        dataset+=sentences_sample[i][j]+" "
                if dataset!="":
                    dataset=dataset.rstrip()
                    dataset=clean_text(dataset)
                    if dataset  not in datasets:
                        datasets.append(dataset)

        datasets="|".join(datasets)
        datasets_all.append(datasets)
    return datasets_all


In [28]:
validation_pred=predict_extract(paper_train_folder,validation_df)
validation_true=validation_df["cleaned_label"]
f1_validation=compute_fbeta(validation_true,validation_pred)
print(f1_validation)

0.6091277143908723


# 测试集和字符串匹配

In [None]:
#string matchinf  return literal_preds 
all_labels= set()

for label_1, label_2, label_3 in train[['dataset_title', 'dataset_label', 'cleaned_label']].itertuples(index=False):
    label_1=label_1.split("|")
    label_2=label_2.split("|")
    label_3=label_3.split("|")


    for label in label_1:

        all_labels.add(str(label).lower())
    for label in label_2:
        all_labels.add(str(label).lower())    
    for label in label_3:
        all_labels.add(str(label).lower())


print(f'No. different labels: {len(all_labels)}')


def totally_clean_text(txt):
    txt = clean_text(txt)
    txt = re.sub(' +', ' ', txt)
    return txt
literal_preds = []

for paper_id in sample_sub['Id']:
    paper = papers_sample[paper_id]
    text_1 = '. '.join(section['text'] for section in paper).lower()
    text_2 = totally_clean_text(text_1)
    
    labels = set()
    for label in all_labels:
        if label in text_1 or label in text_2:
            labels.add(clean_text(label))
    
    literal_preds.append('|'.join(labels))
    

In [None]:
datasets_all=predict_extract(paper_sample_folder,sample_sub)
print("datasets in test data:",datasets_all)
for i in range(len(datasets_all)):
    if literal_preds[i]!="":
        if datasets_all[i]=="":
            datasets_all[i]=literal_preds[i]
        else:
            datasets_all[i]+="|"+literal_preds[i]
    datasets_all[i]=datasets_all[i].split("|")
    datasets_all[i]=list(set(datasets_all[i]))
    datasets_all[i]="|".join(datasets_all[i])
my_submission=pd.DataFrame({"Id":sample_sub['Id'],"PredictionString":datasets_all})
my_submission.to_csv('submission.csv', index=False)

In [None]:
def print_state_features(state_features):
    for (attr, label), weight in state_features:
        print("%0.6f %-8s %s" % (weight, label, attr))

print("Top positive:")
print_state_features(collections.Counter(crf.state_features_).most_common(30))

print("\nTop negative:")
print_state_features(collections.Counter(crf.state_features_).most_common()[-30:])

[0, 0, 0, 0, 0, 1, 1, 1, 1]


1
2
3
4
