In [1]:
import datasets
import torch
import pandas as pd
import json
import copy
import os
import logging

In [2]:
logging.basicConfig(level=logging.INFO)

In [3]:
#load original dataset
train_dataset = datasets.load_dataset(path='nlpaueb/finer-139', split='train',streaming = True)
test_dataset = datasets.load_dataset(path='nlpaueb/finer-139', split='test',streaming = True)
validation_dataset = datasets.load_dataset(path='nlpaueb/finer-139', split='validation',streaming = True)

In [4]:
# enumerate dataset, each value is a dict data type, each value has to keys which are tokens and ner_tags
# i extract the tokens and ner_tags from dataset and put them in two lists.
def get_fulldata(dataset):
    tokens = []
    ner_tags = []
    for idx,value in enumerate(dataset):
        tokens.append(value['tokens'])
        ner_tags.append(value['ner_tags'])
    return tokens,ner_tags

In [None]:
train_tokens,train_ner_tags = get_fulldata(train_dataset)
test_tokens,test_ner_tags = get_fulldata(test_dataset)
validation_tokens,validation_ner_tags = get_fulldata(validation_dataset)

INFO:datasets_modules.datasets.nlpaueb--finer-139.5f5a8eb2a38e8b142bb8ca63f3f9600634cc6c8963e4c982926cf2b48e4e55ff.finer-139:⏳ Generating examples from = zip://train.jsonl::https://huggingface.co/datasets/nlpaueb/finer-139/resolve/main/finer139.zip


In [None]:
def load_dataset_tags():
    dataset = datasets.load_dataset('nlpaueb/finer-139', split='train', streaming=True)
    dataset_tags = dataset.features['ner_tags'].feature.names
    tag2idx = {tag: int(i) for i, tag in enumerate(dataset_tags)}
    idx2tag = {idx: tag for tag, idx in tag2idx.items()}

    return tag2idx, idx2tag

In [None]:
tag2idx, idx2tag = load_dataset_tags()

In [None]:
multi_task_dataset = ['ShareBasedCompensation','Amortization','BusinessCombination','Stock','DebtInstrument','EmployeeServiceShareBasedCompensation','EquityMethodInvestment',
'LineOfCredit','LossContingency','OperatingLease','Revenue','Tax','Equity','BusinessAcquisition','Transaction','Loss']

In [None]:
def produce_new_label(tag2idx,new_dataset_name:str):
    new_label_list = [0]
    new_label_dict = {'O': 0}
    
    for key in tag2idx.keys():
        if new_dataset_name in key:
            new_label_list.append(tag2idx[key])
            new_label_dict[key] = tag2idx[key]
    a = sorted(new_label_dict.items(), key=lambda x: x[1])
    label_dict = new_label_dict.copy()
    new = [x for x in range(len(new_label_list)+1)]
    for i,j in zip(a,new):
        label_dict[i[0]] = j
    converter = {}
    for key in new_label_dict.keys():
        converter[new_label_dict[key]] = label_dict[key]
        
    return new_label_list,new_label_dict,label_dict,converter

In [None]:
def sampling_data(label_list,sample_num ,ner_tags,tokens):
    new_tokens = []
    new_tags = []
    index_save = []
    for i in label_list:
        calculator = 0
        if i != 0:
            for index,value in enumerate(ner_tags):
                if calculator == sample_num:
                    break
                if i in value:
                    if calculator !=sample_num:
                        if index not in index_save:
                            new_tokens.append(tokens[index])
                            new_tags.append(ner_tags[index])
                            calculator = calculator+1
                            index_save.append(index)
            
    ner_tags_sampl = convert_labels(new_tags,idx2tag)
    for idx,va in enumerate(ner_tags_sampl):
        for index,value in enumerate(va):
            if value != 'O':
                new_tokens[idx][index] = '******'+new_tokens[idx][index]+'******'
            
    
    
    return {'tokens':new_tokens,'ner_tags':ner_tags_sampl}
                
                
    

In [None]:
def produce_multitask_data(ner_tags,tokens,dataset_name:str,learning_process:str):
    print("Get " + learning_process +" "+dataset_name+" data")

    new_ner_tags = []
    new_tokens = []
    save_index = []
    tmp_ner_tags = []

    index_save = []
    
    relative_label = {}

  
    
    new_label_list,new_label_dict,label_dict,converter = produce_new_label(tag2idx,dataset_name)
    print("Label for dataset "+ dataset_name)
    print(new_label_list)
    print("Total number of labels "+ str(len(new_label_list)))
    print('Original database label and idx')
    
    for key in new_label_dict.keys():
        print(key+": "+ str(new_label_dict[key]))
   
    tmp_ner_tags = copy.deepcopy(ner_tags) 
    
    for i in new_label_list:
        for j in tmp_ner_tags:
            if i in j:
                for value in j:
                    if value != 0:
                        if value not in new_label_list:
                            relative_label[idx2tag[value]] = relative_label.get(idx2tag[value], 0.0) + 1.0
                            
    for i in tmp_ner_tags: 
        for index,value in enumerate(i):
            if value not in new_label_list:
                i[index] = 0
                
 
    for i in new_label_list:
        for index,value in enumerate(tmp_ner_tags):
            if (i in value)&(i!=0):
                if index not in index_save:
                    new_ner_tags.append(tmp_ner_tags[index])
                    new_tokens.append(tokens[index])
                    index_save.append(index)


    a = []
    for i in new_ner_tags:
        for j in i:
            a.append(j)
    print('New label contain in this dataset')
    print(set(a))
    final_label_list = list(set(a))

    for key in new_label_dict.keys():
        new_label_dict[key] = new_label_list.index(new_label_dict[key])

    a = sorted(new_label_dict.items(), key=lambda x: x[1])
    new = [x for x in range(len(new_label_list)+1)]
    for i,j in zip(a,new):
        new_label_dict[i[0]] = j
    print("New label for dataset "+ dataset_name)
    print('New database label and idx')
    for key in new_label_dict.keys():
        print(key+": "+ str(new_label_dict[key]))
    print('Total number of this database '+ str(len(index_save)))
    print('=============================================================')
        
  
    return new_ner_tags,new_tokens,new_label_dict,converter,index_save,final_label_list,relative_label

In [None]:
def convert_labels(ner_tags,converter):
    a = copy.deepcopy(ner_tags)
    b = []
  
    for idx,va in enumerate(a):
        for index,value in enumerate(va):
            va[index] = converter[value]
    
    return a

In [13]:
logging.info("********** Scheme: Starting extract database **********")

process = ['train','validation','test']
sample_num = 5

for i in multi_task_dataset:
    print('======================================================================================')
    os.chdir('D:\学习相关\新建文件夹')
    os.mkdir(i)
    os.chdir(i)
     
    for j in process:
            
        logging.info("********** Scheme: Starting extract database for "+ i+" "+ j+" **********")
        
        if j == 'train':
            new_ner_tags,new_tokens,new_label_dict,converter,index_save,label_list,relative_label = produce_multitask_data(train_ner_tags,train_tokens,i,j)
            appeared_labels = 
        if j == 'validation': 
            new_ner_tags,new_tokens,new_label_dict,converter,index_save,label_list,relative_label = produce_multitask_data(validation_ner_tags,validation_tokens,i,j)
        
        if j == 'test':
            new_ner_tags,new_tokens,new_label_dict,converter,index_save,label_list,relative_label = produce_multitask_data(test_ner_tags,test_tokens,i,j)
 
        ner_tags_strlabel = convert_labels(new_ner_tags,idx2tag)
      
        ner_tags_numlabel = convert_labels(new_ner_tags,converter)
        
        num = {'tokens':new_tokens,'ner_tags':ner_tags_numlabel}
        sttr = {'tokens':new_tokens,'ner_tags':ner_tags_strlabel}
        
        logging.info("********** Producing relevant tags **********")
        r_label_df = pd.DataFrame([(k, v) for k, v in relative_label.items()])
        r_label_df = r_label_df.sort_values(by = 1,ascending = False)
        r_label_df.reset_index(drop=True, inplace=True)
        
        r_label_df.to_csv(i+'_'+j+ '_relevant_tags'+'.csv')
        
        logging.info("********** Sampling....... **********")
        sampled_data = sampling_data(label_list,sample_num ,new_ner_tags,new_tokens)
        sd = pd.DataFrame.from_dict(sampled_data)
        sd.to_csv(i+'_'+j+ '_sample'+'.csv')
        
        logging.info("********** Producing " + j+ " number database **********")
        df = pd.DataFrame.from_dict(num)
        num_database = datasets.Dataset.from_pandas(df)
        num_database = num_database.shuffle(seed=42)
        
        df.to_csv(i+'_'+j+ '_num'+'.csv')
        num_database.save_to_disk(i+'_'+j+'_number')
        
        logging.info("********** Producing " + j+ " str database **********")
        
        df = pd.DataFrame.from_dict(sttr)
        str_database = datasets.Dataset.from_pandas(df)
 
        str_database = str_database.shuffle(seed=42)
        df.to_csv(i+'_'+j+ '_str '+'.csv')
        str_database.save_to_disk(i+'_'+j+'_string')
        
        logging.info("********** Finishing save the database **********")
        
        
        
        new_label_dict = pd.DataFrame([(k, v) for k, v in new_label_dict.items()])
        new_label_dict.to_csv(i+'_'+j+'.csv')
        
        
        logging.info("********** Finishing extracting database for " + i+ " "+ j+" **********")
        print('================================================================================================')
    logging.info("********** Finishing extracting all database for " + i+ " **********")  
        

    
    

INFO:root:********** Scheme: Starting extract database **********
INFO:root:********** Scheme: Starting extract database for ShareBasedCompensation train **********


Get train ShareBasedCompensation data
Label for dataset ShareBasedCompensation
[0, 4, 59, 60, 61, 62, 63, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 177, 183, 188, 189, 193, 206, 211, 216, 223, 234, 257, 267]
Total number of labels 33
Original database label and idx
O: 0
B-AllocatedShareBasedCompensationExpense: 4
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 59
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 60
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 61
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 62
B-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 63
B-ShareBasedCompensation: 137
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardAwardVestingPeriod1: 138
I-ShareB

INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/23 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/23 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for ShareBasedCompensation train **********
INFO:root:********** Scheme: Starting extract database for ShareBasedCompensation validation **********


Get validation ShareBasedCompensation data
Label for dataset ShareBasedCompensation
[0, 4, 59, 60, 61, 62, 63, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 177, 183, 188, 189, 193, 206, 211, 216, 223, 234, 257, 267]
Total number of labels 33
Original database label and idx
O: 0
B-AllocatedShareBasedCompensationExpense: 4
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 59
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 60
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 61
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 62
B-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 63
B-ShareBasedCompensation: 137
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardAwardVestingPeriod1: 138
I-S

INFO:root:********** Sampling....... **********
INFO:root:********** Producing validation number database **********


Flattening the indices:   0%|          | 0/3 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/3 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for ShareBasedCompensation validation **********
INFO:root:********** Scheme: Starting extract database for ShareBasedCompensation test **********


Get test ShareBasedCompensation data
Label for dataset ShareBasedCompensation
[0, 4, 59, 60, 61, 62, 63, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 177, 183, 188, 189, 193, 206, 211, 216, 223, 234, 257, 267]
Total number of labels 33
Original database label and idx
O: 0
B-AllocatedShareBasedCompensationExpense: 4
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 59
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 60
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 61
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 62
B-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 63
B-ShareBasedCompensation: 137
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardAwardVestingPeriod1: 138
I-ShareBa

INFO:root:********** Sampling....... **********


New label contain in this dataset
{0, 4, 137, 138, 139, 140, 142, 143, 144, 145, 147, 148, 149, 150, 59, 60, 61, 62, 63}
New label for dataset ShareBasedCompensation
New database label and idx
O: 0
B-AllocatedShareBasedCompensationExpense: 1
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 2
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 3
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 4
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 5
B-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 6
B-ShareBasedCompensation: 7
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardAwardVestingPeriod1: 8
I-ShareBasedCompensationArrangementByShareBasedPaymentAwardAwardVestingPeriod1: 9
B-ShareBasedCompensationArrangementB

INFO:root:********** Producing test number database **********


Flattening the indices:   0%|          | 0/3 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/3 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for ShareBasedCompensation test **********
INFO:root:********** Finishing extracting all database for ShareBasedCompensation **********
INFO:root:********** Scheme: Starting extract database for Amortization train **********


Get train Amortization data
Label for dataset Amortization
[0, 5, 6, 7, 21, 208, 214]
Total number of labels 7
Original database label and idx
O: 0
B-AmortizationOfFinancingCosts: 5
B-AmortizationOfIntangibleAssets: 6
I-AmortizationOfIntangibleAssets: 7
B-CapitalizedContractCostAmortization: 21
I-AmortizationOfFinancingCosts: 208
I-CapitalizedContractCostAmortization: 214
New label contain in this dataset
{0, 21, 5, 6}
New label for dataset Amortization
New database label and idx
O: 0
B-AmortizationOfFinancingCosts: 1
B-AmortizationOfIntangibleAssets: 2
I-AmortizationOfIntangibleAssets: 3
B-CapitalizedContractCostAmortization: 4
I-AmortizationOfFinancingCosts: 5
I-CapitalizedContractCostAmortization: 6
Total number of this database 3518


INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/4 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/4 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Amortization train **********
INFO:root:********** Scheme: Starting extract database for Amortization validation **********


Get validation Amortization data
Label for dataset Amortization
[0, 5, 6, 7, 21, 208, 214]
Total number of labels 7
Original database label and idx
O: 0
B-AmortizationOfFinancingCosts: 5
B-AmortizationOfIntangibleAssets: 6
I-AmortizationOfIntangibleAssets: 7
B-CapitalizedContractCostAmortization: 21
I-AmortizationOfFinancingCosts: 208
I-CapitalizedContractCostAmortization: 214


INFO:root:********** Sampling....... **********
INFO:root:********** Producing validation number database **********


New label contain in this dataset
{0, 5, 6, 7, 21}
New label for dataset Amortization
New database label and idx
O: 0
B-AmortizationOfFinancingCosts: 1
B-AmortizationOfIntangibleAssets: 2
I-AmortizationOfIntangibleAssets: 3
B-CapitalizedContractCostAmortization: 4
I-AmortizationOfFinancingCosts: 5
I-CapitalizedContractCostAmortization: 6
Total number of this database 386


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Amortization validation **********
INFO:root:********** Scheme: Starting extract database for Amortization test **********


Get test Amortization data
Label for dataset Amortization
[0, 5, 6, 7, 21, 208, 214]
Total number of labels 7
Original database label and idx
O: 0
B-AmortizationOfFinancingCosts: 5
B-AmortizationOfIntangibleAssets: 6
I-AmortizationOfIntangibleAssets: 7
B-CapitalizedContractCostAmortization: 21
I-AmortizationOfFinancingCosts: 208
I-CapitalizedContractCostAmortization: 214


INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


New label contain in this dataset
{0, 5, 6, 7, 21}
New label for dataset Amortization
New database label and idx
O: 0
B-AmortizationOfFinancingCosts: 1
B-AmortizationOfIntangibleAssets: 2
I-AmortizationOfIntangibleAssets: 3
B-CapitalizedContractCostAmortization: 4
I-AmortizationOfFinancingCosts: 5
I-CapitalizedContractCostAmortization: 6
Total number of this database 327


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Amortization test **********
INFO:root:********** Finishing extracting all database for Amortization **********
INFO:root:********** Scheme: Starting extract database for BusinessCombination train **********


Get train BusinessCombination data
Label for dataset BusinessCombination
[0, 16, 17, 18, 19, 20, 194, 202, 221, 241, 246]
Total number of labels 11
Original database label and idx
O: 0
B-BusinessCombinationAcquisitionRelatedCosts: 16
B-BusinessCombinationConsiderationTransferred1: 17
B-BusinessCombinationContingentConsiderationLiability: 18
B-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibleAssetsOtherThanGoodwill: 19
B-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibles: 20
I-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibleAssetsOtherThanGoodwill: 194
I-BusinessCombinationConsiderationTransferred1: 202
I-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibles: 221
I-BusinessCombinationContingentConsiderationLiability: 241
I-BusinessCombinationAcquisitionRelatedCosts: 246
New label contain in this dataset
{0, 16, 17, 18, 19, 20}
New label for da

INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/7 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/7 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for BusinessCombination train **********
INFO:root:********** Scheme: Starting extract database for BusinessCombination validation **********


Get validation BusinessCombination data
Label for dataset BusinessCombination
[0, 16, 17, 18, 19, 20, 194, 202, 221, 241, 246]
Total number of labels 11
Original database label and idx
O: 0
B-BusinessCombinationAcquisitionRelatedCosts: 16
B-BusinessCombinationConsiderationTransferred1: 17
B-BusinessCombinationContingentConsiderationLiability: 18
B-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibleAssetsOtherThanGoodwill: 19
B-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibles: 20
I-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibleAssetsOtherThanGoodwill: 194
I-BusinessCombinationConsiderationTransferred1: 202
I-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibles: 221
I-BusinessCombinationContingentConsiderationLiability: 241
I-BusinessCombinationAcquisitionRelatedCosts: 246


INFO:root:********** Sampling....... **********
INFO:root:********** Producing validation number database **********


New label contain in this dataset
{0, 16, 17, 18, 19, 20}
New label for dataset BusinessCombination
New database label and idx
O: 0
B-BusinessCombinationAcquisitionRelatedCosts: 1
B-BusinessCombinationConsiderationTransferred1: 2
B-BusinessCombinationContingentConsiderationLiability: 3
B-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibleAssetsOtherThanGoodwill: 4
B-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibles: 5
I-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibleAssetsOtherThanGoodwill: 6
I-BusinessCombinationConsiderationTransferred1: 7
I-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibles: 8
I-BusinessCombinationContingentConsiderationLiability: 9
I-BusinessCombinationAcquisitionRelatedCosts: 10
Total number of this database 570


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for BusinessCombination validation **********
INFO:root:********** Scheme: Starting extract database for BusinessCombination test **********


Get test BusinessCombination data
Label for dataset BusinessCombination
[0, 16, 17, 18, 19, 20, 194, 202, 221, 241, 246]
Total number of labels 11
Original database label and idx
O: 0
B-BusinessCombinationAcquisitionRelatedCosts: 16
B-BusinessCombinationConsiderationTransferred1: 17
B-BusinessCombinationContingentConsiderationLiability: 18
B-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibleAssetsOtherThanGoodwill: 19
B-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibles: 20
I-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibleAssetsOtherThanGoodwill: 194
I-BusinessCombinationConsiderationTransferred1: 202
I-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibles: 221
I-BusinessCombinationContingentConsiderationLiability: 241
I-BusinessCombinationAcquisitionRelatedCosts: 246


INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


New label contain in this dataset
{0, 16, 17, 18, 19, 20}
New label for dataset BusinessCombination
New database label and idx
O: 0
B-BusinessCombinationAcquisitionRelatedCosts: 1
B-BusinessCombinationConsiderationTransferred1: 2
B-BusinessCombinationContingentConsiderationLiability: 3
B-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibleAssetsOtherThanGoodwill: 4
B-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibles: 5
I-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibleAssetsOtherThanGoodwill: 6
I-BusinessCombinationConsiderationTransferred1: 7
I-BusinessCombinationRecognizedIdentifiableAssetsAcquiredAndLiabilitiesAssumedIntangibles: 8
I-BusinessCombinationContingentConsiderationLiability: 9
I-BusinessCombinationAcquisitionRelatedCosts: 10
Total number of this database 577


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for BusinessCombination test **********
INFO:root:********** Finishing extracting all database for BusinessCombination **********
INFO:root:********** Scheme: Starting extract database for Stock train **********


Get train Stock data
Label for dataset Stock
[0, 24, 25, 26, 27, 28, 29, 115, 116, 117, 118, 134, 135, 136, 156, 157, 158, 159, 160, 161, 162, 164, 165, 166, 167, 171, 195, 203, 207, 210, 213, 219, 227, 229, 255, 256, 270]
Total number of labels 37
Original database label and idx
O: 0
B-CommonStockCapitalSharesReservedForFutureIssuance: 24
B-CommonStockDividendsPerShareDeclared: 25
B-CommonStockParOrStatedValuePerShare: 26
B-CommonStockSharesAuthorized: 27
I-CommonStockSharesAuthorized: 28
B-CommonStockSharesOutstanding: 29
B-PreferredStockDividendRatePercentage: 115
B-PreferredStockSharesAuthorized: 116
I-PreferredStockSharesAuthorized: 117
B-ProceedsFromIssuanceOfCommonStock: 118
B-SaleOfStockNumberOfSharesIssuedInTransaction: 134
I-SaleOfStockNumberOfSharesIssuedInTransaction: 135
B-SaleOfStockPricePerShare: 136
B-StockIssuedDuringPeriodSharesNewIssues: 156
I-StockIssuedDuringPeriodSharesNewIssues: 157
B-StockRepurchaseProgramAuthorizedAmount1: 158
B-StockRepurchaseProgramRemainingA

INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/16 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/16 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Stock train **********
INFO:root:********** Scheme: Starting extract database for Stock validation **********


Get validation Stock data
Label for dataset Stock
[0, 24, 25, 26, 27, 28, 29, 115, 116, 117, 118, 134, 135, 136, 156, 157, 158, 159, 160, 161, 162, 164, 165, 166, 167, 171, 195, 203, 207, 210, 213, 219, 227, 229, 255, 256, 270]
Total number of labels 37
Original database label and idx
O: 0
B-CommonStockCapitalSharesReservedForFutureIssuance: 24
B-CommonStockDividendsPerShareDeclared: 25
B-CommonStockParOrStatedValuePerShare: 26
B-CommonStockSharesAuthorized: 27
I-CommonStockSharesAuthorized: 28
B-CommonStockSharesOutstanding: 29
B-PreferredStockDividendRatePercentage: 115
B-PreferredStockSharesAuthorized: 116
I-PreferredStockSharesAuthorized: 117
B-ProceedsFromIssuanceOfCommonStock: 118
B-SaleOfStockNumberOfSharesIssuedInTransaction: 134
I-SaleOfStockNumberOfSharesIssuedInTransaction: 135
B-SaleOfStockPricePerShare: 136
B-StockIssuedDuringPeriodSharesNewIssues: 156
I-StockIssuedDuringPeriodSharesNewIssues: 157
B-StockRepurchaseProgramAuthorizedAmount1: 158
B-StockRepurchaseProgramRemai

INFO:root:********** Sampling....... **********


New label contain in this dataset
{0, 134, 136, 24, 25, 26, 27, 28, 157, 156, 29, 158, 159, 160, 161, 164, 165, 167, 115, 116, 117, 118}
New label for dataset Stock
New database label and idx
O: 0
B-CommonStockCapitalSharesReservedForFutureIssuance: 1
B-CommonStockDividendsPerShareDeclared: 2
B-CommonStockParOrStatedValuePerShare: 3
B-CommonStockSharesAuthorized: 4
I-CommonStockSharesAuthorized: 5
B-CommonStockSharesOutstanding: 6
B-PreferredStockDividendRatePercentage: 7
B-PreferredStockSharesAuthorized: 8
I-PreferredStockSharesAuthorized: 9
B-ProceedsFromIssuanceOfCommonStock: 10
B-SaleOfStockNumberOfSharesIssuedInTransaction: 11
I-SaleOfStockNumberOfSharesIssuedInTransaction: 12
B-SaleOfStockPricePerShare: 13
B-StockIssuedDuringPeriodSharesNewIssues: 14
I-StockIssuedDuringPeriodSharesNewIssues: 15
B-StockRepurchaseProgramAuthorizedAmount1: 16
B-StockRepurchaseProgramRemainingAuthorizedRepurchaseAmount1: 17
B-StockRepurchasedAndRetiredDuringPeriodShares: 18
B-StockRepurchasedDuringPe

INFO:root:********** Producing validation number database **********


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Stock validation **********
INFO:root:********** Scheme: Starting extract database for Stock test **********


Get test Stock data
Label for dataset Stock
[0, 24, 25, 26, 27, 28, 29, 115, 116, 117, 118, 134, 135, 136, 156, 157, 158, 159, 160, 161, 162, 164, 165, 166, 167, 171, 195, 203, 207, 210, 213, 219, 227, 229, 255, 256, 270]
Total number of labels 37
Original database label and idx
O: 0
B-CommonStockCapitalSharesReservedForFutureIssuance: 24
B-CommonStockDividendsPerShareDeclared: 25
B-CommonStockParOrStatedValuePerShare: 26
B-CommonStockSharesAuthorized: 27
I-CommonStockSharesAuthorized: 28
B-CommonStockSharesOutstanding: 29
B-PreferredStockDividendRatePercentage: 115
B-PreferredStockSharesAuthorized: 116
I-PreferredStockSharesAuthorized: 117
B-ProceedsFromIssuanceOfCommonStock: 118
B-SaleOfStockNumberOfSharesIssuedInTransaction: 134
I-SaleOfStockNumberOfSharesIssuedInTransaction: 135
B-SaleOfStockPricePerShare: 136
B-StockIssuedDuringPeriodSharesNewIssues: 156
I-StockIssuedDuringPeriodSharesNewIssues: 157
B-StockRepurchaseProgramAuthorizedAmount1: 158
B-StockRepurchaseProgramRemainingAu

INFO:root:********** Sampling....... **********


New label contain in this dataset
{0, 134, 136, 24, 25, 26, 27, 156, 28, 29, 158, 160, 161, 159, 164, 165, 167, 115, 116, 118}
New label for dataset Stock
New database label and idx
O: 0
B-CommonStockCapitalSharesReservedForFutureIssuance: 1
B-CommonStockDividendsPerShareDeclared: 2
B-CommonStockParOrStatedValuePerShare: 3
B-CommonStockSharesAuthorized: 4
I-CommonStockSharesAuthorized: 5
B-CommonStockSharesOutstanding: 6
B-PreferredStockDividendRatePercentage: 7
B-PreferredStockSharesAuthorized: 8
I-PreferredStockSharesAuthorized: 9
B-ProceedsFromIssuanceOfCommonStock: 10
B-SaleOfStockNumberOfSharesIssuedInTransaction: 11
I-SaleOfStockNumberOfSharesIssuedInTransaction: 12
B-SaleOfStockPricePerShare: 13
B-StockIssuedDuringPeriodSharesNewIssues: 14
I-StockIssuedDuringPeriodSharesNewIssues: 15
B-StockRepurchaseProgramAuthorizedAmount1: 16
B-StockRepurchaseProgramRemainingAuthorizedRepurchaseAmount1: 17
B-StockRepurchasedAndRetiredDuringPeriodShares: 18
B-StockRepurchasedDuringPeriodShares

INFO:root:********** Producing test number database **********


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Stock test **********
INFO:root:********** Finishing extracting all database for Stock **********
INFO:root:********** Scheme: Starting extract database for DebtInstrument train **********


Get train DebtInstrument data
Label for dataset DebtInstrument
[0, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 174, 178, 185, 186, 191, 204, 212, 263]
Total number of labels 23
Original database label and idx
O: 0
B-DebtInstrumentBasisSpreadOnVariableRate1: 34
B-DebtInstrumentCarryingAmount: 35
B-DebtInstrumentConvertibleConversionPrice1: 36
B-DebtInstrumentFaceAmount: 37
I-DebtInstrumentFaceAmount: 38
B-DebtInstrumentFairValue: 39
B-DebtInstrumentInterestRateEffectivePercentage: 40
B-DebtInstrumentInterestRateStatedPercentage: 41
B-DebtInstrumentMaturityDate: 42
I-DebtInstrumentMaturityDate: 43
B-DebtInstrumentRedemptionPricePercentage: 44
B-DebtInstrumentTerm: 45
I-DebtInstrumentTerm: 46
B-DebtInstrumentUnamortizedDiscount: 47
I-DebtInstrumentFairValue: 174
I-DebtInstrumentInterestRateStatedPercentage: 178
I-DebtInstrumentCarryingAmount: 185
I-DebtInstrumentConvertibleConversionPrice1: 186
I-DebtInstrumentUnamortizedDiscount: 191
I-DebtInstrumentBasisSpreadOnVariableRate1

INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/29 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/29 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for DebtInstrument train **********
INFO:root:********** Scheme: Starting extract database for DebtInstrument validation **********


Get validation DebtInstrument data
Label for dataset DebtInstrument
[0, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 174, 178, 185, 186, 191, 204, 212, 263]
Total number of labels 23
Original database label and idx
O: 0
B-DebtInstrumentBasisSpreadOnVariableRate1: 34
B-DebtInstrumentCarryingAmount: 35
B-DebtInstrumentConvertibleConversionPrice1: 36
B-DebtInstrumentFaceAmount: 37
I-DebtInstrumentFaceAmount: 38
B-DebtInstrumentFairValue: 39
B-DebtInstrumentInterestRateEffectivePercentage: 40
B-DebtInstrumentInterestRateStatedPercentage: 41
B-DebtInstrumentMaturityDate: 42
I-DebtInstrumentMaturityDate: 43
B-DebtInstrumentRedemptionPricePercentage: 44
B-DebtInstrumentTerm: 45
I-DebtInstrumentTerm: 46
B-DebtInstrumentUnamortizedDiscount: 47
I-DebtInstrumentFairValue: 174
I-DebtInstrumentInterestRateStatedPercentage: 178
I-DebtInstrumentCarryingAmount: 185
I-DebtInstrumentConvertibleConversionPrice1: 186
I-DebtInstrumentUnamortizedDiscount: 191
I-DebtInstrumentBasisSpreadOnVariable

INFO:root:********** Sampling....... **********
INFO:root:********** Producing validation number database **********


Flattening the indices:   0%|          | 0/4 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/4 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for DebtInstrument validation **********
INFO:root:********** Scheme: Starting extract database for DebtInstrument test **********


Get test DebtInstrument data
Label for dataset DebtInstrument
[0, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 174, 178, 185, 186, 191, 204, 212, 263]
Total number of labels 23
Original database label and idx
O: 0
B-DebtInstrumentBasisSpreadOnVariableRate1: 34
B-DebtInstrumentCarryingAmount: 35
B-DebtInstrumentConvertibleConversionPrice1: 36
B-DebtInstrumentFaceAmount: 37
I-DebtInstrumentFaceAmount: 38
B-DebtInstrumentFairValue: 39
B-DebtInstrumentInterestRateEffectivePercentage: 40
B-DebtInstrumentInterestRateStatedPercentage: 41
B-DebtInstrumentMaturityDate: 42
I-DebtInstrumentMaturityDate: 43
B-DebtInstrumentRedemptionPricePercentage: 44
B-DebtInstrumentTerm: 45
I-DebtInstrumentTerm: 46
B-DebtInstrumentUnamortizedDiscount: 47
I-DebtInstrumentFairValue: 174
I-DebtInstrumentInterestRateStatedPercentage: 178
I-DebtInstrumentCarryingAmount: 185
I-DebtInstrumentConvertibleConversionPrice1: 186
I-DebtInstrumentUnamortizedDiscount: 191
I-DebtInstrumentBasisSpreadOnVariableRate1:

INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


Flattening the indices:   0%|          | 0/4 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/4 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for DebtInstrument test **********
INFO:root:********** Finishing extracting all database for DebtInstrument **********
INFO:root:********** Scheme: Starting extract database for EmployeeServiceShareBasedCompensation train **********


Get train EmployeeServiceShareBasedCompensation data
Label for dataset EmployeeServiceShareBasedCompensation
[0, 59, 60, 61, 62, 63, 177, 189, 257]
Total number of labels 9
Original database label and idx
O: 0
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 59
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 60
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 61
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 62
B-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 63
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 177
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 189
I-EmployeeServiceShareBasedCo

INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/6 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/6 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for EmployeeServiceShareBasedCompensation train **********
INFO:root:********** Scheme: Starting extract database for EmployeeServiceShareBasedCompensation validation **********


Get validation EmployeeServiceShareBasedCompensation data
Label for dataset EmployeeServiceShareBasedCompensation
[0, 59, 60, 61, 62, 63, 177, 189, 257]
Total number of labels 9
Original database label and idx
O: 0
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 59
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 60
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 61
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 62
B-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 63
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 177
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 189
I-EmployeeServiceShareBa

INFO:root:********** Sampling....... **********


New label contain in this dataset
{0, 59, 60, 61, 62, 63}
New label for dataset EmployeeServiceShareBasedCompensation
New database label and idx
O: 0
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 1
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 2
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 3
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 4
B-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 5
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 6
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 7
I-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 8
Total number of this d

INFO:root:********** Producing validation number database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for EmployeeServiceShareBasedCompensation validation **********
INFO:root:********** Scheme: Starting extract database for EmployeeServiceShareBasedCompensation test **********


Get test EmployeeServiceShareBasedCompensation data
Label for dataset EmployeeServiceShareBasedCompensation
[0, 59, 60, 61, 62, 63, 177, 189, 257]
Total number of labels 9
Original database label and idx
O: 0
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 59
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 60
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 61
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 62
B-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 63
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 177
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 189
I-EmployeeServiceShareBasedCom

INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


New label contain in this dataset
{0, 59, 60, 61, 62, 63}
New label for dataset EmployeeServiceShareBasedCompensation
New database label and idx
O: 0
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 1
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 2
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedPeriodForRecognition1: 3
B-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 4
B-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 5
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognized: 6
I-EmployeeServiceShareBasedCompensationNonvestedAwardsTotalCompensationCostNotYetRecognizedShareBasedAwardsOtherThanOptions: 7
I-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 8
Total number of this d

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for EmployeeServiceShareBasedCompensation test **********
INFO:root:********** Finishing extracting all database for EmployeeServiceShareBasedCompensation **********
INFO:root:********** Scheme: Starting extract database for EquityMethodInvestment train **********


Get train EquityMethodInvestment data
Label for dataset EquityMethodInvestment
[0, 64, 65, 66, 73, 190, 258]
Total number of labels 7
Original database label and idx
O: 0
B-EquityMethodInvestmentOwnershipPercentage: 64
I-EquityMethodInvestmentOwnershipPercentage: 65
B-EquityMethodInvestments: 66
B-IncomeLossFromEquityMethodInvestments: 73
I-EquityMethodInvestments: 190
I-IncomeLossFromEquityMethodInvestments: 258
New label contain in this dataset
{0, 64, 66, 65, 73}
New label for dataset EquityMethodInvestment
New database label and idx
O: 0
B-EquityMethodInvestmentOwnershipPercentage: 1
I-EquityMethodInvestmentOwnershipPercentage: 2
B-EquityMethodInvestments: 3
B-IncomeLossFromEquityMethodInvestments: 4
I-EquityMethodInvestments: 5
I-IncomeLossFromEquityMethodInvestments: 6
Total number of this database 5351


INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/6 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/6 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for EquityMethodInvestment train **********
INFO:root:********** Scheme: Starting extract database for EquityMethodInvestment validation **********


Get validation EquityMethodInvestment data
Label for dataset EquityMethodInvestment
[0, 64, 65, 66, 73, 190, 258]
Total number of labels 7
Original database label and idx
O: 0
B-EquityMethodInvestmentOwnershipPercentage: 64
I-EquityMethodInvestmentOwnershipPercentage: 65
B-EquityMethodInvestments: 66
B-IncomeLossFromEquityMethodInvestments: 73
I-EquityMethodInvestments: 190
I-IncomeLossFromEquityMethodInvestments: 258


INFO:root:********** Sampling....... **********
INFO:root:********** Producing validation number database **********


New label contain in this dataset
{0, 73, 66, 64}
New label for dataset EquityMethodInvestment
New database label and idx
O: 0
B-EquityMethodInvestmentOwnershipPercentage: 1
I-EquityMethodInvestmentOwnershipPercentage: 2
B-EquityMethodInvestments: 3
B-IncomeLossFromEquityMethodInvestments: 4
I-EquityMethodInvestments: 5
I-IncomeLossFromEquityMethodInvestments: 6
Total number of this database 562


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for EquityMethodInvestment validation **********
INFO:root:********** Scheme: Starting extract database for EquityMethodInvestment test **********


Get test EquityMethodInvestment data
Label for dataset EquityMethodInvestment
[0, 64, 65, 66, 73, 190, 258]
Total number of labels 7
Original database label and idx
O: 0
B-EquityMethodInvestmentOwnershipPercentage: 64
I-EquityMethodInvestmentOwnershipPercentage: 65
B-EquityMethodInvestments: 66
B-IncomeLossFromEquityMethodInvestments: 73
I-EquityMethodInvestments: 190
I-IncomeLossFromEquityMethodInvestments: 258


INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


New label contain in this dataset
{0, 73, 66, 64}
New label for dataset EquityMethodInvestment
New database label and idx
O: 0
B-EquityMethodInvestmentOwnershipPercentage: 1
I-EquityMethodInvestmentOwnershipPercentage: 2
B-EquityMethodInvestments: 3
B-IncomeLossFromEquityMethodInvestments: 4
I-EquityMethodInvestments: 5
I-IncomeLossFromEquityMethodInvestments: 6
Total number of this database 519


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for EquityMethodInvestment test **********
INFO:root:********** Finishing extracting all database for EquityMethodInvestment **********
INFO:root:********** Scheme: Starting extract database for LineOfCredit train **********


Get train LineOfCredit data
Label for dataset LineOfCredit
[0, 83, 84, 85, 86, 87, 88, 89, 198, 199, 201, 209, 225, 236, 271]
Total number of labels 15
Original database label and idx
O: 0
B-LineOfCredit: 83
B-LineOfCreditFacilityCommitmentFeePercentage: 84
B-LineOfCreditFacilityCurrentBorrowingCapacity: 85
B-LineOfCreditFacilityInterestRateAtPeriodEnd: 86
B-LineOfCreditFacilityMaximumBorrowingCapacity: 87
B-LineOfCreditFacilityRemainingBorrowingCapacity: 88
B-LineOfCreditFacilityUnusedCapacityCommitmentFeePercentage: 89
I-LineOfCredit: 198
I-LineOfCreditFacilityMaximumBorrowingCapacity: 199
I-LineOfCreditFacilityCommitmentFeePercentage: 201
I-LineOfCreditFacilityCurrentBorrowingCapacity: 209
I-LineOfCreditFacilityRemainingBorrowingCapacity: 225
I-LineOfCreditFacilityUnusedCapacityCommitmentFeePercentage: 236
I-LineOfCreditFacilityInterestRateAtPeriodEnd: 271
New label contain in this dataset
{0, 83, 84, 85, 86, 87, 88, 89}
New label for dataset LineOfCredit
New database label and idx


INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/16 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/16 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for LineOfCredit train **********
INFO:root:********** Scheme: Starting extract database for LineOfCredit validation **********


Get validation LineOfCredit data
Label for dataset LineOfCredit
[0, 83, 84, 85, 86, 87, 88, 89, 198, 199, 201, 209, 225, 236, 271]
Total number of labels 15
Original database label and idx
O: 0
B-LineOfCredit: 83
B-LineOfCreditFacilityCommitmentFeePercentage: 84
B-LineOfCreditFacilityCurrentBorrowingCapacity: 85
B-LineOfCreditFacilityInterestRateAtPeriodEnd: 86
B-LineOfCreditFacilityMaximumBorrowingCapacity: 87
B-LineOfCreditFacilityRemainingBorrowingCapacity: 88
B-LineOfCreditFacilityUnusedCapacityCommitmentFeePercentage: 89
I-LineOfCredit: 198
I-LineOfCreditFacilityMaximumBorrowingCapacity: 199
I-LineOfCreditFacilityCommitmentFeePercentage: 201
I-LineOfCreditFacilityCurrentBorrowingCapacity: 209
I-LineOfCreditFacilityRemainingBorrowingCapacity: 225
I-LineOfCreditFacilityUnusedCapacityCommitmentFeePercentage: 236
I-LineOfCreditFacilityInterestRateAtPeriodEnd: 271
New label contain in this dataset
{0, 83, 84, 85, 86, 87, 88, 89}
New label for dataset LineOfCredit
New database label and

INFO:root:********** Sampling....... **********
INFO:root:********** Producing validation number database **********


Flattening the indices:   0%|          | 0/3 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/3 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for LineOfCredit validation **********
INFO:root:********** Scheme: Starting extract database for LineOfCredit test **********


Get test LineOfCredit data
Label for dataset LineOfCredit
[0, 83, 84, 85, 86, 87, 88, 89, 198, 199, 201, 209, 225, 236, 271]
Total number of labels 15
Original database label and idx
O: 0
B-LineOfCredit: 83
B-LineOfCreditFacilityCommitmentFeePercentage: 84
B-LineOfCreditFacilityCurrentBorrowingCapacity: 85
B-LineOfCreditFacilityInterestRateAtPeriodEnd: 86
B-LineOfCreditFacilityMaximumBorrowingCapacity: 87
B-LineOfCreditFacilityRemainingBorrowingCapacity: 88
B-LineOfCreditFacilityUnusedCapacityCommitmentFeePercentage: 89
I-LineOfCredit: 198
I-LineOfCreditFacilityMaximumBorrowingCapacity: 199
I-LineOfCreditFacilityCommitmentFeePercentage: 201
I-LineOfCreditFacilityCurrentBorrowingCapacity: 209
I-LineOfCreditFacilityRemainingBorrowingCapacity: 225
I-LineOfCreditFacilityUnusedCapacityCommitmentFeePercentage: 236
I-LineOfCreditFacilityInterestRateAtPeriodEnd: 271


INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


New label contain in this dataset
{0, 83, 84, 85, 86, 87, 88, 89}
New label for dataset LineOfCredit
New database label and idx
O: 0
B-LineOfCredit: 1
B-LineOfCreditFacilityCommitmentFeePercentage: 2
B-LineOfCreditFacilityCurrentBorrowingCapacity: 3
B-LineOfCreditFacilityInterestRateAtPeriodEnd: 4
B-LineOfCreditFacilityMaximumBorrowingCapacity: 5
B-LineOfCreditFacilityRemainingBorrowingCapacity: 6
B-LineOfCreditFacilityUnusedCapacityCommitmentFeePercentage: 7
I-LineOfCredit: 8
I-LineOfCreditFacilityMaximumBorrowingCapacity: 9
I-LineOfCreditFacilityCommitmentFeePercentage: 10
I-LineOfCreditFacilityCurrentBorrowingCapacity: 11
I-LineOfCreditFacilityRemainingBorrowingCapacity: 12
I-LineOfCreditFacilityUnusedCapacityCommitmentFeePercentage: 13
I-LineOfCreditFacilityInterestRateAtPeriodEnd: 14
Total number of this database 1826


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for LineOfCredit test **********
INFO:root:********** Finishing extracting all database for LineOfCredit **********
INFO:root:********** Scheme: Starting extract database for LossContingency train **********


Get train LossContingency data
Label for dataset LossContingency
[0, 92, 93, 94, 95, 96, 172, 235, 242]
Total number of labels 9
Original database label and idx
O: 0
B-LossContingencyAccrualAtCarryingValue: 92
B-LossContingencyDamagesSoughtValue: 93
B-LossContingencyEstimateOfPossibleLoss: 94
B-LossContingencyPendingClaimsNumber: 95
I-LossContingencyPendingClaimsNumber: 96
I-LossContingencyEstimateOfPossibleLoss: 172
I-LossContingencyAccrualAtCarryingValue: 235
I-LossContingencyDamagesSoughtValue: 242
New label contain in this dataset
{0, 96, 92, 93, 94, 95}
New label for dataset LossContingency
New database label and idx
O: 0
B-LossContingencyAccrualAtCarryingValue: 1
B-LossContingencyDamagesSoughtValue: 2
B-LossContingencyEstimateOfPossibleLoss: 3
B-LossContingencyPendingClaimsNumber: 4
I-LossContingencyPendingClaimsNumber: 5
I-LossContingencyEstimateOfPossibleLoss: 6
I-LossContingencyAccrualAtCarryingValue: 7
I-LossContingencyDamagesSoughtValue: 8
Total number of this database 3675


INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/4 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/4 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for LossContingency train **********
INFO:root:********** Scheme: Starting extract database for LossContingency validation **********


Get validation LossContingency data
Label for dataset LossContingency
[0, 92, 93, 94, 95, 96, 172, 235, 242]
Total number of labels 9
Original database label and idx
O: 0
B-LossContingencyAccrualAtCarryingValue: 92
B-LossContingencyDamagesSoughtValue: 93
B-LossContingencyEstimateOfPossibleLoss: 94
B-LossContingencyPendingClaimsNumber: 95
I-LossContingencyPendingClaimsNumber: 96
I-LossContingencyEstimateOfPossibleLoss: 172
I-LossContingencyAccrualAtCarryingValue: 235
I-LossContingencyDamagesSoughtValue: 242


INFO:root:********** Sampling....... **********
INFO:root:********** Producing validation number database **********


New label contain in this dataset
{0, 96, 92, 93, 94, 95}
New label for dataset LossContingency
New database label and idx
O: 0
B-LossContingencyAccrualAtCarryingValue: 1
B-LossContingencyDamagesSoughtValue: 2
B-LossContingencyEstimateOfPossibleLoss: 3
B-LossContingencyPendingClaimsNumber: 4
I-LossContingencyPendingClaimsNumber: 5
I-LossContingencyEstimateOfPossibleLoss: 6
I-LossContingencyAccrualAtCarryingValue: 7
I-LossContingencyDamagesSoughtValue: 8
Total number of this database 501


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for LossContingency validation **********
INFO:root:********** Scheme: Starting extract database for LossContingency test **********


Get test LossContingency data
Label for dataset LossContingency
[0, 92, 93, 94, 95, 96, 172, 235, 242]
Total number of labels 9
Original database label and idx
O: 0
B-LossContingencyAccrualAtCarryingValue: 92
B-LossContingencyDamagesSoughtValue: 93
B-LossContingencyEstimateOfPossibleLoss: 94
B-LossContingencyPendingClaimsNumber: 95
I-LossContingencyPendingClaimsNumber: 96
I-LossContingencyEstimateOfPossibleLoss: 172
I-LossContingencyAccrualAtCarryingValue: 235
I-LossContingencyDamagesSoughtValue: 242


INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


New label contain in this dataset
{0, 92, 93, 94, 95}
New label for dataset LossContingency
New database label and idx
O: 0
B-LossContingencyAccrualAtCarryingValue: 1
B-LossContingencyDamagesSoughtValue: 2
B-LossContingencyEstimateOfPossibleLoss: 3
B-LossContingencyPendingClaimsNumber: 4
I-LossContingencyPendingClaimsNumber: 5
I-LossContingencyEstimateOfPossibleLoss: 6
I-LossContingencyAccrualAtCarryingValue: 7
I-LossContingencyDamagesSoughtValue: 8
Total number of this database 321


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for LossContingency test **********
INFO:root:********** Finishing extracting all database for LossContingency **********
INFO:root:********** Scheme: Starting extract database for OperatingLease train **********


Get train OperatingLease data
Label for dataset OperatingLease
[0, 78, 79, 80, 81, 103, 104, 105, 106, 107, 108, 109, 110, 111, 197, 224, 232, 238, 245, 251, 273]
Total number of labels 21
Original database label and idx
O: 0
B-LesseeOperatingLeaseRenewalTerm: 78
I-LesseeOperatingLeaseRenewalTerm: 79
B-LesseeOperatingLeaseTermOfContract: 80
I-LesseeOperatingLeaseTermOfContract: 81
B-OperatingLeaseCost: 103
B-OperatingLeaseExpense: 104
B-OperatingLeaseLiability: 105
B-OperatingLeasePayments: 106
B-OperatingLeaseRightOfUseAsset: 107
B-OperatingLeaseWeightedAverageDiscountRatePercent: 108
B-OperatingLeaseWeightedAverageRemainingLeaseTerm1: 109
I-OperatingLeaseWeightedAverageRemainingLeaseTerm1: 110
B-OperatingLeasesRentExpenseNet: 111
I-OperatingLeaseWeightedAverageDiscountRatePercent: 197
I-OperatingLeasePayments: 224
I-OperatingLeaseExpense: 232
I-OperatingLeaseLiability: 238
I-OperatingLeaseRightOfUseAsset: 245
I-OperatingLeaseCost: 251
I-OperatingLeasesRentExpenseNet: 273
New label co

INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/11 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/11 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for OperatingLease train **********
INFO:root:********** Scheme: Starting extract database for OperatingLease validation **********


Get validation OperatingLease data
Label for dataset OperatingLease
[0, 78, 79, 80, 81, 103, 104, 105, 106, 107, 108, 109, 110, 111, 197, 224, 232, 238, 245, 251, 273]
Total number of labels 21
Original database label and idx
O: 0
B-LesseeOperatingLeaseRenewalTerm: 78
I-LesseeOperatingLeaseRenewalTerm: 79
B-LesseeOperatingLeaseTermOfContract: 80
I-LesseeOperatingLeaseTermOfContract: 81
B-OperatingLeaseCost: 103
B-OperatingLeaseExpense: 104
B-OperatingLeaseLiability: 105
B-OperatingLeasePayments: 106
B-OperatingLeaseRightOfUseAsset: 107
B-OperatingLeaseWeightedAverageDiscountRatePercent: 108
B-OperatingLeaseWeightedAverageRemainingLeaseTerm1: 109
I-OperatingLeaseWeightedAverageRemainingLeaseTerm1: 110
B-OperatingLeasesRentExpenseNet: 111
I-OperatingLeaseWeightedAverageDiscountRatePercent: 197
I-OperatingLeasePayments: 224
I-OperatingLeaseExpense: 232
I-OperatingLeaseLiability: 238
I-OperatingLeaseRightOfUseAsset: 245
I-OperatingLeaseCost: 251
I-OperatingLeasesRentExpenseNet: 273


INFO:root:********** Sampling....... **********
INFO:root:********** Producing validation number database **********


New label contain in this dataset
{0, 103, 104, 105, 106, 107, 108, 109, 78, 79, 80, 81, 110, 111}
New label for dataset OperatingLease
New database label and idx
O: 0
B-LesseeOperatingLeaseRenewalTerm: 1
I-LesseeOperatingLeaseRenewalTerm: 2
B-LesseeOperatingLeaseTermOfContract: 3
I-LesseeOperatingLeaseTermOfContract: 4
B-OperatingLeaseCost: 5
B-OperatingLeaseExpense: 6
B-OperatingLeaseLiability: 7
B-OperatingLeasePayments: 8
B-OperatingLeaseRightOfUseAsset: 9
B-OperatingLeaseWeightedAverageDiscountRatePercent: 10
B-OperatingLeaseWeightedAverageRemainingLeaseTerm1: 11
I-OperatingLeaseWeightedAverageRemainingLeaseTerm1: 12
B-OperatingLeasesRentExpenseNet: 13
I-OperatingLeaseWeightedAverageDiscountRatePercent: 14
I-OperatingLeasePayments: 15
I-OperatingLeaseExpense: 16
I-OperatingLeaseLiability: 17
I-OperatingLeaseRightOfUseAsset: 18
I-OperatingLeaseCost: 19
I-OperatingLeasesRentExpenseNet: 20
Total number of this database 791


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for OperatingLease validation **********
INFO:root:********** Scheme: Starting extract database for OperatingLease test **********


Get test OperatingLease data
Label for dataset OperatingLease
[0, 78, 79, 80, 81, 103, 104, 105, 106, 107, 108, 109, 110, 111, 197, 224, 232, 238, 245, 251, 273]
Total number of labels 21
Original database label and idx
O: 0
B-LesseeOperatingLeaseRenewalTerm: 78
I-LesseeOperatingLeaseRenewalTerm: 79
B-LesseeOperatingLeaseTermOfContract: 80
I-LesseeOperatingLeaseTermOfContract: 81
B-OperatingLeaseCost: 103
B-OperatingLeaseExpense: 104
B-OperatingLeaseLiability: 105
B-OperatingLeasePayments: 106
B-OperatingLeaseRightOfUseAsset: 107
B-OperatingLeaseWeightedAverageDiscountRatePercent: 108
B-OperatingLeaseWeightedAverageRemainingLeaseTerm1: 109
I-OperatingLeaseWeightedAverageRemainingLeaseTerm1: 110
B-OperatingLeasesRentExpenseNet: 111
I-OperatingLeaseWeightedAverageDiscountRatePercent: 197
I-OperatingLeasePayments: 224
I-OperatingLeaseExpense: 232
I-OperatingLeaseLiability: 238
I-OperatingLeaseRightOfUseAsset: 245
I-OperatingLeaseCost: 251
I-OperatingLeasesRentExpenseNet: 273


INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


New label contain in this dataset
{0, 103, 104, 105, 106, 107, 108, 109, 78, 79, 80, 81, 111}
New label for dataset OperatingLease
New database label and idx
O: 0
B-LesseeOperatingLeaseRenewalTerm: 1
I-LesseeOperatingLeaseRenewalTerm: 2
B-LesseeOperatingLeaseTermOfContract: 3
I-LesseeOperatingLeaseTermOfContract: 4
B-OperatingLeaseCost: 5
B-OperatingLeaseExpense: 6
B-OperatingLeaseLiability: 7
B-OperatingLeasePayments: 8
B-OperatingLeaseRightOfUseAsset: 9
B-OperatingLeaseWeightedAverageDiscountRatePercent: 10
B-OperatingLeaseWeightedAverageRemainingLeaseTerm1: 11
I-OperatingLeaseWeightedAverageRemainingLeaseTerm1: 12
B-OperatingLeasesRentExpenseNet: 13
I-OperatingLeaseWeightedAverageDiscountRatePercent: 14
I-OperatingLeasePayments: 15
I-OperatingLeaseExpense: 16
I-OperatingLeaseLiability: 17
I-OperatingLeaseRightOfUseAsset: 18
I-OperatingLeaseCost: 19
I-OperatingLeasesRentExpenseNet: 20
Total number of this database 763


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for OperatingLease test **********
INFO:root:********** Finishing extracting all database for OperatingLease **********
INFO:root:********** Scheme: Starting extract database for Revenue train **********


Get train Revenue data
Label for dataset Revenue
[0, 32, 129, 130, 131, 132, 133, 175, 176, 196, 220, 239, 269]
Total number of labels 13
Original database label and idx
O: 0
B-ContractWithCustomerLiabilityRevenueRecognized: 32
B-RevenueFromContractWithCustomerExcludingAssessedTax: 129
B-RevenueFromContractWithCustomerIncludingAssessedTax: 130
B-RevenueFromRelatedParties: 131
B-RevenueRemainingPerformanceObligation: 132
B-Revenues: 133
I-ContractWithCustomerLiabilityRevenueRecognized: 175
I-RevenueRemainingPerformanceObligation: 176
I-RevenueFromContractWithCustomerIncludingAssessedTax: 196
I-Revenues: 220
I-RevenueFromRelatedParties: 239
I-RevenueFromContractWithCustomerExcludingAssessedTax: 269
New label contain in this dataset
{0, 129, 32, 130, 132, 133, 131}
New label for dataset Revenue
New database label and idx
O: 0
B-ContractWithCustomerLiabilityRevenueRecognized: 1
B-RevenueFromContractWithCustomerExcludingAssessedTax: 2
B-RevenueFromContractWithCustomerIncludingAssessedTax: 3

INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/8 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/8 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Revenue train **********
INFO:root:********** Scheme: Starting extract database for Revenue validation **********


Get validation Revenue data
Label for dataset Revenue
[0, 32, 129, 130, 131, 132, 133, 175, 176, 196, 220, 239, 269]
Total number of labels 13
Original database label and idx
O: 0
B-ContractWithCustomerLiabilityRevenueRecognized: 32
B-RevenueFromContractWithCustomerExcludingAssessedTax: 129
B-RevenueFromContractWithCustomerIncludingAssessedTax: 130
B-RevenueFromRelatedParties: 131
B-RevenueRemainingPerformanceObligation: 132
B-Revenues: 133
I-ContractWithCustomerLiabilityRevenueRecognized: 175
I-RevenueRemainingPerformanceObligation: 176
I-RevenueFromContractWithCustomerIncludingAssessedTax: 196
I-Revenues: 220
I-RevenueFromRelatedParties: 239
I-RevenueFromContractWithCustomerExcludingAssessedTax: 269


INFO:root:********** Sampling....... **********
INFO:root:********** Producing validation number database **********


New label contain in this dataset
{0, 129, 32, 131, 132, 133, 130}
New label for dataset Revenue
New database label and idx
O: 0
B-ContractWithCustomerLiabilityRevenueRecognized: 1
B-RevenueFromContractWithCustomerExcludingAssessedTax: 2
B-RevenueFromContractWithCustomerIncludingAssessedTax: 3
B-RevenueFromRelatedParties: 4
B-RevenueRemainingPerformanceObligation: 5
B-Revenues: 6
I-ContractWithCustomerLiabilityRevenueRecognized: 7
I-RevenueRemainingPerformanceObligation: 8
I-RevenueFromContractWithCustomerIncludingAssessedTax: 9
I-Revenues: 10
I-RevenueFromRelatedParties: 11
I-RevenueFromContractWithCustomerExcludingAssessedTax: 12
Total number of this database 976


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Revenue validation **********
INFO:root:********** Scheme: Starting extract database for Revenue test **********


Get test Revenue data
Label for dataset Revenue
[0, 32, 129, 130, 131, 132, 133, 175, 176, 196, 220, 239, 269]
Total number of labels 13
Original database label and idx
O: 0
B-ContractWithCustomerLiabilityRevenueRecognized: 32
B-RevenueFromContractWithCustomerExcludingAssessedTax: 129
B-RevenueFromContractWithCustomerIncludingAssessedTax: 130
B-RevenueFromRelatedParties: 131
B-RevenueRemainingPerformanceObligation: 132
B-Revenues: 133
I-ContractWithCustomerLiabilityRevenueRecognized: 175
I-RevenueRemainingPerformanceObligation: 176
I-RevenueFromContractWithCustomerIncludingAssessedTax: 196
I-Revenues: 220
I-RevenueFromRelatedParties: 239
I-RevenueFromContractWithCustomerExcludingAssessedTax: 269


INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


New label contain in this dataset
{0, 129, 32, 130, 132, 131, 133}
New label for dataset Revenue
New database label and idx
O: 0
B-ContractWithCustomerLiabilityRevenueRecognized: 1
B-RevenueFromContractWithCustomerExcludingAssessedTax: 2
B-RevenueFromContractWithCustomerIncludingAssessedTax: 3
B-RevenueFromRelatedParties: 4
B-RevenueRemainingPerformanceObligation: 5
B-Revenues: 6
I-ContractWithCustomerLiabilityRevenueRecognized: 7
I-RevenueRemainingPerformanceObligation: 8
I-RevenueFromContractWithCustomerIncludingAssessedTax: 9
I-Revenues: 10
I-RevenueFromRelatedParties: 11
I-RevenueFromContractWithCustomerExcludingAssessedTax: 12
Total number of this database 669


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Revenue test **********
INFO:root:********** Finishing extracting all database for Revenue **********
INFO:root:********** Scheme: Starting extract database for Tax train **********


Get train Tax data
Label for dataset Tax
[0, 57, 58, 63, 74, 129, 130, 168, 169, 187, 196, 200, 247, 257, 266, 268, 269]
Total number of labels 17
Original database label and idx
O: 0
B-EffectiveIncomeTaxRateContinuingOperations: 57
B-EffectiveIncomeTaxRateReconciliationAtFederalStatutoryIncomeTaxRate: 58
B-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 63
B-IncomeTaxExpenseBenefit: 74
B-RevenueFromContractWithCustomerExcludingAssessedTax: 129
B-RevenueFromContractWithCustomerIncludingAssessedTax: 130
B-UnrecognizedTaxBenefits: 168
B-UnrecognizedTaxBenefitsThatWouldImpactEffectiveTaxRate: 169
I-IncomeTaxExpenseBenefit: 187
I-RevenueFromContractWithCustomerIncludingAssessedTax: 196
I-EffectiveIncomeTaxRateReconciliationAtFederalStatutoryIncomeTaxRate: 200
I-UnrecognizedTaxBenefits: 247
I-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 257
I-UnrecognizedTaxBenefitsThatWouldImpactEffectiveTaxRate: 266
I-EffectiveIncomeTaxRateContinuingOpera

INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/12 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/12 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Tax train **********
INFO:root:********** Scheme: Starting extract database for Tax validation **********


Get validation Tax data
Label for dataset Tax
[0, 57, 58, 63, 74, 129, 130, 168, 169, 187, 196, 200, 247, 257, 266, 268, 269]
Total number of labels 17
Original database label and idx
O: 0
B-EffectiveIncomeTaxRateContinuingOperations: 57
B-EffectiveIncomeTaxRateReconciliationAtFederalStatutoryIncomeTaxRate: 58
B-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 63
B-IncomeTaxExpenseBenefit: 74
B-RevenueFromContractWithCustomerExcludingAssessedTax: 129
B-RevenueFromContractWithCustomerIncludingAssessedTax: 130
B-UnrecognizedTaxBenefits: 168
B-UnrecognizedTaxBenefitsThatWouldImpactEffectiveTaxRate: 169
I-IncomeTaxExpenseBenefit: 187
I-RevenueFromContractWithCustomerIncludingAssessedTax: 196
I-EffectiveIncomeTaxRateReconciliationAtFederalStatutoryIncomeTaxRate: 200
I-UnrecognizedTaxBenefits: 247
I-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 257
I-UnrecognizedTaxBenefitsThatWouldImpactEffectiveTaxRate: 266
I-EffectiveIncomeTaxRateContinuing

INFO:root:********** Sampling....... **********
INFO:root:********** Producing validation number database **********


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Tax validation **********
INFO:root:********** Scheme: Starting extract database for Tax test **********


Get test Tax data
Label for dataset Tax
[0, 57, 58, 63, 74, 129, 130, 168, 169, 187, 196, 200, 247, 257, 266, 268, 269]
Total number of labels 17
Original database label and idx
O: 0
B-EffectiveIncomeTaxRateContinuingOperations: 57
B-EffectiveIncomeTaxRateReconciliationAtFederalStatutoryIncomeTaxRate: 58
B-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 63
B-IncomeTaxExpenseBenefit: 74
B-RevenueFromContractWithCustomerExcludingAssessedTax: 129
B-RevenueFromContractWithCustomerIncludingAssessedTax: 130
B-UnrecognizedTaxBenefits: 168
B-UnrecognizedTaxBenefitsThatWouldImpactEffectiveTaxRate: 169
I-IncomeTaxExpenseBenefit: 187
I-RevenueFromContractWithCustomerIncludingAssessedTax: 196
I-EffectiveIncomeTaxRateReconciliationAtFederalStatutoryIncomeTaxRate: 200
I-UnrecognizedTaxBenefits: 247
I-EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense: 257
I-UnrecognizedTaxBenefitsThatWouldImpactEffectiveTaxRate: 266
I-EffectiveIncomeTaxRateContinuingOperat

INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Tax test **********
INFO:root:********** Finishing extracting all database for Tax **********
INFO:root:********** Scheme: Starting extract database for Equity train **********


Get train Equity data
Label for dataset Equity
[0, 13, 64, 65, 66, 73, 140, 141, 142, 143, 144, 190, 211, 216, 223, 244, 258]
Total number of labels 17
Original database label and idx
O: 0
B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 13
B-EquityMethodInvestmentOwnershipPercentage: 64
I-EquityMethodInvestmentOwnershipPercentage: 65
B-EquityMethodInvestments: 66
B-IncomeLossFromEquityMethodInvestments: 73
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriod: 140
I-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriod: 141
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriodWeightedAverageGrantDateFairValue: 142
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsNonvestedNumber: 143
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherT

INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/12 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/12 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Equity train **********
INFO:root:********** Scheme: Starting extract database for Equity validation **********


Get validation Equity data
Label for dataset Equity
[0, 13, 64, 65, 66, 73, 140, 141, 142, 143, 144, 190, 211, 216, 223, 244, 258]
Total number of labels 17
Original database label and idx
O: 0
B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 13
B-EquityMethodInvestmentOwnershipPercentage: 64
I-EquityMethodInvestmentOwnershipPercentage: 65
B-EquityMethodInvestments: 66
B-IncomeLossFromEquityMethodInvestments: 73
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriod: 140
I-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriod: 141
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriodWeightedAverageGrantDateFairValue: 142
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsNonvestedNumber: 143
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsO

INFO:root:********** Sampling....... **********


New label contain in this dataset
{0, 64, 66, 73, 140, 13, 142, 143, 144}
New label for dataset Equity
New database label and idx
O: 0
B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 1
B-EquityMethodInvestmentOwnershipPercentage: 2
I-EquityMethodInvestmentOwnershipPercentage: 3
B-EquityMethodInvestments: 4
B-IncomeLossFromEquityMethodInvestments: 5
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriod: 6
I-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriod: 7
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriodWeightedAverageGrantDateFairValue: 8
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsNonvestedNumber: 9
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsVestedInPeriodTotalFairValue: 10
I-EquityMethodInvestment

INFO:root:********** Producing validation number database **********


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Equity validation **********
INFO:root:********** Scheme: Starting extract database for Equity test **********


Get test Equity data
Label for dataset Equity
[0, 13, 64, 65, 66, 73, 140, 141, 142, 143, 144, 190, 211, 216, 223, 244, 258]
Total number of labels 17
Original database label and idx
O: 0
B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 13
B-EquityMethodInvestmentOwnershipPercentage: 64
I-EquityMethodInvestmentOwnershipPercentage: 65
B-EquityMethodInvestments: 66
B-IncomeLossFromEquityMethodInvestments: 73
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriod: 140
I-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriod: 141
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriodWeightedAverageGrantDateFairValue: 142
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsNonvestedNumber: 143
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherTh

INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


New label contain in this dataset
{0, 64, 66, 73, 140, 13, 142, 143, 144}
New label for dataset Equity
New database label and idx
O: 0
B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 1
B-EquityMethodInvestmentOwnershipPercentage: 2
I-EquityMethodInvestmentOwnershipPercentage: 3
B-EquityMethodInvestments: 4
B-IncomeLossFromEquityMethodInvestments: 5
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriod: 6
I-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriod: 7
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriodWeightedAverageGrantDateFairValue: 8
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsNonvestedNumber: 9
B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsVestedInPeriodTotalFairValue: 10
I-EquityMethodInvestment

Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Equity test **********
INFO:root:********** Finishing extracting all database for Equity **********
INFO:root:********** Scheme: Starting extract database for BusinessAcquisition train **********


Get train BusinessAcquisition data
Label for dataset BusinessAcquisition
[0, 13, 14, 15, 244]
Total number of labels 5
Original database label and idx
O: 0
B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 13
B-BusinessAcquisitionPercentageOfVotingInterestsAcquired: 14
I-BusinessAcquisitionPercentageOfVotingInterestsAcquired: 15
I-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 244
New label contain in this dataset
{0, 13, 14, 15}
New label for dataset BusinessAcquisition
New database label and idx
O: 0
B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 1
B-BusinessAcquisitionPercentageOfVotingInterestsAcquired: 2
I-BusinessAcquisitionPercentageOfVotingInterestsAcquired: 3
I-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 4
Total number of this database 2408


INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/3 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/3 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for BusinessAcquisition train **********
INFO:root:********** Scheme: Starting extract database for BusinessAcquisition validation **********


Get validation BusinessAcquisition data
Label for dataset BusinessAcquisition
[0, 13, 14, 15, 244]
Total number of labels 5
Original database label and idx
O: 0
B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 13
B-BusinessAcquisitionPercentageOfVotingInterestsAcquired: 14
I-BusinessAcquisitionPercentageOfVotingInterestsAcquired: 15
I-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 244


INFO:root:********** Sampling....... **********
INFO:root:********** Producing validation number database **********


New label contain in this dataset
{0, 13, 14}
New label for dataset BusinessAcquisition
New database label and idx
O: 0
B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 1
B-BusinessAcquisitionPercentageOfVotingInterestsAcquired: 2
I-BusinessAcquisitionPercentageOfVotingInterestsAcquired: 3
I-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 4
Total number of this database 226


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for BusinessAcquisition validation **********
INFO:root:********** Scheme: Starting extract database for BusinessAcquisition test **********


Get test BusinessAcquisition data
Label for dataset BusinessAcquisition
[0, 13, 14, 15, 244]
Total number of labels 5
Original database label and idx
O: 0
B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 13
B-BusinessAcquisitionPercentageOfVotingInterestsAcquired: 14
I-BusinessAcquisitionPercentageOfVotingInterestsAcquired: 15
I-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 244


INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


New label contain in this dataset
{0, 13, 14}
New label for dataset BusinessAcquisition
New database label and idx
O: 0
B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 1
B-BusinessAcquisitionPercentageOfVotingInterestsAcquired: 2
I-BusinessAcquisitionPercentageOfVotingInterestsAcquired: 3
I-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued: 4
Total number of this database 255


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for BusinessAcquisition test **********
INFO:root:********** Finishing extracting all database for BusinessAcquisition **********
INFO:root:********** Scheme: Starting extract database for Transaction train **********


Get train Transaction data
Label for dataset Transaction
[0, 122, 123, 124, 125, 134, 135]
Total number of labels 7
Original database label and idx
O: 0
B-RelatedPartyTransactionAmountsOfTransaction: 122
I-RelatedPartyTransactionAmountsOfTransaction: 123
B-RelatedPartyTransactionExpensesFromTransactionsWithRelatedParty: 124
I-RelatedPartyTransactionExpensesFromTransactionsWithRelatedParty: 125
B-SaleOfStockNumberOfSharesIssuedInTransaction: 134
I-SaleOfStockNumberOfSharesIssuedInTransaction: 135
New label contain in this dataset
{0, 134, 135, 122, 123, 124}
New label for dataset Transaction
New database label and idx
O: 0
B-RelatedPartyTransactionAmountsOfTransaction: 1
I-RelatedPartyTransactionAmountsOfTransaction: 2
B-RelatedPartyTransactionExpensesFromTransactionsWithRelatedParty: 3
I-RelatedPartyTransactionExpensesFromTransactionsWithRelatedParty: 4
B-SaleOfStockNumberOfSharesIssuedInTransaction: 5
I-SaleOfStockNumberOfSharesIssuedInTransaction: 6
Total number of this database 2342

INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/3 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/3 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Transaction train **********
INFO:root:********** Scheme: Starting extract database for Transaction validation **********


Get validation Transaction data
Label for dataset Transaction
[0, 122, 123, 124, 125, 134, 135]
Total number of labels 7
Original database label and idx
O: 0
B-RelatedPartyTransactionAmountsOfTransaction: 122
I-RelatedPartyTransactionAmountsOfTransaction: 123
B-RelatedPartyTransactionExpensesFromTransactionsWithRelatedParty: 124
I-RelatedPartyTransactionExpensesFromTransactionsWithRelatedParty: 125
B-SaleOfStockNumberOfSharesIssuedInTransaction: 134
I-SaleOfStockNumberOfSharesIssuedInTransaction: 135


INFO:root:********** Sampling....... **********
INFO:root:********** Producing validation number database **********


New label contain in this dataset
{0, 122, 124, 134}
New label for dataset Transaction
New database label and idx
O: 0
B-RelatedPartyTransactionAmountsOfTransaction: 1
I-RelatedPartyTransactionAmountsOfTransaction: 2
B-RelatedPartyTransactionExpensesFromTransactionsWithRelatedParty: 3
I-RelatedPartyTransactionExpensesFromTransactionsWithRelatedParty: 4
B-SaleOfStockNumberOfSharesIssuedInTransaction: 5
I-SaleOfStockNumberOfSharesIssuedInTransaction: 6
Total number of this database 271


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Transaction validation **********
INFO:root:********** Scheme: Starting extract database for Transaction test **********


Get test Transaction data
Label for dataset Transaction
[0, 122, 123, 124, 125, 134, 135]
Total number of labels 7
Original database label and idx
O: 0
B-RelatedPartyTransactionAmountsOfTransaction: 122
I-RelatedPartyTransactionAmountsOfTransaction: 123
B-RelatedPartyTransactionExpensesFromTransactionsWithRelatedParty: 124
I-RelatedPartyTransactionExpensesFromTransactionsWithRelatedParty: 125
B-SaleOfStockNumberOfSharesIssuedInTransaction: 134
I-SaleOfStockNumberOfSharesIssuedInTransaction: 135


INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


New label contain in this dataset
{0, 134, 122, 124, 125}
New label for dataset Transaction
New database label and idx
O: 0
B-RelatedPartyTransactionAmountsOfTransaction: 1
I-RelatedPartyTransactionAmountsOfTransaction: 2
B-RelatedPartyTransactionExpensesFromTransactionsWithRelatedParty: 3
I-RelatedPartyTransactionExpensesFromTransactionsWithRelatedParty: 4
B-SaleOfStockNumberOfSharesIssuedInTransaction: 5
I-SaleOfStockNumberOfSharesIssuedInTransaction: 6
Total number of this database 297


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Transaction test **********
INFO:root:********** Finishing extracting all database for Transaction **********
INFO:root:********** Scheme: Starting extract database for Loss train **********


Get train Loss data
Label for dataset Loss
[0, 1, 69, 71, 73, 92, 93, 94, 95, 96, 112, 172, 179, 192, 217, 235, 242, 254, 258]
Total number of labels 19
Original database label and idx
O: 0
B-AccrualForEnvironmentalLossContingencies: 1
B-GainsLossesOnExtinguishmentOfDebt: 69
B-GoodwillImpairmentLoss: 71
B-IncomeLossFromEquityMethodInvestments: 73
B-LossContingencyAccrualAtCarryingValue: 92
B-LossContingencyDamagesSoughtValue: 93
B-LossContingencyEstimateOfPossibleLoss: 94
B-LossContingencyPendingClaimsNumber: 95
I-LossContingencyPendingClaimsNumber: 96
B-OperatingLossCarryforwards: 112
I-LossContingencyEstimateOfPossibleLoss: 172
I-OperatingLossCarryforwards: 179
I-GainsLossesOnExtinguishmentOfDebt: 192
I-AccrualForEnvironmentalLossContingencies: 217
I-LossContingencyAccrualAtCarryingValue: 235
I-LossContingencyDamagesSoughtValue: 242
I-GoodwillImpairmentLoss: 254
I-IncomeLossFromEquityMethodInvestments: 258
New label contain in this dataset
{0, 1, 96, 69, 71, 73, 112, 92, 93, 94, 95}


INFO:root:********** Sampling....... **********
INFO:root:********** Producing train number database **********


Flattening the indices:   0%|          | 0/9 [00:00<?, ?ba/s]

INFO:root:********** Producing train str database **********


Flattening the indices:   0%|          | 0/9 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Loss train **********
INFO:root:********** Scheme: Starting extract database for Loss validation **********


Get validation Loss data
Label for dataset Loss
[0, 1, 69, 71, 73, 92, 93, 94, 95, 96, 112, 172, 179, 192, 217, 235, 242, 254, 258]
Total number of labels 19
Original database label and idx
O: 0
B-AccrualForEnvironmentalLossContingencies: 1
B-GainsLossesOnExtinguishmentOfDebt: 69
B-GoodwillImpairmentLoss: 71
B-IncomeLossFromEquityMethodInvestments: 73
B-LossContingencyAccrualAtCarryingValue: 92
B-LossContingencyDamagesSoughtValue: 93
B-LossContingencyEstimateOfPossibleLoss: 94
B-LossContingencyPendingClaimsNumber: 95
I-LossContingencyPendingClaimsNumber: 96
B-OperatingLossCarryforwards: 112
I-LossContingencyEstimateOfPossibleLoss: 172
I-OperatingLossCarryforwards: 179
I-GainsLossesOnExtinguishmentOfDebt: 192
I-AccrualForEnvironmentalLossContingencies: 217
I-LossContingencyAccrualAtCarryingValue: 235
I-LossContingencyDamagesSoughtValue: 242
I-GoodwillImpairmentLoss: 254
I-IncomeLossFromEquityMethodInvestments: 258


INFO:root:********** Sampling....... **********
INFO:root:********** Producing validation number database **********


New label contain in this dataset
{0, 1, 96, 69, 71, 73, 112, 92, 93, 94, 95}
New label for dataset Loss
New database label and idx
O: 0
B-AccrualForEnvironmentalLossContingencies: 1
B-GainsLossesOnExtinguishmentOfDebt: 2
B-GoodwillImpairmentLoss: 3
B-IncomeLossFromEquityMethodInvestments: 4
B-LossContingencyAccrualAtCarryingValue: 5
B-LossContingencyDamagesSoughtValue: 6
B-LossContingencyEstimateOfPossibleLoss: 7
B-LossContingencyPendingClaimsNumber: 8
I-LossContingencyPendingClaimsNumber: 9
B-OperatingLossCarryforwards: 10
I-LossContingencyEstimateOfPossibleLoss: 11
I-OperatingLossCarryforwards: 12
I-GainsLossesOnExtinguishmentOfDebt: 13
I-AccrualForEnvironmentalLossContingencies: 14
I-LossContingencyAccrualAtCarryingValue: 15
I-LossContingencyDamagesSoughtValue: 16
I-GoodwillImpairmentLoss: 17
I-IncomeLossFromEquityMethodInvestments: 18
Total number of this database 1074


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Producing validation str database **********


Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Loss validation **********
INFO:root:********** Scheme: Starting extract database for Loss test **********


Get test Loss data
Label for dataset Loss
[0, 1, 69, 71, 73, 92, 93, 94, 95, 96, 112, 172, 179, 192, 217, 235, 242, 254, 258]
Total number of labels 19
Original database label and idx
O: 0
B-AccrualForEnvironmentalLossContingencies: 1
B-GainsLossesOnExtinguishmentOfDebt: 69
B-GoodwillImpairmentLoss: 71
B-IncomeLossFromEquityMethodInvestments: 73
B-LossContingencyAccrualAtCarryingValue: 92
B-LossContingencyDamagesSoughtValue: 93
B-LossContingencyEstimateOfPossibleLoss: 94
B-LossContingencyPendingClaimsNumber: 95
I-LossContingencyPendingClaimsNumber: 96
B-OperatingLossCarryforwards: 112
I-LossContingencyEstimateOfPossibleLoss: 172
I-OperatingLossCarryforwards: 179
I-GainsLossesOnExtinguishmentOfDebt: 192
I-AccrualForEnvironmentalLossContingencies: 217
I-LossContingencyAccrualAtCarryingValue: 235
I-LossContingencyDamagesSoughtValue: 242
I-GoodwillImpairmentLoss: 254
I-IncomeLossFromEquityMethodInvestments: 258


INFO:root:********** Sampling....... **********
INFO:root:********** Producing test number database **********


New label contain in this dataset
{0, 1, 69, 71, 73, 112, 92, 93, 94, 95}
New label for dataset Loss
New database label and idx
O: 0
B-AccrualForEnvironmentalLossContingencies: 1
B-GainsLossesOnExtinguishmentOfDebt: 2
B-GoodwillImpairmentLoss: 3
B-IncomeLossFromEquityMethodInvestments: 4
B-LossContingencyAccrualAtCarryingValue: 5
B-LossContingencyDamagesSoughtValue: 6
B-LossContingencyEstimateOfPossibleLoss: 7
B-LossContingencyPendingClaimsNumber: 8
I-LossContingencyPendingClaimsNumber: 9
B-OperatingLossCarryforwards: 10
I-LossContingencyEstimateOfPossibleLoss: 11
I-OperatingLossCarryforwards: 12
I-GainsLossesOnExtinguishmentOfDebt: 13
I-AccrualForEnvironmentalLossContingencies: 14
I-LossContingencyAccrualAtCarryingValue: 15
I-LossContingencyDamagesSoughtValue: 16
I-GoodwillImpairmentLoss: 17
I-IncomeLossFromEquityMethodInvestments: 18
Total number of this database 806


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Producing test str database **********


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:root:********** Finishing save the database **********
INFO:root:********** Finishing extracting database for Loss test **********
INFO:root:********** Finishing extracting all database for Loss **********


