In [1]:
import findspark
findspark.init() # this must be executed before the below import

In [2]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext
from pyspark.sql import SparkSession
from pyspark import SparkFiles

In [3]:
import ray
import time
import rtree
from rtree import index
import pandas as pd
import numpy as np
from numpy import genfromtxt
import threading
import pyarrow as pa
import pyarrow.parquet as pq

In [4]:
conf = SparkConf().setAll([("spark.executor.memory", "8g"),("spark.driver.memory","8g"),
                           ("spark.memory.offHeap.enabled",True),("spark.memory.offHeap.size","8g")])

sc = SparkContext(conf=conf)
sqlContext = SQLContext(sc)

In [5]:
def merge_dict(base_dict, new_dict):
    for key, val in new_dict.items():
        if key in base_dict:
            base_dict[key] += val
        else:
            base_dict[key] = val
    new_dict.clear()

class DumpThread(threading.Thread):
    def __init__(self, thread_id, parameters):
        threading.Thread.__init__(self)
        self.thread_id = thread_id
        self.parameters = parameters
        
    def run(self):
        print('= = = start dumping thread = = =')
        start_index, end_index, pids, merged_dict, pq_writers, fs, hdfs_path, column_names = self.parameters
        for pid in pids[start_index: end_index]:
            path = hdfs_path + 'partition_' + str(pid)+'.parquet'
            pdf = pd.DataFrame(merged_dict[pid], columns=column_names)
            adf = pa.Table.from_pandas(pdf)
            if pid in pq_writers:
                pq_writers[pid].write_table(table=adf)
                #print('existing pid')
            else:
                writer = pq.ParquetWriter(path, adf.schema, fs)
                pq_writers[pid] = writer
                writer.write_table(table=adf)
                #print('new pid', pid, 'from thread',self.thread_id)
            merged_dict[pid] = []
        print('= = = exit dumping thread = = =')

@ray.remote
def dump_dict_2_hdfs(merged_dict, pq_writers, fs, column_names, hdfs_path, num_threads):
                                         
    if num_threads == 1:
        print('start dumping with single thread in main process..')
        pids = list(merged_dict.keys())
        for pid in pids:
            #print("writing to pid:",pid)
            path = hdfs_path + 'partition_' + str(pid)+'.parquet'
            pdf = pd.DataFrame(merged_dict[pid], columns=column_names)
            adf = pa.Table.from_pandas(pdf)
            if pid in pq_writers:
                pq_writers[pid].write_table(table=adf)
                #print('existing pid')
            else:
                writer = pq.ParquetWriter(path, adf.schema, fs)
                print("adf.schema:",adf.schema)
                pq_writers[pid] = writer
                writer.write_table(table=adf)
                #print('new pid')
            merged_dict[pid] = []  
    else:
        print('start dumping with', num_threads, 'threads in main process..')
        pids = list(merged_dict.keys())
        step = int(len(pids) / num_threads) + 1
        threads = []
        for i in range(num_threads):
            start_index = i * step
            end_index = (i+1) * step
            parameters = [start_index, end_index, pids, merged_dict, pq_writers, fs, hdfs_path, column_names]
            thread = DumpThread(i, parameters)
            thread.start()
            threads.append(thread)
            if start_index >= len(pids):
                break   
        for t in threads:
            t.join()
    print('finish dumping.')


def dump_dict_2_hdfs_simple(merged_dict, pq_writers, column_names, hdfs_path):
    print('= = = start dumping in main thread = = =')
    for pid, val in merged_dict.items():
        #print("writing to pid:",pid)
        path = hdfs_path + 'partition_' + str(pid)+'.parquet'
        pdf = pd.DataFrame(val, columns=column_names)
        adf = pa.Table.from_pandas(pdf)
        pq_writers[pid].write_table(table=adf)
    print('= = = exit dumping = = =')

In [6]:
def kdnode_2_border(kdnode):
    lower = [domain[0] for domain in kdnode[0]]
    upper = [domain[1] for domain in kdnode[0]]
    border = tuple(lower + upper) # non interleave
    return border

def load_partitions_from_file(path):
    stretched_kdnodes = genfromtxt(path, delimiter=',')
    num_dims = int(stretched_kdnodes[0,0])
    kdnodes = []
    for i in range(len(stretched_kdnodes)):
        domains = [ [stretched_kdnodes[i,k+1],stretched_kdnodes[i,1+num_dims+k]] for k in range(num_dims) ]
        row = [domains]
        row.append(stretched_kdnodes[i,2*num_dims+1])
        # to be compatible with qd-tree's partition, that do not have the last 4 attributes
        if len(stretched_kdnodes[i]) > 2*num_dims+2:
            row.append(stretched_kdnodes[i,-4])
            row.append(stretched_kdnodes[i,-3])
            row.append(stretched_kdnodes[i,-2])
            row.append(stretched_kdnodes[i,-1])
        kdnodes.append(row)
    return kdnodes

def process_chunk_row(row, used_dims, pidx, pid_data_dict, count, k):
    if count[0] % 100000 == 0:
        print('proces',k,'has routed',count[0],'rows')
    count[0] += 1
    row_numpy = row.to_numpy()
    row_used_dims_list = row_numpy[used_dims].tolist()
    row_border = tuple(row_used_dims_list+row_used_dims_list)
    try:
        pid = list(pidx.intersection(row_border))[0]
    except:
        print(row_border)
    if pid in pid_data_dict:
        pid_data_dict[pid]+=[row_numpy.tolist()]
        #print('update dict..')
    else:
        pid_data_dict[pid]=[row_numpy.tolist()]
        #print('initialize dict..')

@ray.remote
def process_chunk(chunk, used_dims, partition_path, k):
    print("enter data routing process", k, '..')    
    pid_data_dict = {}
    partitions = load_partitions_from_file(partition_path)
    p = index.Property()
    p.leaf_capacity = 32
    p.index_capacity = 32
    p.NearMinimumOverlaoFactor = 16
    p.fill_factor = 0.8
    p.overwrite = True
    pidx = index.Index(properties = p)
    for i in range(len(partitions)):
        pidx.insert(i, kdnode_2_border(partitions[i]))
    count = [0]
    chunk.apply(lambda row: process_chunk_row(row, used_dims, pidx, pid_data_dict, count, k), axis=1)
    dict_id = ray.put(pid_data_dict)
    print("exit data routing process", k, ".")
    return dict_id

In [7]:
def prepare_writers(table_path, cols, col_names, partition_path, hdfs_path, fs):
    
    schema = None
    for chunk in pd.read_table(table_path, delimiter='|', usecols=cols, names=col_names, chunksize=10):
        schema = pa.Schema.from_pandas(chunk)
        break
    
    stretched_kdnodes = genfromtxt(partition_path, delimiter=',')
    num_nodes = len(stretched_kdnodes)
    
    pq_writers = {}
    for i in range(num_nodes):
        path = hdfs_path + 'partition_' + str(i)+'.parquet'
        pq_writers[i] = pq.ParquetWriter(path, schema, fs)
    
    return pq_writers

In [8]:
def batch_data_parallel(table_path, partition_path, chunk_size, used_dims, hdfs_path, num_dims, num_process, hdfs_private_ip):
    
    begin_time = time.time()
    
    ray.init(num_cpus=num_process)
    
    # column names for pandas dataframe
    cols = [i for i in range(num_dims)]
    col_names = ['_c'+str(i) for i in range(num_dims)]
    
    # pyarrow parquent append
    #pq_writers = {}
    fs = pa.fs.HadoopFileSystem(hdfs_private_ip, port=9000, user='hdfs', replication=1)
    pq_writers = prepare_writers(table_path, cols, col_names, partition_path, hdfs_path, fs)
    
    # chunks
    chunk_count = 0
    
    # collect object refs
    chunk_ids = []
    result_ids = []
    
    dump_id = None
    first_loop = True
    
    for chunk in pd.read_table(table_path, delimiter='|', usecols=cols, names=col_names, chunksize=chunk_size):
        print('reading chunk: ', chunk_count)
        
        chunk_id = ray.put(chunk)
        chunk_ids.append(chunk_id)
        del chunk_id
        
        result_id = process_chunk.remote(chunk_ids[chunk_count % num_process], used_dims, partition_path, chunk_count % num_process)
        result_ids.append(result_id)
        del result_id
        
        # after all process allocated a chunk, process and dump the data
        if chunk_count % num_process == num_process - 1:
            
            print("= = = = = = Synchronization Barrier = = = = = =")
            base_dict = {}
            while len(result_ids):
                done_id, result_ids = ray.wait(result_ids)
                dict_id = ray.get(done_id[0])
                result_dict = ray.get(dict_id)
                dump_dict_2_hdfs_simple(result_dict, pq_writers, col_names, hdfs_path)
                #merge_dict(base_dict, result_dict)
            print("= = finish merged = =")
            
            chunk_ids.clear() # clear up the references
            result_ids.clear() # clear up the references
            
            #dump_dict_2_hdfs_simple(base_dict, pq_writers, col_names, hdfs_path)
            base_dict.clear()
            
#             if first_loop:
#                 first_loop = False
#             else:
#                 print("= = check last dump = =")
#                 dump_thread.join()
#                 print("= = last dump OK = =")
            #dump_id = dump_dict_2_hdfs_remote.remote(base_dict_id, pq_writers, col_names, hdfs_path) # we can not serialize parquetwriter
            #dump_id = dump_dict_2_hdfs.remote(base_dict_id, pq_writers_id, fs, col_names, hdfs_path, 1)    
                     
            current_time = time.time()
            time_elapsed = current_time - begin_time
            print("= = = TOTAL PROCESSED SO FAR:", (chunk_count+1) * chunk_size,"ROWS. TIME SPENT:", time_elapsed, "SECONDS = = =")
                
        chunk_count += 1
        
    print('after exit, chunks size: ', len(chunk_ids))
    # process the last batch
    if len(chunk_ids) != 0:
        print("= = = = = = Synchronization Barrier = = = = = =")
        base_dict = {}
        while len(result_ids):
            done_id, result_ids = ray.wait(result_ids)
            dict_id = ray.get(done_id[0])
            result_dict = ray.get(dict_id)
            merge_dict(base_dict, result_dict)
        print("= = finish merged = =")
        chunk_ids.clear() # clear up the references
        result_ids.clear() # clear up the references
        dump_dict_2_hdfs_simple(base_dict, pq_writers, col_names, hdfs_path) 
        base_dict.clear()
        
    
    for pid, writer in pq_writers.items():
        writer.close()
    
    ray.shutdown()
    
    finish_time = time.time()
    print('total data routing and persisting time: ', finish_time - begin_time)

In [9]:
# = = = Configuration (UBDA Cloud Centos) = = =
scale_factor = 100

table_base_path = '/media/datadrive1/TPCH/dbgen/'
table_path = table_base_path + 'lineitem_' + str(scale_factor) + '.tbl'

num_process = 8
chunk_size = 3000000
# 6M rows = about 1GB raw data

num_dims = 16
used_dims = [1,2]

# base path of HDFS
hdfs_private_ip = '192.168.6.62'
hdfs_base_path = 'hdfs://192.168.6.62:9000/user/cloudray/'

nora_hdfs = hdfs_base_path + 'NORA/scale' + str(scale_factor) + '/'
qdtree_hdfs = hdfs_base_path + 'QdTree/scale' + str(scale_factor) + '/'
kdtree_hdfs = hdfs_base_path + 'KDTree/scale' + str(scale_factor) + '/'

# base path of Partition
partition_base_path = '/home/centos/PartitionLayout/'

nora_partition = partition_base_path + 'nora_partitions_' + str(scale_factor)
qdtree_partition = partition_base_path + 'qdtree_partitions_' + str(scale_factor)
kdtree_partition = partition_base_path + 'kdtree_partitions_' + str(scale_factor)

In [None]:
# = = = Execution = = =
if __name__ == '__main__':
    # batch_data_parallel(table_path, nora_partition, chunk_size, used_dims, nora_hdfs, num_dims, num_process, hdfs_private_ip)
    # print('finish nora data routing..')
    # batch_data_parallel(table_path, qdtree_partition, chunk_size, used_dims, qdtree_hdfs, num_dims, num_process, hdfs_private_ip)
    # print('finish qdtree data routing..')
    batch_data_parallel(table_path, kdtree_partition, chunk_size, used_dims, kdtree_hdfs, num_dims, num_process, hdfs_private_ip)
    print('finish kdtree data routing..')

2020-10-28 15:14:21,086	INFO services.py:1164 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


reading chunk:  0
[2m[36m(pid=2016)[0m enter data routing process 0 ..
[2m[36m(pid=2016)[0m proces 0 has routed 0 rows
reading chunk:  1
[2m[36m(pid=2016)[0m proces 0 has routed 100000 rows
[2m[36m(pid=2017)[0m enter data routing process 1 ..
[2m[36m(pid=2016)[0m proces 0 has routed 200000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 0 rows
[2m[36m(pid=2016)[0m proces 0 has routed 300000 rows
reading chunk:  2
[2m[36m(pid=2017)[0m proces 1 has routed 100000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 400000 rows
[2m[36m(pid=2021)[0m enter data routing process 2 ..
[2m[36m(pid=2017)[0m proces 1 has routed 200000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 500000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 0 rows
[2m[36m(pid=2017)[0m proces 1 has routed 300000 rows
reading chunk:  3
[2m[36m(pid=2016)[0m proces 0 has routed 600000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 100000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 400

[2m[36m(pid=2017)[0m proces 1 has routed 2300000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 1800000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 800000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 1300000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 1100000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 2100000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 2600000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 1600000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 2400000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 1900000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 1400000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 900000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 2200000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 2700000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 1200000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 1700000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 2500000 rows
[2m[36m(pid=2023)[0m proces 3 

[2m[36m(pid=2019)[0m proces 0 has routed 800000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 0 rows
[2m[36m(pid=2018)[0m proces 2 has routed 300000 rows
reading chunk:  12
[2m[36m(pid=2022)[0m proces 1 has routed 600000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 900000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 100000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 400000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 700000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 1000000 rows
[2m[36m(pid=2023)[0m enter data routing process 4 ..
[2m[36m(pid=2020)[0m proces 3 has routed 200000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 500000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 0 rows
[2m[36m(pid=2022)[0m proces 1 has routed 800000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 300000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 1100000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 600000 rows
reading chunk:  13
[2m[36m(pid=2023

[2m[36m(pid=2020)[0m proces 3 has routed 2100000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 2300000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 1800000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 1300000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 2600000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 2900000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 1600000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 1100000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 2400000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 1900000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 1400000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 2200000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 2700000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 1200000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 1700000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 2000000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 2500000 rows
[2m[36m(pid=2017)[0m proces 

[2m[36m(pid=2018)[0m enter data routing process 5 ..
[2m[36m(pid=2020)[0m proces 4 has routed 200000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 1000000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 500000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 0 rows
[2m[36m(pid=2021)[0m proces 2 has routed 800000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 1300000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 300000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 1100000 rows
reading chunk:  22
[2m[36m(pid=2023)[0m proces 3 has routed 600000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 100000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 900000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 1400000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 400000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 1200000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 200000 rows
[2m[36m(pid=2022)[0m enter data routing process 6 ..
[2m[36m(pid=2023)[0m proce

[2m[36m(pid=2021)[0m proces 2 has routed 2600000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 2900000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 2100000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 2400000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 1600000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 1400000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 1900000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 2700000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 2500000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 2200000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 1700000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 2000000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 1500000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 2800000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 2300000 rows
[2m[36m(pid=2016)[0m exit data routing process 0 .
[2m[36m(pid=2022)[0m proces 6 has routed 1800000 rows
[2m[36m(pid=2019)[0m proces 7 h

[2m[36m(pid=2017)[0m proces 6 has routed 0 rows
[2m[36m(pid=2022)[0m proces 1 has routed 1300000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 300000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 800000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 1600000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 600000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 1100000 rows
reading chunk:  31
[2m[36m(pid=2017)[0m proces 6 has routed 100000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 1400000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 900000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 400000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 1700000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 700000 rows
= = = = = = Synchronization Barrier = = = = = =
[2m[36m(pid=2017)[0m proces 6 has routed 200000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 1200000 rows
[2m[36m(pid=2016)[0m enter data routing process 7 ..
[2m[36m(pid=2022)[0m proces 1 has

[2m[36m(pid=2023)[0m proces 4 has routed 2400000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 2200000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 2700000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 1700000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 2500000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 2000000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 2300000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 2800000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 1800000 rows
[2m[36m(pid=2022)[0m exit data routing process 1 .
[2m[36m(pid=2023)[0m proces 4 has routed 2600000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 2100000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 2400000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 2900000 rows
= = = start dumping in main thread = = =
[2m[36m(pid=2016)[0m proces 7 has routed 1900000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 2700000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 220000

[2m[36m(pid=2021)[0m proces 2 has routed 1300000 rows
[2m[36m(pid=2019)[0m enter data routing process 7 ..
[2m[36m(pid=2016)[0m proces 0 has routed 1900000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 1100000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 300000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 600000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 1700000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 0 rows
[2m[36m(pid=2021)[0m proces 2 has routed 1400000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 900000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 2000000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 1200000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 400000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 100000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 700000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 1000000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 1800000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 

[2m[36m(pid=2021)[0m exit data routing process 2 .
[2m[36m(pid=2022)[0m proces 6 has routed 2300000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 2600000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 2900000 rows
= = = exit dumping = = =
[2m[36m(pid=2019)[0m proces 7 has routed 2100000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 2400000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 2700000 rows
= = = start dumping in main thread = = =
[2m[36m(pid=2023)[0m exit data routing process 3 .
[2m[36m(pid=2019)[0m proces 7 has routed 2200000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 2500000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 2800000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 2300000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 2600000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 2900000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 2400000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 2700000 rows
[2m[36m(pid=2020)[0m exit

[2m[36m(pid=2018)[0m proces 2 has routed 1500000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 2100000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 1100000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 300000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 600000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 900000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 1400000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 2200000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 1600000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 1900000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 400000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 700000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 1200000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 1000000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 1500000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 2300000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 1700000 rows
[2m[36m(pid=2022)[0m proces 1 has

[2m[36m(pid=2017)[0m proces 6 has routed 2900000 rows
= = = exit dumping = = =
[2m[36m(pid=2016)[0m proces 7 has routed 2700000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 2800000 rows
[2m[36m(pid=2021)[0m exit data routing process 5 .
= = = start dumping in main thread = = =
[2m[36m(pid=2016)[0m proces 7 has routed 2900000 rows
[2m[36m(pid=2017)[0m exit data routing process 6 .
= = = exit dumping = = =
= = = start dumping in main thread = = =
[2m[36m(pid=2016)[0m exit data routing process 7 .
= = = exit dumping = = =
= = = start dumping in main thread = = =
= = = exit dumping = = =
= = = start dumping in main thread = = =
= = = exit dumping = = =
= = = start dumping in main thread = = =
= = = exit dumping = = =
= = = start dumping in main thread = = =
= = = exit dumping = = =
= = finish merged = =
= = = TOTAL PROCESSED SO FAR: 144000000 ROWS. TIME SPENT: 1886.5624725818634 SECONDS = = =
reading chunk:  48
[2m[36m(pid=2016)[0m enter data routing process 0 ..


[2m[36m(pid=2022)[0m proces 6 has routed 800000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 2400000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 2100000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 1100000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 600000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 1400000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 1900000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 1700000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 900000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 2500000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 2200000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 1200000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 700000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 2000000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 1800000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 1500000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 2600000 rows
[2m[36m(pid=2022)[0m proces 6 ha

[2m[36m(pid=2022)[0m proces 1 has routed 0 rows
[2m[36m(pid=2019)[0m proces 0 has routed 300000 rows
reading chunk:  58
[2m[36m(pid=2022)[0m proces 1 has routed 100000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 400000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 200000 rows
[2m[36m(pid=2018)[0m enter data routing process 2 ..
[2m[36m(pid=2019)[0m proces 0 has routed 500000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 0 rows
[2m[36m(pid=2022)[0m proces 1 has routed 300000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 600000 rows
reading chunk:  59
[2m[36m(pid=2018)[0m proces 2 has routed 100000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 400000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 700000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 200000 rows
[2m[36m(pid=2020)[0m enter data routing process 3 ..
[2m[36m(pid=2022)[0m proces 1 has routed 500000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 800000 rows
[2m[36m(pid=2020)

[2m[36m(pid=2021)[0m proces 5 has routed 1300000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 2100000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 2400000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 1100000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 1900000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 2700000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 800000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 1700000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 1400000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 2200000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 2500000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 1200000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 2000000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 2800000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 1800000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 900000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 1500000 rows
[2m[36m(pid=2022)[0m proces 1 

reading chunk:  68
[2m[36m(pid=2016)[0m proces 0 has routed 900000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 100000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 400000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 700000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 1000000 rows
[2m[36m(pid=2020)[0m enter data routing process 4 ..
[2m[36m(pid=2023)[0m proces 3 has routed 200000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 500000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 0 rows
[2m[36m(pid=2017)[0m proces 1 has routed 800000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 1100000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 300000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 600000 rows
reading chunk:  69
[2m[36m(pid=2017)[0m proces 1 has routed 900000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 100000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 400000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 1200000 rows
[2m[36m(pi

[2m[36m(pid=2019)[0m proces 7 has routed 1000000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 2400000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 2900000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 1400000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 1600000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 1900000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 2700000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 2200000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 1100000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 2500000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 1700000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 1500000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 2000000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 2800000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 2300000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 1200000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 2600000 rows
[2m[36m(pid=2018)[0m proces 

[2m[36m(pid=2020)[0m proces 3 has routed 500000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 0 rows
[2m[36m(pid=2018)[0m proces 2 has routed 800000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 300000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 1300000 rows
reading chunk:  78
[2m[36m(pid=2022)[0m proces 1 has routed 1100000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 600000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 100000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 900000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 400000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 1400000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 1200000 rows
[2m[36m(pid=2017)[0m enter data routing process 6 ..
[2m[36m(pid=2020)[0m proces 3 has routed 700000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 200000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 500000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 1500000 rows
[2m[36m(pid=2017)[0m proce

[2m[36m(pid=2021)[0m proces 5 has routed 1900000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 2400000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 2900000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 2600000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 1700000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 2200000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 2000000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 1500000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 2500000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 2700000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 1800000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 2300000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 2100000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 1600000 rows
[2m[36m(pid=2019)[0m exit data routing process 0 .
[2m[36m(pid=2018)[0m proces 2 has routed 2800000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 1900000 rows
[2m[36m(pid=2020)[0m proces 3 h

[2m[36m(pid=2016)[0m proces 0 has routed 1600000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 1100000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 300000 rows
reading chunk:  87
[2m[36m(pid=2020)[0m proces 4 has routed 600000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 100000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 1400000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 1700000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 900000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 1200000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 400000 rows
= = = = = = Synchronization Barrier = = = = = =
[2m[36m(pid=2020)[0m proces 4 has routed 700000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 200000 rows
[2m[36m(pid=2019)[0m enter data routing process 7 ..
[2m[36m(pid=2017)[0m proces 1 has routed 1500000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 1000000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 1800000 rows
[2m[36m(pid=2021)[0m proce

[2m[36m(pid=2020)[0m proces 4 has routed 2600000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 2300000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 2900000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 2100000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 1900000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 2700000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 2400000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 2200000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 2800000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 2500000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 2000000 rows
[2m[36m(pid=2021)[0m exit data routing process 2 .
[2m[36m(pid=2022)[0m proces 6 has routed 2300000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 2900000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 2100000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 2400000 rows
= = = exit dumping = = =
[2m[36m(pid=2018)[0m proces 5 has routed 2600000 rows
[2m[36m

[2m[36m(pid=2018)[0m proces 2 has routed 1400000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 1700000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 1200000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 2000000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 700000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 200000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 500000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 1000000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 1500000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 1300000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 2100000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 1800000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 800000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 300000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 600000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 1100000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 1600000 rows
[2m[36m(pid=2020)[0m proces 3 has 

= = = start dumping in main thread = = =
[2m[36m(pid=2017)[0m proces 6 has routed 2700000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 2400000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 2900000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 2800000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 2500000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 2900000 rows
[2m[36m(pid=2023)[0m exit data routing process 4 .
[2m[36m(pid=2016)[0m proces 7 has routed 2600000 rows
= = = exit dumping = = =
[2m[36m(pid=2016)[0m proces 7 has routed 2700000 rows
= = = start dumping in main thread = = =
[2m[36m(pid=2021)[0m exit data routing process 5 .
[2m[36m(pid=2016)[0m proces 7 has routed 2800000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 2900000 rows
[2m[36m(pid=2017)[0m exit data routing process 6 .
= = = exit dumping = = =
= = = start dumping in main thread = = =[2m[36m(pid=2016)[0m exit data routing process 7 .

= = = exit dumping = = =
= = = start dump

[2m[36m(pid=2016)[0m proces 0 has routed 2200000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 1200000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 700000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 1000000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 500000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 2100000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 1800000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 1600000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 1300000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 800000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 2300000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 1100000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 600000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 1900000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 2200000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 1400000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 900000 rows
[2m[36m(pid=2023)[0m proces 3 has

= = = start dumping in main thread = = =
= = = exit dumping = = =
= = finish merged = =
= = = TOTAL PROCESSED SO FAR: 312000000 ROWS. TIME SPENT: 4090.8993978500366 SECONDS = = =
reading chunk:  104
[2m[36m(pid=2019)[0m enter data routing process 0 ..
[2m[36m(pid=2019)[0m proces 0 has routed 0 rows
reading chunk: [2m[36m(pid=2019)[0m proces 0 has routed 100000 rows 105

[2m[36m(pid=2019)[0m proces 0 has routed 200000 rows
[2m[36m(pid=2022)[0m enter data routing process 1 ..
[2m[36m(pid=2022)[0m proces 1 has routed 0 rows
[2m[36m(pid=2019)[0m proces 0 has routed 300000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 100000 rows
reading chunk:  106
[2m[36m(pid=2019)[0m proces 0 has routed 400000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 200000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 500000 rows
[2m[36m(pid=2018)[0m enter data routing process 2 ..
[2m[36m(pid=2022)[0m proces 1 has routed 300000 rows
[2m[36m(pid=2018)[0m proces 2 has routed

[2m[36m(pid=2023)[0m proces 4 has routed 1500000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 1000000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 2500000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 2000000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 1800000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 1300000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 2300000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 800000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 1600000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 1100000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 2600000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 1900000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 2100000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 2400000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 1400000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 900000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 2700000 rows
[2m[36m(pid=2017)[0m proces 6 

[2m[36m(pid=2016)[0m proces 0 has routed 700000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 200000 rows
[2m[36m(pid=2023)[0m enter data routing process 3 ..
[2m[36m(pid=2017)[0m proces 1 has routed 500000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 800000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 0 rows
[2m[36m(pid=2021)[0m proces 2 has routed 300000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 600000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 900000 rows
reading chunk:  116
[2m[36m(pid=2023)[0m proces 3 has routed 100000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 400000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 700000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 1000000 rows
[2m[36m(pid=2020)[0m enter data routing process 4 ..
[2m[36m(pid=2023)[0m proces 3 has routed 200000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 500000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 800000 rows
[2m[36m(pid=2020)[0m proces 4

[2m[36m(pid=2023)[0m proces 3 has routed 2000000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 1500000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 1000000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 2300000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 2600000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 1800000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 2900000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 1300000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 2100000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 1600000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 2400000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 1100000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 2700000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 1900000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 2200000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 1400000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 1700000 rows
[2m[36m(pid=2021)[0m proces 

[2m[36m(pid=2023)[0m proces 4 has routed 200000 rows
[2m[36m(pid=2021)[0m enter data routing process 5 ..
[2m[36m(pid=2019)[0m proces 0 has routed 1200000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 700000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 1000000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 500000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 0 rows
[2m[36m(pid=2023)[0m proces 4 has routed 300000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 1300000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 800000 rows
reading chunk:  126
[2m[36m(pid=2020)[0m proces 3 has routed 600000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 1100000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 100000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 400000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 900000 rows
[2m[36m(pid=2019)[0m proces 0 has routed 1400000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 1200000 rows
[2m[36m(pid=2017)[0m ent

[2m[36m(pid=2016)[0m proces 7 has routed 1300000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 2800000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 1600000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 2600000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 2200000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 1900000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 2400000 rows
[2m[36m(pid=2022)[0m proces 1 has routed 2900000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 1700000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 1400000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 2700000 rows
[2m[36m(pid=2021)[0m proces 5 has routed 2000000 rows
[2m[36m(pid=2023)[0m proces 4 has routed 2300000 rows
[2m[36m(pid=2020)[0m proces 3 has routed 2500000 rows
[2m[36m(pid=2017)[0m proces 6 has routed 1800000 rows
[2m[36m(pid=2016)[0m proces 7 has routed 1500000 rows
[2m[36m(pid=2018)[0m proces 2 has routed 2800000 rows
[2m[36m(pid=2020)[0m proces 

[2m[36m(pid=2021)[0m proces 2 has routed 1000000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 500000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 0 rows
[2m[36m(pid=2023)[0m proces 3 has routed 800000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 1300000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 300000 rows
[2m[36m(pid=2016)[0m proces 0 has routed 1600000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 1100000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 600000 rows
reading chunk:  135
[2m[36m(pid=2022)[0m proces 6 has routed 100000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 900000 rows
[2m[36m(pid=2017)[0m proces 1 has routed 1400000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 400000 rows
= = = = = = Synchronization Barrier = = = = = =
[2m[36m(pid=2016)[0m proces 0 has routed 1700000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 1200000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 200000 rows
[2m[36m(pid=2020)[0m proces 4 h

[2m[36m(pid=2018)[0m proces 5 has routed 2100000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 1600000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 1900000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 2700000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 2400000 rows
[2m[36m(pid=2021)[0m proces 2 has routed 2900000 rows
[2m[36m(pid=2018)[0m proces 5 has routed 2200000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 1700000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 2800000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 2500000 rows
[2m[36m(pid=2022)[0m proces 6 has routed 2000000 rows
[2m[36m(pid=2017)[0m exit data routing process 1 .
[2m[36m(pid=2018)[0m proces 5 has routed 2300000 rows
[2m[36m(pid=2023)[0m proces 3 has routed 2900000 rows
[2m[36m(pid=2019)[0m proces 7 has routed 1800000 rows
= = = start dumping in main thread = = =
[2m[36m(pid=2022)[0m proces 6 has routed 2100000 rows
[2m[36m(pid=2020)[0m proces 4 has routed 260000

In [None]:
# ray.shutdown()