In [3]:
import pandas as pd
import hashlib

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

N_PARTITIONS = 1000

In [4]:
def id_to_hash(customer_id):
    return int(hashlib.md5(customer_id.encode('utf-8')).hexdigest(), 16)

In [5]:
members = pd.read_csv('/data/churn/members_v3.csv', nrows = 1)
members

transactions = pd.read_csv('/data/churn/all_trans.csv', nrows = 1)
transactions

logs = pd.read_csv('/data/churn/user_logs.csv', nrows = 1)
logs

train = pd.read_csv('/data/churn/all_train.csv', nrows = 1)
train

test = pd.read_csv('/data/churn/sample_submission_v2.csv', nrows = 1)
test

Unnamed: 0,msno,city,bd,gender,registered_via,registration_init_time
0,Rb9UwLQTrxzBVwCB6+bCcSQWZ9JiNLC9dXtM1oEsZA8=,1,0,,11,20110911


Unnamed: 0,msno,payment_method_id,payment_plan_days,plan_list_price,actual_amount_paid,is_auto_renew,transaction_date,membership_expire_date,is_cancel
0,YyO+tlZtAXYXoZhNr3Vg3+dfVQvrBVGO8j1mfqe4ZHc=,41,30,129,129,1,20150930,20151101,0


Unnamed: 0,msno,date,num_25,num_50,num_75,num_985,num_100,num_unq,total_secs
0,rxIP2f2aN0rYNp+toI0Obt/N/FYQX8hcO1fTmmy2h34=,20150513,0,0,0,0,1,1,280.335


Unnamed: 0,msno,is_churn
0,waLDQMmcOu2jLDaV1ddDkgCrB/jl6sD66Xzs0Vqax1Y=,1


Unnamed: 0,msno,is_churn
0,4n+fXlyJvfQnTeKXTWT507Ll4JVYGrOC8LHCfwBmPE4=,0


In [6]:
id_to_hash(members.loc[0, 'msno'])

209512247756457468966515739358104959027

In [7]:
import os

base_dir = '/data/churn/parts/'

# Create 1000 directories for each partition
for i in range(1000):
    os.makedirs(base_dir + f'p{i}', exist_ok=True)
    
len(os.listdir(base_dir))

1000

In [8]:
','.join(list(transactions.columns))

'msno,payment_method_id,payment_plan_days,plan_list_price,actual_amount_paid,is_auto_renew,transaction_date,membership_expire_date,is_cancel'

## Create Files

In each partition there are 5 csv files. The following code writes the header for each of the five files in each of the `N_PARTITION` partitions.

In [11]:
InteractiveShell.ast_node_interactivity = 'last_expr'

In [59]:
def create_blank_partitions():
    # For each partition create the files with headers
    for i in range(N_PARTITIONS):
        directory = base_dir + f'p{i}/'
        # Create five files
        for file in ['transactions.csv', 'train.csv', 'test.csv', 'members.csv', 'logs.csv']:
            # Write file header as first line
            with open(directory + file, 'w') as f:
                if file == 'transactions.csv':
                    f.write(','.join(list(transactions.columns)))
                elif file == 'train.csv':
                    f.write(','.join(list(train.columns)))
                elif file == 'test.csv':
                    f.write(','.join(list(train.columns)))
                elif file == 'members.csv':
                    f.write(','.join(list(members.columns)))
                elif file == 'logs.csv':
                    f.write(','.join(list(logs.columns)))
                    
    return directory

directory = create_blank_partitions()

In [60]:
os.listdir(directory)

['transactions.csv', 'members.csv', 'test.csv', 'train.csv', 'logs.csv']

### Example of Writing a Row

For each file, we'll iterate over the rows one at a time. The process for handling a row is:

1. Convert the customer id to an integer by hashing
2. Convert the integer to a partition number by modulus dividing by the number of partitions
3. Append the row to the correct partition file

Let's see how this works with a single row.

In [38]:
# Iterate through the dataframe one row at a time
for i, row in members.iterrows():
    # Find the partition by hashing the id
    partition = id_to_hash(row['msno']) % N_PARTITIONS
    # Open the file for appending
    with open(base_dir + f'p{partition}/members.csv', 'a') as f:
        # Write a newline and then the information
        f.write('\n')
        f.write(','.join([str(x) for x in row.values]))
    if i > 1:
        break

In [39]:
pd.read_csv(base_dir + f'p{partition}/members.csv')

Unnamed: 0,msno,city,bd,gender,registered_via,registration_init_time
0,cV358ssn7a0f7jZOwGNWS07wCKVqxyiImJUX6xcIwKw=,1,0,,11,20110915


Everything looks like it went well with the first attempt. Now we'll run this on a complete dataset.

## For Real

In [40]:
from timeit import default_timer as timer

### Member Information

We'll start off with the members. 

In [41]:
members = pd.read_csv('/data/churn/members_v3.csv')
members.shape

(6769473, 6)

## Iterrows

The first implementation will try using `iterrows`. This is fairly slow because Pandas packages the row as a Pandas series before iteration.

In [19]:
start = timer()

for i, row in members.iterrows():
    # Find the partition number by hashing the id
    partition = id_to_hash(row['msno']) % N_PARTITIONS
    
    # Open file for appending
    with open(base_dir + f'p{partition}/members.csv', 'a') as f:
        # Write a new line and then data
        f.write('\n')
        f.write(','.join([str(x) for x in row.values]))
        
    if i % 10000 == 0:
        print(f'{100 * round(i / members.shape[0], 2)}% complete. {round(timer() - start)} seconds elapsed.', end = '\r')

end = timer()
print(f'Reading and Writing {i} lines took {round(end - start)} seconds using iterrows.')

Reading and Writing 6769472 lines took 1456 seconds.


In [20]:
pd.read_csv(base_dir + f'p{partition}/members.csv').head()

Unnamed: 0,msno,city,bd,gender,registered_via,registration_init_time
0,+zMKqjvsTvD7O0Fvntk3VXe4ovwvD4KYk6PJZ92Ky60=,1,0,,9,20161227
1,3p2AY1tZAYa4LFcs0/plkuPv2hY9smh/xgcbKjtU9Dc=,5,26,male,3,20141109
2,BO0XUBzHeItkHI3N5g4uL08Ld1T/ZW/8GrbjBmT4s3w=,1,0,,7,20161228
3,qy7PNK2EE4+x6xeIdqjFVw5FlmxnFKylv6LKqqGbSo4=,1,23,female,4,20170113
4,c36721uHBQyhoVko21J9rR44Fex2ul72a74k0M7IkiQ=,1,0,,4,20170121


## Itertuples

Itertuples should be faster than iterrows because Pandas packages the row as a tuple instead of as a series. 

In [61]:
_ = create_blank_partitions()

In [55]:
start = timer()

for i, row in enumerate(members.itertuples()):
    
    # Find the partition number by hashing the id
    partition = id_to_hash(row[1]) % N_PARTITIONS
    
    # Open file for appending
    with open(base_dir + f'p{partition}/members.csv', 'a') as f:
        # Write a new line and then data
        f.write('\n')
        f.write(','.join([str(x) for x in row[1:]]))
        
    if i % 10000 == 0:
        print(f'{100 * round(i / members.shape[0], 2)}% complete. {round(timer() - start)} seconds elapsed.', end = '\r')

end = timer()
print(f'Reading and Writing {i} lines took {round(end - start)} seconds using itertuples.')

Reading and Writing 6769472 lines took 366 seconds using iterrows.


In [62]:
pd.read_csv(base_dir + f'p{partition}/members.csv').head()

Unnamed: 0,msno,city,bd,gender,registered_via,registration_init_time


## Apply

The other operation that could be used is apply. To use `apply`, we write a small function that saves the row and then call apply to the dataframe using `axis = 1` which sends each row to the function.

In [63]:
def save_row(row, name):
    # Find the partition number by hashing the id
    partition = id_to_hash(row['msno']) % N_PARTITIONS
    
    # Open file for appending
    with open(base_dir + f'p{partition}/{name}.csv', 'a') as f:
        # Write a new line and then data
        f.write('\n')
        f.write(','.join([str(x) for x in row.values]))

In [None]:
from tqdm import tqdm_notebook
from tqdm import tqdm
tqdm.pandas()

start = timer()
members.progress_apply(save_row, axis = 1, name = 'members')
end = timer()

print(f'Writing {members.shape[0]} rows took {round(end - start)} seconds using apply.')



  0%|          | 0/6769473 [00:00<?, ?it/s][A[A

  0%|          | 1/6769473 [00:02<4079:47:15,  2.17s/it][A[A

  0%|          | 518/6769473 [00:02<2855:44:32,  1.52s/it][A[A

  0%|          | 1159/6769473 [00:02<1998:55:06,  1.06s/it][A[A

  0%|          | 1739/6769473 [00:02<1399:13:12,  1.34it/s][A[A

  0%|          | 2635/6769473 [00:02<979:23:15,  1.92it/s] [A[A

  0%|          | 3480/6769473 [00:02<685:33:08,  2.74it/s][A[A

  0%|          | 4285/6769473 [00:02<479:53:58,  3.92it/s][A[A

  0%|          | 5264/6769473 [00:02<335:56:19,  5.59it/s][A[A

  0%|          | 6453/6769473 [00:02<235:09:47,  7.99it/s][A[A

  0%|          | 7350/6769473 [00:03<164:39:35, 11.41it/s][A[A

  0%|          | 8229/6769473 [00:03<115:19:04, 16.29it/s][A[A

  0%|          | 9083/6769473 [00:03<80:46:48, 23.25it/s] [A[A

  0%|          | 10027/6769473 [00:03<56:35:52, 33.17it/s][A[A

  0%|          | 10903/6769473 [00:03<39:41:25, 47.30it/s][A[A

  0%|          | 11851

  2%|▏         | 112282/6769473 [00:15<12:30, 8873.76it/s][A[A

  2%|▏         | 113187/6769473 [00:15<13:08, 8436.64it/s][A[A

  2%|▏         | 114048/6769473 [00:15<13:35, 8159.39it/s][A[A

  2%|▏         | 114878/6769473 [00:15<14:09, 7833.11it/s][A[A

  2%|▏         | 115674/6769473 [00:15<14:39, 7565.41it/s][A[A

  2%|▏         | 116442/6769473 [00:15<15:40, 7070.73it/s][A[A

  2%|▏         | 117163/6769473 [00:16<16:44, 6625.51it/s][A[A

  2%|▏         | 118030/6769473 [00:16<15:33, 7129.09it/s][A[A

  2%|▏         | 119075/6769473 [00:16<14:04, 7878.71it/s][A[A

  2%|▏         | 119916/6769473 [00:16<13:48, 8029.53it/s][A[A

  2%|▏         | 120928/6769473 [00:16<12:56, 8558.54it/s][A[A

  2%|▏         | 121814/6769473 [00:16<13:39, 8109.54it/s][A[A

  2%|▏         | 122651/6769473 [00:16<14:08, 7830.44it/s][A[A

  2%|▏         | 123558/6769473 [00:16<13:34, 8162.94it/s][A[A

  2%|▏         | 124409/6769473 [00:16<13:24, 8263.57it/s][A[A

  2%|▏    

  3%|▎         | 222374/6769473 [00:28<12:31, 8710.67it/s][A[A

  3%|▎         | 223349/6769473 [00:28<12:07, 8996.05it/s][A[A

  3%|▎         | 224260/6769473 [00:28<12:10, 8964.52it/s][A[A

  3%|▎         | 225165/6769473 [00:28<12:30, 8723.03it/s][A[A

  3%|▎         | 226045/6769473 [00:28<12:46, 8536.33it/s][A[A

  3%|▎         | 226906/6769473 [00:29<12:44, 8557.67it/s][A[A

  3%|▎         | 227766/6769473 [00:29<13:35, 8020.14it/s][A[A

  3%|▎         | 228624/6769473 [00:29<13:19, 8178.43it/s][A[A

  3%|▎         | 229450/6769473 [00:29<13:42, 7955.56it/s][A[A

  3%|▎         | 230253/6769473 [00:29<14:00, 7776.40it/s][A[A

  3%|▎         | 231037/6769473 [00:29<14:12, 7673.38it/s][A[A

  3%|▎         | 231864/6769473 [00:29<13:53, 7842.12it/s][A[A

  3%|▎         | 232653/6769473 [00:29<14:03, 7748.17it/s][A[A

  3%|▎         | 233615/6769473 [00:29<13:14, 8227.38it/s][A[A

  3%|▎         | 234599/6769473 [00:29<12:35, 8651.44it/s][A[A

  3%|▎    

  5%|▍         | 332409/6769473 [00:41<13:55, 7701.81it/s][A[A

  5%|▍         | 333196/6769473 [00:41<13:50, 7751.39it/s][A[A

  5%|▍         | 333975/6769473 [00:41<13:57, 7688.59it/s][A[A

  5%|▍         | 334747/6769473 [00:42<14:35, 7351.33it/s][A[A

  5%|▍         | 335652/6769473 [00:42<13:46, 7788.79it/s][A[A

  5%|▍         | 336442/6769473 [00:42<15:41, 6831.32it/s][A[A

  5%|▍         | 337370/6769473 [00:42<14:27, 7417.06it/s][A[A

  5%|▍         | 338216/6769473 [00:42<13:55, 7701.82it/s][A[A

  5%|▌         | 339014/6769473 [00:42<14:24, 7438.95it/s][A[A

  5%|▌         | 339839/6769473 [00:42<13:58, 7664.48it/s][A[A

  5%|▌         | 340622/6769473 [00:42<14:36, 7332.30it/s][A[A

  5%|▌         | 341370/6769473 [00:42<14:58, 7150.69it/s][A[A

  5%|▌         | 342247/6769473 [00:43<14:09, 7568.58it/s][A[A

  5%|▌         | 343019/6769473 [00:43<14:04, 7608.40it/s][A[A

  5%|▌         | 343930/6769473 [00:43<13:22, 8002.57it/s][A[A

  5%|▌    

  7%|▋         | 441797/6769473 [00:54<14:23, 7329.51it/s][A[A

  7%|▋         | 442569/6769473 [00:54<14:10, 7440.66it/s][A[A

  7%|▋         | 443371/6769473 [00:55<13:52, 7603.21it/s][A[A

  7%|▋         | 444140/6769473 [00:55<14:01, 7517.55it/s][A[A

  7%|▋         | 444941/6769473 [00:55<14:12, 7420.72it/s][A[A

  7%|▋         | 445688/6769473 [00:55<16:35, 6354.27it/s][A[A

  7%|▋         | 446398/6769473 [00:55<16:03, 6560.80it/s][A[A

  7%|▋         | 447077/6769473 [00:55<16:27, 6400.76it/s][A[A

  7%|▋         | 447971/6769473 [00:55<15:03, 6995.65it/s][A[A

  7%|▋         | 448791/6769473 [00:55<14:23, 7316.89it/s][A[A

  7%|▋         | 449547/6769473 [00:56<15:39, 6726.46it/s][A[A

  7%|▋         | 450246/6769473 [00:56<16:02, 6565.73it/s][A[A

  7%|▋         | 450932/6769473 [00:56<15:50, 6649.64it/s][A[A

  7%|▋         | 451812/6769473 [00:56<14:40, 7174.44it/s][A[A

  7%|▋         | 452551/6769473 [00:56<15:11, 6926.50it/s][A[A

  7%|▋    

  8%|▊         | 541868/6769473 [01:08<12:47, 8113.96it/s][A[A

  8%|▊         | 542695/6769473 [01:08<12:46, 8122.85it/s][A[A

  8%|▊         | 543713/6769473 [01:08<12:00, 8645.19it/s][A[A

  8%|▊         | 544595/6769473 [01:08<12:07, 8552.59it/s][A[A

  8%|▊         | 545513/6769473 [01:09<11:52, 8730.26it/s][A[A

  8%|▊         | 546504/6769473 [01:09<11:27, 9052.14it/s][A[A

  8%|▊         | 547515/6769473 [01:09<11:05, 9343.65it/s][A[A

  8%|▊         | 548459/6769473 [01:09<11:08, 9309.13it/s][A[A

  8%|▊         | 549397/6769473 [01:09<11:20, 9137.14it/s][A[A

  8%|▊         | 550316/6769473 [01:09<11:47, 8786.13it/s][A[A

  8%|▊         | 551330/6769473 [01:09<11:19, 9151.27it/s][A[A

  8%|▊         | 552254/6769473 [01:09<12:18, 8413.04it/s][A[A

  8%|▊         | 553176/6769473 [01:09<11:59, 8637.57it/s][A[A

  8%|▊         | 554054/6769473 [01:09<13:03, 7933.52it/s][A[A

  8%|▊         | 555078/6769473 [01:10<12:10, 8507.97it/s][A[A

  8%|▊    

 10%|▉         | 648687/6769473 [01:21<12:57, 7875.57it/s][A[A

 10%|▉         | 649484/6769473 [01:21<13:06, 7777.03it/s][A[A

 10%|▉         | 650347/6769473 [01:21<12:43, 8014.08it/s][A[A

 10%|▉         | 651223/6769473 [01:22<12:24, 8222.40it/s][A[A

 10%|▉         | 652185/6769473 [01:22<11:51, 8595.03it/s][A[A

 10%|▉         | 653053/6769473 [01:22<12:29, 8161.29it/s][A[A

 10%|▉         | 653880/6769473 [01:22<12:47, 7968.18it/s][A[A

 10%|▉         | 654686/6769473 [01:22<13:01, 7826.04it/s][A[A

 10%|▉         | 655542/6769473 [01:22<12:41, 8030.86it/s][A[A

 10%|▉         | 656351/6769473 [01:22<12:40, 8035.21it/s][A[A

 10%|▉         | 657159/6769473 [01:22<13:06, 7775.90it/s][A[A

 10%|▉         | 657942/6769473 [01:22<13:20, 7637.12it/s][A[A

 10%|▉         | 658789/6769473 [01:23<12:56, 7868.27it/s][A[A

 10%|▉         | 659581/6769473 [01:23<12:55, 7878.25it/s][A[A

 10%|▉         | 660426/6769473 [01:23<12:39, 8041.43it/s][A[A

 10%|▉    

 11%|█         | 758687/6769473 [01:34<11:32, 8675.41it/s][A[A

 11%|█         | 759573/6769473 [01:35<11:28, 8728.60it/s][A[A

 11%|█         | 760468/6769473 [01:35<11:23, 8791.62it/s][A[A

 11%|█         | 761414/6769473 [01:35<11:08, 8981.45it/s][A[A

 11%|█▏        | 762315/6769473 [01:35<12:12, 8196.59it/s][A[A

 11%|█▏        | 763150/6769473 [01:35<12:50, 7794.59it/s][A[A

 11%|█▏        | 763945/6769473 [01:35<13:24, 7468.67it/s][A[A

 11%|█▏        | 764808/6769473 [01:35<12:51, 7781.33it/s][A[A

 11%|█▏        | 765801/6769473 [01:35<12:01, 8319.42it/s][A[A

 11%|█▏        | 766742/6769473 [01:35<11:36, 8617.41it/s][A[A

 11%|█▏        | 767770/6769473 [01:36<11:02, 9056.81it/s][A[A

 11%|█▏        | 768693/6769473 [01:36<11:34, 8638.02it/s][A[A

 11%|█▏        | 769584/6769473 [01:36<11:28, 8717.64it/s][A[A

 11%|█▏        | 770495/6769473 [01:36<11:19, 8829.65it/s][A[A

 11%|█▏        | 771387/6769473 [01:36<11:19, 8821.87it/s][A[A

 11%|█▏   

 13%|█▎        | 868200/6769473 [01:48<11:37, 8454.85it/s][A[A

 13%|█▎        | 869116/6769473 [01:48<11:21, 8654.60it/s][A[A

 13%|█▎        | 869998/6769473 [01:48<11:23, 8633.64it/s][A[A

 13%|█▎        | 870992/6769473 [01:48<10:56, 8986.92it/s][A[A

 13%|█▎        | 871952/6769473 [01:48<10:43, 9161.00it/s][A[A

 13%|█▎        | 872878/6769473 [01:48<11:11, 8779.24it/s][A[A

 13%|█▎        | 873766/6769473 [01:48<11:50, 8298.29it/s][A[A

 13%|█▎        | 874609/6769473 [01:48<12:33, 7819.50it/s][A[A

 13%|█▎        | 875462/6769473 [01:49<12:14, 8019.36it/s][A[A

 13%|█▎        | 876276/6769473 [01:49<12:25, 7904.93it/s][A[A

 13%|█▎        | 877075/6769473 [01:49<12:35, 7799.82it/s][A[A

 13%|█▎        | 877864/6769473 [01:49<12:32, 7824.75it/s][A[A

 13%|█▎        | 878766/6769473 [01:49<12:03, 8146.67it/s][A[A

 13%|█▎        | 879675/6769473 [01:49<11:40, 8408.33it/s][A[A

 13%|█▎        | 880574/6769473 [01:49<11:26, 8573.17it/s][A[A

 13%|█▎   

 14%|█▍        | 979148/6769473 [02:01<10:53, 8857.72it/s][A[A

 14%|█▍        | 980037/6769473 [02:01<10:54, 8844.80it/s][A[A

 14%|█▍        | 980924/6769473 [02:01<11:49, 8161.70it/s][A[A

 15%|█▍        | 981833/6769473 [02:01<11:27, 8419.29it/s][A[A

 15%|█▍        | 982686/6769473 [02:01<11:45, 8207.80it/s][A[A

 15%|█▍        | 983591/6769473 [02:01<11:25, 8443.49it/s][A[A

 15%|█▍        | 984443/6769473 [02:01<11:35, 8321.03it/s][A[A

 15%|█▍        | 985357/6769473 [02:01<11:16, 8549.63it/s][A[A

 15%|█▍        | 986218/6769473 [02:02<11:57, 8059.91it/s][A[A

 15%|█▍        | 987214/6769473 [02:02<11:16, 8547.37it/s][A[A

 15%|█▍        | 988084/6769473 [02:02<11:57, 8052.84it/s][A[A

 15%|█▍        | 988906/6769473 [02:02<12:03, 7990.91it/s][A[A

 15%|█▍        | 989717/6769473 [02:02<12:13, 7874.37it/s][A[A

 15%|█▍        | 990553/6769473 [02:02<12:01, 8012.44it/s][A[A

 15%|█▍        | 991457/6769473 [02:02<11:36, 8294.20it/s][A[A

 15%|█▍   

 16%|█▌        | 1088909/6769473 [02:14<11:40, 8106.09it/s][A[A

 16%|█▌        | 1089732/6769473 [02:14<12:28, 7587.17it/s][A[A

 16%|█▌        | 1090506/6769473 [02:14<13:54, 6806.64it/s][A[A

 16%|█▌        | 1091213/6769473 [02:14<13:49, 6844.08it/s][A[A

 16%|█▌        | 1091946/6769473 [02:14<13:33, 6981.84it/s][A[A

 16%|█▌        | 1092792/6769473 [02:14<12:50, 7366.84it/s][A[A

 16%|█▌        | 1093544/6769473 [02:14<13:20, 7088.26it/s][A[A

 16%|█▌        | 1094266/6769473 [02:15<13:36, 6954.54it/s][A[A

 16%|█▌        | 1095256/6769473 [02:15<12:23, 7633.69it/s][A[A

 16%|█▌        | 1096047/6769473 [02:15<12:38, 7483.54it/s][A[A

 16%|█▌        | 1096815/6769473 [02:15<12:52, 7342.00it/s][A[A

 16%|█▌        | 1097564/6769473 [02:15<12:58, 7288.31it/s][A[A

 16%|█▌        | 1098316/6769473 [02:15<12:51, 7355.37it/s][A[A

 16%|█▌        | 1099093/6769473 [02:15<12:38, 7474.23it/s][A[A

 16%|█▌        | 1099871/6769473 [02:15<12:29, 7561.04it/s][A

 18%|█▊        | 1196405/6769473 [02:27<11:19, 8197.90it/s][A[A

 18%|█▊        | 1197325/6769473 [02:27<10:57, 8473.18it/s][A[A

 18%|█▊        | 1198560/6769473 [02:27<09:55, 9354.03it/s][A[A

 18%|█▊        | 1199534/6769473 [02:27<10:42, 8670.25it/s][A[A

 18%|█▊        | 1200438/6769473 [02:27<10:44, 8638.59it/s][A[A

 18%|█▊        | 1201328/6769473 [02:27<10:48, 8586.75it/s][A[A

 18%|█▊        | 1202205/6769473 [02:27<10:50, 8553.60it/s][A[A

 18%|█▊        | 1203074/6769473 [02:27<11:48, 7851.14it/s][A[A

 18%|█▊        | 1203998/6769473 [02:28<11:16, 8220.81it/s][A[A

 18%|█▊        | 1204839/6769473 [02:28<11:36, 7984.47it/s][A[A

 18%|█▊        | 1205664/6769473 [02:28<11:30, 8062.18it/s][A[A

 18%|█▊        | 1206504/6769473 [02:28<11:21, 8158.92it/s][A[A

 18%|█▊        | 1207473/6769473 [02:28<10:49, 8563.59it/s][A[A

 18%|█▊        | 1208345/6769473 [02:28<10:46, 8608.07it/s][A[A

 18%|█▊        | 1209214/6769473 [02:28<10:58, 8437.44it/s][A

 19%|█▉        | 1308901/6769473 [02:40<10:48, 8424.89it/s][A[A

 19%|█▉        | 1309749/6769473 [02:40<10:46, 8440.77it/s][A[A

 19%|█▉        | 1310703/6769473 [02:40<10:24, 8741.88it/s][A[A

 19%|█▉        | 1311646/6769473 [02:40<10:10, 8936.14it/s][A[A

 19%|█▉        | 1312547/6769473 [02:40<10:09, 8957.19it/s][A[A

 19%|█▉        | 1313505/6769473 [02:40<09:57, 9134.78it/s][A[A

 19%|█▉        | 1314422/6769473 [02:40<09:58, 9112.76it/s][A[A

 19%|█▉        | 1315336/6769473 [02:40<10:53, 8340.22it/s][A[A

 19%|█▉        | 1316185/6769473 [02:41<11:29, 7912.24it/s][A[A

 19%|█▉        | 1317043/6769473 [02:41<11:13, 8099.22it/s][A[A

 19%|█▉        | 1317879/6769473 [02:41<11:06, 8174.02it/s][A[A

 19%|█▉        | 1318705/6769473 [02:41<11:20, 8010.85it/s][A[A

 19%|█▉        | 1319560/6769473 [02:41<11:07, 8163.96it/s][A[A

 20%|█▉        | 1320416/6769473 [02:41<10:58, 8278.05it/s][A[A

 20%|█▉        | 1321478/6769473 [02:41<10:14, 8863.09it/s][A

 21%|██        | 1417433/6769473 [02:53<11:58, 7449.75it/s][A[A

 21%|██        | 1418186/6769473 [02:53<12:13, 7299.73it/s][A[A

 21%|██        | 1419080/6769473 [02:53<11:32, 7722.98it/s][A[A

 21%|██        | 1419872/6769473 [02:53<11:27, 7780.10it/s][A[A

 21%|██        | 1420840/6769473 [02:53<10:47, 8264.50it/s][A[A

 21%|██        | 1421681/6769473 [02:53<12:05, 7374.99it/s][A[A

 21%|██        | 1422584/6769473 [02:53<11:25, 7803.17it/s][A[A

 21%|██        | 1423511/6769473 [02:54<10:52, 8191.85it/s][A[A

 21%|██        | 1424370/6769473 [02:54<10:43, 8304.59it/s][A[A

 21%|██        | 1425390/6769473 [02:54<10:07, 8792.50it/s][A[A

 21%|██        | 1426289/6769473 [02:54<10:11, 8738.03it/s][A[A

 21%|██        | 1427183/6769473 [02:54<10:07, 8794.72it/s][A[A

 21%|██        | 1428109/6769473 [02:54<09:58, 8926.36it/s][A[A

 21%|██        | 1429009/6769473 [02:54<10:04, 8838.17it/s][A[A

 21%|██        | 1429899/6769473 [02:54<10:14, 8685.85it/s][A

 23%|██▎       | 1527662/6769473 [03:06<10:54, 8003.04it/s][A[A

 23%|██▎       | 1528469/6769473 [03:06<11:17, 7738.30it/s][A[A

 23%|██▎       | 1529250/6769473 [03:06<11:48, 7399.52it/s][A[A

 23%|██▎       | 1529998/6769473 [03:06<12:04, 7233.50it/s][A[A

 23%|██▎       | 1530849/6769473 [03:06<11:31, 7570.98it/s][A[A

 23%|██▎       | 1531648/6769473 [03:06<11:21, 7689.88it/s][A[A

 23%|██▎       | 1532424/6769473 [03:06<11:47, 7405.81it/s][A[A

 23%|██▎       | 1533226/6769473 [03:06<11:30, 7579.36it/s][A[A

 23%|██▎       | 1534026/6769473 [03:07<11:19, 7699.32it/s][A[A

 23%|██▎       | 1534804/6769473 [03:07<11:17, 7721.15it/s][A[A

 23%|██▎       | 1535580/6769473 [03:07<11:23, 7662.83it/s][A[A

 23%|██▎       | 1536397/6769473 [03:07<11:10, 7807.22it/s][A[A

 23%|██▎       | 1537180/6769473 [03:07<11:18, 7706.21it/s][A[A

 23%|██▎       | 1538056/6769473 [03:07<10:54, 7994.59it/s][A[A

 23%|██▎       | 1539379/6769473 [03:07<09:36, 9071.18it/s][A

 24%|██▍       | 1637811/6769473 [03:19<10:56, 7817.76it/s][A[A

 24%|██▍       | 1639032/6769473 [03:19<09:45, 8763.49it/s][A[A

 24%|██▍       | 1639948/6769473 [03:19<09:40, 8837.62it/s][A[A

 24%|██▍       | 1641068/6769473 [03:19<09:03, 9432.65it/s][A[A

 24%|██▍       | 1642057/6769473 [03:19<08:56, 9563.78it/s][A[A

 24%|██▍       | 1643036/6769473 [03:19<10:11, 8384.10it/s][A[A

 24%|██▍       | 1643916/6769473 [03:19<10:46, 7923.83it/s][A[A

 24%|██▍       | 1644743/6769473 [03:19<10:53, 7846.86it/s][A[A

 24%|██▍       | 1645645/6769473 [03:20<10:27, 8164.47it/s][A[A

 24%|██▍       | 1646831/6769473 [03:20<09:28, 9004.93it/s][A[A

 24%|██▍       | 1647917/6769473 [03:20<08:59, 9490.71it/s][A[A

 24%|██▍       | 1648902/6769473 [03:20<09:24, 9066.44it/s][A[A

 24%|██▍       | 1649838/6769473 [03:20<09:42, 8794.21it/s][A[A

 24%|██▍       | 1650740/6769473 [03:20<09:47, 8716.54it/s][A[A

 24%|██▍       | 1651713/6769473 [03:20<09:28, 8997.39it/s][A

 26%|██▌       | 1748967/6769473 [03:32<09:48, 8538.11it/s][A[A

 26%|██▌       | 1749827/6769473 [03:32<09:59, 8377.59it/s][A[A

 26%|██▌       | 1750678/6769473 [03:32<09:56, 8416.34it/s][A[A

 26%|██▌       | 1751592/6769473 [03:32<09:42, 8620.07it/s][A[A

 26%|██▌       | 1752508/6769473 [03:32<09:31, 8773.34it/s][A[A

 26%|██▌       | 1753389/6769473 [03:32<10:00, 8354.61it/s][A[A

 26%|██▌       | 1754233/6769473 [03:32<09:58, 8378.52it/s][A[A

 26%|██▌       | 1755076/6769473 [03:32<10:06, 8261.96it/s][A[A

 26%|██▌       | 1755947/6769473 [03:32<09:57, 8388.98it/s][A[A

 26%|██▌       | 1756789/6769473 [03:32<09:59, 8355.90it/s][A[A

 26%|██▌       | 1757677/6769473 [03:33<09:49, 8504.14it/s][A[A

 26%|██▌       | 1758616/6769473 [03:33<09:32, 8750.59it/s][A[A

 26%|██▌       | 1759534/6769473 [03:33<09:24, 8873.09it/s][A[A

 26%|██▌       | 1760425/6769473 [03:33<09:26, 8841.67it/s][A[A

 26%|██▌       | 1761571/6769473 [03:33<08:47, 9491.81it/s][A

 27%|██▋       | 1859416/6769473 [03:45<10:13, 8004.63it/s][A[A

 27%|██▋       | 1860385/6769473 [03:45<09:41, 8444.16it/s][A[A

 27%|██▋       | 1861249/6769473 [03:45<10:00, 8175.46it/s][A[A

 28%|██▊       | 1862097/6769473 [03:45<09:53, 8263.24it/s][A[A

 28%|██▊       | 1863077/6769473 [03:45<09:25, 8669.82it/s][A[A

 28%|██▊       | 1863957/6769473 [03:45<09:30, 8604.29it/s][A[A

 28%|██▊       | 1864827/6769473 [03:45<09:44, 8390.11it/s][A[A

 28%|██▊       | 1865674/6769473 [03:45<09:54, 8248.97it/s][A[A

 28%|██▊       | 1866505/6769473 [03:45<11:19, 7219.26it/s][A[A

 28%|██▊       | 1867282/6769473 [03:45<11:04, 7375.85it/s][A[A

 28%|██▊       | 1868183/6769473 [03:46<10:28, 7797.43it/s][A[A

 28%|██▊       | 1869217/6769473 [03:46<09:42, 8417.82it/s][A[A

 28%|██▊       | 1870184/6769473 [03:46<09:19, 8757.15it/s][A[A

 28%|██▊       | 1871101/6769473 [03:46<09:11, 8876.83it/s][A[A

 28%|██▊       | 1872006/6769473 [03:46<09:12, 8857.98it/s][A

 29%|██▉       | 1966066/6769473 [03:58<10:20, 7746.26it/s][A[A

 29%|██▉       | 1966892/6769473 [03:58<10:08, 7893.33it/s][A[A

 29%|██▉       | 1967692/6769473 [03:58<11:13, 7129.15it/s][A[A

 29%|██▉       | 1968581/6769473 [03:58<10:33, 7578.64it/s][A[A

 29%|██▉       | 1969362/6769473 [03:58<10:37, 7534.00it/s][A[A

 29%|██▉       | 1970132/6769473 [03:58<11:29, 6965.11it/s][A[A

 29%|██▉       | 1970986/6769473 [03:58<10:50, 7371.77it/s][A[A

 29%|██▉       | 1971744/6769473 [03:59<12:28, 6413.38it/s][A[A

 29%|██▉       | 1972422/6769473 [03:59<14:45, 5415.45it/s][A[A

 29%|██▉       | 1973016/6769473 [03:59<17:20, 4608.66it/s][A[A

 29%|██▉       | 1973704/6769473 [03:59<15:37, 5114.75it/s][A[A

 29%|██▉       | 1974345/6769473 [03:59<14:40, 5444.11it/s][A[A

 29%|██▉       | 1974936/6769473 [03:59<14:55, 5355.96it/s][A[A

 29%|██▉       | 1975505/6769473 [03:59<15:11, 5256.79it/s][A[A

 29%|██▉       | 1976331/6769473 [03:59<13:32, 5899.32it/s][A

 31%|███       | 2066519/6769473 [04:11<08:46, 8926.52it/s][A[A

 31%|███       | 2067420/6769473 [04:11<09:43, 8058.67it/s][A[A

 31%|███       | 2068248/6769473 [04:11<10:03, 7790.08it/s][A[A

 31%|███       | 2069065/6769473 [04:11<09:54, 7899.87it/s][A[A

 31%|███       | 2069905/6769473 [04:11<09:44, 8042.99it/s][A[A

 31%|███       | 2070719/6769473 [04:12<09:53, 7912.67it/s][A[A

 31%|███       | 2071518/6769473 [04:12<09:58, 7844.00it/s][A[A

 31%|███       | 2072308/6769473 [04:12<10:32, 7429.34it/s][A[A

 31%|███       | 2073103/6769473 [04:12<10:19, 7577.72it/s][A[A

 31%|███       | 2073927/6769473 [04:12<10:04, 7764.17it/s][A[A

 31%|███       | 2074709/6769473 [04:12<10:30, 7450.33it/s][A[A

 31%|███       | 2075461/6769473 [04:12<10:31, 7435.14it/s][A[A

 31%|███       | 2076218/6769473 [04:12<10:27, 7474.14it/s][A[A

 31%|███       | 2076969/6769473 [04:12<10:32, 7414.28it/s][A[A

 31%|███       | 2078097/6769473 [04:12<09:27, 8262.69it/s][A

 32%|███▏      | 2170633/6769473 [04:24<11:12, 6837.76it/s][A[A

 32%|███▏      | 2171345/6769473 [04:24<11:08, 6882.14it/s][A[A

 32%|███▏      | 2172204/6769473 [04:24<10:28, 7317.70it/s][A[A

 32%|███▏      | 2173115/6769473 [04:24<09:51, 7775.79it/s][A[A

 32%|███▏      | 2173994/6769473 [04:24<09:30, 8053.56it/s][A[A

 32%|███▏      | 2175004/6769473 [04:25<08:55, 8572.75it/s][A[A

 32%|███▏      | 2176047/6769473 [04:25<08:27, 9055.81it/s][A[A

 32%|███▏      | 2176976/6769473 [04:25<08:25, 9087.43it/s][A[A

 32%|███▏      | 2177901/6769473 [04:25<08:35, 8910.78it/s][A[A

 32%|███▏      | 2178805/6769473 [04:25<09:07, 8388.25it/s][A[A

 32%|███▏      | 2179659/6769473 [04:25<10:07, 7553.66it/s][A[A

 32%|███▏      | 2180441/6769473 [04:25<10:26, 7327.87it/s][A[A

 32%|███▏      | 2181237/6769473 [04:25<10:11, 7505.93it/s][A[A

 32%|███▏      | 2182216/6769473 [04:25<09:28, 8070.81it/s][A[A

 32%|███▏      | 2183046/6769473 [04:26<09:24, 8126.14it/s][A

 34%|███▎      | 2277053/6769473 [04:37<08:04, 9272.10it/s][A[A

 34%|███▎      | 2277994/6769473 [04:37<08:22, 8932.49it/s][A[A

 34%|███▎      | 2278899/6769473 [04:37<08:27, 8855.76it/s][A[A

 34%|███▎      | 2279818/6769473 [04:37<08:21, 8952.21it/s][A[A

 34%|███▎      | 2280810/6769473 [04:37<08:06, 9219.79it/s][A[A

 34%|███▎      | 2281951/6769473 [04:38<07:38, 9781.47it/s][A[A

 34%|███▎      | 2283020/6769473 [04:38<07:27, 10036.16it/s][A[A

 34%|███▎      | 2284041/6769473 [04:38<07:24, 10087.40it/s][A[A

 34%|███▍      | 2285058/6769473 [04:38<08:08, 9181.14it/s] [A[A

 34%|███▍      | 2285998/6769473 [04:38<08:12, 9109.14it/s][A[A

 34%|███▍      | 2286924/6769473 [04:38<08:39, 8626.09it/s][A[A

 34%|███▍      | 2287803/6769473 [04:38<09:11, 8119.65it/s][A[A

 34%|███▍      | 2288633/6769473 [04:38<09:37, 7758.90it/s][A[A

 34%|███▍      | 2289425/6769473 [04:39<09:54, 7530.10it/s][A[A

 34%|███▍      | 2290191/6769473 [04:39<10:42, 6972.77it/s]

 35%|███▌      | 2385339/6769473 [04:50<09:15, 7891.70it/s][A[A

 35%|███▌      | 2386259/6769473 [04:50<08:51, 8242.49it/s][A[A

 35%|███▌      | 2387254/6769473 [04:50<08:24, 8687.82it/s][A[A

 35%|███▌      | 2388173/6769473 [04:51<08:16, 8829.95it/s][A[A

 35%|███▌      | 2389067/6769473 [04:51<09:06, 8019.48it/s][A[A

 35%|███▌      | 2390050/6769473 [04:51<08:36, 8487.22it/s][A[A

 35%|███▌      | 2390976/6769473 [04:51<08:23, 8704.72it/s][A[A

 35%|███▌      | 2391864/6769473 [04:51<08:23, 8693.21it/s][A[A

 35%|███▌      | 2392746/6769473 [04:51<08:24, 8680.30it/s][A[A

 35%|███▌      | 2393623/6769473 [04:51<08:28, 8606.42it/s][A[A

 35%|███▌      | 2394490/6769473 [04:51<08:41, 8396.77it/s][A[A

 35%|███▌      | 2395429/6769473 [04:51<08:32, 8535.42it/s][A[A

 35%|███▌      | 2396287/6769473 [04:52<08:43, 8346.54it/s][A[A

 35%|███▌      | 2397268/6769473 [04:52<08:20, 8735.66it/s][A[A

 35%|███▌      | 2398150/6769473 [04:52<08:19, 8757.25it/s][A

 37%|███▋      | 2495133/6769473 [05:03<08:07, 8759.26it/s][A[A

 37%|███▋      | 2496012/6769473 [05:04<08:27, 8416.01it/s][A[A

 37%|███▋      | 2496886/6769473 [05:04<08:22, 8508.66it/s][A[A

 37%|███▋      | 2497741/6769473 [05:04<08:35, 8288.36it/s][A[A

 37%|███▋      | 2498574/6769473 [05:04<08:50, 8051.86it/s][A[A

 37%|███▋      | 2499384/6769473 [05:04<09:15, 7686.98it/s][A[A

 37%|███▋      | 2500188/6769473 [05:04<09:08, 7788.38it/s][A[A

 37%|███▋      | 2501120/6769473 [05:04<08:41, 8191.43it/s][A[A

 37%|███▋      | 2502241/6769473 [05:04<07:58, 8910.56it/s][A[A

 37%|███▋      | 2503157/6769473 [05:04<08:00, 8875.78it/s][A[A

 37%|███▋      | 2504062/6769473 [05:04<07:58, 8922.86it/s][A[A

 37%|███▋      | 2504967/6769473 [05:05<08:02, 8832.76it/s][A[A

 37%|███▋      | 2505873/6769473 [05:05<07:59, 8898.88it/s][A[A

 37%|███▋      | 2506770/6769473 [05:05<08:20, 8517.19it/s][A[A

 37%|███▋      | 2507630/6769473 [05:05<08:59, 7901.67it/s][A

 39%|███▊      | 2606420/6769473 [05:17<08:33, 8099.67it/s][A[A

 39%|███▊      | 2607446/6769473 [05:17<08:01, 8643.20it/s][A[A

 39%|███▊      | 2608568/6769473 [05:17<07:28, 9282.72it/s][A[A

 39%|███▊      | 2609521/6769473 [05:17<07:38, 9074.16it/s][A[A

 39%|███▊      | 2610555/6769473 [05:17<07:21, 9417.84it/s][A[A

 39%|███▊      | 2611636/6769473 [05:17<07:04, 9794.87it/s][A[A

 39%|███▊      | 2612631/6769473 [05:17<07:48, 8864.03it/s][A[A

 39%|███▊      | 2613545/6769473 [05:17<08:04, 8582.45it/s][A[A

 39%|███▊      | 2614466/6769473 [05:17<07:54, 8761.33it/s][A[A

 39%|███▊      | 2615358/6769473 [05:17<08:06, 8537.70it/s][A[A

 39%|███▊      | 2616225/6769473 [05:18<08:21, 8280.97it/s][A[A

 39%|███▊      | 2617064/6769473 [05:18<08:36, 8045.19it/s][A[A

 39%|███▊      | 2617878/6769473 [05:18<08:49, 7838.07it/s][A[A

 39%|███▊      | 2618709/6769473 [05:18<08:40, 7972.65it/s][A[A

 39%|███▊      | 2619722/6769473 [05:18<08:07, 8515.42it/s][A

 40%|████      | 2723834/6769473 [05:30<07:05, 9516.96it/s][A[A

 40%|████      | 2724791/6769473 [05:30<07:07, 9471.82it/s][A[A

 40%|████      | 2725742/6769473 [05:30<07:38, 8820.19it/s][A[A

 40%|████      | 2726636/6769473 [05:30<07:59, 8427.27it/s][A[A

 40%|████      | 2727647/6769473 [05:30<07:35, 8869.48it/s][A[A

 40%|████      | 2728549/6769473 [05:30<07:39, 8797.02it/s][A[A

 40%|████      | 2729576/6769473 [05:30<07:19, 9189.85it/s][A[A

 40%|████      | 2730543/6769473 [05:30<07:12, 9328.65it/s][A[A

 40%|████      | 2731555/6769473 [05:31<07:02, 9550.82it/s][A[A

 40%|████      | 2732518/6769473 [05:31<07:31, 8949.61it/s][A[A

 40%|████      | 2733535/6769473 [05:31<07:14, 9282.67it/s][A[A

 40%|████      | 2734476/6769473 [05:31<07:45, 8676.68it/s][A[A

 40%|████      | 2735361/6769473 [05:31<08:10, 8230.86it/s][A[A

 40%|████      | 2736201/6769473 [05:31<08:29, 7918.63it/s][A[A

 40%|████      | 2737007/6769473 [05:31<08:31, 7876.75it/s][A

 42%|████▏     | 2837151/6769473 [05:43<06:39, 9849.41it/s][A[A

 42%|████▏     | 2838172/6769473 [05:43<07:22, 8888.23it/s][A[A

 42%|████▏     | 2839104/6769473 [05:43<07:26, 8796.66it/s][A[A

 42%|████▏     | 2840014/6769473 [05:43<07:55, 8262.47it/s][A[A

 42%|████▏     | 2840898/6769473 [05:43<07:46, 8426.06it/s][A[A

 42%|████▏     | 2841761/6769473 [05:43<08:03, 8124.57it/s][A[A

 42%|████▏     | 2842590/6769473 [05:44<08:08, 8031.06it/s][A[A

 42%|████▏     | 2843467/6769473 [05:44<07:56, 8237.62it/s][A[A

 42%|████▏     | 2844338/6769473 [05:44<07:48, 8371.86it/s][A[A

 42%|████▏     | 2845206/6769473 [05:44<07:43, 8459.26it/s][A[A

 42%|████▏     | 2846075/6769473 [05:44<07:40, 8526.09it/s][A[A

 42%|████▏     | 2846941/6769473 [05:44<07:38, 8563.41it/s][A[A

 42%|████▏     | 2847809/6769473 [05:44<07:36, 8595.24it/s][A[A

 42%|████▏     | 2848674/6769473 [05:44<07:35, 8609.02it/s][A[A

 42%|████▏     | 2849542/6769473 [05:44<07:34, 8627.84it/s][A

 44%|████▎     | 2951926/6769473 [05:56<06:31, 9757.32it/s] [A[A

 44%|████▎     | 2952958/6769473 [05:56<07:00, 9080.40it/s][A[A

 44%|████▎     | 2953915/6769473 [05:56<07:11, 8842.81it/s][A[A

 44%|████▎     | 2954835/6769473 [05:56<07:21, 8644.94it/s][A[A

 44%|████▎     | 2955994/6769473 [05:56<06:47, 9357.07it/s][A[A

 44%|████▎     | 2956965/6769473 [05:56<07:13, 8788.92it/s][A[A

 44%|████▎     | 2957876/6769473 [05:56<07:21, 8628.56it/s][A[A

 44%|████▎     | 2958786/6769473 [05:56<07:14, 8763.35it/s][A[A

 44%|████▎     | 2959679/6769473 [05:57<07:14, 8768.81it/s][A[A

 44%|████▎     | 2960760/6769473 [05:57<06:49, 9293.96it/s][A[A

 44%|████▍     | 2961706/6769473 [05:57<06:55, 9172.21it/s][A[A

 44%|████▍     | 2962695/6769473 [05:57<06:46, 9374.01it/s][A[A

 44%|████▍     | 2963642/6769473 [05:57<06:54, 9187.86it/s][A[A

 44%|████▍     | 2964569/6769473 [05:57<07:33, 8391.76it/s][A[A

 44%|████▍     | 2965427/6769473 [05:57<07:53, 8025.86it/s][

 45%|████▌     | 3063376/6769473 [06:09<08:20, 7398.33it/s][A[A

 45%|████▌     | 3064234/6769473 [06:09<08:00, 7715.22it/s][A[A

 45%|████▌     | 3065016/6769473 [06:09<08:11, 7542.20it/s][A[A

 45%|████▌     | 3065778/6769473 [06:09<08:29, 7264.75it/s][A[A

 45%|████▌     | 3066512/6769473 [06:09<08:32, 7229.69it/s][A[A

 45%|████▌     | 3067308/6769473 [06:09<08:18, 7433.23it/s][A[A

 45%|████▌     | 3068242/6769473 [06:09<07:47, 7917.81it/s][A[A

 45%|████▌     | 3069081/6769473 [06:09<07:39, 8051.99it/s][A[A

 45%|████▌     | 3070128/6769473 [06:10<07:07, 8651.07it/s][A[A

 45%|████▌     | 3071019/6769473 [06:10<07:03, 8726.34it/s][A[A

 45%|████▌     | 3071906/6769473 [06:10<07:23, 8336.47it/s][A[A

 45%|████▌     | 3072892/6769473 [06:10<07:02, 8740.93it/s][A[A

 45%|████▌     | 3073807/6769473 [06:10<06:57, 8859.50it/s][A[A

 45%|████▌     | 3074706/6769473 [06:10<06:55, 8897.54it/s][A[A

 45%|████▌     | 3075604/6769473 [06:10<06:54, 8911.65it/s][A

 47%|████▋     | 3175528/6769473 [06:22<07:26, 8042.24it/s][A[A

 47%|████▋     | 3176423/6769473 [06:22<07:13, 8294.37it/s][A[A

 47%|████▋     | 3177407/6769473 [06:22<06:52, 8704.23it/s][A[A

 47%|████▋     | 3178295/6769473 [06:22<06:56, 8619.37it/s][A[A

 47%|████▋     | 3179170/6769473 [06:22<07:04, 8461.61it/s][A[A

 47%|████▋     | 3180026/6769473 [06:22<07:08, 8379.05it/s][A[A

 47%|████▋     | 3180871/6769473 [06:23<07:24, 8072.56it/s][A[A

 47%|████▋     | 3181991/6769473 [06:23<06:47, 8809.11it/s][A[A

 47%|████▋     | 3182964/6769473 [06:23<06:35, 9065.13it/s][A[A

 47%|████▋     | 3183908/6769473 [06:23<06:30, 9172.07it/s][A[A

 47%|████▋     | 3184882/6769473 [06:23<06:24, 9333.80it/s][A[A

 47%|████▋     | 3185829/6769473 [06:23<06:22, 9372.94it/s][A[A

 47%|████▋     | 3186774/6769473 [06:23<06:27, 9236.16it/s][A[A

 47%|████▋     | 3187704/6769473 [06:23<06:46, 8809.60it/s][A[A

 47%|████▋     | 3188593/6769473 [06:23<06:58, 8562.54it/s][A

 49%|████▊     | 3287957/6769473 [06:35<07:14, 8007.28it/s][A[A

 49%|████▊     | 3288930/6769473 [06:35<06:51, 8454.05it/s][A[A

 49%|████▊     | 3289787/6769473 [06:35<06:51, 8457.47it/s][A[A

 49%|████▊     | 3290743/6769473 [06:35<06:37, 8758.32it/s][A[A

 49%|████▊     | 3291627/6769473 [06:35<06:52, 8440.99it/s][A[A

 49%|████▊     | 3292545/6769473 [06:35<06:45, 8583.87it/s][A[A

 49%|████▊     | 3293410/6769473 [06:36<07:19, 7916.84it/s][A[A

 49%|████▊     | 3294217/6769473 [06:36<07:20, 7890.95it/s][A[A

 49%|████▊     | 3295180/6769473 [06:36<06:56, 8342.08it/s][A[A

 49%|████▊     | 3296029/6769473 [06:36<06:59, 8284.74it/s][A[A

 49%|████▊     | 3296957/6769473 [06:36<06:45, 8558.68it/s][A[A

 49%|████▊     | 3297923/6769473 [06:36<06:31, 8860.03it/s][A[A

 49%|████▊     | 3299004/6769473 [06:36<06:10, 9366.74it/s][A[A

 49%|████▉     | 3300213/6769473 [06:36<05:45, 10043.24it/s][A[A

 49%|████▉     | 3301241/6769473 [06:36<05:51, 9868.66it/s] 

 50%|█████     | 3397935/6769473 [06:48<07:10, 7823.09it/s][A[A

 50%|█████     | 3398734/6769473 [06:48<07:19, 7674.16it/s][A[A

 50%|█████     | 3399565/6769473 [06:48<07:09, 7853.84it/s][A[A

 50%|█████     | 3400619/6769473 [06:48<06:36, 8503.43it/s][A[A

 50%|█████     | 3401492/6769473 [06:48<06:35, 8515.46it/s][A[A

 50%|█████     | 3402462/6769473 [06:49<06:20, 8838.71it/s][A[A

 50%|█████     | 3403360/6769473 [06:49<06:37, 8464.19it/s][A[A

 50%|█████     | 3404285/6769473 [06:49<06:27, 8682.76it/s][A[A

 50%|█████     | 3405164/6769473 [06:49<06:30, 8617.69it/s][A[A

 50%|█████     | 3406281/6769473 [06:49<06:03, 9251.15it/s][A[A

 50%|█████     | 3407261/6769473 [06:49<05:57, 9408.07it/s][A[A

 50%|█████     | 3408216/6769473 [06:49<06:01, 9298.71it/s][A[A

 50%|█████     | 3409156/6769473 [06:49<06:06, 9161.01it/s][A[A

 50%|█████     | 3410080/6769473 [06:49<06:06, 9160.05it/s][A[A

 50%|█████     | 3411002/6769473 [06:49<06:20, 8826.38it/s][A

 52%|█████▏    | 3498043/6769473 [07:01<10:16, 5304.67it/s][A[A

 52%|█████▏    | 3498624/6769473 [07:02<10:15, 5314.21it/s][A[A

 52%|█████▏    | 3499458/6769473 [07:02<09:08, 5961.13it/s][A[A

 52%|█████▏    | 3500359/6769473 [07:02<08:12, 6633.67it/s][A[A

 52%|█████▏    | 3501082/6769473 [07:02<08:41, 6271.01it/s][A[A

 52%|█████▏    | 3501892/6769473 [07:02<08:05, 6724.91it/s][A[A

 52%|█████▏    | 3502658/6769473 [07:02<07:48, 6979.37it/s][A[A

 52%|█████▏    | 3503583/6769473 [07:02<07:13, 7533.72it/s][A[A

 52%|█████▏    | 3504754/6769473 [07:02<06:27, 8435.27it/s][A[A

 52%|█████▏    | 3505739/6769473 [07:02<06:10, 8813.87it/s][A[A

 52%|█████▏    | 3506667/6769473 [07:03<06:24, 8484.08it/s][A[A

 52%|█████▏    | 3507645/6769473 [07:03<06:09, 8833.97it/s][A[A

 52%|█████▏    | 3508770/6769473 [07:03<05:45, 9441.98it/s][A[A

 52%|█████▏    | 3509746/6769473 [07:03<06:00, 9033.96it/s][A[A

 52%|█████▏    | 3510676/6769473 [07:03<06:17, 8625.46it/s][A

In [None]:
pd.read_csv(base_dir + f'p{partition}/members.csv').head()

## Groupby 

Another option is to compute the partitions all at once, groupby the partition, and write the grouped dataframe to disk.

## Reusable Hashing Dataframe Function

To make the process reusable, we'll write a function that does this for us. It will take in a dataframe, a name for the file to save the lines to, and an optional progress argument. The partition is determined by hashing the customer id and then modulo dividing by the number of partitions. Each line of the dataframe will be saved to the appropriate file in the partition.

In [22]:
def partition_by_hashing(df, name, progress = None):
    """Partition a dataframe into N_PARTITIONS by hashing the id.
    
    Params
    --------
        df (pandas dataframe): dataframe for partition. Must have 'msno' column.
        name (str): name of dataframe. Used for saving the row data.
        progress (int, optional): number of rows to be processed before displaying information.
                                  Defaults to None
                                  
    Returns:
    --------
        Nothing returned. Dataframe is saved one line at a time as csv files to the N_PARTITIONS 
    """
    
    start = timer()
    
    # Iterate through one row at a time
    for i, row in df.iterrows():
        # Find the partition number by hashing the id
        partition = id_to_hash(row['msno']) % N_PARTITIONS
        
        # Open file for appending
        with open(base_dir + f'p{partition}/{name}.csv', 'a') as f:
            # Write a new line and then data
            f.write('\n')
            f.write(','.join([str(x) for x in row.values]))
            
        # Record progress every `progress` steps
        if progress is not None:
            if i % progress == 0:
                print(f'{100 * round(i / df.shape[0], 2)}% complete. {round(timer() - start)} seconds elapsed.')
    
    end = timer()
    
    print(f'{i} rows processed in {round(end - start)} seconds.')

## Training Data

Now we can use this function to partition the training data.