In [18]:
# parallel I/O 
##############

import numpy as np
import h5py
import glob
import os
import sys
import timeit
import concurrent.futures

data_files = glob.glob("/data/shared/LCDLargeWindow/fixedangle/ChPiEscan/temp/*.h5")[:10]
features = ['ECAL', 'HCAL', 'pdgID', 'ECAL_E', 'HCAL_E', 'HCAL_ECAL_ERatio', 'energy']
data_dict = {}

def timer(test_code, setup, iter_num):
    total_access_time = timeit.Timer(stmt=test_code, 
                                  setup=setup).timeit(number=iter_num)
    return (total_access_time/iter_num)

def access(file):
    for feat in features: 
        with h5py.File(file, 'r') as f:
            data_dict[f.filename + feat] = np.array(f[feat])

In [19]:
def pIO():
    with concurrent.futures.ProcessPoolExecutor() as executor:
        for _ in executor.map(access, data_files):
            print("P - Job Done")
            
def IO():
    for file in data_files:
        access(file)
        print("R - Job Done")

In [20]:
setup = "from __main__ import pIO"
test_code = "pIO()"

tpio = timer(test_code, setup, 1)

setup = "from __main__ import IO"
test_code = "IO()"

tio = timer(test_code, setup, 1)

print("Parallel %.3f"%tpio)
print("Regular %.3f"%tio)

P - Job Done
P - Job Done
P - Job Done
P - Job Done
P - Job Done
P - Job Done
P - Job Done
P - Job Done
P - Job Done
P - Job Done
R - Job Done
R - Job Done
R - Job Done
R - Job Done
R - Job Done
R - Job Done
R - Job Done
R - Job Done
R - Job Done
R - Job Done
Parallel 55.662
Regular 308.753


In [22]:
print("Percentage speed up for 10 files: %.3f percent!!!"%((tio - tpio)* 100/tio))

Percentage speed up for 10 files: 81.972 percent!!!
