### Example for how to use joblib Parallel to speed up calculations by running them on multiple processors

In [8]:
import numpy as np
import netCDF4 as nc
import time

# Library for running on multiple cores:
from joblib import Parallel

The main calculation for this example:

In [3]:
def main_calc(filenameU):
    # Load file
    folder  = '/data/brogalla/ANHA12/'
    file_u  = nc.Dataset(folder + filenameU)
    u_vel   = np.array(file_u.variables['vozocrtx'])

    # Whatever larger computation I want to do (I usually end up calling another function)
    calc = np.multiply(u_vel, u_vel)

    return calc

In [13]:
# Files I want to loop over
gridU_files=['ANHA12-EXH006_y2015m01d05_gridU.nc', 'ANHA12-EXH006_y2015m01d10_gridU.nc', \
            'ANHA12-EXH006_y2015m01d15_gridU.nc', 'ANHA12-EXH006_y2015m01d20_gridU.nc', \
            'ANHA12-EXH006_y2015m01d25_gridU.nc', 'ANHA12-EXH006_y2015m01d30_gridU.nc', \
            'ANHA12-EXH006_y2015m02d04_gridU.nc', 'ANHA12-EXH006_y2015m02d09_gridU.nc']

Time the calculation without threading:

In [14]:
start = time.time()

for fileU in gridU_files:
    calc = main_calc(fileU)
    
end = time.time()
print('Calculation took: ', end - start)

Calculation took:  42.54135608673096


In [15]:
# Define joblib solver such that it passes a file to the main calculation and returns what you want
def joblib_solver(main_calc, fileU):
    calc = main_calc(fileU)
    return calc

# Add items to the list of jobs that need to be calculated
# In this case, I want to read in a bunch of files, so each job reads in a file and performs a 
# calculation on it.

joblist=[]
for fileU in gridU_files:
    positional_args=[main_calc, fileU]
    keyword_args={}
    joblist.append((joblib_solver,positional_args,keyword_args))

In [16]:
start = time.time()

# Indicate the number of cores that you want to use (6 in this case)
# and then perform the calculations. 
ncores=8
with Parallel(n_jobs=ncores,backend='threading') as parallel:
    results = parallel(joblist)

    
end = time.time()
print('Calculation took: ', end - start)

Calculation took:  29.805630207061768


If your calculation function returns multiple variables, it is easiest to zip the results at the end:

In [40]:
results_zip = zip(*results)