# PARALLEL NOTEBOOK EXAMPLE

**Import parallel module client**

*Either run ipcluster start --profile=profile_name -n 6 (start 6 engine cluster)*

*OR*

*From notebook home page select clusters and run start from there (default is all cores so select number before you start!!)*

Where profile refers to the profile you've used to run this notebook. (profile_default if none selected)

In [1]:
from IPython.parallel import Client
# Tell it what engines to use path to .json file stored in:
# ~/.ipython/profile_name/security/ipcontroller-client.json
rc = Client('/noc/users/hb1g13/.ipython/profile_thalassa/security/ipcontroller-client.json')
# Most simple way of executing in parallel is direct view
dview = rc[:]

print("Worker engine IDs: {}".format(rc.ids))

Worker engine IDs: [0, 1, 2, 3]


You need to feed modules and functions to each engline through:

**%%px --local**

cell magic at top of each cell that's required for all workers

you can work out a variable and then pass it on so save memory 

In [2]:
%%px --local
from scipy.io import netcdf
import numpy as np
import os
import csv
import sys
import netCDF4
from numba import autojit
import glob
from pylab import *
sys.path.append('/noc/users/hb1g13/Python/python_functions/')
from useful import *
###        

**This script is remapping eddy fluxes to grid used by MITgcm Layers package
It's a lot of regridding and interpolation so ideal to put in parallel**

In [None]:
%%px --local
## Generate file structure and list of files to use
OP = 'Closeddaynokpp'
x = '/noc/msm/scratch/students/hb1g13/Mobilis/'
lists = glob.glob(x+'/'+(OP)+'/*alled.nc')
###                                                                                                                  
# Read in the timeaveraged files                                                                                     
print 'Loading Tav and grid fields...'
file2 = netCDF4.Dataset(x+'/'+str(OP)+"/Tav.nc",'r')
file3 = netCDF4.Dataset(x+'/'+str(OP)+"/grid.nc",'r')
Temp = file2.variables['THETA'][:].squeeze()
Yc = file2.variables['Y'][:]
X = file2.variables['X'][:]
Zp = file3.variables['Zp1'][:]
dz = Zp[0:len(Zp)-1]-Zp[1:len(Zp)]
Z = file3.variables['Z'][:]
V = file2.variables['VVEL'][:].squeeze()
Yp1 = file2.variables['Yp1'][:]

# Regrid V to center pointsVC = numba_regrid(V[:]*1)                                                                 
V = numba_regridy(V)

# LAYERS STYLE ITERPOLATION                                                                                          
# Split cells into 10                                                                                                
FineGridFact = 10
ZFF = np.zeros((300))
for kk in range(len(Z)-1):
    ZFF[10*kk:10*kk+10] = np.linspace(Z[kk],Z[kk+1],10)
ZFF[-10::] = np.linspace(Z[-1],Zp[-1],10)

TTavff = np.zeros((300,400,200))
VTavff = np.zeros((300,401,200))
for ii in range(len(X)):
    for jj in range(len(Yc)):
        TTavff[:,jj,ii]=interp(ZFF,Z[::-1], Temp[::-1,jj,ii])
        VTavff[:,jj,ii]=interp(ZFF,Z[::-1], V[::-1,jj,ii])

VTavff = numba_regridy(VTavff)
# Bin Temps layer                                                                                                    
Rho = np.arange(-2,11,0.1)
TTavffbin = zeros_like(TTavff)
for ii in range(len(X)):
    for jj in range(len(Yc)):
        for kk in range(len(ZFF)):
            TTavffbin[kk,jj,ii] = find_nearest(Rho,TTavff[kk,jj,ii])

# Load in each file and find V'T' and timeaverage it!  
lists = glob.glob(x+'/'+str(OP)+'/*alled.nc')
VTprimetav20 = 0
VTbar20 = 0
total=len(lists)
                                                                         
Rho = np.arange(-2,11,0.1)
Tpff = np.zeros((10,300,400,200))
Vpff = np.zeros((10,300,400,200))

**I'm going to now define a fuction that I want to run in parallel**

In [None]:
%%px --local
def eddyfluxbin():
    '''Its always good to have a docstring
       Args = none     
       Returns remapped time averaged V'T' and VT
    '''
    lists = glob.glob(x+'/'+str(OP)+'/*ALLED.nc')
    VTprimetav20 = 0
    VTbar20 = 0
    for file in lists:
        file2 = netCDF4.Dataset(file,'r')
        Temp = file2.variables['THETA'][:]
        V = file2.variables['VVEL'][:]
        Vc = numba_regridy(V)
        # Split into chunks to make RAM use ~20GB per core
        for yr in range((60)):
            yr1 = 10*yr
            yr2 = yr1+10
            Vchnk = Vc[yr1:yr2,:,:,:]
            Tchnk = Temp[yr1:yr2,:,:,:]
            # Interpolate and bin into 0.1C temp bins as before
            for tt in range((10)):
                for ii in range(len(X)):
                    for jj in range(len(Yc)):
                        Tpff[tt,:,jj,ii]= interp(ZFF, Z[::-1], Tchnk[tt,::-1,jj,ii])
                        Vpff[tt,:,jj,ii]= interp( ZFF, Z[::-1],Vchnk[tt,::-1,jj,ii])
                    for kk in range(len(ZFF)):
                        Tpff[tt,kk,jj,ii] = find_nearest(Rho,Tpff[tt,kk,jj,ii])
            Vprime = Vpff-VTavff
            Tprime = Tpff[:]-TTavffbin[:]
            VTprime = Vprime*Tprime
            VTprimetav = np.mean(VTprime,axis=0)
            VTprimetav20 = (VTprimetav20 + VTprimetav/(60*total))
            VTbar = np.mean(Vpff*Tpff,axis=0)
            VTbar20 = VTbar20+VTbar/(60*total)
    return VTprimetav20, VTbar20
# Numba is C wrapper for function
# Should see noticable improvement in speed
numba_eddyfluxbin = autojit()(eddyfluxbin)
numba_eddyfluxbin.func_name = "eddyfluxbin"     

# PARALLEL EXECUTION

In [None]:
dview.execute( 'A_local = numba_eddyfluxbin()', block=True)
# Gather results
A = dview.gather('A_local').get()
VTbar20 = (A[1]+A[3]+A[5]+A[7])/4
VTprimebar20 = (A[0]+A[2]+A[4]+A[6])/4

In [12]:
f = netcdf.netcdf_file(x+'/'+str(OP)+'/VTprimebar.nc','w')
f.createDimension('X',len(VTprimebar20[1,1,:]))
f.createDimension('Y',len(VTprimebar20[1,:,1]))
f.createDimension('ZFF',len(VTprimebar20[:,1,1]))
VT = f.createVariable('VT','double',('ZFF','Y','X'))
VT[:] = VTprimebar20
f.close()
# Write to nc format                                                                                                 
f = netcdf.netcdf_file(x+'/'+str(OP)+'/VTbar.nc','w')
f.createDimension('X',len(VTbar20[1,1,:]))
f.createDimension('Y',len(VTbar20[1,:,1]))
f.createDimension('ZFF',len(VTbar20[:,1,1]))
VT = f.createVariable('VT','double',('ZFF','Y','X'))
VT[:] = VTbar20
f.close()
