## Preprocessing the PNNL data using Multiprocessing
It takes about 2 minutes per 500 frame simulation.

In [None]:
import os, sys
from glob import glob
import pickle
import numpy as np
from tqdm import tqdm
import pandas as pd
from scipy.interpolate import interp2d, griddata
from multiprocessing import Pool
from time import time

In [None]:
def loadfile(fn, channel=1,Nx=128,Ny=128):
    try:
        D = pd.read_csv(fn)
        x = D['X (m)'].values.astype('float32')
        y = D['Y (m)'].values.astype('float32')
        X = []
        columns = D.columns
        z = D[columns[channel]].values.astype('float32')
        grid_x, grid_y, grid_z = interpData(x,y,z,
                                            Nx,Ny,
                                            delta_x=None,nextPow2=None,
                                            method='linear')
        return grid_z.astype('float32')
    except:
        return fn

def interpData(x,y,z,Nx=None,Ny=None,delta_x=None,nextPow2=False,method='linear'):
    '''
    This function takes 3 lists of points (x,y,z) and maps them to a 
    rectangular grid. Either Nx or Ny must be set or delta_x must be set. 
    e.g. 
    
    x = y = z = np.random.rand(30)
    grid_x, grid_y, grid_z = interpData(x,y,z,Nx=128,Ny=128)
    
    or 
    
    grid_x, grid_y, grid_z = interpData(x,y,z,delta_x=1e-3,nextPow2=True)
    '''
    
    eps = 1e-4 # needed to make sure that the interpolation does not have nans. 
    def _NextPowerOfTwo(number):
        # Returns next power of two following 'number'
        return np.ceil(np.log2(number))
    
    if Nx == None and Ny == None:
        assert delta_x != None
        delta_y = delta_x
        Nx = int((x.max() - x.min())/delta_x)
        Ny = int((y.max() - y.min())/delta_y)

    if nextPow2:
        Nx = 2**_NextPowerOfTwo(Nx)
        Ny = 2**_NextPowerOfTwo(Ny)
        
    grid_x, grid_y = np.mgrid[x.min()+eps:x.max()-eps:Nx*1j,y.min()+eps:y.max()-eps:Ny*1j]
    grid_z = griddata(np.array([x,y]).T, z, (grid_x, grid_y), method=method)
    return grid_x, grid_y, grid_z


def getInt(f):
    return int(f.split('_')[-1].replace('.csv',''))

In [None]:
dataDir = '/data/ccsi/pnnl_liquid_inlet/data/'
outdir = '/data/ccsi/pnnl_liquid_inlet/'
Nx = 128
Ny = Nx
channel = 2
numThreads = 32
outdir = '{}/channel_{}/gridsize_{}'.format(outdir,channel,Nx)    

if not os.path.exists(outdir):
    os.makedirs(outdir)

folders = os.listdir(dataDir) # I had to run this lots of times because 3 data files were corrupted. 
#folders = ['041']  
#folders = folders[11:]
print(folders)

['041']


In [None]:
def process_file(fn):
    t0 = time()
    bn = os.path.basename(fn)
    print('\t processing {}...'.format(bn))
    out = loadfile(fn,channel=channel,Nx=Nx,Ny=Ny)
    print('\t processing time: {} {}'.format(time()-t0,bn))
    return out


def mp_handler(orderedFiles):
    pool_manager = Pool(numThreads)
    out = pool_manager.map(process_file, orderedFiles)
    pool_manager.close()
    pool_manager.join()
    return out

In [None]:
for fd in folders:
    print('-'*80)
    print('processing {}...'.format(fd))
    t0 = time()
    out = []
    fns = glob(os.path.join(dataDir,fd,'*.csv'))
    L = np.argsort(list(map(getInt,fns)))
    orderedFiles = [fns[i] for i in L]
    orderedFiles

    out = mp_handler(orderedFiles)

    out = np.array(out) 
    print(out.shape)

    with open(os.path.join(outdir,fd + '.pkl'),'wb') as fid:
        pickle.dump(out,fid)
    print('rt {}'.format(time()-t0))

--------------------------------------------------------------------------------
processing 041...
	 processing XYZ_Internal_Table_table_50.csv...
	 processing XYZ_Internal_Table_table_10.csv...
	 processing XYZ_Internal_Table_table_370.csv...
	 processing XYZ_Internal_Table_table_130.csv...
	 processing XYZ_Internal_Table_table_330.csv...
	 processing XYZ_Internal_Table_table_450.csv...
	 processing XYZ_Internal_Table_table_90.csv...
	 processing XYZ_Internal_Table_table_810.csv...
	 processing XYZ_Internal_Table_table_730.csv...
	 processing XYZ_Internal_Table_table_530.csv...
	 processing XYZ_Internal_Table_table_290.csv...
	 processing XYZ_Internal_Table_table_570.csv...
	 processing XYZ_Internal_Table_table_610.csv...
	 processing XYZ_Internal_Table_table_250.csv...
	 processing XYZ_Internal_Table_table_690.csv...
	 processing XYZ_Internal_Table_table_210.csv...
	 processing XYZ_Internal_Table_table_410.csv...
	 processing XYZ_Internal_Table_table_490.csv...
	 processing XYZ_Inter

	 processing time: 7.270827531814575 XYZ_Internal_Table_table_260.csv
	 processing XYZ_Internal_Table_table_270.csv...
	 processing time: 7.00397253036499 XYZ_Internal_Table_table_20.csv
	 processing XYZ_Internal_Table_table_30.csv...
	 processing time: 7.34044623374939 XYZ_Internal_Table_table_620.csv
	 processing XYZ_Internal_Table_table_630.csv...
	 processing time: 6.846310615539551 XYZ_Internal_Table_table_420.csv
	 processing XYZ_Internal_Table_table_430.csv...
	 processing time: 6.940137147903442 XYZ_Internal_Table_table_460.csv
	 processing XYZ_Internal_Table_table_470.csv...
	 processing time: 7.126295804977417 XYZ_Internal_Table_table_540.csv
	 processing XYZ_Internal_Table_table_550.csv...
	 processing time: 6.911552906036377 XYZ_Internal_Table_table_900.csv
	 processing XYZ_Internal_Table_table_910.csv...
	 processing time: 7.189158201217651 XYZ_Internal_Table_table_340.csv
	 processing XYZ_Internal_Table_table_350.csv...
	 processing time: 6.8602235317230225 XYZ_Internal_T

	 processing XYZ_Internal_Table_table_2330.csv...
	 processing time: 6.292459011077881 XYZ_Internal_Table_table_640.csv
	 processing XYZ_Internal_Table_table_2370.csv...
	 processing time: 6.441187620162964 XYZ_Internal_Table_table_360.csv
	 processing XYZ_Internal_Table_table_2410.csv...
	 processing time: 6.876932621002197 XYZ_Internal_Table_table_240.csv
	 processing time: 6.658028602600098 XYZ_Internal_Table_table_120.csv
	 processing XYZ_Internal_Table_table_2450.csv...
	 processing time: 5.84432053565979 XYZ_Internal_Table_table_1330.csv
	 processing XYZ_Internal_Table_table_1340.csv...
	 processing time: 6.731402397155762 XYZ_Internal_Table_table_40.csv
	 processing XYZ_Internal_Table_table_2490.csv...
	 processing XYZ_Internal_Table_table_2530.csv...
	 processing time: 5.082823753356934 XYZ_Internal_Table_table_1370.csv
	 processing XYZ_Internal_Table_table_1380.csv...
	 processing time: 5.728322267532349 XYZ_Internal_Table_table_1410.csv
	 processing XYZ_Internal_Table_table_1

	 processing XYZ_Internal_Table_table_2430.csv...
	 processing time: 5.46602988243103 XYZ_Internal_Table_table_1430.csv
	 processing time: 6.179396390914917 XYZ_Internal_Table_table_2180.csv
	 processing XYZ_Internal_Table_table_1440.csv...
	 processing XYZ_Internal_Table_table_2190.csv...
	 processing time: 5.71816873550415 XYZ_Internal_Table_table_1470.csv
	 processing XYZ_Internal_Table_table_1480.csv...
	 processing time: 6.700037002563477 XYZ_Internal_Table_table_2500.csv
	 processing XYZ_Internal_Table_table_2510.csv...
	 processing time: 5.633841276168823 XYZ_Internal_Table_table_1590.csv
	 processing XYZ_Internal_Table_table_1600.csv...
	 processing time: 5.443450450897217 XYZ_Internal_Table_table_1510.csv
	 processing XYZ_Internal_Table_table_1520.csv...
	 processing time: 6.345210790634155 XYZ_Internal_Table_table_2540.csv
	 processing XYZ_Internal_Table_table_2550.csv...
	 processing time: 6.373346567153931 XYZ_Internal_Table_table_2460.csv
	 processing XYZ_Internal_Table_ta

	 processing XYZ_Internal_Table_table_3650.csv...
	 processing time: 5.279205083847046 XYZ_Internal_Table_table_2580.csv
	 processing XYZ_Internal_Table_table_2590.csv...
	 processing time: 5.4301252365112305 XYZ_Internal_Table_table_2520.csv
	 processing XYZ_Internal_Table_table_3690.csv...
	 processing time: 6.355997800827026 XYZ_Internal_Table_table_2440.csv
	 processing XYZ_Internal_Table_table_3730.csv...
	 processing time: 5.951228857040405 XYZ_Internal_Table_table_2480.csv
	 processing XYZ_Internal_Table_table_3770.csv...
	 processing time: 5.814550876617432 XYZ_Internal_Table_table_2930.csv
	 processing XYZ_Internal_Table_table_2940.csv...
	 processing time: 6.177381277084351 XYZ_Internal_Table_table_2890.csv
	 processing XYZ_Internal_Table_table_2900.csv...
	 processing time: 6.262732982635498 XYZ_Internal_Table_table_2560.csv
	 processing XYZ_Internal_Table_table_3810.csv...
	 processing time: 4.69987154006958 XYZ_Internal_Table_table_2620.csv
	 processing XYZ_Internal_Table_

	 processing XYZ_Internal_Table_table_2960.csv...
	 processing time: 5.287590026855469 XYZ_Internal_Table_table_2680.csv
	 processing time: 5.2966272830963135 XYZ_Internal_Table_table_3190.csv
	 processing XYZ_Internal_Table_table_3200.csv...
	 processing time: 5.528543710708618 XYZ_Internal_Table_table_2910.csv
	 processing XYZ_Internal_Table_table_3930.csv...
	 processing XYZ_Internal_Table_table_2920.csv...
	 processing time: 6.5136942863464355 XYZ_Internal_Table_table_3620.csv
	 processing XYZ_Internal_Table_table_3630.csv...
	 processing time: 5.726405382156372 XYZ_Internal_Table_table_2990.csv
	 processing XYZ_Internal_Table_table_3000.csv...
	 processing time: 6.311307907104492 XYZ_Internal_Table_table_3740.csv
	 processing XYZ_Internal_Table_table_3750.csv...
	 processing time: 0.24034333229064941 XYZ_Internal_Table_table_3000.csv
	 processing XYZ_Internal_Table_table_3970.csv...
	 processing time: 5.454442262649536 XYZ_Internal_Table_table_3030.csv
	 processing XYZ_Internal_Ta

	 processing XYZ_Internal_Table_table_4300.csv...
	 processing time: 5.675356864929199 XYZ_Internal_Table_table_4370.csv
	 processing XYZ_Internal_Table_table_4380.csv...
	 processing time: 5.7467944622039795 XYZ_Internal_Table_table_3980.csv
	 processing XYZ_Internal_Table_table_3990.csv...
	 processing time: 5.568667888641357 XYZ_Internal_Table_table_4020.csv
	 processing XYZ_Internal_Table_table_4030.csv...
	 processing time: 6.189192533493042 XYZ_Internal_Table_table_4330.csv
	 processing XYZ_Internal_Table_table_4340.csv...
	 processing time: 5.610597610473633 XYZ_Internal_Table_table_4410.csv
	 processing XYZ_Internal_Table_table_4420.csv...
	 processing time: 6.0732128620147705 XYZ_Internal_Table_table_3760.csv
	 processing time: 5.30759596824646 XYZ_Internal_Table_table_4450.csv
	 processing XYZ_Internal_Table_table_4460.csv...
	 processing time: 6.012724161148071 XYZ_Internal_Table_table_3640.csv
	 processing time: 5.3685102462768555 XYZ_Internal_Table_table_4490.csv
	 process

	 processing XYZ_Internal_Table_table_4600.csv...
	 processing time: 4.984355926513672 XYZ_Internal_Table_table_4710.csv
	 processing XYZ_Internal_Table_table_4720.csv...
	 processing time: 4.640959024429321 XYZ_Internal_Table_table_4080.csv
	 processing time: 4.764352083206177 XYZ_Internal_Table_table_4750.csv
	 processing XYZ_Internal_Table_table_4760.csv...
	 processing time: 4.794953107833862 XYZ_Internal_Table_table_4870.csv
	 processing XYZ_Internal_Table_table_4880.csv...
	 processing time: 4.808486461639404 XYZ_Internal_Table_table_4160.csv
	 processing time: 4.5728065967559814 XYZ_Internal_Table_table_4240.csv
	 processing time: 5.070862054824829 XYZ_Internal_Table_table_4830.csv
	 processing XYZ_Internal_Table_table_4840.csv...
	 processing time: 5.000413656234741 XYZ_Internal_Table_table_4120.csv
	 processing time: 4.921011209487915 XYZ_Internal_Table_table_4790.csv
	 processing XYZ_Internal_Table_table_4800.csv...
	 processing time: 5.039155721664429 XYZ_Internal_Table_tabl

## Scratch Work 

In [None]:
fn = orderedFiles[1]
fn

'/data/ccsi/pnnl_liquid_inlet/data/041/XYZ_Internal_Table_table_20.csv'

In [None]:
D = pd.read_csv(fn)
x = D['X (m)'].values.astype('float32')
y = D['Y (m)'].values.astype('float32')
X = []
columns = D.columns
z = D[columns[channel]].values.astype('float32')
grid_x, grid_y, grid_z = interpData(x,y,z,
                                    Nx,Ny,
                                    delta_x=None,nextPow2=None,
                                    method='linear')
grid_x.shape, grid_y.shape

((128, 128), (128, 128))

In [None]:
outdict = {'grid_x':grid_x, 'grid_y':grid_y}
with open(os.path.join('/data/ccsi/pnnl_liquid_inlet/','grid_x_grid_y_{}.pkl'.format(Nx)),'wb') as fid:
    pickle.dump(outdict, fid)

In [None]:
os.path.join(outdir,'grid_x_grid_y_{}.pkl'.format(Nx))

'/data/ccsi/pnnl_liquid_inlet//channel_2/gridsize_512/grid_x_grid_y_512.pkl'

In [None]:
fd = '005'

In [None]:
out = []
fns = glob(os.path.join(dataDir,fd,'*.csv'))
L = np.argsort(list(map(getInt,fns)))
orderedFiles = [fns[i] for i in L]
for idx,fn in enumerate(orderedFiles):
    D = pd.read_csv(fn)
    try:
        print(idx,D.shape)
    except:
        print(idx,fn)

0 (150073, 7)
1 (150073, 7)
2 (150073, 7)
3 (150073, 7)
4 (150073, 7)
5 (150073, 7)
6 (150073, 7)
7 (150073, 7)
8 (150073, 7)
9 (150073, 7)
10 (150073, 7)
11 (150073, 7)


KeyboardInterrupt: 

In [None]:
orderedFiles[0].split('/')[-2]

'034'

In [None]:
outdir

'/data/ccsi/pnnl_liquid_inlet//channel_2/gridsize_512'

In [None]:
for fd in folders[:1]:
    fn = os.path.join(outdir,fd+ '.pkl')
    with open(fn,'rb') as fid:
        A = pickle.load(fid)
    print(A.shape)

(500, 512, 512)


In [None]:
B = A.copy()

In [None]:
A.max(), A.min()

(1.0, 0.0)

In [None]:
d = A-B
np.max(np.abs(d))

1.0