# Import irregularly gridded reservoir models and resample them regularly with nearest neighbour

In [1]:
import numpy as np
import pandas as pd
import scipy.spatial

### Create artificial, irregular model

In [2]:
# size of dataset
size = 100
#size = 120*166*105

In [3]:
# Assume a paramter of grid cells, named zone
zone = np.random.randint(0,64+1, size=size)
zone

array([ 6, 42, 49, 63, 31, 18, 29, 22,  1, 11, 11, 10,  9, 59,  5, 25, 41,
       18, 45,  2, 44, 10, 19, 14,  4, 50, 32, 10, 44, 62, 35, 43, 32, 15,
       54, 54, 58, 51, 21, 16, 34, 60, 41, 13, 62, 19, 13, 45, 30, 23, 26,
       41, 37, 31, 11, 26,  2, 62,  8, 60, 14,  6,  7, 56, 28, 17, 27, 59,
       15,  7, 38, 21, 45, 52, 32, 53, 56,  7, 27, 57, 51, 55, 35, 63, 51,
       18, 11, 48, 58, 13, 37, 61, 46, 29,  6, 30, 13, 49, 57, 30])

In [4]:
x = np.random.random(size=size) * 300
x[0:10]

array([293.10223042, 232.50123866, 221.50980979, 208.3381015 ,
        28.04580832,  26.54252101,  44.52493462,  43.803259  ,
       285.93164893,  25.72777042])

In [5]:
y = np.random.random(size=size) * 400
y[0:10]

array([ 33.10883767, 142.16585485, 185.64572981, 229.11387477,
       256.82414824, 215.91856075, 308.84221516, 353.48808211,
       294.47920925, 245.17598044])

In [6]:
z = np.random.random(size=size) * 100
z[0:10]

array([75.52959224, 90.50087812, 90.19008858, 75.45058348,  8.03363798,
        6.41302624,  3.13539414, 27.67146609, 18.58180224, 37.4575374 ])

In [7]:
#300*400*100

In [8]:
# combine in a pandas dataframe for readability
data = pd.DataFrame.from_dict({'x': x, 'y': y, 'z': z, 'zone': zone})
data.head()

Unnamed: 0,x,y,z,zone
0,293.10223,33.108838,75.529592,6
1,232.501239,142.165855,90.500878,42
2,221.50981,185.64573,90.190089,49
3,208.338102,229.113875,75.450583,63
4,28.045808,256.824148,8.033638,31


In [9]:
# xarray representation, might be useful at some point
#xd = df.set_index(['x','y','z']).to_xarray()
#xd

## Create regular grid for lookup

In [10]:
rx = np.arange(0,300,30)
ry = np.arange(0,400,20)
rz = np.arange(0,100,10)

In [11]:
a,b,c = np.meshgrid(rx,ry,rz)

In [12]:
grid = pd.DataFrame.from_dict({'x': a.ravel(), 'y': b.ravel(), 'z': c.ravel()})
grid.head()

Unnamed: 0,x,y,z
0,0,0,0
1,0,0,10
2,0,0,20
3,0,0,30
4,0,0,40


### Develop nearest neighbour algorithm

In [13]:
# Check shapes
data.values.shape, grid.values.shape

((100, 4), (2000, 3))

In [14]:
def nearest_neighbour(data, grid, threshold=10):
    """Find nearest neighbour of a new grid-cell in a set of data-grid-cells
    
    Args:
        data (array): n x 3 array with x,y,z coordinates of irregular grid points
        grid (array): n x 3 array with x,y,z coordinates of regular grid points
        threshold (float): Maximum distance, within a neighbour is accepted as such
        
    Returns:
        mask (bool array): 1D mask defining the validity of grid cells dependent on threshold
        idx (int array): 1D array of size grid[mask], with inidces pointing to nearest point in data
        dis (float array): 1D array of size grid[mask], with distances to indixed neighbour (for testing)        
    """
    
    # calculate distances between all data points and grid points
    dist = scipy.spatial.distance.cdist(grid, data)
    
    # nan out all distances that are too large  (maybe use np.inf?)
    if threshold is not None:
        dist = np.where(dist < threshold, dist, np.nan)
    
    # get a mask of all "valid" grid cells
    mask = ~np.all(np.isnan(dist), axis=1)
    
    # get array of "valid" distance combinations
    valid_dist = dist[mask,:]
    
    # find minimum distance indices for each valid grid cell
    idx = np.nanargmin(valid_dist, axis=1)
    dis = np.nanmin(valid_dist, axis=1)
    
    return mask, idx, dis

In [24]:
def nearest_neighbour_2(data, grid, threshold=10):
    """Find nearest neighbour of a new grid-cell in a set of data-grid-cells
    
    Args:
        data (array): n x 3 array with x,y,z coordinates of irregular grid points
        grid (array): n x 3 array with x,y,z coordinates of regular grid points
        threshold (float): Maximum distance, within a neighbour is accepted as such
        
    Returns:
        mask (bool array): 1D mask defining the validity of grid cells dependent on threshold
        idx (int array): 1D array of size grid[mask], with inidces pointing to nearest point in data
        mindist (float array): 1D array of size grid[mask], with distances to indixed neighbour (for testing)        
    """
    
    # calculate distances between all data points and grid points
    dist = scipy.spatial.distance.cdist(grid, data)
    
    # find minimum distance indices for each grid cell
    idx = np.argmin(dist, axis=1)
    
    # find minimum distances for each grid cell
    mindist = np.min(dist, axis=1)
    
    # get "valid" distances, indices and the mask to filter grid
    if threshold is not None:
        mask = np.where(mindist < threshold, True, False)
        idx = idx[mask]
        mindist = mindist[mask]
    
    return mask, idx, mindist

In [42]:
def nearest_neighbour_elemwise(data, grid, threshold=10):
    """Find nearest neighbour of a new grid-cell in a set of data-grid-cells
    
    Args:
        data (array): n x 3 array with x,y,z coordinates of irregular grid points
        grid (array): n x 3 array with x,y,z coordinates of regular grid points
        threshold (float): Maximum distance, within a neighbour is accepted as such
        
    Returns:
        mask (bool array): 1D mask defining the validity of grid cells dependent on threshold
        idx (int array): 1D array of size grid[mask], with inidces pointing to nearest point in data
        mindist (float array): 1D array of size grid[mask], with distances to indixed neighbour (for testing)        
    """
    
    gs = grid.shape[0]
    
    idx = np.empty(gs, dtype=np.int)
    mindist = np.empty(gs)
    
    for i in np.arange(gs):
        dist = scipy.spatial.distance.cdist([grid[i]], data)
        
        # find minimum distance indices for each grid cell
        idx[i] = np.argmin(dist)
    
        # find minimum distances for each grid cell
        mindist[i] = np.nanmin(dist)
    
    # get "valid" distances, indices and the mask to filter grid
    if threshold is not None:
        mask = np.where(mindist < threshold, True, False)
        idx = idx[mask]
        mindist = mindist[mask]
    
    return mask, idx, mindist

In [43]:
threshold = 20

### Application 1

In [44]:
mask, idx, dis = nearest_neighbour(data.iloc[:,0:3].values, grid.values, threshold=threshold)
print('Mask: ', mask.shape, np.sum(mask))
print('Indices: ', idx.shape, np.max(idx))
print('Distances: ', dis.shape, np.max(dis))

Mask:  (2000,) 409
Indices:  (409,) 99
Distances:  (409,) 19.998208539898517


In [45]:
valid_grid = grid.loc[mask].copy()
valid_grid.head()

Unnamed: 0,x,y,z
29,60,0,90
31,90,0,10
32,90,0,20
36,90,0,60
37,90,0,70


In [46]:
valid_grid['zone'] = data.zone[idx].values
valid_grid['dis'] = dis
valid_grid.head(6)

Unnamed: 0,x,y,z,zone,dis
29,60,0,90,19,19.064208
31,90,0,10,51,16.045523
32,90,0,20,51,16.400062
36,90,0,60,23,14.702194
37,90,0,70,23,8.419593
38,90,0,80,23,11.208236


In [47]:
# check validity of treshhold
valid_grid.dis.max()

19.998208539898517

### Application 2

In [48]:
mask2, idx2, dis2 = nearest_neighbour_2(data.iloc[:,0:3].values, grid.values, threshold=threshold)
print('Mask: ', mask2.shape, np.sum(mask2))
print('Indices: ', idx2.shape, np.max(idx2))
print('Distances: ', dis2.shape, np.max(dis2))

Mask:  (2000,) 409
Indices:  (409,) 99
Distances:  (409,) 19.998208539898517


In [49]:
# check conformity of functions
np.all(mask == mask2), np.all(idx == idx2), np.all(dis == dis2)

(True, True, True)

### Application element-wise

In [50]:
mask3, idx3, dis3 = nearest_neighbour_elemwise(data.iloc[:,0:3].values, grid.values, threshold=threshold)
print('Mask: ', mask3.shape, np.sum(mask3))
print('Indices: ', idx3.shape, np.max(idx3))
print('Distances: ', dis3.shape, np.max(dis3))

Mask:  (2000,) 409
Indices:  (409,) 99
Distances:  (409,) 19.998208539898517


In [51]:
# check conformity of functions
np.all(mask == mask3), np.all(idx == idx3), np.all(dis == dis3)

(True, True, True)

# Application element-wise to large data-sets

In [53]:
# size of dataset
size = 120*166*105

In [54]:
# Assume a paramter of grid cells, named zone
zone = np.random.randint(0,64+1, size=size)
zone

array([57, 48, 40, ..., 64,  8,  4])

In [57]:
x = np.random.random(size=size) * 300
y = np.random.random(size=size) * 400
z = np.random.random(size=size) * 100

In [6]:

z[0:10]

array([75.52959224, 90.50087812, 90.19008858, 75.45058348,  8.03363798,
        6.41302624,  3.13539414, 27.67146609, 18.58180224, 37.4575374 ])

In [7]:
#300*400*100

In [8]:
# combine in a pandas dataframe for readability
data = pd.DataFrame.from_dict({'x': x, 'y': y, 'z': z, 'zone': zone})
data.head()

Unnamed: 0,x,y,z,zone
0,293.10223,33.108838,75.529592,6
1,232.501239,142.165855,90.500878,42
2,221.50981,185.64573,90.190089,49
3,208.338102,229.113875,75.450583,63
4,28.045808,256.824148,8.033638,31


In [9]:
# xarray representation, might be useful at some point
#xd = df.set_index(['x','y','z']).to_xarray()
#xd

## Create regular grid for lookup

In [10]:
rx = np.arange(0,300,30)
ry = np.arange(0,400,20)
rz = np.arange(0,100,10)

In [11]:
a,b,c = np.meshgrid(rx,ry,rz)

In [12]:
grid = pd.DataFrame.from_dict({'x': a.ravel(), 'y': b.ravel(), 'z': c.ravel()})
grid.head()

Unnamed: 0,x,y,z
0,0,0,0
1,0,0,10
2,0,0,20
3,0,0,30
4,0,0,40
