# Import irregularly gridded reservoir models and resample them regularly with nearest neighbour

In [253]:
import numpy as np
import pandas as pd
import scipy.spatial

### Create artificial, irregular model

In [266]:
# size of dataset
size = 100

In [259]:
# Assume a paramter of grid cells, named zone
zone = np.random.randint(0,64+1, size=size)
zone

array([60, 16, 33, 42, 63, 43, 21, 23, 18,  5, 62, 30, 39,  8, 15, 30, 23,
       25, 57, 30, 22, 63, 14, 33, 39, 64, 50, 26, 40, 44,  9, 52, 10, 12,
       40, 61, 31, 44,  6, 28, 19, 54, 18,  7, 58, 27, 21, 19, 13, 25, 40,
        6,  2, 36,  6, 27, 31, 38, 40, 50, 20, 19,  3, 44, 29,  6, 34, 62,
       57,  7, 55, 49,  9, 62, 16, 58, 36, 13, 53, 46, 33, 24, 30, 42, 40,
       10, 42, 24, 63, 17, 59, 49, 35, 36, 24, 19, 42,  3, 49, 35])

In [260]:
x = np.random.random(size=size) * 100
x[0:10]

array([ 1.51233826, 81.01059078, 22.68816015, 76.56471176, 33.12751578,
       49.46316335, 93.42542677, 12.14335179, 46.72815939, 32.88239164])

In [261]:
y = np.random.random(size=size) * 200
y[0:10]

array([ 80.26568594, 116.71884472,  96.97626831,  37.23686584,
        86.54438411, 137.97636137, 172.96980786, 178.22230768,
        21.38051922,   6.97748026])

In [262]:
z = np.random.random(size=size) * 20
z[0:10]

array([ 6.62432816, 19.28473329,  6.92481585, 10.87283854, 14.21435851,
        7.96628053, 15.39058253, 10.41046319,  5.82541016, 18.00091136])

In [272]:
# combine in a pandas dataframe for readability
data = pd.DataFrame.from_dict({'zone': zone, 'x': x, 'y': y, 'z': z})
data.head()

Unnamed: 0,zone,x,y,z
0,60,1.512338,80.265686,6.624328
1,16,81.010591,116.718845,19.284733
2,33,22.68816,96.976268,6.924816
3,42,76.564712,37.236866,10.872839
4,63,33.127516,86.544384,14.214359


In [268]:
# xarray representation, might be useful at some point
#xd = df.set_index(['x','y','z']).to_xarray()
#xd

## Create regular grid for lookup

In [269]:
rx = np.arange(0,100,10)
ry = np.arange(0,200,20)
rz = np.arange(0,20,4)

In [270]:
a,b,c = np.meshgrid(rx,ry,rz)

In [271]:
grid = pd.DataFrame.from_dict({'x': a.ravel(), 'y': b.ravel(), 'z': c.ravel()})
grid.head()

Unnamed: 0,x,y,z
0,0,0,0
1,0,0,4
2,0,0,8
3,0,0,12
4,0,0,16


### Develop nearest neighbour algorithm

In [274]:
# Check shapes
data.values.shape, grid.values.shape

((100, 4), (500, 3))

In [283]:
def nearest_neighbour(data, grid, threshold=None):
    """Find nearest neighbour of a new grid-cell in a set of data-grid-cells
    
    Args:
        data (array): n x 3 array with x,y,z coordinates of irregular grid points
        grid (array): n x 3 array with x,y,z coordinates of regular grid points
        threshold (float): Maximum distance, within a neighbour is accepted as such
        
    Returns:
        mask (bool array): 1D mask defining the validity of grid cells dependent on threshold
        idx (int array): 1D array of size grid[mask], with inidces pointing to nearest point in data
        dis (float array): 1D array of size grid[mask], with distances to indixed neighbour (for testing)        
    """
    
    # calculate distances between all data points and grid points
    dist = scipy.spatial.distance.cdist(grid, data[:,1:4])
    
    # nan out all distances that are too large  (maybe use np.inf?)
    if threshold is not None:
        dist = np.where(dist < threshold, dist, np.nan)
    
    # get a mask of all "valid" grid cells
    mask = ~np.all(np.isnan(dist), axis=1)
    
    # get array of "valid" distance combinations
    valid_dist = dist[mask,:]
    
    # find minimum distance indices for each valid grid cell
    idx = np.nanargmin(valid_dist, axis=1)
    dis = np.nanmin(valid_dist, axis=1)
    
    return mask, idx, dis

In [284]:
mask, idx, dis = nearest_neighbour(df.values, grid_df.values, threshold=20)
np.sum(mask), idx.shape

(493, (493,))

In [285]:
valid_grid = grid_df.loc[mask].copy()
valid_grid.head()

Unnamed: 0,x,y,z
0,0,0,0
1,0,0,4
2,0,0,8
3,0,0,12
4,0,0,16


In [286]:
valid_grid['zone'] = df.zone[idx].values
valid_grid['dis'] = dis
valid_grid.head(15)

Unnamed: 0,x,y,z,zone,dis
0,0,0,0,19,14.282145
1,0,0,4,19,12.641108
2,0,0,8,63,11.566399
3,0,0,12,63,7.909722
4,0,0,16,63,4.831752
5,10,0,0,64,12.916901
6,10,0,4,64,10.704791
7,10,0,8,64,9.712815
8,10,0,12,63,9.358453
9,10,0,16,63,6.954335


In [287]:
# check validity of treshhold
valid_grid.dis.max()

19.970669574783372