In [1]:
%matplotlib

import matplotlib.pyplot as plt
import numpy as np
from numpy.random import default_rng
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KernelDensity

Using matplotlib backend: Qt5Agg


In [2]:
def rtv_org(ui, uf, a):
    """
    http://xahlee.info/math/i/transvection_for_rotations.pdf
    Rotate vectors via transvection.
    Rotates an array of vectors, a, in the frame from ui to uf.
    ui and uf are assumed to be unit vectors.
    Assume ui and uf are numpy (1,3) arrays and a is
    numpy (N,3) array.
    """
    s = ui + uf
    c = 2.0 / np.dot(s, s)
    w = (2.0 * np.dot(a, ui)) * ui - a
    r = (c * np.dot(w, s)) * s - w
    
    return r

def rtv(ui, uf, a):
    """
    http://xahlee.info/math/i/transvection_for_rotations.pdf
    Rotate vectors via transvection.
    Rotates an array of vectors, a, in the frame from ui to uf.
    ui and uf are assumed to be unit vectors.
    Assume ui and uf are numpy (1,3) arrays and a is
    numpy (N,3) array.
    """
    just_one = np.ndim(a) == 1
    a = np.atleast_2d(a)
    
    s = ui + uf
    c = 2.0 / np.dot(s, s)
    w = (2.0 * np.dot(a, ui))[:,None] * ui - a
    r = (c * np.dot(w, s))[:,None] * s - w
    
    return r[0] if just_one else r

def rtv_einsum(ui, uf, a):
    """
    http://xahlee.info/math/i/transvection_for_rotations.pdf
    Rotate vectors via transvection.
    Rotates the row vectors, a, in the frame of each row in ui to uf
    from ui to uf.  The row vectors in ui and uf are assumed to be
    unit length. This method uses einsum to calculate the rowwise
    dot product.
    
    Parameters
    ----------
    a : 2-D array_like
        An (N, M) 2-D array.
    ui, uf : 2-D array_like
        An (N, M) 2-D array where each row vector is unit length
        in an M-dimentional space
    
    Returns
    -------
    a-rotated : 2-D array_like
        An (N, M) 2-D array where each row vector in a is rotated
        from the matching row vector in ui to the matching row vector in uf
    
    Examples
    --------
    >>> ui = np.array([[1, 0, 0, 0, 0],[0, 0, 0, 0, 1]])
    >>> uf = np.array([[0, 1, 0, 0, 0],[0, 0, 0, 1, 0]])
    >>> aa = np.array([[1, 1, 0, 0, 0],[0, 0, 0, 1, 1]])
    
    >>> rtv_einsum(ai, uf, aa)
    array([[-1.,  1.,  0.,  0.,  0.], [ 0.,  0.,  0.,  1., -1.]])
    
    """
    just_one = np.ndim(a) == 1
    a = np.atleast_2d(a)
    
    s = ui + uf
    c = (2.0 / np.einsum('ij,ij->i', s,  s))
    w = (2.0 * np.einsum('ij,ij->i', a, ui))[:,None] * ui - a
    r = (c   * np.einsum('ij,ij->i', w,  s))[:,None] *  s - w
 
    return r[0] if just_one else r

def normalized(a, axis=-1, order=2):
    """
    Normalize a vector for arbitrary axes, and giving optimal performance.
    """
    one_dim = np.ndim(a) == 0 or np.ndim(a) == 1
    l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
    l2[l2==0] = 1
    ret = a / np.expand_dims(l2, axis)
    return ret[0] if one_dim else ret

def einsumdot(x, y):
    """
    Rowwise dot product
    """
    return np.einsum('ij,ij->i', x, y)

def uniform_sampler(X, n_samples=1, random_state=None):
    """
    Uniformly sample the columns of the data set.
    """
    rng = np.random.default_rng(random_state)
    return rng.uniform(low=X.min(axis=0), high=X.max(axis=0), size=(n_samples, X.shape[1]))
    
def kd_sampler(X, n_samples=1, random_state=None):
    """
    Use a kernal density estimator to sample the datasets columns.
    """
    kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X)
    return kde.sample(n_samples, random_state)

In [3]:
uiii = np.array([[1,0,0,0,0],[0,0,0,0,1]])
ufff = np.array([[0,1,0,0,0],[0,0,0,1,0]])
aaaa = np.array([[1,1,0,0,0],[0,0,0,1,1]])

In [4]:
rtv_einsum(uiii, ufff, aaaa)

array([[-1.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1., -1.]])

In [5]:
rng = np.random.default_rng(12345)

In [6]:
type(rng)

numpy.random._generator.Generator

In [7]:
x = np.array([[0,1,2], [3,4,5], [6,7,8], [9,10,11]])
x

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [8]:
np.repeat(x, 2, axis=0)

array([[ 0,  1,  2],
       [ 0,  1,  2],
       [ 3,  4,  5],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [ 9, 10, 11]])

In [9]:
np.repeat([x[0]], 2, axis=0)

array([[0, 1, 2],
       [0, 1, 2]])

In [10]:
x[:,2] #+= 100

array([ 2,  5,  8, 11])

In [11]:
x_min = x.min(axis=0)
x_max = x.max(axis=0)

In [12]:
rints = rng.integers(low=x_min, high=x_max, size=(4,3))

In [13]:
rints

array([[ 6,  3,  9],
       [ 2,  2,  9],
       [ 5,  7, 10],
       [ 3,  8,  4]], dtype=int64)

In [14]:
xx = np.arange(14*4, dtype=float).reshape([14,4])
xx

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.],
       [12., 13., 14., 15.],
       [16., 17., 18., 19.],
       [20., 21., 22., 23.],
       [24., 25., 26., 27.],
       [28., 29., 30., 31.],
       [32., 33., 34., 35.],
       [36., 37., 38., 39.],
       [40., 41., 42., 43.],
       [44., 45., 46., 47.],
       [48., 49., 50., 51.],
       [52., 53., 54., 55.]])

In [15]:
xx_min = xx.min(axis=0)
xx_max = xx.max(axis=0)

In [16]:
xx_min.shape

(4,)

In [17]:
rfloats_init = rng.uniform(low=xx_min, high=xx_max, size=(14,4))

In [18]:
rfloats_init

array([[31.11205519, 10.71017765, 36.98331429, 51.97374899],
       [12.90877716, 50.3418199 , 36.69634756,  7.98669265],
       [22.97566264, 47.09695581, 38.26758199, 19.97658893],
       [38.16426449, 12.44701769,  6.24291762, 11.31457126],
       [17.68520962, 25.19004399, 15.85389347, 45.42037298],
       [10.05130824,  7.73239196,  6.76656708, 34.12553671],
       [44.44657903, 32.28430457, 50.46339478, 40.68863078],
       [44.7486685 , 49.32556568, 30.40167247, 51.75899386],
       [25.73937288, 15.23620549, 25.49249279, 37.58202402],
       [17.20632838, 47.97960835, 15.36785711, 20.67107356],
       [13.46037673, 19.48321696,  2.26116135, 35.68743629],
       [14.68390079,  4.54055985, 34.07510682, 12.16896865],
       [15.82819614, 23.92611417,  9.81052174, 14.33230088],
       [24.665322  , 25.77118046, 15.2720824 , 18.47339394]])

In [19]:
uniform_sampler(xx, 14, 12345)

array([[11.82147317, 17.47143366, 43.46300378, 38.16524288],
       [20.33769663, 18.30632425, 33.11205519, 12.71017765],
       [34.98331429, 49.97374899, 14.90877716, 52.3418199 ],
       [34.69634756,  5.98669265, 24.97566264, 49.09695581],
       [36.26758199, 17.97658893, 40.16426449, 14.44701769],
       [ 4.24291762,  9.31457126, 19.68520962, 27.19004399],
       [13.85389347, 43.42037298, 12.05130824,  9.73239196],
       [ 4.76656708, 32.12553671, 46.44657903, 34.28430457],
       [48.46339478, 38.68863078, 46.7486685 , 51.32556568],
       [28.40167247, 49.75899386, 27.73937288, 17.23620549],
       [23.49249279, 35.58202402, 19.20632838, 49.97960835],
       [13.36785711, 18.67107356, 15.46037673, 21.48321696],
       [ 0.26116135, 33.68743629, 16.68390079,  6.54055985],
       [32.07510682, 10.16896865, 17.82819614, 25.92611417]])

In [20]:
kd_sampler(xx, 14, 12345)

array([[52.50359468, 52.35188945, 54.13749582, 55.11445644],
       [16.67645842, 17.44321467, 16.99918135, 18.81407873],
       [ 8.83451265,  8.78071513,  9.73012928, 11.23849251],
       [ 9.62447196,  8.48938624,  9.71145635, 11.06206064],
       [28.15130678, 29.26188603, 30.00047014, 31.6719049 ],
       [31.64322801, 32.58442323, 32.81488417, 34.06961961],
       [51.5696213 , 53.28007265, 53.36703275, 55.05991356],
       [35.46824378, 37.16644136, 36.8202906 , 38.90022852],
       [39.22900224, 40.51463204, 41.34648487, 43.14317487],
       [36.18899206, 36.62305673, 38.16564283, 39.67487111],
       [40.03493834, 41.12333705, 41.9940692 , 43.5024058 ],
       [52.66359731, 52.54036922, 53.22544678, 55.0110923 ],
       [ 0.37918157,  0.66973784,  2.43129004,  2.99498405],
       [ 4.02500468,  5.3351078 ,  6.42648252,  6.52206557]])

In [21]:
print(rfloats_init[0])

[31.11205519 10.71017765 36.98331429 51.97374899]


In [22]:
scaler = MinMaxScaler()
rfloats = scaler.fit_transform(rfloats_init)
rfloats

array([[0.60698413, 0.13470411, 0.72034324, 1.        ],
       [0.08235407, 1.        , 0.71438985, 0.        ],
       [0.37248812, 0.92915339, 0.74698656, 0.27257783],
       [0.81023329, 0.17262533, 0.08260522, 0.07565586],
       [0.22001389, 0.4508497 , 0.28199382, 0.85101581],
       [0.        , 0.06968874, 0.09346882, 0.59423945],
       [0.99129359, 0.60574195, 1.        , 0.74344457],
       [1.        , 0.97781165, 0.58380098, 0.99511777],
       [0.45214   , 0.23352296, 0.4819555 , 0.67281909],
       [0.20621223, 0.94842475, 0.27191055, 0.28836621],
       [0.09825152, 0.32624991, 0.        , 0.62974761],
       [0.13351426, 0.        , 0.66000978, 0.0950797 ],
       [0.16649358, 0.42325373, 0.15661848, 0.14426081],
       [0.42118518, 0.46353792, 0.26992361, 0.23840425]])

In [23]:
rfloats.shape

(14, 4)

In [24]:
cr = np.array([True, False, True, False, False, False, False, False, False, False, False, False, False, False], dtype=bool)
cr.shape

(14,)

In [25]:
samples = np.array([[0, 0, 2], [1, 0, 0], [0, 0, 1], [0, 1, 0]], dtype=float)
samples

array([[0., 0., 2.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.]])

#### A flag array for  our target class

In [26]:
c = np.array([True, False, True, False], dtype=bool)
c

array([ True, False,  True, False])

In [27]:
neigh = NearestNeighbors(n_neighbors=2)
neigh.fit(samples)

NearestNeighbors(n_neighbors=2)

#### The test_set is our syntehtic samples

In [28]:
test_set = np.array([[0, 0, 1.3], [0.1, 0, 0], [0.1, 0.1, 1.3]])

In [29]:
nnd, nni = neigh.kneighbors(test_set, return_distance=True)
nni

array([[2, 0],
       [1, 2],
       [2, 0]], dtype=int64)

In [30]:
nnd

array([[0.3       , 0.7       ],
       [0.9       , 1.00498756],
       [0.33166248, 0.71414284]])

In [31]:
nni.shape

(3, 2)

In [32]:
nns = samples[nni]
nns

array([[[0., 0., 1.],
        [0., 0., 2.]],

       [[1., 0., 0.],
        [0., 0., 1.]],

       [[0., 0., 1.],
        [0., 0., 2.]]])

#### Map the target flags into our nn index array
This gives an array that tells which neighbors of our test_set are in our target class

In [33]:
cx = c[nni]
cx

array([[ True,  True],
       [False,  True],
       [ True,  True]])

And this tells us what records in our test_set have no neighbors in our target class

In [34]:
cs = ~cx.all(axis=1)
cs

array([False,  True, False])

#### The records from our test_set that have at least one neighbor in our target class
*** 2.1.4 Update the generated instances - rule 1 ***

If all the neighbours belong to the majority class, we remove that generated instance (addressing RQ1).

In [35]:
nns[cs]

array([[[1., 0., 0.],
        [0., 0., 1.]]])

In [36]:
nni[cs]

array([[1, 2]], dtype=int64)

#### Sort by distance to the neighbors for each test_set element

In [37]:
srt = np.argsort(nnd, axis=1)
srt

array([[0, 1],
       [0, 1],
       [0, 1]], dtype=int64)

In [38]:
nni

array([[2, 0],
       [1, 2],
       [2, 0]], dtype=int64)

In [39]:
nni_srt = c[np.take_along_axis(nni, srt, axis=1)[:,0]]
nni_srt

array([ True, False,  True])

#### Get the set of test_set elements whos nearest neighbor is of our target class
*** 2.1.4 Update the generated instances - rule 2 ***

If the condition of rule (1) is not met, we order the neighbours based on their distance from the generated instance. If the closest neighbour belongs to the minority class, we keep that generated instance and update it as su (updated instance).

In [40]:
test_set[nni_srt]

array([[0. , 0. , 1.3],
       [0.1, 0.1, 1.3]])

## Now we will build rule 3

### ^^^^^^^

In [41]:
data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [42]:
len(y)

569

In [43]:
sum(y)

357

In [44]:
len(y) - sum(y)

212

In [45]:
jjj = np.arange(10 * 10).reshape(10, 10)
jjj

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])

In [46]:
jjj[:,:-3]

array([[ 0,  1,  2,  3,  4,  5,  6],
       [10, 11, 12, 13, 14, 15, 16],
       [20, 21, 22, 23, 24, 25, 26],
       [30, 31, 32, 33, 34, 35, 36],
       [40, 41, 42, 43, 44, 45, 46],
       [50, 51, 52, 53, 54, 55, 56],
       [60, 61, 62, 63, 64, 65, 66],
       [70, 71, 72, 73, 74, 75, 76],
       [80, 81, 82, 83, 84, 85, 86],
       [90, 91, 92, 93, 94, 95, 96]])

#### Trim off a few features so we don't end up with a square matrix

In [47]:
trim_features = -3

#### Scale the data

D_norm

In [48]:
rfloats = scaler.fit_transform(X_train[:,:trim_features])

In [49]:
rfloats.shape

(426, 27)

#### We set the class data to bool so we have mask array then invert because the class flagged as one (1) in the original data is the WDBC-Benign class.  We need it to be the other way around.  We want the target / minority class to be the positive class, 1.

In [50]:
sum(y_train)

268

#### A binnary flag array of the minority class rows in D_norm

In [51]:
cr = ~np.array(y_train, dtype=bool)

In [52]:
cr

array([False,  True, False,  True,  True,  True, False,  True, False,
       False, False, False,  True,  True, False, False, False, False,
       False, False, False,  True, False, False, False, False,  True,
       False, False, False, False, False, False,  True, False, False,
       False, False, False, False,  True,  True,  True, False, False,
        True, False,  True, False, False, False, False,  True, False,
       False,  True, False, False, False,  True, False,  True,  True,
       False, False, False,  True, False, False, False, False,  True,
       False, False, False, False, False,  True, False,  True,  True,
       False, False,  True, False, False, False, False, False, False,
       False,  True,  True,  True, False, False,  True, False, False,
        True, False,  True, False,  True, False,  True, False, False,
        True, False, False, False,  True, False,  True, False,  True,
       False,  True, False, False,  True, False, False, False, False,
        True, False,

In [53]:
cr.shape

(426,)

In [54]:
sum(cr)

158

####  We will be using the X_test set in place of synthetic samples.  Scale to the X_train range too.

In [55]:
test_setr = scaler.transform(X_test[:,:trim_features])

In [56]:
test_setr.shape

(143, 27)

In [57]:
neighr = NearestNeighbors(n_neighbors=5)
neighr.fit(rfloats)

NearestNeighbors()

#### Find the nearest neighbors to our test_setr data, the synthetic samples

In [58]:
nndr, nnir = neighr.kneighbors(test_setr, return_distance=True)

In [59]:
nndr

array([[0.22258887, 0.25295094, 0.26743372, 0.2798781 , 0.28479658],
       [0.21170343, 0.30000496, 0.31824835, 0.37200185, 0.37565166],
       [0.24049137, 0.25795871, 0.30746756, 0.3302952 , 0.33833015],
       [0.2702441 , 0.29867018, 0.29986279, 0.31260432, 0.32775909],
       [0.30681   , 0.32709584, 0.34410999, 0.34784848, 0.36405133],
       [0.73074048, 0.75216903, 0.78389683, 0.78606968, 0.89498035],
       [0.34742508, 0.40008254, 0.44795745, 0.468386  , 0.47144265],
       [0.3076041 , 0.35450466, 0.36202072, 0.36631365, 0.36828544],
       [0.20667004, 0.32668664, 0.33725458, 0.35111556, 0.35897535],
       [0.19131964, 0.23087909, 0.23218645, 0.26325294, 0.27500253],
       [0.1533544 , 0.21592567, 0.23038885, 0.24536373, 0.24820088],
       [0.24867786, 0.25678725, 0.32199003, 0.36535096, 0.37731924],
       [0.20396369, 0.23763505, 0.24384069, 0.249601  , 0.25651756],
       [0.50681097, 0.52342461, 0.53225564, 0.53227167, 0.54869143],
       [0.21343223, 0.21598096, 0.

In [60]:
nnir

array([[ 37, 311,  87, 181, 146],
       [195, 121, 371, 312,  99],
       [160, 156, 322, 230, 202],
       [228,  22,  37, 357, 339],
       [ 22, 293, 155,  67,  72],
       [191,  12, 251, 389, 192],
       [179,  83, 320, 332, 381],
       [ 40, 156,  41, 360, 163],
       [303, 306, 127, 101, 362],
       [386,  28,  60,  31, 137],
       [415,  86, 325, 327, 122],
       [346,  40, 244, 241, 151],
       [100, 212, 174,  57,  98],
       [241, 204, 211, 380, 417],
       [185, 364, 214, 378, 237],
       [379, 196, 370,  13, 136],
       [201, 137,  15, 275,  57],
       [284, 153, 278, 269, 355],
       [321, 158, 282, 284, 123],
       [157, 413, 320, 183, 206],
       [127, 374, 107, 271, 420],
       [311, 272, 268, 169, 267],
       [192, 320, 389, 191, 157],
       [343, 110,  64, 425, 117],
       [ 51,  84,  50, 213, 340],
       [330, 199,  87, 113, 411],
       [395, 317, 261, 125, 145],
       [250, 215, 245,  82, 391],
       [ 10,  35, 141, 147, 209],
       [265, 3

In [61]:
nnir.shape

(143, 5)

#### We sort the each neighbor by distance.  NOTE: it looks like the neighbors may already be sorted!

In [62]:
srtr = np.argsort(nndr, axis=1)
srtr

array([[0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2,

#### For each synthetic element in test_setr map the class flag to each of its neighbors. e.g., if all are True/1 then all the neighbors are from the target / minority class.

In [63]:
nni_srtr = cr[np.take_along_axis(nnir, srtr, axis=1)]
nni_srtr

array([[False, False, False, False, False],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [False, False, False, False, False],
       [False, False, False, False, False],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True, False,  True,  True],
       [False, False, False, False, False],
       [False, False,  True, False, False],
       [ True,  True,  True,  True, False],
       [False, False, False, False, False],
       [ True,  True, False,  True,  True],
       [False, False, False, False, False],
       [ True,  True,  True,  True,  True],
       [False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False, False, False],
       [ True,  True,  True,  True,  True],
       [False,  True, False, False, False],
       [False, False, False, False, False],
       [ True,  True,  True,  Tr

In [64]:
nni_srtr.shape

(143, 5)

#### Mask all test / synthetic elements where their closest neighbor,the first column in the neighbors list, is from the target class

In [65]:
nni_srtr[:,0]

array([False,  True,  True, False, False,  True,  True,  True,  True,
       False, False,  True, False,  True, False,  True, False, False,
       False,  True, False, False,  True, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
        True, False,  True, False, False,  True, False, False, False,
       False, False, False, False, False,  True,  True,  True, False,
       False, False, False,  True,  True, False, False,  True,  True,
       False, False, False,  True,  True, False, False,  True,  True,
       False,  True, False, False, False, False, False, False,  True,
       False,  True,  True,  True,  True,  True,  True, False, False,
       False,  True, False, False, False, False,  True,  True, False,
        True,  True, False,  True,  True, False, False, False,  True,
       False, False,  True, False,  True,  True, False,  True, False,
       False, False,  True, False, False, False,  True, False,  True,
        True, False,

In [66]:
test_setr[nni_srtr[:,0]]

array([[0.55090847, 0.39228948, 0.53834116, ..., 0.32690348, 0.22656241,
        0.27966278],
       [0.38047897, 0.33040243, 0.38255797, ..., 0.5666644 , 0.23293177,
        0.39456703],
       [0.6322053 , 0.66351031, 0.65571205, ..., 0.63730218, 0.92334809,
        0.97699833],
       ...,
       [0.28497968, 0.31044978, 0.28581591, ..., 0.48651769, 0.25225947,
        0.35782681],
       [0.33101523, 0.3554278 , 0.33127045, ..., 0.57073966, 0.41544679,
        0.51696503],
       [0.59351584, 0.42069665, 0.58386684, ..., 0.54357128, 0.31595304,
        0.5591174 ]])

In [125]:
test_setr[nni_srtr[:,0]].shape

(57, 27)

#### Mask all test / synthetic elaments that have NO neighbors in the target class

In [67]:
np.all(~nni_srtr, axis=1)

array([ True, False, False,  True,  True, False, False, False, False,
        True, False, False,  True, False,  True, False,  True,  True,
        True, False, False,  True, False,  True,  True,  True,  True,
        True,  True, False,  True,  True,  True,  True,  True,  True,
       False,  True, False,  True,  True, False,  True,  True,  True,
       False,  True,  True,  True,  True, False, False, False,  True,
        True,  True,  True, False, False,  True,  True, False, False,
       False,  True, False, False, False,  True,  True, False, False,
        True, False,  True,  True, False, False,  True,  True, False,
        True, False, False, False, False, False, False,  True,  True,
        True, False,  True,  True,  True,  True, False, False,  True,
       False, False,  True, False, False,  True, False,  True, False,
        True, False, False,  True, False, False,  True, False,  True,
       False,  True, False,  True,  True,  True, False,  True, False,
       False,  True,

#### Find (mask) all test / synthetic elements where the first column is not in the target class and there is at least one column that has the target class

If the first column is true, then the 2nd test will always be false, TF = T.  We can never have the TT case.

If the first column is false, and all the other columns are false then we are true, FT = T.

If the first column in false, and there is one other column that is true then the 2nd test if false, FF = F

In [68]:
at_least_1_trgt = nni_srtr[:,0] ^ ~np.all(~nni_srtr, axis=1)
at_least_1_trgt

array([False, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
        True, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
        True, False,  True, False, False, False, False, False, False,
       False, False, False, False,  True,  True, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False,  True, False, False,
       False,  True, False, False, False, False, False, False, False,
        True, False, False, False, False, False, False, False, False,
       False, False,

In [69]:
at_least_1_trgt.shape

(143,)

In [70]:
sum(at_least_1_trgt)

11

#### Indexes of nearest neighbors for each filtered synthetic element

We have removed synthetic samples that have no target as their class label or that have their closest neighbor in the target class.

In [71]:
nnir_filt = nnir[at_least_1_trgt]
nnir_filt

array([[415,  86, 325, 327, 122],
       [127, 374, 107, 271, 420],
       [236, 325, 139, 146, 132],
       [176,  24, 335, 415, 122],
       [270, 418,  66, 366,  53],
       [ 32, 212, 415,  86, 325],
       [107, 271, 362, 127, 228],
       [222, 267,  37,  10, 164],
       [174,  28, 105, 240, 270],
       [377,  82, 269, 325, 169],
       [ 28, 174, 386, 100, 335]], dtype=int64)

In [72]:
nnir_filt.shape

(11, 5)

#### Flag the neighbors that are from the target class

In [73]:
nnir_filt_flag = cr[nnir_filt]
nnir_filt_flag

array([[False, False,  True, False, False],
       [False,  True, False, False, False],
       [False,  True, False, False, False],
       [False, False,  True, False, False],
       [False, False,  True, False, False],
       [False, False, False, False,  True],
       [False, False,  True, False, False],
       [False, False, False, False,  True],
       [False, False,  True, False, False],
       [False, False, False,  True, False],
       [False, False, False, False,  True]])

In [74]:
nnir_filt_flag.shape

(11, 5)

Double cumsum to isolate the 1's

In [75]:
nnb_flag = ~(np.cumsum(np.cumsum(nnir_filt_flag, axis=1), axis=1) != 1)
nnb_flag

array([[False, False,  True, False, False],
       [False,  True, False, False, False],
       [False,  True, False, False, False],
       [False, False,  True, False, False],
       [False, False,  True, False, False],
       [False, False, False, False,  True],
       [False, False,  True, False, False],
       [False, False, False, False,  True],
       [False, False,  True, False, False],
       [False, False, False,  True, False],
       [False, False, False, False,  True]])

In [76]:
nnir[at_least_1_trgt]

array([[415,  86, 325, 327, 122],
       [127, 374, 107, 271, 420],
       [236, 325, 139, 146, 132],
       [176,  24, 335, 415, 122],
       [270, 418,  66, 366,  53],
       [ 32, 212, 415,  86, 325],
       [107, 271, 362, 127, 228],
       [222, 267,  37,  10, 164],
       [174,  28, 105, 240, 270],
       [377,  82, 269, 325, 169],
       [ 28, 174, 386, 100, 335]], dtype=int64)

#### Indexes to the closet neighbor that is in the target / minority class, nnb

In [77]:
nni_nnb = nnir[at_least_1_trgt][nnb_flag]
nni_nnb

array([325, 374, 325, 335,  66, 325, 362, 164, 105, 325, 335], dtype=int64)

In [78]:
nni_nnb.shape

(11,)

#### Rowwise index to the to nnb for each synthetic sample

In [79]:
nnb_idx = np.argmax(nnb_flag, axis=1)
nnb_idx

array([2, 1, 1, 2, 2, 4, 2, 4, 2, 3, 4], dtype=int64)

#### Rowwise index to the closest majority neighbor to nnb for each synthetic sample

In [80]:
nna_max_idx = nnb_idx - 1
nna_max_idx

array([1, 0, 0, 1, 1, 3, 1, 3, 1, 2, 3], dtype=int64)

In [81]:
nna_max_idx.shape

(11,)

#### Indexes of our filtered synthetic elements

In [82]:
test_setr_idx = np.arange(test_setr.shape[0])[at_least_1_trgt]
test_setr_idx

array([ 10,  20,  45,  63,  65,  76,  77, 105, 109, 117, 132])

In [83]:
test_setr_idx.shape

(11,)

#### The filtered set of synthetic samples

In [84]:
test_setr_filt = test_setr[at_least_1_trgt]
test_setr_filt

array([[0.27028748, 0.52553263, 0.25736236, 0.15768472, 0.31849779,
        0.1118759 , 0.06494845, 0.1027833 , 0.24132408, 0.12384162,
        0.03508963, 0.21875   , 0.02624511, 0.02013082, 0.13784546,
        0.04378586, 0.03813131, 0.12220117, 0.10429654, 0.03224023,
        0.20729479, 0.59195096, 0.18707742, 0.1015532 , 0.39210759,
        0.08676601, 0.14467111],
       [0.29918213, 0.2056138 , 0.3023901 , 0.17750794, 0.43396226,
        0.37196082, 0.18249766, 0.25193837, 0.2642819 , 0.33066554,
        0.0605468 , 0.0575274 , 0.05654243, 0.03090785, 0.10211782,
        0.13817707, 0.04441919, 0.17438909, 0.08059278, 0.07722178,
        0.28842921, 0.23560768, 0.28371715, 0.14608972, 0.47497113,
        0.35900111, 0.28923813],
       [0.17870611, 0.39059858, 0.17470479, 0.09701364, 0.31560892,
        0.15879049, 0.1202671 , 0.0943837 , 0.17084891, 0.32287279,
        0.0445048 , 0.13832656, 0.0395797 , 0.01794553, 0.25155522,
        0.10002403, 0.06169192, 0.12189809, 0.1455

In [85]:
test_setr_filt.shape

(11, 27)

#### The location of our nearest target class nearghbors

In [86]:
nnb = rfloats[nni_nnb]
nnb

array([[2.48738920e-01, 4.31518431e-01, 2.40432494e-01, 1.44125976e-01,
        3.42330956e-01, 1.30915691e-01, 1.10379569e-01, 1.34393638e-01,
        2.23171383e-01, 2.25147430e-01, 4.53376788e-02, 2.25380127e-01,
        3.29830844e-02, 2.44080105e-02, 1.96688989e-01, 7.33619731e-02,
        5.88888889e-02, 1.68971396e-01, 1.60289671e-01, 5.99132153e-02,
        2.12411373e-01, 5.69029851e-01, 1.90890143e-01, 1.06689162e-01,
        4.80404809e-01, 1.37281602e-01, 2.26582015e-01],
       [3.63827807e-01, 2.35711870e-01, 3.61786883e-01, 2.34360251e-01,
        3.26171346e-01, 2.61968358e-01, 1.76897844e-01, 2.02733598e-01,
        2.27976508e-01, 2.08508846e-01, 5.77946768e-02, 4.19908062e-04,
        5.73434481e-02, 3.66792554e-02, 1.27783255e-01, 1.27211824e-01,
        5.14898990e-02, 1.56469028e-01, 1.37145843e-01, 6.86193220e-02,
        3.32285652e-01, 2.19083156e-01, 3.21335977e-01, 1.90030766e-01,
        5.29307886e-01, 3.35829828e-01, 3.46273938e-01],
       [2.48738920e-01

In [87]:
nnb.shape

(11, 27)

#### Get the locations of all the neighbors

In [88]:
nnsr = rfloats[nnir_filt]
nnsr

array([[[0.26000294, 0.52147447, 0.24676341, ..., 0.34388372,
         0.09006051, 0.04644047],
        [0.22327244, 0.43050389, 0.2152511 , ..., 0.36969368,
         0.16814004, 0.12801832],
        [0.24873892, 0.43151843, 0.24043249, ..., 0.48040481,
         0.1372816 , 0.22658201],
        [0.24873892, 0.66689212, 0.23780054, ..., 0.35475107,
         0.08687583, 0.09006037],
        [0.27028748, 0.62056138, 0.2629108 , ..., 0.24471915,
         0.12168766, 0.11053289]],

       [[0.24286204, 0.28610078, 0.24733248, ..., 0.55783468,
         0.39194606, 0.37281432],
        [0.36382781, 0.23571187, 0.36178688, ..., 0.52930789,
         0.33582983, 0.34627394],
        [0.32954601, 0.23097734, 0.3288519 , ..., 0.39550363,
         0.34022249, 0.39082015],
        [0.28742838, 0.14169767, 0.28930147, ..., 0.4511988 ,
         0.26631599, 0.26821399],
        [0.26392086, 0.20290835, 0.26824584, ..., 0.4077294 ,
         0.27488167, 0.19671107]],

       [[0.20760076, 0.39972946, 0.2

In [89]:
nnsr.shape

(11, 5, 27)

#### Move the nearest neighbors so the origin of each subspace is at the location of the generated synthetic instance

In [90]:
nnsrss = nnsr - test_setr_filt[:,None,:]
nnsrss

array([[[-1.02845389e-02, -4.05816706e-03, -1.05989472e-02, ...,
         -4.82238674e-02,  3.29449490e-03, -9.82306411e-02],
        [-4.70150350e-02, -9.50287454e-02, -4.21112534e-02, ...,
         -2.24139102e-02,  8.13740240e-02, -1.66527893e-02],
        [-2.15485577e-02, -9.40142036e-02, -1.69298620e-02, ...,
          8.82972220e-02,  5.05155885e-02,  8.19109076e-02],
        [-2.15485577e-02,  1.41359486e-01, -1.95618153e-02, ...,
         -3.73565170e-02,  1.09816497e-04, -5.46107410e-02],
        [ 0.00000000e+00,  9.50287454e-02,  5.54844217e-03, ...,
         -1.47388440e-01,  3.49216459e-02, -3.41382182e-02]],

       [[-5.63200940e-02,  8.04869800e-02, -5.50576184e-02, ...,
          8.28635468e-02,  3.29449490e-02,  8.35761865e-02],
        [ 6.46456731e-02,  3.00980724e-02,  5.93967847e-02, ...,
          5.43367520e-02, -2.31712808e-02,  5.70358035e-02],
        [ 3.03638768e-02,  2.53635441e-02,  2.64618011e-02, ...,
         -7.94674998e-02, -1.87786209e-02,  1.01582

In [91]:
nnsrss.shape

(11, 5, 27)

#### Move nnb so the origin of each subspace is at the location of the generated synthetic instance

Note: these are in the nnsr, but we do them again so we don't have to try and ID them from nnsr

In [92]:
nnbss = nnb - test_setr_filt
nnbss

array([[-2.15485577e-02, -9.40142036e-02, -1.69298620e-02,
        -1.35587402e-02,  2.38331678e-02,  1.90397918e-02,
         4.54311153e-02,  3.16103380e-02, -1.81526962e-02,
         1.01305813e-01,  1.02480536e-02,  6.63012730e-03,
         6.73797295e-03,  4.27719192e-03,  5.88435259e-02,
         2.95761108e-02,  2.07575758e-02,  4.67702216e-02,
         5.59931315e-02,  2.76729820e-02,  5.11658504e-03,
        -2.29211087e-02,  3.81271923e-03,  5.13596665e-03,
         8.82972220e-02,  5.05155885e-02,  8.19109076e-02],
       [ 6.46456731e-02,  3.00980724e-02,  5.93967847e-02,
         5.68523127e-02, -1.07790918e-01, -1.09992466e-01,
        -5.59981256e-03, -4.92047714e-02, -3.63053924e-02,
        -1.22156698e-01, -2.75212747e-03, -5.71074965e-02,
         8.01017764e-04,  5.77140744e-03,  2.56654316e-02,
        -1.09652417e-02,  7.07070707e-03, -1.79200606e-02,
         5.65530628e-02, -8.60246258e-03,  4.38564432e-02,
        -1.65245203e-02,  3.76188297e-02,  4.39410480e-

In [93]:
nnbss.shape

(11, 27)

#### The unit length direction (dir) vectors from the generated synthetic instances origin to the nnb's.

In [94]:
dir = normalized(nnbss)
dir

array([[-9.35312524e-02, -4.08067506e-01, -7.34838599e-02,
        -5.88515469e-02,  1.03447575e-01,  8.26419845e-02,
         1.97193203e-01,  1.37204287e-01, -7.87915569e-02,
         4.39716648e-01,  4.44815518e-02,  2.87779868e-02,
         2.92460896e-02,  1.85650995e-02,  2.55409607e-01,
         1.28374749e-01,  9.00980048e-02,  2.03005578e-01,
         2.43037506e-01,  1.20114242e-01,  2.22084751e-02,
        -9.94887934e-02,  1.65490614e-02,  2.22926007e-02,
         3.83253017e-01,  2.19262297e-01,  3.55533297e-01],
       [ 2.37217560e-01,  1.10444999e-01,  2.17956743e-01,
         2.08619792e-01, -3.95539212e-01, -4.03617802e-01,
        -2.05485349e-02, -1.80557108e-01, -1.33222785e-01,
        -4.48254498e-01, -1.00989429e-02, -2.09556190e-01,
         2.93933794e-03,  2.11782030e-02,  9.41794053e-02,
        -4.02369989e-02,  2.59459883e-02, -6.57577352e-02,
         2.07521694e-01, -3.15667714e-02,  1.60931397e-01,
        -6.06367942e-02,  1.38042449e-01,  1.61241854e-

In [95]:
dir.shape

(11, 27)

### ui

In [96]:
dir_rpt = np.repeat(dir, nnsrss.shape[1], axis=0)
dir_rpt

array([[-0.09353125, -0.40806751, -0.07348386, ...,  0.38325302,
         0.2192623 ,  0.3555333 ],
       [-0.09353125, -0.40806751, -0.07348386, ...,  0.38325302,
         0.2192623 ,  0.3555333 ],
       [-0.09353125, -0.40806751, -0.07348386, ...,  0.38325302,
         0.2192623 ,  0.3555333 ],
       ...,
       [-0.13899763,  0.39914535, -0.12408949, ...,  0.35145757,
        -0.04713215,  0.18732613],
       [-0.13899763,  0.39914535, -0.12408949, ...,  0.35145757,
        -0.04713215,  0.18732613],
       [-0.13899763,  0.39914535, -0.12408949, ...,  0.35145757,
        -0.04713215,  0.18732613]])

In [97]:
dir_rpt.shape

(55, 27)

### uf

In [98]:
axis0q = np.zeros(nnsrss.shape[-1])
axis0q[0] = 1.0
axis0 = np.repeat(np.atleast_2d(axis0q), nnsrss.shape[0] * nnsrss.shape[1], axis=0)
axis0

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [99]:
axis0.shape

(55, 27)

In [100]:
nnsrss.shape

(11, 5, 27)

### a

In [101]:
print(nnsrss.shape)
nnsrss_a = nnsrss.ravel().reshape(nnsrss.shape[0] * nnsrss.shape[1], nnsrss.shape[2])
print(nnsrss_a.shape)
nnsrss_a.shape

(11, 5, 27)
(55, 27)


(55, 27)

In [102]:
uii = dir_rpt
uff = axis0
aa = nnsrss_a

In [103]:
ss = uii + uff
ss

array([[ 0.90646875, -0.40806751, -0.07348386, ...,  0.38325302,
         0.2192623 ,  0.3555333 ],
       [ 0.90646875, -0.40806751, -0.07348386, ...,  0.38325302,
         0.2192623 ,  0.3555333 ],
       [ 0.90646875, -0.40806751, -0.07348386, ...,  0.38325302,
         0.2192623 ,  0.3555333 ],
       ...,
       [ 0.86100237,  0.39914535, -0.12408949, ...,  0.35145757,
        -0.04713215,  0.18732613],
       [ 0.86100237,  0.39914535, -0.12408949, ...,  0.35145757,
        -0.04713215,  0.18732613],
       [ 0.86100237,  0.39914535, -0.12408949, ...,  0.35145757,
        -0.04713215,  0.18732613]])

In [104]:
ss.shape

(55, 27)

In [105]:
cc = (2.0 / np.einsum('ij,ij->i', ss,  ss))
cc.shape

(55,)

In [106]:
ww = (2.0 * np.einsum('ij,ij->i', aa, uii))[:,None] * uii - aa
ww.shape

(55, 27)

In [107]:
rr = (cc   * np.einsum('ij,ij->i', ww,  ss))[:,None] *  ss - ww
rr.shape

(55, 27)

In [108]:
rr

array([[-7.66393880e-02, -4.31889481e-02, -1.76455285e-02, ...,
        -1.14726200e-02,  2.43201948e-02, -6.41375183e-02],
       [ 8.36085172e-02, -7.85553577e-02, -3.91447635e-02, ...,
        -3.78855550e-02,  7.25225650e-02, -3.10054092e-02],
       [ 2.30388850e-01,  1.38777878e-17,  0.00000000e+00, ...,
        -1.38777878e-17, -6.93889390e-18,  0.00000000e+00],
       ...,
       [-7.69090369e-02, -2.36933211e-02, -3.40395780e-02, ...,
        -2.62815038e-02, -1.07797424e-01, -2.85520248e-02],
       [ 1.18829898e-01, -5.10360562e-02, -2.30276697e-02, ...,
         3.65446807e-02, -1.49561538e-01, -7.12749399e-02],
       [ 2.88916260e-01, -4.16333634e-17,  6.93889390e-18, ...,
        -2.77555756e-17,  1.73472348e-18, -6.93889390e-18]])

In [109]:
rr.shape

(55, 27)

#### Rotate each subspaces nearest neighbors from the direction to nnb to the 1st axis

In this configuration, the first coordinate of the rotated vectors in each synthetic samples subspace is the projected distance on the line from our synthetic sample to nnb.

In [110]:
rr = rtv_einsum(dir_rpt, axis0, nnsrss_a)
rr

array([[-7.66393880e-02, -4.31889481e-02, -1.76455285e-02, ...,
        -1.14726200e-02,  2.43201948e-02, -6.41375183e-02],
       [ 8.36085172e-02, -7.85553577e-02, -3.91447635e-02, ...,
        -3.78855550e-02,  7.25225650e-02, -3.10054092e-02],
       [ 2.30388850e-01,  1.38777878e-17,  0.00000000e+00, ...,
        -1.38777878e-17, -6.93889390e-18,  0.00000000e+00],
       ...,
       [-7.69090369e-02, -2.36933211e-02, -3.40395780e-02, ...,
        -2.62815038e-02, -1.07797424e-01, -2.85520248e-02],
       [ 1.18829898e-01, -5.10360562e-02, -2.30276697e-02, ...,
         3.65446807e-02, -1.49561538e-01, -7.12749399e-02],
       [ 2.88916260e-01, -4.16333634e-17,  6.93889390e-18, ...,
        -2.77555756e-17,  1.73472348e-18, -6.93889390e-18]])

In [111]:
rr.shape

(55, 27)

In [112]:
ma = rr[:,0].reshape(nnsrss.shape[0], nnsrss.shape[1])
ma

array([[-0.07663939,  0.08360852,  0.23038885, -0.03278154, -0.05099504],
       [-0.06389604,  0.27251639,  0.07749573,  0.00907458, -0.11048265],
       [ 0.16016827,  0.33480496,  0.06067855,  0.19767971,  0.18239977],
       [-0.02315304,  0.11411974,  0.27470669,  0.10605045,  0.03658089],
       [ 0.04640372,  0.02871833,  0.3579492 ,  0.05502559,  0.17012375],
       [ 0.09078196, -0.03470485,  0.05809832,  0.15363798,  0.27361253],
       [ 0.03674439, -0.07952057,  0.29795377,  0.13129858,  0.07615609],
       [ 0.0532187 ,  0.01401369, -0.08445033,  0.0874674 ,  0.33067592],
       [ 0.1135484 , -0.01778437,  0.25960944, -0.06223616,  0.05696151],
       [ 0.14145596, -0.0173206 ,  0.10810268,  0.34811584,  0.12374399],
       [ 0.03481057,  0.13835571, -0.07690904,  0.1188299 ,  0.28891626]])

In [113]:
ma.shape

(11, 5)

In [114]:
nna_max_idx[:,None]

array([[1],
       [0],
       [0],
       [1],
       [1],
       [3],
       [1],
       [3],
       [1],
       [2],
       [3]], dtype=int64)

#### Get the nearest neighbor projected distance of the majority class in nn subspace that is farthest from the synthetic instance

In [115]:
max_disp = np.take_along_axis(rr[:,0].reshape(nnsrss.shape[0], nnsrss.shape[1]), nna_max_idx[:,None], axis=1).ravel()
max_disp

array([ 0.08360852, -0.06389604,  0.16016827,  0.11411974,  0.02871833,
        0.15363798, -0.07952057,  0.0874674 , -0.01778437,  0.10810268,
        0.1188299 ])

In [116]:
max_disp.shape

(11,)

#### The displacement from the synthetic instanace to nnb

In [117]:
nnb_disp = np.take_along_axis(rr[:,0].reshape(nnsrss.shape[0], nnsrss.shape[1]), nnb_idx[:,None], axis=1).ravel()
nnb_disp

array([0.23038885, 0.27251639, 0.33480496, 0.27470669, 0.3579492 ,
       0.27361253, 0.29795377, 0.33067592, 0.25960944, 0.34811584,
       0.28891626])

In [118]:
nnb_disp.shape

(11,)

#### Add a random displacement to max_disp then add to dir to get the new synthetic sample location for each subspace

In [119]:
syn_new_ss = dir * ((max_disp + ((nnb_disp - max_disp) * rng.uniform(size=nnb_disp.shape[0])))[:,None])
syn_new_ss

array([[-1.16511958e-02, -5.08330027e-02, -9.15389046e-03,
        -7.33114203e-03,  1.28864729e-02,  1.02947188e-02,
         2.45643734e-02,  1.70915493e-02, -9.81507071e-03,
         5.47755389e-02,  5.54107056e-03,  3.58487618e-03,
         3.64318778e-03,  2.31265596e-03,  3.18163958e-02,
         1.59916530e-02,  1.12235159e-02,  2.52884215e-02,
         3.02752020e-02,  1.49626409e-02,  2.76651156e-03,
        -1.23933272e-02,  2.06151794e-03,  2.77699110e-03,
         4.77418598e-02,  2.73135223e-02,  4.42888120e-02],
       [ 5.63772287e-03,  2.62484065e-03,  5.17996944e-03,
         4.95806707e-03, -9.40040214e-03, -9.59239825e-03,
        -4.88357376e-04, -4.29112810e-03, -3.16617851e-03,
        -1.06532359e-02, -2.40011917e-04, -4.98032154e-03,
         6.98564334e-05,  5.03322096e-04,  2.23827185e-03,
        -9.56274268e-04,  6.16633488e-04, -1.56280120e-03,
         4.93196961e-03, -7.50217264e-04,  3.82470258e-03,
        -1.44109669e-03,  3.28072284e-03,  3.83208093e-

In [120]:
syn_new_ss.shape

(11, 27)

#### Move the new sythetic locations back out of the subspaces

In [121]:
syn_new = syn_new_ss + test_setr_filt

In [122]:
syn_new.shape

(11, 27)

#### Invert the scaling to transfer back to the original feature space

In [123]:
syn_in_org = scaler.inverse_transform(syn_new)
syn_in_org

array([[1.29720942e+01, 2.37468681e+01, 8.28131461e+01, 5.20814040e+02,
        8.93374346e-02, 5.50562638e-02, 3.82040746e-02, 2.41188197e-02,
        1.60061637e-01, 5.84407426e-02, 2.23701666e-01, 1.36622085e+00,
        1.39131937e+00, 1.88181914e+01, 6.70394291e-03, 1.02112566e-02,
        1.95445123e-02, 7.78597577e-03, 1.50920848e-02, 2.26109663e-03,
        1.44256973e+01, 3.37650024e+01, 9.16955212e+01, 6.44092385e+02,
        1.35929034e-01, 1.31171967e-01, 1.81552691e-01],
       [1.39151167e+01, 1.58676165e+01, 9.11582001e+01, 5.95655271e+02,
        9.96587175e-02, 1.25198828e-01, 7.76815691e-02, 4.98266250e-02,
        1.65606975e-01, 6.51541844e-02, 2.78037207e-01, 5.97965041e-01,
        1.95848256e+00, 2.36194776e+01, 4.78284324e-03, 2.05226740e-02,
        1.78341869e-02, 9.12349972e-03, 1.24642451e-02, 3.10828481e-03,
        1.66746515e+01, 2.08059301e+01, 1.10945351e+02, 8.27844819e+02,
        1.41797677e-01, 3.52359878e-01, 2.82679086e-01],
       [1.23841927e+01

In [124]:
syn_in_org.shape

(11, 27)

In [130]:
kkk = test_setr[np.zeros(test_setr.shape[0], dtype=bool)]
kkk

array([], shape=(0, 27), dtype=float64)

In [132]:
np.concatenate([kkk, test_setr])

array([[0.23404672, 0.30064254, 0.23595106, ..., 0.48515927, 0.23117471,
        0.2779975 ],
       [0.55090847, 0.39228948, 0.53834116, ..., 0.32690348, 0.22656241,
        0.27966278],
       [0.38047897, 0.33040243, 0.38255797, ..., 0.5666644 , 0.23293177,
        0.39456703],
       ...,
       [0.16401391, 0.24078458, 0.16360791, ..., 0.44644434, 0.13277913,
        0.11105329],
       [0.59351584, 0.42069665, 0.58386684, ..., 0.54357128, 0.31595304,
        0.5591174 ],
       [0.12581419, 0.08488333, 0.1268317 , ..., 0.42878489, 0.1512283 ,
        0.08966486]])