In [1]:
import numpy as np

import pytest

from smote_variants import (array_array_index,
                            base_idx_neighbor_idx_simplices,
                            all_neighbor_simplices_real_idx,
                            reweight_simplex_vertices,
                            cartesian_product, vector_choice,
                            simplex_volume, simplex_volumes,
                            SimplexSamplingMixin,
                            random_samples_from_simplices)


2022-07-30 22:15:30.222871: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-07-30 22:15:30.227649: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-07-30 22:15:30.227695: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
X = np.array([[0, 1], [1, 0], [0, 0]])
X_vertices = np.array([[0, 1], [1, 0], [1, 1]])
simplices = np.array([[0, 1, 2]]*10)
vertex_weights_0 = np.array([1.0, 1.0, 0.0])

In [3]:
sample = random_samples_from_simplices(X, simplices)
np.all(np.dot(sample, np.array([1.0, 1.0])) - 1 < 0)

True

In [4]:
sample = random_samples_from_simplices(X, simplices, X_vertices=X)
np.all(np.dot(sample, np.array([1.0, 1.0])) - 1 < 0)

True

In [5]:
sample = random_samples_from_simplices(X, simplices, X_vertices=X_vertices)
np.all(np.dot(sample, np.array([1.0, 1.0])) - 1 > 0)

True

In [6]:
sample = random_samples_from_simplices(X, simplices, vertex_weights=vertex_weights_0)
np.testing.assert_almost_equal(np.dot(sample, np.array([1.0, 1.0])) - 1, 0)

In [7]:
sample = random_samples_from_simplices(X, simplices, X_vertices = X_vertices, vertex_weights=vertex_weights_0)
np.testing.assert_almost_equal(np.dot(sample, np.array([1.0, 1.0])) - 1, 0)

In [8]:
np.dot(sample, np.array([1.0, 1.0])) - 1

array([2.22044605e-16, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       2.22044605e-16, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00])

In [9]:
sample = random_samples_from_simplices(X, simplices)

In [10]:
sample

array([[0.67008034, 0.17083696],
       [0.24193552, 0.45503586],
       [0.32787891, 0.48440649],
       [0.08018775, 0.18643431],
       [0.10699822, 0.29827136],
       [0.42079889, 0.27842957],
       [0.32060835, 0.34767421],
       [0.35404587, 0.33058247],
       [0.22619646, 0.5139539 ],
       [0.45452972, 0.45628295]])

In [11]:
X_ssm = np.array([[0.0, 1.0], [1.0, 0.0], [0.0, 2.0], [0.01, 2.00], [0.01, 2.01]])
indices_ssm = np.array([[0, 2, 1], [1, 0, 2], [2, 0, 1], [3, 4, 4], [4, 3, 3]])
small_volume_edge_weight = 0.4 # (4 edges, but the small volume appears twice)
n_to_sample_ssm = 1000


In [12]:
ssm = SimplexSamplingMixin(simplex_sampling='volume',
                                within_simplex_sampling='deterministic',
                                n_dim=2,
                                random_state=5)

assert len(ssm.get_params()) > 0

samples = ssm.sample_simplex(X=X_ssm,
                                indices=indices_ssm,
                                n_to_sample=n_to_sample_ssm,
                                base_weights=np.array([0, 0, 0, 0.5, 0.5]),
                                vertex_weights=np.array([1.0, 1.0, 1.0, 1.0, 0.5]))

assert len(samples) == n_to_sample_ssm
assert np.all(np.logical_and(samples[:, 1] <= 2.005,
                                samples[:, 1] >= 2.0))

samples = ssm.sample_simplex(X=X_ssm,
                                indices=indices_ssm,
                                n_to_sample=n_to_sample_ssm,
                                base_weights=np.array([0, 0, 0, 1.0, 0]),
                                vertex_weights=np.array([1.0, 1.0, 1.0, 1.0, 0.5]),
                                X_vertices=X_ssm.copy())

assert len(samples) == n_to_sample_ssm
assert np.all(np.logical_and(samples[:, 1] <= 2.005,
                                samples[:, 1] >= 2.0))

In [13]:
small_volume_edge_weight = 0.4

In [14]:
ssm = SimplexSamplingMixin(simplex_sampling='uniform',
                                within_simplex_sampling='deterministic',
                                n_dim=2,
                                random_state=5)

samples = ssm.sample_simplex(X=X_ssm,
                                indices=indices_ssm,
                                n_to_sample=n_to_sample_ssm)

np.sum(samples[:, 0] == 0.01)

392

In [18]:
ssm = SimplexSamplingMixin(simplex_sampling='uniform',
                                within_simplex_sampling='random',
                                n_dim=1,
                                random_state=5)

In [19]:
all_neighbor_simplices_real_idx(n_dim=1, indices=indices_ssm)

array([[0],
       [1],
       [2],
       [3],
       [4]])

In [20]:
samples = ssm.sample_simplex(X=X_ssm, indices=indices_ssm, n_to_sample=n_to_sample_ssm)

In [23]:
np.unique(samples, axis=0)

array([[0.  , 1.  ],
       [0.  , 2.  ],
       [0.01, 2.  ],
       [0.01, 2.01],
       [1.  , 0.  ]])

In [15]:
ssm = SimplexSamplingMixin(simplex_sampling='uniform',
                                within_simplex_sampling='random',
                                n_dim=2,
                                random_state=5)

samples = ssm.sample_simplex(X=X_ssm,
                                indices=indices_ssm,
                                n_to_sample=n_to_sample_ssm)

np.sum(samples[:, 0] == 0.01)

257

In [16]:
ssm = SimplexSamplingMixin(simplex_sampling='uniform',
                                within_simplex_sampling='random',
                                n_dim=2,
                                random_state=5)

In [17]:
simp0 = ssm.simplices(X_ssm, n_to_sample_ssm, indices=indices_ssm)

In [18]:
ssm = SimplexSamplingMixin(simplex_sampling='uniform',
                                within_simplex_sampling='deterministic',
                                n_dim=2,
                                random_state=5)

In [19]:
simp1 = ssm.simplices(X_ssm, n_to_sample_ssm, indices=indices_ssm)

In [20]:
np.testing.assert_array_equal(simp0, simp1)