In [3]:
import numpy as np

import logging
import pytest

import smote_variants as sv

In [14]:
X_1_min_some_maj = np.array([[1.0, 1.1],
                      [1.1, 1.2],
                      [1.05, 1.1],
                      [1.08, 1.05],
                      [1.1, 1.08],
                      [1.55, 1.55]])

y_1_min_some_maj = np.array([0, 0, 0, 0, 1, 1])

In [15]:
from smote_variants import NearestNeighborsWithMetricTensor

X = X_1_min_some_maj
y = y_1_min_some_maj

min_label = 1
n_neighbors = 5
nn_params = {}

X_min = X[y == min_label]

# fitting the model
n_neighbors = min([len(X_min), n_neighbors+1])

nn_mt= NearestNeighborsWithMetricTensor(n_neighbors=n_neighbors,
                                        n_jobs=1,
                                        **nn_params)
nn_mt.fit(X_min)
_, ind_min = nn_mt.kneighbors(X_min, return_distance=True)

2022-07-21 17:53:19,200:INFO:NearestNeighborsWithMetricTensor: NN fitting with metric minkowski
2022-07-21 17:53:19,202:INFO:NearestNeighborsWithMetricTensor: kneighbors query minkowski


In [16]:
sv.SMOTE_TomekLinks().sample(X_1_min_some_maj, y_1_min_some_maj)

2022-07-21 17:53:19,526:INFO:SMOTE_TomekLinks: Running sampling via ('SMOTE_TomekLinks', "{'proportion': 1.0, 'n_neighbors': 5, 'nn_params': {}, 'sampling_params': {}, 'n_jobs': 1, 'random_state': None}")
2022-07-21 17:53:19,528:INFO:{SMOTE}: Running sampling via {('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'nn_params': {'metric_tensor': None}, 'n_jobs': 1, 'sampling_params': {'n_dim': 2, 'simplex_sampling': 'uniform', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': None}")}
2022-07-21 17:53:19,529:INFO:NearestNeighborsWithMetricTensor: NN fitting with metric minkowski
2022-07-21 17:53:19,530:INFO:NearestNeighborsWithMetricTensor: kneighbors query minkowski
2022-07-21 17:53:19,640:INFO:TomekLinkRemoval: Running noise removal via TomekLinkRemoval
2022-07-21 17:53:19,643:INFO:NearestNeighborsWithMetricTensor: NN fitting with metric minkowski
2022-07-21 17:53:19,647:INFO:NearestNeighborsWithMetricTensor: kneighbors query minkowski


(array([[1.        , 1.1       ],
        [1.1       , 1.2       ],
        [1.05      , 1.1       ],
        [1.55      , 1.55      ],
        [1.22332086, 1.20880178],
        [1.43372516, 1.42855739]]),
 array([0, 0, 0, 1, 1, 1]))

In [17]:
sv.SimplexSamplingMixin().sample_simplex(X_min, indices=ind_min, n_to_sample=6)

array([[1.16428571, 1.14714286],
       [1.22857143, 1.21428571],
       [1.29285714, 1.28142857],
       [1.35714286, 1.34857143],
       [1.42142857, 1.41571429],
       [1.48571429, 1.48285714]])

In [20]:
parameters = sv.SMOTE_TomekLinks.parameter_combinations()

In [21]:
params = np.random.choice(parameters)

In [23]:
params2 = sv.SMOTE_TomekLinks(**params).get_params()

In [24]:
params

{'n_neighbors': 7, 'proportion': 0.75}

In [25]:
params2

{'proportion': 0.75,
 'n_neighbors': 7,
 'nn_params': {},
 'sampling_params': {},
 'n_jobs': 1,
 'random_state': None}

In [3]:
import numpy as np
import os
import shutil
from smote_variants import dump_dict, load_dict

test_dir = 'test_base_dir'
os.makedirs(test_dir, exist_ok=True)

filename = os.path.join(test_dir, 'dump.pickle')

obj = {'a': 2,
        'b': np.array([1, 2])}
dump_dict(obj, filename, 'pickle', ['b'])
obj_loaded = load_dict(filename, 'pickle', ['b'])

print(obj)
print(obj_loaded)

np.testing.assert_array_equal(obj['b'], obj_loaded['b'])

{'a': 2, 'b': array([1, 2])}
{'a': 2, 'b': array([1, 2])}


In [1]:
from smote_variants import instantiate_obj

In [2]:
%%timeit
instantiate_obj(('sklearn.tree', 'DecisionTreeClassifier', {}))

21 µs ± 4.02 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [3]:
from smote_variants import AMSCO

In [5]:
AMSCO().get_params()

{'n_pop': 5,
 'n_iter': 15,
 'omega': 0.1,
 'r1': 0.1,
 'r2': 0.1,
 'nn_params': {},
 'n_jobs': 1,
 'classifier': ('sklearn.tree', 'DecisionTreeClassifier', {'random_state': 2}),
 'sampling_params': {'n_dim': 2,
  'simplex_sampling': 'uniform',
  'within_simplex_sampling': 'random',
  'gaussian_component': {}},
 'random_state': None,
 'class_name': 'AMSCO'}

In [6]:
import numpy as np

In [8]:
np.min(np.array([[1, 2], [2, 3], [3, 4]]), axis=0) < np.array([2, 3])

array([ True,  True])