In [1]:
import numpy as np

from smote_variants import (construct_tensor, ClassifierImpliedDissimilarityMatrix,
                            estimate_mutual_information, NearestNeighborsWithMetricTensor,
                            n_neighbors_func)
from sklearn.linear_model import LinearRegression

In [2]:
X = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
dissim_matrix = np.array([[0.0, np.sqrt(2.0), 1.0],
                            [np.sqrt(2.0), 0.0, 1.0],
                            [1.0, 1.0, 0.0]])

In [3]:
construct_tensor(X, dissim_matrix)

(array([[1., 0.],
        [0., 1.]]),
 1.0)

In [4]:
# pre-calculating some triangle indices
X_tu_indices= np.triu_indices(X.shape[0], k=0)
d_tu_indices_0= np.triu_indices(X.shape[1], k=0)
d_tu_indices_1= np.triu_indices(X.shape[1], k=1)

n_upper, n_d= len(X_tu_indices[0]), len(d_tu_indices_0[0])

In [5]:
cross_diff_all= (X[:,None] - X)[X_tu_indices]


In [6]:
y_target= dissim_matrix[(X_tu_indices[0], X_tu_indices[1])]**2

In [7]:
y_target

array([0., 2., 1., 0., 1., 0.])

In [8]:
cross_diff_all

array([[ 0.,  0.],
       [ 1., -1.],
       [ 0., -1.],
       [ 0.,  0.],
       [-1.,  0.],
       [ 0.,  0.]])

In [9]:
cross_diff_cross_products= np.einsum('...i,...j->...ij',
                                        cross_diff_all,
                                        cross_diff_all)

In [10]:
cross_diff_cross_products

array([[[ 0.,  0.],
        [ 0.,  0.]],

       [[ 1., -1.],
        [-1.,  1.]],

       [[ 0.,  0.],
        [ 0.,  1.]],

       [[ 0.,  0.],
        [ 0.,  0.]],

       [[ 1.,  0.],
        [ 0.,  0.]],

       [[ 0.,  0.],
        [ 0.,  0.]]])

In [11]:
cross_diff_cross_products[:, d_tu_indices_1[0], d_tu_indices_1[1]]*= 2

In [12]:
cross_diff_cross_products

array([[[ 0.,  0.],
        [ 0.,  0.]],

       [[ 1., -2.],
        [-1.,  1.]],

       [[ 0.,  0.],
        [ 0.,  1.]],

       [[ 0.,  0.],
        [ 0.,  0.]],

       [[ 1.,  0.],
        [ 0.,  0.]],

       [[ 0.,  0.],
        [ 0.,  0.]]])

In [13]:
X_target= cross_diff_cross_products[:, d_tu_indices_0[0], d_tu_indices_0[1]]

In [14]:
#X_target[1, 1] = 2

In [15]:
X_target

array([[ 0.,  0.,  0.],
       [ 1., -2.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [16]:
linearr= LinearRegression(fit_intercept=False).fit(X_target, y_target)

In [17]:
linearr.coef_

array([ 1.00000000e+00, -1.11022302e-16,  1.00000000e+00])

In [18]:
cidm = ClassifierImpliedDissimilarityMatrix(classifier=('sklearn.ensemble',
                                                        'RandomForestClassifier'),
                                            classifier_params = {'n_estimators': 100,
                                                                'min_samples_leaf': 2,
                                                                'random_state': 5})
X = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 1.0],
                [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
y = np.array([0, 0, 1, 0, 0, 1])
cidm.fit(X, y)

dissim = cidm.dissimilarity_matrix(X[:3])

2022-07-24 14:48:59,458:INFO:ClassifierImpliedDissimilarityMatrix: fitting


In [19]:
X = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 1.0], [2.0, 2.0]])
y = np.array([0, 0, 1, 1])

In [20]:
from sklearn.neighbors import NearestNeighbors

In [21]:
np.repeat(0, 0)

array([], dtype=int64)

In [22]:
X_min= X[y == 1]
X_maj= X[y == 0]

nearestn= NearestNeighbors(n_neighbors=2).fit(X)
_, ind_min= nearestn.kneighbors(X_min)
_, ind_maj= nearestn.kneighbors(X_maj)

label_min= np.all((y[ind_min] == 1), axis=1)
label_maj= np.all((y[ind_maj] == 0), axis=1)

X_final = np.vstack([X_maj[~label_maj], X_min[~label_min]])
y_final = np.hstack([np.repeat(0, int(np.sum(label_maj))),
                        np.repeat(1, int(np.sum(label_min)))])

In [23]:
X = np.array([[0.0, 1.0], [1.0, 2.0], [0.0, 1.0], [0.0, 2.0], [1.0, 0.0],
                [1.0, 2.0], [0.0, 1.0], [1.0, 0.0]])
y = np.array([0, 1, 0, 0, 1, 1, 0, 1])

In [24]:
estimate_mutual_information(X, y, normalize=False)

array([1.15744048, 0.48452381])

In [30]:
X_base = np.array([[1.0, 0.0], [0.0, 1.0]])
X_neighbors = np.array([[2.0, 0.0], [0.0, 2.0]])

metric_params = {'metric_tensor': None}

nearestn = NearestNeighborsWithMetricTensor(metric='minkowski',
                                            metric_params=metric_params,
                                            n_neighbors=2)

nearestn.fit(X_neighbors)

ind = nearestn.kneighbors(X_base)

2022-07-24 14:50:26,315:INFO:NearestNeighborsWithMetricTensor: NN fitting with metric minkowski
2022-07-24 14:50:26,317:INFO:NearestNeighborsWithMetricTensor: kneighbors query minkowski


In [31]:
ind

(array([[1.        , 2.23606798],
        [1.        , 2.23606798]]),
 array([[0, 1],
        [1, 0]]))

In [32]:
n_neighbors_func(X_base, X_neighbors, n_neighbors=2, return_distance=True)

(array([[1.        , 2.23606798],
        [1.        , 2.23606798]]),
 array([[0, 1],
        [1, 0]]))