In [6]:
import numpy as np
from MaskingMap.Utilities.linearprog import lp_partial
from MaskingMap.Utilities.sinkhorn import sinkhorn_log_domain
from MaskingMap.Utilities.utils import cost_matrix, cost_matrix_1d, create_mask_KL, create_mask_binary, cost_matrix_aw
import matplotlib.pyplot as plt

In [7]:
def masking_map_non_linear(xs, xt, ratio=0.1, eps=1e-10, reg=0.0001, max_iterations=100000, thres=1e-5, algorithm="linear_programming", plot=False):
    '''
    Parameters
    ----------
        a: ndarray, (m,d)
           d-dimensional source samples
        b: ndarray, (n,d) 
           d-dimensional target samples
        lamb: lambda, int 
           Adjust the diagonal width. Default is 3
        sub_length: int
                    The number of elements of sub-sequence. Default is 25
        algorithm: str
                   algorithm to solve model. Default is "linear_programming". Choices should be
                   "linear_programming" and "sinkhorn"
        plot: bool
              status for plot the optimal transport matrix or not. Default is "False"
    Returns
    ------- 
        cost: Transportation cost
    '''
    p = np.ones(len(xs))/len(xs)
    q = np.ones(len(xt))/len(xt)

    # mask matrix
    C = cost_matrix_aw(xs, xt)
    C /= (C.max() + eps)
    KL = create_mask_KL(xs, xt, type=2)
    M_hat = create_mask_binary(KL, ratio)
    # solving model
    if algorithm == "linear_programming":
        pi = lp_partial(p, q, C, M_hat)
    elif algorithm == "sinkhorn":
        pi = sinkhorn_log_domain(
            p, q, C, M_hat, reg, max_iterations, thres)
    else:
        raise ValueError(
            "algorithm must be 'linear_programming' or 'sinkhorn'!")

    cost = np.sum(pi * C)
    if plot:
        plt.imshow(pi, cmap='viridis')
        plt.colorbar()
        plt.show()
        return pi, cost
    return cost


In [8]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from tqdm import tqdm
def knn_classifier_from_distance_matrix(distance_matrix, k, labels):
    knn_clf = KNeighborsClassifier(
        n_neighbors=k, algorithm="brute", metric="precomputed"
    )
    n_train_samples = distance_matrix.shape[1]
    knn_clf.fit(np.random.rand(n_train_samples, n_train_samples), labels)
    predicted_labels = knn_clf.predict(distance_matrix)
    return predicted_labels
def knn_masking_map_non_linear(X_train, X_test, y_train, y_test, ratio=0.1, k=1, is_aw=False):
    print(f"Is aw: {is_aw}")
    train_size = len(X_train)
    test_size = len(X_test)
    result = np.zeros((test_size, train_size))
    for train_idx in tqdm(range(train_size)):
        cost = 0
        for test_idx in tqdm(range(test_size), leave=False):
            distance = masking_map_non_linear(
                np.array(X_train[train_idx]), np.array(X_test[test_idx]), ratio=ratio, is_aw=is_aw
            )
            cost += distance
            result[test_idx, train_idx] = distance
        print(f"Sum cost: {cost}\n")
    y_pred = knn_classifier_from_distance_matrix(
        distance_matrix=result,
        k=k,
        labels=y_train,
    )
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

In [9]:
from GetData.GetDataOneDimension import getData
X_train, y_train, X_test, y_test = getData("BME", "../Data/OneDimension/")
len(X_train), len(X_test), X_train[0].shape


(30, 150, (128,))

In [11]:
accuracy1 = knn_masking_map_non_linear(
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    k=1,
    ratio=0.2,
    is_aw=True
)
accuracy1

Is aw: True


  0%|          | 0/30 [00:00<?, ?it/s]


KeyboardInterrupt: 

In [14]:
A = cost_matrix_1d(X_train[0], X_train[1])
A

array([[1.39425702e-03, 2.81662678e-04, 1.24177305e-03, ...,
        8.98537003e-04, 7.34523732e-05, 2.53677133e-04],
       [2.95047611e-08, 4.29679486e-04, 5.16525729e-06, ...,
        5.67899974e-05, 8.37586380e-04, 4.65880955e-04],
       [1.26180832e-05, 2.89161455e-04, 2.10610741e-06, ...,
        1.45309643e-05, 6.35903192e-04, 3.18990745e-04],
       ...,
       [9.67102717e-04, 1.11119701e-04, 8.40844822e-04, ...,
        5.63308965e-04, 5.42405468e-06, 9.38141211e-05],
       [1.54940547e-03, 3.53648560e-04, 1.38842217e-03, ...,
        1.02389401e-03, 1.12215314e-04, 3.22201854e-04],
       [4.09492464e-04, 1.03055124e-07, 3.28877115e-04, ...,
        1.65682612e-04, 7.28188302e-05, 1.38435462e-06]])

In [15]:
B = cost_matrix_aw(X_train[0], X_train[1])
B

array([[0.03733975, 0.01678281, 0.0352388 , ..., 0.02997561, 0.00857044,
        0.01592724],
       [0.00017177, 0.02072871, 0.00227272, ..., 0.00753591, 0.02894108,
        0.02158428],
       [0.00355219, 0.01700475, 0.00145124, ..., 0.00381195, 0.02521712,
        0.01786031],
       ...,
       [0.03109828, 0.01054133, 0.02899732, ..., 0.02373413, 0.00232896,
        0.00968577],
       [0.03936249, 0.01880555, 0.03726154, ..., 0.03199834, 0.01059317,
        0.01794998],
       [0.02023592, 0.00032102, 0.01813497, ..., 0.01287178, 0.0085334 ,
        0.00117659]])