In [219]:
%load_ext autoreload
%autoreload 2
import numpy as np
import svt
from bayes_opt import BayesianOptimization
import pickle
def eval_distM(M, triplets):
    correct = 0
    for a,p,n in triplets:
        if M[a,p] < M[a,n]: correct += 1
    return correct/len(triplets)
def eval_simM(M, triplets):
    correct = 0
    for a,p,n in triplets:
        if M[a,p] >= M[a,n]: correct += 1
    return correct/len(triplets)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [220]:
data_size = 120
triplets = np.array(pickle.load(open("data/bm_prolific_triplets.pkl","rb")))
# triplets = np.array(pickle.load(open("data/bm_prolific_triplets_filtered.pkl","rb")))

In [218]:
from sklearn.model_selection import KFold

X = np.arange(11)
kf = KFold(n_splits=5, shuffle=True)
for train, test in kf.split(X):
    print("%s %s" % (train, test))

[ 1  2  4  5  7  8  9 10] [0 3 6]
[0 1 2 3 5 6 7 8 9] [ 4 10]
[ 0  2  3  4  5  6  7  9 10] [1 8]
[ 0  1  3  4  5  6  7  8 10] [2 9]
[ 0  1  2  3  4  6  8  9 10] [5 7]


In [221]:
len_triplets = len(triplets)
valid_idx = np.random.choice(np.arange(len_triplets), int(0.2*len_triplets), replace=False)
train_idx = np.setdiff1d(np.arange(len_triplets), valid_idx)
train_triplets = triplets[train_idx]
valid_triplets = triplets[valid_idx]

constructing distance matrix from triplets

In [222]:
X = np.zeros([data_size,data_size])
mask = np.zeros([data_size,data_size])
np.fill_diagonal(mask, 1)

incrementing/decrementing by 1

In [223]:
for a,p,n in train_triplets:
    X[a,p] -= 1
    X[a,n] += 1
    X[p,a] -= 1
    X[n,a] += 1
    mask[a,p], mask[a,n], mask[p,a], mask[n,a] = 1, 1, 1, 1
np.fill_diagonal(X, 0)
eval_distM(X, train_triplets), eval_distM(X, valid_triplets)

(0.9572916666666667, 0.3770833333333333)

In [142]:
for a,p,n in train_triplets:
    X[a,p] += 1
    X[a,n] -= 1
    X[p,a] += 1
    X[n,a] -= 1
    mask[a,p], mask[a,n], mask[p,a], mask[n,a] = 1, 1, 1, 1
np.fill_diagonal(X, X.max()*2)
eval_simM(X, train_triplets), eval_simM(X, valid_triplets)

(0.99375, 0.78125)

incrementing/decrementing by random

In [181]:
for a,p,n in train_triplets:
    X[a,p] -= np.random.random()
    X[a,n] += np.random.random()
    X[p,a] -= np.random.random()
    X[n,a] += np.random.random()
    mask[a,p], mask[a,n], mask[p,a], mask[n,a] = 1, 1, 1, 1
np.fill_diagonal(X, X.min()*2)
eval_distM(X, train_triplets), eval_distM(X, valid_triplets)

(0.9479166666666666, 0.5854166666666667)

In [190]:
for a,p,n in train_triplets:
    X[a,p] += 1
    X[a,n] += 2
    X[p,a] += 1
    X[n,a] += 2
    mask[a,p], mask[a,n], mask[p,a], mask[n,a] = 1, 1, 1, 1
np.fill_diagonal(X, 0)
eval_distM(X, train_triplets), eval_distM(X, valid_triplets)

(0.778125, 0.534375)

## singular value thresholding with bayesian optimization

In [209]:
def svt_solve(threshold, eps):
    X_hat = svt.svt_solve(X, mask, threshold=threshold, eps=eps)
    return  - np.linalg.norm(X - X_hat)

In [210]:
S = np.linalg.svd(X)[1]
pbounds = {"threshold": (S.min(), S.max()/2),
    "eps": (1e-4, 1000)}

In [211]:
optimizer = BayesianOptimization(
    f= svt_solve,
    pbounds=pbounds,
    verbose=0, 
    random_state=1,
)
optimizer.maximize(
    init_points=2,
    n_iter=100,
)
optimizer.max

{'target': -0.14233965736598567,
 'params': {'eps': 7.364687302826743, 'threshold': 0.18263289239704822}}

In [212]:
threshold = optimizer.max["params"]["threshold"]
eps = optimizer.max["params"]["eps"]
X_hat = svt.svt_solve(X, mask, threshold=threshold, eps=eps, max_iters=1000)
eval_distM(X_hat, valid_triplets)

0.609375

In [224]:
from matrix_completion import svt_solve

In [225]:
X_hat = svt_solve(X, mask, max_iterations=100)
eval_distM(X_hat, valid_triplets)


0.6708333333333333

In [226]:
X_hat = svt_solve(X, mask, max_iterations=1000)
eval_distM(X_hat, valid_triplets)


0.6645833333333333