In [23]:
from rbf import train, utils
import numpy as np 
from sklearn.datasets import load_breast_cancer

breast_cancer = load_breast_cancer()
X_samples = breast_cancer['data']
y_sample = np.reshape(breast_cancer['target'], (-1,1))
ignored_idx = [2,3,9,11,12,13,14,18,19,22,23]

X = utils.delete_features(X_samples, ignored_idx)
X_mean = np.mean(X, axis = 0)
X_std = np.std(X, axis = 0)
X = utils.normalize_features(X, X_mean, X_std)
y_sample[y_sample==0] = -1

x_train, x_test, y_train, y_test = utils.split_set(X, y_sample)

In [2]:
from rbf.km_rbf import km_rbf
accs, stds = [], []
for p in range(1,20):
    acc, acct = [], []
    for _ in range(10):
        x_train, x_test, y_train, y_test = utils.split_set(X, y_sample)
        km = km_rbf(x_train, y_train, p)
        y_hat_train = np.sign(km.eval(x_train))
        y_hat_test = np.sign(km.eval(x_test))
        acc.append(utils.eval_accuracy(y_hat_train, y_train, 1))
        acct.append(utils.eval_accuracy(y_hat_test, y_test, 1))
    #print(acc)
    #print(acct)
    accs.append((np.mean(acc), np.mean(acct)))
    #accs[0,1].append(np.mean(acct))
    stds.append((np.std(acc), np.std(acct)))
accs = (tuple(map(list, zip(*accs))),)
stds = (tuple(map(list, zip(*stds))), )

In [24]:
from rbf.rnd_rbf import rnd_rbf
accs_2, stds_2 = [], []
for p in range(1,20):
    acc, acct = [], []
    for _ in range(10):
        x_train, x_test, y_train, y_test = utils.split_set(X, y_sample)
        km = rbfrnd = rnd_rbf(x_train, y_train, p, 'supremum', same_radii = False, mean = True)
        y_hat_train = np.sign(km.eval(x_train))
        y_hat_test = np.sign(km.eval(x_test))
        acc.append(utils.eval_accuracy(y_hat_train, y_train, 1))
        acct.append(utils.eval_accuracy(y_hat_test, y_test, 1))
    #print(acc)
    #print(acct)
    accs_2.append((np.mean(acc), np.mean(acct)))
    #accs[0,1].append(np.mean(acct))
    stds_2.append((np.std(acc), np.std(acct)))
accs_2 = (tuple(map(list, zip(*accs_2))),)
stds_2 = (tuple(map(list, zip(*stds_2))), )

In [25]:
accs_3, stds_3 = [], []
for p in range(1,20):
    print(p)
    acc, acct = [], []
    for _ in range(10):
        x_train, x_test, y_train, y_test = utils.split_set(X, y_sample)
        km = rbfrnd = rnd_rbf(x_train, y_train, p, 'supremum', same_radii = True, mean = True)
        y_hat_train = np.sign(km.eval(x_train))
        y_hat_test = np.sign(km.eval(x_test))
        acc.append(utils.eval_accuracy(y_hat_train, y_train, 1))
        acct.append(utils.eval_accuracy(y_hat_test, y_test, 1))
    #print(acc)
    #print(acct)
    accs_3.append((np.mean(acc), np.mean(acct)))
    #accs[0,1].append(np.mean(acct))
    stds_3.append((np.std(acc), np.std(acct)))
accs_3 = (tuple(map(list, zip(*accs_3))),)
stds_3 = (tuple(map(list, zip(*stds_3))), )

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [26]:
accs_test = accs_2 +accs_3
stds_test = stds_2 +stds_3
print(accs_test)
print(stds_test)

(([0.6281407035175878, 0.6969849246231155, 0.8052763819095479, 0.7804020100502512, 0.8231155778894473, 0.8537688442211057, 0.8487437185929648, 0.8741206030150753, 0.8693467336683417, 0.8786432160804021, 0.8866834170854272, 0.8881909547738693, 0.8879396984924623, 0.8879396984924623, 0.8947236180904522, 0.8957286432160805, 0.9, 0.900251256281407, 0.9085427135678392], [0.6257309941520468, 0.6894736842105263, 0.7988304093567252, 0.7766081871345029, 0.8304093567251462, 0.8251461988304094, 0.8532163742690058, 0.864327485380117, 0.8567251461988304, 0.8842105263157896, 0.8690058479532166, 0.8894736842105262, 0.8730994152046783, 0.8865497076023392, 0.8853801169590643, 0.8871345029239764, 0.9076023391812866, 0.8865497076023393, 0.8666666666666668]), ([0.6025125628140703, 0.6389447236180905, 0.6517587939698493, 0.6582914572864322, 0.672110552763819, 0.6902010050251255, 0.6939698492462311, 0.7301507537688442, 0.7293969849246231, 0.7515075376884423, 0.7384422110552764, 0.7597989949748742, 0.7467336

In [23]:
names = ['rbf mean']
for ac, st, n in zip(()+accs, ()+stds, names):
    print(ac, st, n)

([0.6281407035175878, 0.678391959798995, 0.6746231155778896], [0.6257309941520468, 0.6549707602339181, 0.6590643274853802]) ([1.1102230246251565e-16, 1.1102230246251565e-16, 0.004388002310696711], [0.0, 0.0, 0.0026798688274595806]) rbf mean


In [27]:
fig = utils.plot_accuracy_std((1,20), accs_test, stds_test, ['Diff radius', 'Same radius'],
                              'in Breast Cancer dataset (10 iterations / k)')
fig.update_yaxes(title = 'Acurracy')
fig.update_xaxes(title = 'k (number of clusters)')
fig.show(width = 800, heigth = 500)

Diff radius
Same radius


In [14]:
data = np.loadtxt( 'heart.dat' )
X_samples = np.copy(data[:, 0:-1])
y_sample = np.reshape(np.copy(data[:, -1]), (-1,1))
ignored_idx = [4,5]
y_sample[y_sample==2]= -1
X = utils.delete_features(X_samples, ignored_idx)
X_mean = np.mean(X, axis = 0)
X_std = np.std(X, axis = 0)
X = utils.normalize_features(X, X_mean, X_std)

In [19]:
from rbf.km_rbf import km_rbf
accs, stds = [], []
for p in range(1,20):
    acc, acct = [], []
    for _ in range(10):
        x_train, x_test, y_train, y_test = utils.split_set(X, y_sample)
        km = km_rbf(x_train, y_train, p)
        y_hat_train = np.sign(km.eval(x_train))
        y_hat_test = np.sign(km.eval(x_test))
        acc.append(utils.eval_accuracy(y_hat_train, y_train, 1))
        acct.append(utils.eval_accuracy(y_hat_test, y_test, 1))
    #print(acc)
    #print(acct)
    accs.append((np.mean(acc), np.mean(acct)))
    #accs[0,1].append(np.mean(acct))
    stds.append((np.std(acc), np.std(acct)))
accs = (tuple(map(list, zip(*accs))),)
stds = (tuple(map(list, zip(*stds))), )

In [18]:
from rbf.rnd_rbf import rnd_rbf
accs_2, stds_2 = [], []
for p in range(1,20):
    print(p)
    acc, acct = [], []
    for _ in range(10):
        x_train, x_test, y_train, y_test = utils.split_set(X, y_sample)
        km = rbfrnd = rnd_rbf(x_train, y_train, p, 'supremum', same_radii = False, mean = True)
        y_hat_train = np.sign(km.eval(x_train))
        y_hat_test = np.sign(km.eval(x_test))
        acc.append(utils.eval_accuracy(y_hat_train, y_train, 1))
        acct.append(utils.eval_accuracy(y_hat_test, y_test, 1))
    #print(acc)
    #print(acct)
    accs_2.append((np.mean(acc), np.mean(acct)))
    #accs[0,1].append(np.mean(acct))
    stds_2.append((np.std(acc), np.std(acct)))
accs_2 = (tuple(map(list, zip(*accs_2))),)
stds_2 = (tuple(map(list, zip(*stds_2))), )

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [22]:
accs_3, stds_3 = [], []
for p in range(1,20):
    print(p)
    acc, acct = [], []
    for _ in range(10):
        x_train, x_test, y_train, y_test = utils.split_set(X, y_sample)
        km = rbfrnd = rnd_rbf(x_train, y_train, p, 'supremum', same_radii = True, mean = True)
        y_hat_train = np.sign(km.eval(x_train))
        y_hat_test = np.sign(km.eval(x_test))
        acc.append(utils.eval_accuracy(y_hat_train, y_train, 1))
        acct.append(utils.eval_accuracy(y_hat_test, y_test, 1))
    #print(acc)
    #print(acct)
    accs_3.append((np.mean(acc), np.mean(acct)))
    #accs[0,1].append(np.mean(acct))
    stds_3.append((np.std(acc), np.std(acct)))
accs_3 = (tuple(map(list, zip(*accs_3))),)
stds_3 = (tuple(map(list, zip(*stds_3))), )

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [24]:
accs_test = accs_2 + accs_3
stds_test = stds_2 + stds_3

In [25]:
fig = utils.plot_accuracy_std((1,20), accs_test, stds_test, ['Diff radius', 'Same radius'],
                              'in Statlog dataset (10 iterations / k)')
fig.update_yaxes(title = 'Acurracy')
fig.update_xaxes(title = 'k (number of clusters)')
fig.show(width = 800, heigth = 500)

Diff radius
Same radius


In [37]:
from sklearn.datasets import load_iris

iris = load_iris()
X_samples = iris['data']
y_sample = np.reshape(iris['target'], (-1,1))
#ignored_idx = [2,3,9,11,12,13,14,18,19,22,23]

#X = utils.delete_features(X_samples, ignored_idx)
y_sample[y_sample == 2] = 1
y_sample[y_sample == 0] = -1
X = X_samples.copy()
#X_mean = np.mean(X, axis = 0)
#X_std = np.std(X, axis = 0)
#X = utils.normalize_features(X, X_mean, X_std)

In [38]:
from rbf.km_rbf import km_rbf
accs, stds = [], []
for p in range(1,20):
    acc, acct = [], []
    for _ in range(10):
        x_train, x_test, y_train, y_test = utils.split_set(X, y_sample)
        km = km_rbf(x_train, y_train, p)
        y_hat_train = np.sign(km.eval(x_train))
        y_hat_test = np.sign(km.eval(x_test))
        acc.append(utils.eval_accuracy(y_hat_train, y_train, 1))
        acct.append(utils.eval_accuracy(y_hat_test, y_test, 1))
    #print(acc)
    #print(acct)
    accs.append((np.mean(acc), np.mean(acct)))
    #accs[0,1].append(np.mean(acct))
    stds.append((np.std(acc), np.std(acct)))
accs = (tuple(map(list, zip(*accs))),)
stds = (tuple(map(list, zip(*stds))), )

In [49]:
from rbf.rnd_rbf import rnd_rbf
accs_2, stds_2 = [], []
for p in range(1,6):
    print(p)
    acc, acct = [], []
    for _ in range(10):
        x_train, x_test, y_train, y_test = utils.split_set(X, y_sample)
        km = rbfrnd = rnd_rbf(x_train, y_train, p, 'supremum', same_radii = False, mean = True)
        y_hat_train = np.sign(km.eval(x_train))
        y_hat_test = np.sign(km.eval(x_test))
        acc.append(utils.eval_accuracy(y_hat_train, y_train, 1))
        acct.append(utils.eval_accuracy(y_hat_test, y_test, 1))
    #print(acc)
    #print(acct)
    accs_2.append((np.mean(acc), np.mean(acct)))
    #accs[0,1].append(np.mean(acct))
    stds_2.append((np.std(acc), np.std(acct)))
accs_2 = (tuple(map(list, zip(*accs_2))),)
stds_2 = (tuple(map(list, zip(*stds_2))), )

1
2
3
4
5


In [57]:
accs_3, stds_3 = [], []
for p in range(1,20):
    print(p)
    acc, acct = [], []
    for _ in range(10):
        x_train, x_test, y_train, y_test = utils.split_set(X, y_sample)
        km = rbfrnd = rnd_rbf(x_train, y_train, p, 'supremum', same_radii = True, mean = True)
        y_hat_train = np.sign(km.eval(x_train))
        y_hat_test = np.sign(km.eval(x_test))
        acc.append(utils.eval_accuracy(y_hat_train, y_train, 1))
        acct.append(utils.eval_accuracy(y_hat_test, y_test, 1))
    #print(acc)
    #print(acct)
    accs_3.append((np.mean(acc), np.mean(acct)))
    #accs[0,1].append(np.mean(acct))
    stds_3.append((np.std(acc), np.std(acct)))
accs_3 = (tuple(map(list, zip(*accs_3))),)
stds_3 = (tuple(map(list, zip(*stds_3))), )

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [59]:
accs_test = accs_2 + accs_3
stds_test = stds_2 + stds_3

In [63]:
fig = utils.plot_accuracy_std((1,10), accs_test, stds_test, ['Diff radius', 'Same radius'],
                              'in Iris dataset (10 iterations / k)')
fig.update_yaxes(title = 'Acurracy')
fig.update_xaxes(title = 'k (number of clusters)')
fig.show(width = 800, heigth = 500)

Diff radius
Same radius


In [2]:
from sklearn.datasets import load_digits

digits = load_digits()
X_samples = digits['data']
y_sample = np.array([x%2 for x in digits['target']]).reshape((-1, 1))
y_sample[y_sample == 0] = -1
X = X_samples.copy()

In [3]:
from rbf.km_rbf import km_rbf
accs, stds = [], []
for p in range(1,20):
    print(p)
    acc, acct = [], []
    for _ in range(10):
        x_train, x_test, y_train, y_test = utils.split_set(X, y_sample)
        km = km_rbf(x_train, y_train, p)
        y_hat_train = np.sign(km.eval(x_train))
        y_hat_test = np.sign(km.eval(x_test))
        acc.append(utils.eval_accuracy(y_hat_train, y_train, 1))
        acct.append(utils.eval_accuracy(y_hat_test, y_test, 1))
    #print(acc)
    #print(acct)
    accs.append((np.mean(acc), np.mean(acct)))
    #accs[0,1].append(np.mean(acct))
    stds.append((np.std(acc), np.std(acct)))
accs = (tuple(map(list, zip(*accs))),)
stds = (tuple(map(list, zip(*stds))), )

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [4]:
from rbf.rnd_rbf import rnd_rbf
accs_2, stds_2 = [], []
for p in range(1,20):
    print(p)
    acc, acct = [], []
    for _ in range(10):
        x_train, x_test, y_train, y_test = utils.split_set(X, y_sample)
        km = rbfrnd = rnd_rbf(x_train, y_train, p, 'supremum', same_radii = False, mean = True)
        y_hat_train = np.sign(km.eval(x_train))
        y_hat_test = np.sign(km.eval(x_test))
        acc.append(utils.eval_accuracy(y_hat_train, y_train, 1))
        acct.append(utils.eval_accuracy(y_hat_test, y_test, 1))
    #print(acc)
    #print(acct)
    accs_2.append((np.mean(acc), np.mean(acct)))
    #accs[0,1].append(np.mean(acct))
    stds_2.append((np.std(acc), np.std(acct)))
accs_2 = (tuple(map(list, zip(*accs_2))),)
stds_2 = (tuple(map(list, zip(*stds_2))), )

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [7]:
accs_3, stds_3 = [], []
for p in range(1,20):
    print(p)
    acc, acct = [], []
    for _ in range(10):
        x_train, x_test, y_train, y_test = utils.split_set(X, y_sample)
        km = rbfrnd = rnd_rbf(x_train, y_train, p, 'supremum', same_radii = True, mean = True)
        y_hat_train = np.sign(km.eval(x_train))
        y_hat_test = np.sign(km.eval(x_test))
        acc.append(utils.eval_accuracy(y_hat_train, y_train, 1))
        acct.append(utils.eval_accuracy(y_hat_test, y_test, 1))
    #print(acc)
    #print(acct)
    accs_3.append((np.mean(acc), np.mean(acct)))
    #accs[0,1].append(np.mean(acct))
    stds_3.append((np.std(acc), np.std(acct)))
accs_3 = (tuple(map(list, zip(*accs_3))),)
stds_3 = (tuple(map(list, zip(*stds_3))), )

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [8]:
accs_test = accs_2  + accs_3
stds_test = stds_2 + stds_3

In [9]:
fig = utils.plot_accuracy_std((1,20), accs_test, stds_test, ['Diff radius', 'Same radius'],
                              'in Digits dataset (10 iterations / k)')
fig.update_yaxes(title = 'Acurracy')
fig.update_xaxes(title = 'k (number of clusters)')
fig.show(width = 800, heigth = 500)

Diff radius
Same radius


In [9]:
from rbf.rnd_rbf import rnd_rbf

rbfrnd = rnd_rbf(x_train, y_train, 8, 'supremum', same_radii = False)
y_hat_train = np.sign(rbfrnd.eval(x_train))
y_hat_test = np.sign(rbfrnd.eval(x_test))
print(utils.eval_accuracy(y_hat_train, y_train, 1))
print(utils.eval_accuracy(y_hat_test, y_test, 1))
print()
print(rbfrnd.radii)
print(rbfrnd.w)

[0.90703518]
[0.93567251]

[ 2.82217925  2.03530044  3.78884219  4.24857688  2.05844066  1.98639171
  6.56927916 11.43060514]
[[-1.53064998e-01]
 [ 7.40120849e+11]
 [-6.33457102e+09]
 [-3.11026089e+13]
 [ 7.52449088e+13]
 [ 2.72137778e+10]
 [ 1.85088567e+10]
 [ 4.55185060e+09]
 [-5.91285074e+08]]


In [13]:
print(sum(y_sample[y_sample==1]))
print(sum(y_sample[y_sample==-1]))
print((y_sample.size))

906
-891
1797


In [15]:
rbfrnd = rnd_rbf(x_train, y_train, 3, 'supremum', same_radii = False, mean = True)
y_hat_train = np.sign(rbfrnd.eval(x_train))
y_hat_test = np.sign(rbfrnd.eval(x_test))
print(utils.eval_accuracy(y_hat_train, y_train, 1))
print(utils.eval_accuracy(y_hat_test, y_test, 1))
print()
print(rbfrnd.radii)
print(rbfrnd.w)

ValueError: number sections must be larger than 0.

In [95]:
a = ((1,2),(3,4))
b= (('a', 'b'), ('c','d'))
d = (('dad'), ('title'))

for x,y,z in zip(a,b, d):
    print(x,y,z)
    print(x[0], y[1])

(1, 2) ('a', 'b') dad
1 b
(3, 4) ('c', 'd') title
3 d


In [4]:
rng = np.random.default_rng()
N = X.shape[0]
C_set = X[rng.integers(N, size=1), :]
print(C_set, C_set.shape)


#np.sum(np.min(dist,axis=1))

[[-0.24348302 -1.05407961 -1.62330374 -1.01941873 -0.70545524 -0.57879493
  -1.55390952  0.00190546 -0.84382803 -0.60722831 -0.15833887  0.0188052
  -0.54181764 -1.39260464 -0.82993996 -0.65816498 -0.22088042 -1.41354505
  -1.21282045]] (1, 19)


In [124]:
C_set = np.r_[C_set,2*C_set]

In [36]:
dist = np.sum((X[:, None, :] - C_set)**2, axis=2)
print(dist.flatten())
cost = np.sum(np.min(dist,axis=1))
print(cost, dist.shape)

[147.85138212  26.91769365  72.22977878 239.76965107  39.12943018
  75.61049015  32.3208683   52.04094872  89.72604762 231.36165809
  19.16059435  47.88817182 121.63253402  21.66817232 117.34764914
 107.86648507  22.04811716  79.38491796  54.39344818  11.77523508
  15.8272808    6.96518388  98.07803544  49.88400115  68.30009279
 118.09470507  93.63492664  32.01698158  81.74488552  20.8622226
  84.1391209   89.34836471  74.54722376  79.50201317  66.65402795
  57.42562058  42.24148176   2.90630374  21.47609694  42.30126767
  12.99976874  50.18405858 154.68512709  39.49692609  34.58666628
  77.72295359   8.99802992  62.73265538  12.43851327  15.03642075
   5.10639554   5.38024669   4.43066203  39.12776248  19.08887264
   7.71336716  53.07976701  50.58570023   4.79973452  18.78792165
  29.96153473  26.60480495  90.04331764  20.56822973  56.03714346
  48.86508379  20.95936316   4.4516085  217.12568265   2.88285276
  28.43961329  44.11506415 100.41827845  21.34472203   5.20946028
  22.675438

In [37]:
dist = np.sum((X - C_set[:, None])**2, axis=2)
print(dist, ((X - C_set[:, None])**2).shape)
cost = np.sum(np.min(dist,axis=0))
print(cost, dist.shape)

[[147.85138212  26.91769365  72.22977878 239.76965107  39.12943018
   75.61049015  32.3208683   52.04094872  89.72604762 231.36165809
   19.16059435  47.88817182 121.63253402  21.66817232 117.34764914
  107.86648507  22.04811716  79.38491796  54.39344818  11.77523508
   15.8272808    6.96518388  98.07803544  49.88400115  68.30009279
  118.09470507  93.63492664  32.01698158  81.74488552  20.8622226
   84.1391209   89.34836471  74.54722376  79.50201317  66.65402795
   57.42562058  42.24148176   2.90630374  21.47609694  42.30126767
   12.99976874  50.18405858 154.68512709  39.49692609  34.58666628
   77.72295359   8.99802992  62.73265538  12.43851327  15.03642075
    5.10639554   5.38024669   4.43066203  39.12776248  19.08887264
    7.71336716  53.07976701  50.58570023   4.79973452  18.78792165
   29.96153473  26.60480495  90.04331764  20.56822973  56.03714346
   48.86508379  20.95936316   4.4516085  217.12568265   2.88285276
   28.43961329  44.11506415 100.41827845  21.34472203   5.20946

In [38]:
l=2
prob = np.min(dist, axis=0) / cost
print(sum(prob), prob)
a = rng.choice(X,l, p=prob)
print(a)

0.9999999999999998 [7.29221856e-03 1.32761495e-03 3.56246473e-03 1.18257447e-02
 1.92991336e-03 3.72920572e-03 1.59410641e-03 2.56672590e-03
 4.42540300e-03 1.14110518e-02 9.45024928e-04 2.36190565e-03
 5.99907156e-03 1.06870187e-03 5.78773558e-03 5.32011249e-03
 1.08744123e-03 3.91536531e-03 2.68275417e-03 5.80769598e-04
 7.80621656e-04 3.43531744e-04 4.83733368e-03 2.46034250e-03
 3.36864761e-03 5.82458133e-03 4.61819389e-03 1.57911833e-03
 4.03176192e-03 1.02895140e-03 4.14984865e-03 4.40677520e-03
 3.67676407e-03 3.92114059e-03 3.28746159e-03 2.83230478e-03
 2.08340371e-03 1.43342604e-04 1.05922847e-03 2.08635242e-03
 6.41165159e-04 2.47514172e-03 7.62926757e-03 1.94803872e-03
 1.70585845e-03 3.83339511e-03 4.43794301e-04 3.09405450e-03
 6.13483324e-04 7.41615431e-04 2.51853935e-04 2.65360622e-04
 2.18525897e-04 1.92983110e-03 9.41487522e-04 3.80433097e-04
 2.61796174e-03 2.49495119e-03 2.36729023e-04 9.26644236e-04
 1.47774107e-03 1.31218288e-03 4.44105116e-03 1.01445130e-03
 2.76

In [39]:
print(X.shape, C_set.T.shape)
dist3 = (C_set @ X.T)**2/(np.sum(X**2, axis=1) * np.sum(C_set**2, axis=1)[:, None])
#dist3 = (np.sum(X**2, axis=1) * np.sum(C_set**2, axis=1)[:, None])
print(dist3.shape, dist3)
cost3 = np.sum(np.min(dist3,axis=0))
print(cost3)

(569, 19) (19, 1)
(1, 569) [[4.67702449e-01 3.06276770e-04 5.33860443e-01 6.76108386e-01
  1.60795457e-02 5.14099121e-01 1.92575516e-01 5.60969468e-01
  7.34471863e-01 7.52051667e-01 7.75344246e-02 5.42437786e-01
  3.35600903e-01 7.41128099e-03 7.93251772e-01 8.74657550e-01
  5.38776899e-03 8.00822308e-01 2.36575321e-01 2.92362979e-01
  1.13508885e-01 6.01994385e-01 4.41717210e-01 1.44170082e-01
  5.89141685e-01 4.01956280e-01 7.59845048e-01 4.08413645e-02
  8.55142707e-01 4.09453316e-03 7.55824042e-01 5.76140633e-01
  6.99529120e-01 7.22450483e-01 6.00228792e-01 4.87130916e-01
  7.22588557e-01 8.36537473e-01 3.24313347e-01 3.96846066e-01
  2.48132824e-01 2.84499079e-01 6.22607816e-01 5.81535883e-01
  2.46200893e-01 5.46759354e-01 5.15705452e-01 5.81392603e-01
  2.63400188e-01 1.32759027e-01 6.99216015e-01 6.79434160e-01
  7.37065446e-01 1.39121282e-01 2.01348024e-02 5.36470727e-01
  4.00947450e-01 7.39063640e-01 7.50414108e-01 2.68025801e-01
  1.11153728e-01 2.88554403e-02 7.09588130e

In [40]:
dist2 = (np.sum(np.abs(X - C_set[:, None]), axis=2))
#print(dist2, ((X - C_set[:, None])**2).shape)
print(dist2, dist2.shape)
cost2 = np.sum(np.min(dist2,axis=0))
print(cost, dist.shape)


[[48.68090698 18.75687209 35.85397944 56.75962092 24.10027761 31.49480169
  21.86418336 27.92950512 36.44961518 54.01889515 13.74543669 26.73021303
  43.27422271 17.84395337 41.54032977 39.59630334 17.00775188 35.64529784
  28.40996879 13.08769266 14.54260837  9.39893672 37.80739926 25.46142132
  33.09552183 42.51679742 36.79650905 23.05505219 35.13280132 18.38011017
  38.17160201 33.83097404 35.34902675 34.81453315 30.76306852 28.69844279
  25.4210236   5.30165163 12.93717259 24.12157606 11.98422798 24.88558218
  50.27550653 23.79064705 21.86031019 35.70993173 10.93451935 29.5169295
  12.29314712 13.10700143  5.56307497  8.10146661  6.76122947 25.39623108
  14.8338908   8.48056073 29.73758091 28.00932638  7.31749046 13.25174547
  14.30759045 17.6261418  37.79794285 15.06782998 28.16876301 27.04419907
  15.10092199  6.85884556 48.66061108  5.39282576 19.59935963 19.96845037
  39.19672049 16.47657228  8.60776462 19.24114504 16.8103865  36.98539188
  61.39922442 11.42868866 15.66894131 2

In [42]:
prob = np.min(dist3, axis=0) / cost3
print(sum(prob), prob)
print(np.nonzero(prob<0))
a = rng.choice(X,l, p=prob)
print(a)

1.0000000000000004 [2.20711419e-03 1.44533732e-06 2.51931749e-03 3.19059355e-03
 7.58802817e-05 2.42606270e-03 9.08774705e-04 2.64724651e-03
 3.46601408e-03 3.54897417e-03 3.65889315e-04 2.55979446e-03
 1.58371956e-03 3.49742523e-05 3.74339978e-03 4.12755823e-03
 2.54251852e-05 3.77912556e-03 1.11641226e-03 1.37967736e-03
 5.35654820e-04 2.84084540e-03 2.08448839e-03 6.80346736e-04
 2.78019279e-03 1.89685432e-03 3.58575157e-03 1.92732699e-04
 4.03546659e-03 1.93223325e-05 3.56677621e-03 2.71884009e-03
 3.30111731e-03 3.40928451e-03 2.83251347e-03 2.29879822e-03
 3.40993609e-03 3.94766744e-03 1.53045294e-03 1.87273893e-03
 1.17095277e-03 1.34256718e-03 2.93812134e-03 2.74430058e-03
 1.16183588e-03 2.58018818e-03 2.43364307e-03 2.74362443e-03
 1.24300032e-03 6.26497322e-04 3.29963975e-03 3.20628806e-03
 3.47825334e-03 6.56521163e-04 9.50172662e-05 2.53163557e-03
 1.89209359e-03 3.48768293e-03 3.54124643e-03 1.26482885e-03
 5.24540701e-04 1.36170448e-04 3.34858634e-03 7.83000122e-04
 3.24

In [85]:
def distance(X, C, metric = 'euclidean'):
    if metric == 'euclidean':
        dist = np.sum((X - C[:, None])**2, axis=2)
    elif metric == 'supremum':
        dist = (np.max(np.abs(X - C[:, None]), axis=2))
    elif metric == 'manhattan':
        dist = (np.sum(np.abs(X - C[:, None]), axis=2))
    elif metric == 'cosine':
        dist = np.arccos((C @ X.T)**2/(np.sum(X**2, axis=1) * np.sum(C**2, axis=1)[:, None])/np.pi)
    return dist

def prob_kmp(dist, cost):
    prob = np.min(dist, axis=0) / cost
    return prob

def cost_kmp(dist):
    return np.sum(np.min(dist,axis=0))

def sample_kmp(X, prob, l):
    X = X.copy()
    rng = np.random.default_rng()
    samples = rng.choice(X,l, p=prob)
    return samples

In [86]:
r = 5
l = 3
X = x_train.copy()
N = x_train.shape[0]
rng = np.random.default_rng()
metric = 'cosine'
C_set = X[rng.integers(N, size=1), :]

for _ in range(r):
    dist = distance(X, C_set, metric)
    cost = cost_kmp(dist)
    prob = prob_kmp(dist, cost)
    C_temp = sample_kmp(X, prob, l)
    C_set = np.r_[C_set, C_temp]
    
dist = distance(X, C_set, metric)
#weigths = 

In [87]:
print(C_set.shape)
print(dist.shape)
print(X.shape)
closest = np.zeros(dist.shape)
test2 = np.zeros(dist.shape)
closest[np.argmin(dist, axis=0), range(dist.shape[1])] = 1
count = np.array([np.count_nonzero(closest[i, :]) for i in range(C_set.shape[0])])
probxx = count/np.sum(count)
print(probxx)
print((np.sum(closest, axis=1)))
sidx = np.argsort(count)[::-1]
print(sidx)
C_new = C_set[sidx[:5],:]
jijij= distance(X, C_new)
#print(distance(X, C_new)[0,:])
labels = np.argmin(jijij, axis=0)
print(labels)
aa= np.arange(10)
#print(C_new[0,:])
#print(C_new[0])
print(np.min(np.mean(np.sqrt((distance(C_new, C_new, 'euclidean'))), axis=0)))
print(np.min(np.mean(distance(C_new, C_new, 'supremum'), axis=0)))
print(np.min(np.mean(distance(C_new, C_new, 'manhattan'), axis=0)))
print(np.min(np.mean((distance(C_new, C_new, 'cosine')), axis=0)))
print(C_new.shape, C_new[0,None].shape)
print(np.max(distance(X[np.nonzero(labels == 0), :], C_new[0, None])))
print(np.max(distance(X[np.nonzero(labels == 0), :], C_new[0, None], 'euclidean')))
print(np.max(np.sqrt(distance(X[np.nonzero(labels == 0)], C_new[0, None], 'euclidean'))))
#print(X[np.nonzero(labels == 1)].shape)
#print(np.arccos((C_new @ X[np.nonzero(labels == 0)].T)**2/(np.sum(X[np.nonzero(labels == 0)]**2, axis=1) * np.sum(C_new**2, axis=1)[:, None])/np.pi))

(16, 19)
(16, 398)
(398, 19)
[0.08291457 0.01256281 0.06532663 0.11306533 0.02763819 0.05527638
 0.05025126 0.01507538 0.08291457 0.04773869 0.13567839 0.02512563
 0.02512563 0.1080402  0.09045226 0.06281407]
[33.  5. 26. 45. 11. 22. 20.  6. 33. 19. 54. 10. 10. 43. 36. 25.]
[10  3 13 14  8  0  2 15  5  6  9  4 12 11  7  1]
[3 3 2 1 4 4 3 3 3 3 4 3 3 3 2 4 3 4 3 3 4 3 3 4 3 3 3 3 4 4 3 3 4 3 3 4 3
 4 3 0 3 3 3 4 3 2 4 3 4 4 4 1 3 1 1 3 3 3 3 3 3 3 3 1 1 4 3 3 1 3 3 4 4 2
 3 3 3 4 3 1 4 3 3 3 2 4 4 4 4 3 3 4 2 2 2 3 3 3 3 3 3 4 3 3 4 3 3 3 3 3 2
 4 3 3 3 3 2 4 3 3 4 4 3 3 3 3 1 3 3 3 3 3 3 3 3 3 3 1 4 4 3 3 3 4 3 3 3 3
 1 4 3 4 3 0 1 0 1 1 0 1 3 3 3 0 1 1 0 0 0 0 1 4 0 0 3 0 1 0 0 3 1 1 1 3 0
 1 1 3 1 3 1 0 1 0 0 0 3 1 1 4 0 1 0 4 1 1 0 4 1 1 1 1 0 0 1 1 0 0 1 3 0 3
 1 1 1 1 0 0 0 1 0 0 1 1 1 1 0 1 1 1 0 1 1 1 1 0 1 3 1 0 1 0 0 0 0 1 3 0 0
 0 0 1 1 0 1 1 0 0 1 0 4 1 3 1 3 0 0 0 0 1 0 0 0 1 1 1 0 0 1 0 3 0 1 1 1 1
 1 1 4 1 0 1 0 0 0 0 1 0 0 4 1 0 1 1 3 1 1 1 1 1 3 1 0 0 0 1 1 0 1 1 3 0 1


In [5]:
from sklearn.datasets import load_iris, load_breast_cancer, load_digits
import numpy as np
breast_cancer = load_breast_cancer()
check = breast_cancer['data']
print(check.shape)
heart = np.loadtxt( 'heart.dat' )
check2 = np.copy(heart[:, 0:-1])
print(check2.shape)
iris = load_iris()
check3 = iris['data']
print(check3.shape)
digits = load_digits()
check4 = digits['data']
print(check4.shape)

(569, 30)
(270, 13)
(150, 4)
(1797, 64)


In [2]:
from rbf import rnd_rbf, km_rbf

km_rbf.eval()

AttributeError: module 'rbf.km_rbf' has no attribute 'eval'

https://arxiv.org/pdf/0912.4540.pdf

https://math.stackexchange.com/questions/3291489/can-the-fibonacci-lattice-be-extended-to-dimensions-higher-than-3

https://math.stackexchange.com/questions/3184449/is-there-a-way-to-generate-individual-uniformly-distributed-points-on-a-sphere-f/3185736#3185736

https://math.stackexchange.com/questions/1358046/is-the-fibonacci-lattice-the-very-best-way-to-evenly-distribute-n-points-on-a-sp

https://sci-hub.se/https://onlinelibrary.wiley.com/doi/10.1002/dac.4625

https://en.wikipedia.org/wiki/Golden-section_search

http://vldb.org/pvldb/vol5/p622_bahmanbahmani_vldb2012.pdf

http://ilpubs.stanford.edu:8090/778/1/2006-13.pdf