In [None]:
import numpy as np
from keras.datasets import mnist
import time

In [None]:
def get_distance(x1, x2):
    return np.linalg.norm(x1-x2)

In [None]:
def kmeans(X, k, max_iters):
    # k = classes
    # X = test_x
    centroids = X[np.random.choice(range(len(X)), k, replace=False)]
    # centroids = [np.random.uniform(size=len(X[0])) for i in range(k)]

    converged = False
    current_iter = 0

    while (not converged) and (current_iter < max_iters):

        cluster_list = [[] for i in range(len(centroids))]

        for x in X:  # Go through each data point
            distances_list = []
            for c in centroids:
                distances_list.append(get_distance(c, x))
            cluster_list[int(np.argmin(distances_list))].append(x)

        cluster_list = list((filter(None, cluster_list)))

        prev_centroids = centroids.copy()

        centroids = []

        for j in range(len(cluster_list)):
            centroids.append(np.mean(cluster_list[j], axis=0))

        pattern = np.abs(np.sum(prev_centroids) - np.sum(centroids))

        # print('K-MEANS: ', int(pattern))

        converged = (pattern == 0)

        current_iter += 1

    return np.array(centroids), [np.std(x) for x in cluster_list]


In [None]:
class RBF:

  def __init__(self, X, y, tX, ty, classes,
                k, std_from_clusters=True):
    self.X = X
    self.y = y

    self.tX = tX
    self.ty = ty

    self.classes = classes
    self.k = k
    self.std_from_clusters = std_from_clusters

  def convert_to_one_hot(self, x, classes):
    arr = np.zeros((len(x), classes))
    for i in range(len(x)):
        c = int(x[i])
        arr[i][c] = 1
    return arr

  # Gaussian rbf function
  def get_rbf(self, x, c, s):
    distance = get_distance(x, c)
    return 1 / np.exp(-distance / s ** 2)

  def get_rbf_as_list(self, X, centroids, std_list):
    RBF_list = []
    for x in X:
        RBF_list.append([self.get_rbf(x, c, s) for (c, s) in zip(centroids, std_list)])
    return np.array(RBF_list)

  def fit(self):

    self.centroids = []*self.k
    self.std_list = []*self.k
    
    if self.std_from_clusters:
      self.centroids, self.std_list = kmeans(self.X, self.k, 1000)
    else:
      for i in range(self.k):
        self.centroids.append(self.X[i*2+10,])
        self.std_list.append(np.std(self.centroids[i]))
      # dMax = np.max([get_distance(c1, c2) for c1 in self.centroids for c2 in self.centroids])
      # self.std_list = np.repeat(dMax / np.sqrt(2 * self.k), self.k)


    RBF_X = self.get_rbf_as_list(self.X, self.centroids, self.std_list)
    self.w = np.linalg.pinv(RBF_X.T @ RBF_X) @ RBF_X.T @ self.convert_to_one_hot(self.y, self.classes)

    RBF_list_tst = self.get_rbf_as_list(self.tX, self.centroids, self.std_list)

    self.pred_ty = RBF_list_tst @ self.w

    self.pred_ty = np.array([np.argmax(x) for x in self.pred_ty])

    diff = self.pred_ty - self.ty

    print('Accuracy: ', len(np.where(diff == 0)[0]) / len(diff))


In [None]:
(train_x, train_y), (test_x, test_y) = mnist.load_data()

train_x = train_x[:3000].astype('float').reshape(3000,784)
train_y = train_y[:3000].astype('float')

test_x = test_x[:300].astype('float').reshape(300,784)
test_y = test_y[:300].astype('float')

In [None]:
for i,j in enumerate(train_y):
    if (j % 2 == 0):
        train_y[i] = 0
    else:
        train_y[i] = 1

for i,j in enumerate(test_y):
    if (j % 2 == 0):
        test_y[i] = 0
    else:
        test_y[i] = 1

In [None]:
RBF_CLASSIFIER = RBF(train_x, train_y, test_x, test_y, classes=2,
                     k=10, std_from_clusters=False)

RBF_CLASSIFIER.fit()

Accuracy:  0.8166666666666667


In [None]:
start_time = time.time()

RBF_CLASSIFIER = RBF(train_x, train_y, test_x, test_y, classes=2,
                     k=60, std_from_clusters=True)

RBF_CLASSIFIER.fit()

print("--- %s seconds ---" % (time.time() - start_time))

Accuracy:  0.8833333333333333
--- 35.82745838165283 seconds ---


In [None]:
start_time = time.time()
RBF_CLASSIFIER = RBF(train_x, train_y, test_x, test_y, classes=2,
                     k=2, std_from_clusters=True)

RBF_CLASSIFIER.fit()
print("--- %s seconds ---" % (time.time() - start_time))

Accuracy:  0.71
--- 3.3110926151275635 seconds ---


In [None]:
start_time = time.time()
RBF_CLASSIFIER = RBF(train_x, train_y, test_x, test_y, classes=2,
                     k=60, std_from_clusters=False)

RBF_CLASSIFIER.fit()
print("--- %s seconds ---" % (time.time() - start_time))

Accuracy:  0.8866666666666667
--- 2.4501733779907227 seconds ---


In [None]:
start_time = time.time()
RBF_CLASSIFIER = RBF(train_x, train_y, test_x, test_y, classes=2,
                     k=100, std_from_clusters=True)

RBF_CLASSIFIER.fit()
print("--- %s seconds ---" % (time.time() - start_time))

Accuracy:  0.9466666666666667
--- 60.80492448806763 seconds ---


In [None]:
start_time = time.time()
RBF_CLASSIFIER = RBF(train_x, train_y, test_x, test_y, classes=2,
                     k=100, std_from_clusters=False)

RBF_CLASSIFIER.fit()
print("--- %s seconds ---" % (time.time() - start_time))

Accuracy:  0.9
--- 3.971332311630249 seconds ---


In [None]:
start_time = time.time()
RBF_CLASSIFIER = RBF(train_x, train_y, test_x, test_y, classes=2,
                     k=100, std_from_clusters=False)

RBF_CLASSIFIER.fit()
print("--- %s seconds ---" % (time.time() - start_time))

Accuracy:  0.9
--- 3.96287202835083 seconds ---


In [None]:
start_time = time.time()
RBF_CLASSIFIER = RBF(train_x, train_y, test_x, test_y, classes=2,
                     k=10, std_from_clusters=False)

RBF_CLASSIFIER.fit()
print("--- %s seconds ---" % (time.time() - start_time))

Accuracy:  0.8166666666666667
--- 0.46685791015625 seconds ---


In [None]:
start_time = time.time()
RBF_CLASSIFIER = RBF(train_x, train_y, test_x, test_y, classes=2,
                     k=10, std_from_clusters=True)

RBF_CLASSIFIER.fit()
print("--- %s seconds ---" % (time.time() - start_time))

Accuracy:  0.83
--- 7.382677316665649 seconds ---


In [None]:
start_time = time.time()
RBF_CLASSIFIER = RBF(train_x, train_y, test_x, test_y, classes=2,
                     k=1000, std_from_clusters=False)

RBF_CLASSIFIER.fit()
print("--- %s seconds ---" % (time.time() - start_time))

Accuracy:  0.96
--- 40.83595108985901 seconds ---


In [None]:
from sklearn.neighbors import KNeighborsClassifier

start = time.time()
accuracies = []

# loop over various values of `k` for the k-Nearest Neighbor classifier
for k in range(1,4,2):
	# train the k-Nearest Neighbor classifier with the current value of `k`
	model = KNeighborsClassifier(n_neighbors=k)
	model.fit(train_x, train_y)

	# evaluate the model and update the accuracies list
	score = model.score(test_x, test_y)
	print("accuracy= ",  score)
	accuracies.append(score)
end = time.time()
print(end - start)

accuracy=  0.9333333333333333
accuracy=  0.93
2.6801791191101074


In [None]:
from sklearn.neighbors import NearestCentroid

start = time.time()
model2 = NearestCentroid()
model2.fit(train_x, train_y)

score = model2.score(test_x, test_y)
print("Nearest Centroid, accuracy= ", score)
end = time.time()
print(end - start)

Nearest Centroid, accuracy=  0.8333333333333334
0.01533365249633789
