In [1]:
import scipy.io
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import random
import collections
from itertools import product
%matplotlib inline

In [2]:
def holdout_crossvalid(data, train, val, test, norm=False):

    np.random.seed(0)
    if isinstance(data, pd.DataFrame):
        vec = data.to_numpy()
    else:
        vec = data
    np.random.shuffle(vec)
    total = data.shape[0]

    train_len = int(train * total)
    val_len = int(val * total)
    test_len = int(test * total)
    train = vec[:train_len, :vec.shape[1] - 1]
    val = vec[train_len:train_len + val_len, :vec.shape[1] - 1]
    test = vec[train_len + val_len:, :vec.shape[1] - 1]

    train_label = vec[:train_len, vec.shape[1] - 1].reshape(-1, 1)
    val_label = vec[train_len:train_len + val_len,
                    vec.shape[1] - 1].reshape(-1, 1)
    test_label = vec[train_len + val_len:, vec.shape[1] - 1].reshape(-1, 1)

    if norm:

        x = []
        mean = []
        std = []
        for i in range(train.shape[1]):
            x.append(
                (train[:, i] - np.mean(train[:, i])) / np.std(train[:, i]))
            mean.append(np.mean(train[:, i]))
            std.append(np.std(train[:, i]))

        train = np.array(x).T

        x = []
        for i in range(test.shape[1]):
            x.append((test[:, i] - mean[i]) / std[i])

        test = np.array(x).T

        x = []
        for i in range(val.shape[1]):
            x.append((val[:, i] - mean[i]) / std[i])

        val = np.array(x).T

    return train, train_label, val, val_label, test, test_label

In [3]:
mat = scipy.io.loadmat('data5.mat')

data = np.array(mat['x'])

train, train_label, val, val_label, test, test_label = holdout_crossvalid( data, 0.7, 0.1, 0.2, True)

In [4]:
#Kmeans
def Kmeans(train, k=30, max_iter=150):
    k = k
    
    def dist(x, y):
        return (np.sum((x - y)**2, axis=-1))**0.5
    np.random.seed(0)
    points = np.random.randint(train.shape[0], size=(k))

    centroids = [train[i] for i in points]

    centroids = np.array(centroids)

    max_iter = max_iter

    for i in range(max_iter):
        pred = [np.argmin(dist(centroids, x.reshape(1, -1))) for x in train]
        cnt = collections.Counter(pred)
        temp = np.zeros((centroids.shape))
        for i, x in enumerate(pred):
            temp[x] += train[i] / cnt[x]
        centroids = temp

    return centroids, pred,k
m=0

centroids, _, k= Kmeans(train, k=2000)


def dist(x, y):
    return (np.sum((x - y)**2, axis=-1))**0.5


dmax = -1
for x in centroids:
    temp = np.max(dist(x, centroids))
    if temp > dmax:
        dmax = temp


def mat(train, centroids, dmax, k,b):
    G = []
    for x, y in product(train, centroids):
        G.append( (dist(x, y)+(dmax**2)/k)**(b))

    return G


In [5]:
s=-1
ar=np.linspace(0, 1, num=10,endpoint=False)
for b in ar[1:]:
    G = np.array(mat(train, centroids, dmax, k,b)).reshape(train.shape[0], k)

    W = np.dot(np.linalg.pinv(G), train_label)

    G_test = np.array(mat(test, centroids, dmax, k,b)).reshape(test.shape[0], k)

    prediction = np.dot(G_test, W)

    prediction = 0.5 * (np.sign(prediction - 0.5) + 1)

    cnt = 0
    for i in range(test_label.shape[0]):
        if prediction[i] == test_label[i]:
            cnt += 1
    score = cnt * 100 / test_label.shape[0]

    if score>s:
        s=score


In [6]:
s

96.2877030162413