In [2]:
import numpy as np
from numpy import linalg
import pandas as pd
from sklearn.model_selection import train_test_split


In [20]:
def rbf_kernel(x, y, sigma=5.0):
    return np.exp(-linalg.norm(x-y)**2 / (2 * (sigma ** 2)))

class KernelPerceptron:
    def __init__(self, kernel=rbf_kernel, max_iter=100):
        self.kernel = kernel
        self.max_iter = max_iter

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.alpha = np.zeros(n_samples, dtype=np.float64)

        Kernels = np.zeros((n_samples, n_samples))
        for i in range(n_samples):
            for j in range(n_samples):
                Kernels[i,j] = self.kernel(X[i], X[j])

        for t in range(self.max_iter):
            for i in range(n_samples):
                if np.sign(np.sum(Kernels[:,i] * self.alpha * y)) != y[i]:
                    self.alpha[i] += 1.0

        sv = self.alpha > 1e-5
        self.alpha = self.alpha[sv]
        self.sv = X[sv]
        self.sv_y = y[sv]

    def project(self, X):
        y_predict = np.zeros(len(X))

        for i in range(len(X)):
            s = 0
            for w, sv_y, sv in zip(self.alpha, self.sv_y, self.sv):
                s += w * sv_y * self.kernel(X[i], sv)
            y_predict[i] = s
        
        return y_predict

    def predict(self, X):
        X = np.atleast_2d(X)
        return np.sign(self.project(X))
    

In [25]:
def gen_non_lin_separable_data():
    """
    This function generates two sets of data points, each representing a different class.
    The data points are randomly generated based on specified means and covariance matrix.
    The generated data is non-linearly separable, meaning that it cannot be separated by a straight line.
    """
    mean1 = [-1, 2]
    mean2 = [1, -1]
    mean3 = [4, -4]
    mean4 = [-4, 4]
    cov = [[1.0, 0.8], [0.8, 1.0]]
    X1 = np.random.multivariate_normal(mean1, cov, 500)
    X1 = np.vstack((X1, np.random.multivariate_normal(mean3, cov, 500)))
    y1 = np.ones(len(X1))
    X2 = np.random.multivariate_normal(mean2, cov, 500)
    X2 = np.vstack((X2, np.random.multivariate_normal(mean4, cov, 500)))
    y2 = np.ones(len(X2)) * -1
 
    X = np.vstack((X1, X2))
    Y = np.concatenate((y1, y2))
    return X, Y


In [26]:
def test_kernel():
    # Testing on generated data
    X, Y = gen_non_lin_separable_data()
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
    perceptron = KernelPerceptron(rbf_kernel, max_iter=10)
    perceptron.fit(X_train, y_train)

    y_predict = perceptron.predict(X_test)
    print(y_predict[:50])
    correct = np.sum(y_predict == y_test)
    print("The accuracy is", correct / len(y_predict) * 100, "%")
    print ("%d out of %d predictions correct" % (correct, len(y_predict)))

    # Testing on Bancknote data
    data = pd.read_csv('./banknote+authentication/data_banknote_authentication.txt', header=None)
    data = data.to_numpy()
    data[:, -1] = np.where(data[:, -1] == 0, -1, data[:, -1])

    X, Y = data[:, :-1], data[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

    perceptron = KernelPerceptron(rbf_kernel, max_iter=10)
    perceptron.fit(X_train, y_train)

    y_predict = perceptron.predict(X_test)
    print(y_predict[:50])
    correct = np.sum(y_predict == y_test)
    print("The accuracy is", correct / len(y_predict) * 100, "%")
    print ("%d out of %d predictions correct" % (correct, len(y_predict)))


test_kernel()

[ 1. -1. -1.  1. -1. -1. -1. -1. -1.  1. -1. -1.  1.  1.  1. -1.  1. -1.
 -1. -1. -1. -1.  1. -1. -1. -1.  1. -1.  1. -1. -1. -1. -1. -1. -1.  1.
 -1.  1.  1.  1.  1. -1. -1.  1. -1.  1.  1.  1. -1. -1.]
The accuracy is 100.0 %
400 out of 400 predictions correct
[ 1. -1.  1. -1. -1. -1. -1. -1.  1.  1. -1. -1.  1. -1. -1. -1.  1.  1.
 -1. -1.  1. -1. -1.  1. -1.  1. -1.  1. -1. -1.  1. -1.  1.  1.  1. -1.
 -1.  1. -1.  1. -1.  1. -1. -1.  1.  1. -1. -1.  1. -1.]
The accuracy is 100.0 %
275 out of 275 predictions correct
