In [1]:
import numpy as np

In [10]:
def calculate_kernel(x, y, kernel, kernel_param):
    """
    Calculate the kernel between x and y
    """
    if kernel == 'linear':
        return np.dot(x, y)
    elif kernel == 'poly':
        return (1 + np.dot(x, y)) ** kernel_param
    elif kernel == 'rbf':
        return np.exp(-kernel_param * np.dot(x - y, x - y))

def calculate_kernel_matrix_old(X, kernel, kernel_param):
    """
    Calculate the kernel matrix for the data X
    """
    K = np.zeros((X.shape[0], X.shape[0]))
    for i in range(len(X)):
        for j in range(len(X)):
            K[i, j] = calculate_kernel(X[i], X[j], kernel, kernel_param)
    return K

def calculate_kernel_matrix(X, kernel, kernel_param):
    """
    Calculate the kernel matrix for the data X
    """
    if(kernel == 'linear'):
        return np.matmul(X, X.T)
    elif(kernel == 'poly'):
        return (1 + np.matmul(X, X.T)) ** kernel_param
    elif(kernel == 'rbf'):
        # kernel(X, Y) = exp(-gamma * ||X - Y||^2)
        return (np.exp(-kernel_param * (np.sum(X**2, axis=1)[:, np.newaxis] + np.sum(X**2, axis=1) - 2 * np.matmul(X, X.T))))

# 500 * 500 rand array
X_train = np.random.rand(500, 500)

kernel_X_old = calculate_kernel_matrix_old(X_train, 'rbf', 4)
kernel_X_new = calculate_kernel_matrix(X_train, 'rbf', 4)
# check if same
print(np.allclose(kernel_X_old, kernel_X_new, rtol=1e-05, atol=1e-08))
# time comparision
%timeit calculate_kernel_matrix_old(X_train, 'rbf', 4)
%timeit calculate_kernel_matrix(X_train, 'rbf', 4)

True
672 ms ± 17.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
6.61 ms ± 204 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [20]:
np.exp(5874.674294394035)
np.sign(0)

  np.exp(5874.674294394035)


1.0

In [6]:
for a in ['A', 'B', 'C', 'D']:
    data = np.load("../Data/dataset_{}.npz".format(a))
    print(data.files)
    print(data['arr_0'].shape)
    print(data['arr_1'].shape)
    print(data['arr_2'].shape)
    print(data['arr_3'].shape)

    X_train = data['arr_0']
    y_train = data['arr_1']
    X_test = data['arr_2']
    y_test = data['arr_3']

    print(np.std(X_train, axis=0))

['arr_0', 'arr_1', 'arr_2', 'arr_3']
(1500, 2)
(1500,)
(500, 2)
(500,)
[0.87224379 0.50577632]
['arr_0', 'arr_1', 'arr_2', 'arr_3']
(1500, 2)
(1500,)
(500, 2)
(500,)
[0.57555848 0.58347663]
['arr_0', 'arr_1', 'arr_2', 'arr_3']
(1500, 64)
(1500,)
(297, 64)
(297,)
[0.         0.88941529 4.76789872 4.29231556 4.27677261 5.67625322
 3.44715303 1.06504126 0.10306417 3.16453261 5.40637693 3.98859574
 4.71803627 6.01451618 3.61807678 0.84184216 0.06823163 3.59937303
 5.63271965 5.79542268 6.13252908 6.18364452 3.35741303 0.46072937
 0.03649049 3.15830306 6.16701707 5.89149575 6.14099011 5.85311537
 3.71133399 0.05157088 0.         3.43167364 6.25080034 6.30328377
 5.93987935 5.82683673 3.55466335 0.         0.15250647 2.91618465
 6.51062598 6.37576662 6.26534582 5.63800816 4.42701459 0.33231511
 0.22343878 1.80229224 5.63023978 5.19196234 5.33584541 5.95461991
 4.97077307 1.02236241 0.02581128 0.89689439 5.10239926 4.347586
 4.83954249 5.94892614 4.3019939  2.01594136]
['arr_0', 'arr_1', 'arr

In [5]:
np.power(2, 1 != 1)

1