In [14]:
import numpy as np
from numpy.linalg import inv
from numpy import linalg as LA
from sklearn.metrics.pairwise import rbf_kernel

## Assume we have $10$ samples and $3$ features

In [2]:
T = 10
K = 3

In [3]:
Y_train = np.random.random((T, 1)) ## label

In [4]:
X_train = np.random.random((T, 3)) ## input size

## Ridge Regression V1：

Assuming $Y_{label}$ is training label in a form of column: $T \times 1$,
$X_{train}$ is training data in the form of matrix: $T \times K$, where
$K$ is the number of features.

### Step 1: transpose $Y_{train}$

Transpose $Y_{train}$: now $Y_train$ is in the form of $1 \times T$.

In [5]:
Y_train = Y_train.T

In [6]:
Y_train

array([[0.13161977, 0.01160174, 0.07528202, 0.12481427, 0.73429626,
        0.11108936, 0.315798  , 0.5833124 , 0.1974174 , 0.21730793]])

In [7]:
X_train

array([[0.36583163, 0.9601442 , 0.19581073],
       [0.17963002, 0.64370528, 0.11226642],
       [0.08098427, 0.42255776, 0.18975507],
       [0.98751032, 0.3405226 , 0.3617746 ],
       [0.3141216 , 0.13780046, 0.72934323],
       [0.66426454, 0.5610924 , 0.28890157],
       [0.5437049 , 0.57003855, 0.02410307],
       [0.10098238, 0.78735054, 0.52798686],
       [0.77987505, 0.74850491, 0.62940982],
       [0.21594251, 0.87183964, 0.97724911]])

### Step 2: Create $K$ matrix in the form of $T \times T$

Assuming we declare a penalty $\alpha$

In [15]:
X_new = np.random.random((100, 10))
Y_new = np.random.random((10, 10))

In [8]:
def guassian_kernel(x, y, gamma):
    ## TODO: NORM
    return np.exp(-gamma * LA.norm(x - y)**2)

In [16]:
guassian_kernel(X_new, Y_new)

TypeError: guassian_kernel() missing 1 required positional argument: 'gamma'

In [18]:
rbf_kernel(X_new, Y_new).shape

(100, 10)

In [13]:
K_matrix = np.zeros((T, T))
alpha = 0.1

for s in range(T):
    for t in range(T):
        K_matrix[s, t] = np.dot(X_train[s,:], X_train[t,:])

In [15]:
K_matrix.shape

(10, 10)

In [21]:
additive_term = np.eye(T) * alpha

K_matrix = K_matrix + additive_term

In [23]:
K_matrix = inv(K_matrix)

### Step 3: Construct k in the form $T \times 1$

$x_{input}$ is the new batch, we want to make prediction.

In [26]:
x_input = np.random.random((K))

In [27]:
k = np.zeros((T, 1))

In [28]:
for t in range(T):
    k[t] = np.dot(X_train[t, :], x_input)

### Final Step: find $w \cdot x$：

In [29]:
result = np.matmul(np.matmul(Y_train, K_matrix), k)

In [30]:
result

array([[0.03462727]])

As you could the see the result is a number, which means we successfully predict a new
value with training data.

However, this approach is very naive and incomplete. We will have to go back and
calculate the corresponding Lagrange Multiplier and then find the weights explicitly because we
have infinity possibility to compute weights otherwise.

Also, we haven't really implement kernel method. I believe we could replace the dot product
with kernel method. This will extend ridge regression to KRR.

I view the single output as failure because we will need to take advantage of parallelism, which I will
read the spec for np.dot() carefully and make sure our project have the feature.

Lastly, as you see, the inverse is very expensive. We will optimize it.

Otherwise, this V1 is a good place to demonstrate the algorithm which I will keep it.

In [32]:
K_matrix.shape

(10, 10)

In [33]:
Y_train.shape

(1, 10)

### Closed form solution

In [34]:
np.matmul(K_matrix, Y_train.T)

array([[ 2.33379631],
       [-1.62652515],
       [-1.33614138],
       [ 2.24641826],
       [-0.23258321],
       [ 3.78041239],
       [-1.03578979],
       [-0.29751108],
       [-1.41086957],
       [-1.08732719]])

In [36]:
lagrange = 2 * alpha * np.matmul(K_matrix, Y_train.T)

In [42]:
lagrange.shape

(10, 1)

In [39]:
weights = np.zeros(3)

In [44]:
for i in range(3):
    weights[i] = 1 / (2 * alpha) * np.dot(lagrange.T, X_train[:, i])

In [45]:
weights

array([-0.2898782 ,  0.91465022,  0.22331624])