# Radar Classification

## A Radar Classification Basics

In [1]:
import pandas as pd
import numpy as np
import numpy.linalg as la
import cvxpy as cp
from cvxpy.atoms.affine.wraps import psd_wrap
from read_data import *
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#%%%%%%%%%%%%%%%%%%%%%%%%%       MGT - 418         %%%%%%%%%%%%%%%%%%%%%%%%%
#%%%%%%%%%%%%%%      Convex Optimization - Project 2          %%%%%%%%%%%%%%
#%%%%%%%%%%%%%%             2021-2022 Fall                    %%%%%%%%%%%%%%
#%%%%%%%%%%%%%%      Learning the Kernel Function             %%%%%%%%%%%%%%
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

### (a) Read & Split data

**(5 points)** Read the data file ionosphere.data into memory by using the scriptsreaddata.pyorreaddata.m.Use the code skeletonsmain.ipnyb or main.m to randomly select 80% of the data for training.

In [2]:
data, labels = prepare_ionosphere_dataset()

In [3]:
from process_data import train_test_split

In [4]:
for i in range(3):
    print("#{}".format(i))
    # data_train, data_test, labels_train, labels_test
    data_train, _, _, _ = train_test_split(data, labels, train_size=0.8, random_seed=i)
    print(data_train[:1,:])

#0
[[1.0 0.97588 -0.10602 0.94601 -0.208 0.92806 -0.2835 0.85996 -0.27342
  0.79766 -0.47929 0.78225 -0.50764 0.74628 -0.61436 0.57945 -0.68086
  0.37852 -0.73641 0.36324 -0.76562 0.31898 -0.79753 0.22792 -0.81634
  0.13659 -0.8251 0.04606 -0.82395 -0.04262 -0.81318 -0.13832 -0.80975]]
#1
[[1.0 -0.205 0.2875 0.23 0.1 0.2825 0.3175 0.3225 0.35 0.36285 -0.34617
  0.0925 0.275 -0.095 0.21 -0.0875 0.235 -0.34187 0.31408 -0.48 -0.08
  0.29908 0.33176 -0.58 -0.24 0.3219 -0.28475 -0.47 0.185 -0.27104
  -0.31228 0.40445 0.0305]]
#2
[[1.0 1.0 0.5782 1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0 -1.0
  1.0 -1.0 1.0 -1.0 1.0 -0.62796 1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0
  -1.0 1.0 -1.0]]


In [5]:
# just testing
df = pd.read_csv('ionosphere.data', sep=",", header=None)
data_array = np.array(df)
print(np.shape(data_array))
# delete the second row
print("Unique values in 2nd column:", np.unique(data_array[:,1]))
data_array = np.delete(data_array, 1, 1)
print(np.shape(data_array))
print(round(np.shape(data_array)[0]*0.8))
labels = data_array[:, -1]
labels[labels == 'g'] = -1
labels[labels == 'b'] = 1
data_array = data_array[:, :-1]
data_normalized = data_array / data_array.max(axis=0)

(351, 35)
Unique values in 2nd column: [0]
(351, 34)
281


- reference links

    - https://stackoverflow.com/questions/3674409/how-to-split-partition-a-dataset-into-training-and-test-datasets-for-e-g-cros
    - https://towardsdatascience.com/stop-using-numpy-random-seed-581a9972805f
    - https://stackoverflow.com/questions/49555991/can-i-create-a-local-numpy-random-seed

### (b) Define kernel function & Solve QCQP

**(15 points)** Define polynomial, Gaussian, and linear kernel function, and construct the kernel matrices $\hat{K}^l,\ l = 1,2,3$, for all training samples
Solve the QCQP in (5) for $\rho = 2$, $p = 2$, $\sigma = 2$ and $c = \sum_{l=1}^3 \mathrm{tr}(\hat{K}^l) $ with CVXPY and MOSEK in Python or with YALMIP and GUROBI in MATLAB, and record the optimal dualvariables $\mu_1^*$, $\mu_2^*$, and $\mu_3^*$. Use the code skeletons `kernel_learning` (in `main.ipynb`) or `kernel_learning.m`

In [6]:
def linear_kernel(x, y):
    return np.inner(x, y)


def gauss_kernel(x, y, sigma=0.5):
    return np.exp(-np.sqrt(la.norm(x - y) ** 2 / (2 * sigma ** 2)))


def poly_kernel(x, y, dimension=2, offset=1):
    return (offset + np.inner(x, y)) ** dimension


def compute_k_mat(X, kernel):
    """
    Input:
        X is an n_tr x n_feat matrix.
        kernel is a string with values 'linear', 'rfb', or 'poly'
            'linear': k(u,v) = u'*v.
            'gaussian':    k(u,v) = exp(-||u - v||^2 / (2*sigma)).
            'poly':   k(u,v) = (1 + u'*v)**degree.
            degree is a positive integer.
    Output:
        k_mat, an n_tr x n_tr matrix.
    """
    n_samples, _ = X.shape
    k_mat = np.zeros((n_samples, n_samples))
    for i, x_i in enumerate(X):
        for j, x_j in enumerate(X):
            if kernel == "linear":
                k_mat[i, j] = linear_kernel(x_i, x_j)
            elif kernel == "gaussian":
                k_mat[i, j] = gauss_kernel(x_i, x_j)
            elif kernel == "poly":
                k_mat[i, j] = poly_kernel(x_i, x_j)
    return k_mat


def compute_ks(x_mat):
    """compute all three kernel matrix"""
    kernels = ["poly", "gaussian", "linear"]
    k_mats = []
    for k in kernels:
        mat = compute_k_mat(x_mat, k)
        k_mats.append(mat)
    return k_mats


def compute_g_mat(k_mat, y_vec):
    n_tr = len(y_vec)
    g_mat = np.zeros((n_tr, n_tr))
    for i in range(n_tr):
        for j in range(n_tr):
            g_mat[i, j] = k_mat[i, j] * y_vec[i] * y_vec[j]
    return g_mat

```
for con in enumerate(cons):
    print(con)
    
(0, Inequality(Expression(CONVEX, NONNEGATIVE, (1, 1))))
(1, Equality(Expression(AFFINE, UNKNOWN, ()), Constant(CONSTANT, ZERO, ())))
(2, Inequality(Constant(CONSTANT, ZERO, ())))
(3, Inequality(Variable((281,))))
(4, Inequality(Expression(CONVEX, NONNEGATIVE, (1, 1))))
(5, Equality(Expression(AFFINE, UNKNOWN, ()), Constant(CONSTANT, ZERO, ())))
(6, Inequality(Constant(CONSTANT, ZERO, ())))
(7, Inequality(Variable((281,))))
(8, Inequality(Expression(CONVEX, NONNEGATIVE, (1, 1))))
(9, Equality(Expression(AFFINE, UNKNOWN, ()), Constant(CONSTANT, ZERO, ())))
(10, Inequality(Constant(CONSTANT, ZERO, ())))
(11, Inequality(Variable((281,))))
```

In [28]:
# def kernel_learning(K1, K2, K3, y_tr, rho)
def kernel_learning(k_mats, y_tr, rho):
    """
    Input: 
        Ks is list of (n_tr, n_tr) matrix.
        y_tr is (n_tr,) array
        
    Output: 
        
    Kernel learning for soft margin SVM. 
    Implementation of problem (5)
    Use cvxpy.atoms.affine.psd_wrap for each G(\hat K^l) matrix when 
    it appear in the constraints and in the objective
    """
    
    # r1 = np.trace(K1) 
    # r2 = np.trace(K2) 
    # r3 = np.trace(K3)
    # r_l = [r1, r2, r3]
    r_l = []
    g_mats = []
    for i in range(3):
        r_l.append(np.trace(k_mats[i]))
        g_mat = compute_g_mat(k_mats[i], y_tr)
        g_mats.append(g_mat)
    c = np.sum(r_l)
    
    
    n_tr = len(y_tr)
    lambda_ = cp.Variable(n_tr)
    z = cp.Variable(1)
    
    obj = cp.Maximize(cp.sum(lambda_) - c*z)
    cons = []
    for l in range(3):
        # Exception: Invalid dimensions for arguments.
        cons.append(z * r_l[i] >= 1/ (2 * rho) * cp.quad_form(lambda_, psd_wrap(g_mats[i]))) # G1
        cons.append(lambda_ @ y_tr == 0) # lambda_ * y_tr == 0
        cons.extend([lambda_>=0, lambda_<=1])
        
    prob = cp.Problem(obj, cons)
    prob.solve(solver=cp.MOSEK)
    # print("lambda_.value =", lambda_.value)
    
    # mu_opt_l (l=1,2,3) denote the optimal dual value of the constraint
    mu_opt1 = cons[0].dual_value
    mu_opt2 = cons[4].dual_value
    mu_opt3 = cons[8].dual_value
    print("mu_opt1 =", mu_opt1)
    print("mu_opt2 =", mu_opt2)
    print("mu_opt3 =", mu_opt3)

    # b = w^T x_i - y i x_i (n_d)
    # w = 1/rho \sum_{i=1}{m} \lambda_i x_i y_i (m = n_tr sample number)
    # from 4(c) b_opt is the dual variable of the constraint [1]
    # print(cons[1].dual_value, cons[5].dual_value, cons[9].dual_value)
    b_opt = np.max([cons[1].dual_value, cons[5].dual_value, cons[9].dual_value])
    # for idx in range(12):
    #     print(idx, cons[idx].dual_value)
    
    return mu_opt1, mu_opt2, mu_opt3, lambda_.value, b_opt

    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

In [30]:
def main_q4_b():
    # \rho = 2, p = 2, \sigma = 0.5 and c = \sum_{l=1}^3 \mathrm{tr}(\hat{K}^l)
    data, labels = prepare_ionosphere_dataset()
    
    x_tr, x_te, y_tr, y_te = train_test_split(data, labels, train_size=0.8)
    
    k_mats = compute_ks(x_tr)
    
    res = kernel_learning(k_mats, y_tr, rho=2)

if __name__ == "__main__":
    main_q4_b()

mu_opt1 = [19.03475044]
mu_opt2 = [0.72732141]
mu_opt3 = [0.72502455]
<class 'tuple'>


### (c) Apply kernel trick for SVM prediction

**(10 points)** Use the code skeletons `SVM_predict`(in `main.ipynb`) or `SVM_predict.m`.

> The size of the dual QP is independent of the feature
dimension D!

In [None]:
def svm_predict(kernel, y_tr, y_te, lambda_opt, b_opt, rho):
    """
    Predict function for kernel SVM. 
    See lecture slide 183.
    """
    n_te = len(y_te)
    n_tr = len(y_tr)
    ...
    
    # wx = \sum_{i=1}^{m} \lambda_i y_i k_mat
    wx = 0
    y_pre = np.sign(1/rho * (wx - b_opt))
    
    acc = ...
    return acc

In [None]:
def main_q4_c():
    # \rho = 2, p = 2, \sigma = 0.5 and c = \sum_{l=1}^3 \mathrm{tr}(\hat{K}^l)
    data, labels = prepare_ionosphere_dataset()
    
    x_tr, x_te, y_tr, y_te = train_test_split(data, labels, train_size=0.8)
    
    k_mats = compute_ks(x_te)
    
    (mu_opt1, mu_opt2, mu_opt3, lambda_.value, b_opt) = kernel_learning(k_mats, y_tr, rho=2)
    
    k_mat_sum = mu_opt1 * k_mats[0] + mu_opt2 * k_mats[1] + mu_opt3 * k_mats[2]
    
    acc = svm_predict(k_mat_sum, y_tr, y_te, lambda_opt, b_opt, rho=2)
    
    print("Current accuracy is ", acc)

if __name__ == "__main__":
    main_q4_c()

## B Repeat experiments

**(5 points)** Repeat the steps 4(a)–(c) 100 times with different seeds for the random partition of the data intotraining and test sets, and report the average test accuracy (correct classification rate) to Table 1

| Kernel function  | $\hat{k}^1$ | $\hat{k}^2$ | $\hat{k}^3$ | $\sum_{l=1}^3 \hat{k}^l$ |
| ---------------- | ----------- | ----------- | ----------- | ------------------------ |
| Average accuracy |             |             |             |                          |


## C Solve dual problem

**(10  points)** For  each  of  the  100  training  and  test  sets  constructed  in  5.,  solve  (2)  using  the  kernels  functions $\hat{k}^1$, $\hat{k}^2$, and $\hat{k}^3$, respectively, and report the average test accuracies in Table 1. Use the code skeletons `SVM_predict`(in `main.ipynb`) or `SVM_predict.m`.

| Kernel function  | $\hat{k}^1$ | $\hat{k}^2$ | $\hat{k}^3$ | $\sum_{l=1}^3 \hat{k}^l$ |
| ---------------- | ----------- | ----------- | ----------- | ------------------------ |
| Average accuracy |             |             |             |                          |


---

In [2]:
import pandas as pd
import numpy as np
import cvxpy as cp
from cvxpy.atoms.affine.wraps import psd_wrap
from read_data import *
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#%%%%%%%%%%%%%%%%%%%%%%%%%       MGT - 418         %%%%%%%%%%%%%%%%%%%%%%%%%
#%%%%%%%%%%%%%%      Convex Optimization - Project 2          %%%%%%%%%%%%%%
#%%%%%%%%%%%%%%             2021-2022 Fall                    %%%%%%%%%%%%%%
#%%%%%%%%%%%%%%      Learning the Kernel Function             %%%%%%%%%%%%%%
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

In [75]:
def svm_fit(kernel, y_tr, rho):
    """
    Dual of soft-margin SVM problem (2)
    Use cvxpy.atoms.affine.psd_wrap for each G(\hat K^l) matrix when it appear in the constraints and in the objective
    """
    n_tr = len(y_tr)
    G =  ...
    lambda_ = cp.Variable(n_tr)
    dual_obj = cp.Maximize(... cp.quad_form(lambda_, psd_wrap(G)))
    cons = []
    ...
    prob = cp.Problem(dual_obj, cons)
    prob.solve(solver=cp.MOSEK)
    lambda_opt = lambda_.value
    b_opt =  ...
    return lambda_opt, b_opt


def svm_predict(kernel, y_tr, y_te, lambda_opt, b_opt, rho):
    """
    Predict function for kernel SVM. 
    See lecture slide 183.
    """
    n_te = len(y_te)
    n_tr = len(y_tr)
    ...
    acc = ...
    return acc

def kernel_learning(K1, K2, K3, y_tr, rho):
    """
    Kernel learning for soft margin SVM. 
    Implementation of problem (5)
    Use cvxpy.atoms.affine.psd_wrap for each G(\hat K^l) matrix when it appear in the constraints and in the objective
    """
    ...
    r1 = np.trace(K1) 
    ...
    lambda_ = cp.Variable(n_tr)
    z = cp.Variable(1)
    ...
    
    cons = []
    cons.append(z * r1 >= 1/ (2 * rho) * cp.quad_form(lambda_, psd_wrap(G1)))
    ...
    ...
    prob = cp.Problem(obj, cons)
    prob.solve(solver=cp.MOSEK)

    mu_opt1 = cons[0].dual_value
    ...
    b_opt = ....dual_value
    return mu_opt1, mu_opt2, mu_opt3, lambda_.value, b_opt

In [75]:
acc_opt_kernel = []    
acc_poly_kernel = []    
acc_gauss_kernel = []    
acc_linear_kernel = []    
rho = 0.01
# data, labels = prepare_ionosphere_dataset()
for iters in range(100): 
    ## Please do not change the random seed.
    np.random.seed(iters)
    ### Training-test split
    msk = np.random.rand(data_normalized.shape[0]) <=...
    x_tr = data[...]
    x_te = data[...]
    y_tr = labels[...]
    y_te = labels[...]
 
    n_tr = y_tr.shape[0]
    n_te = y_te.shape[0]
    n_tr = x_tr.shape[0]
    n_te = x_te.shape[0]
    
    x_all = np.vstack([x_tr, x_te])
    n_all = x_all.shape[0]

    ## Prepare the initial choice of kernels 
    # It is recommended to prepare the kernels for all the training and the test data
    # Then, the kernel size will be (n_tr + n_te)x(n_tr + n_te).
    # Use only the training block (like K1[0:n_tr, 0:n_tr] ) to learn the classifier 
    # (for the functions svm_fit and kernel_learning).
    # When predicting you may use the whole kernel as it is. 
    K1 = ...
    K2 = ...
    K3 = ...

    mu_opt1, mu_opt2, mu_opt3, lambda_opt, b_opt = kernel_learning(...)
    opt_kernel = ...
    acc_opt_kernel.append(svm_predict(...))
    
    lambda_opt, b_opt = svm_fit(...)
    acc_poly_kernel.append(svm_predict(...))
    
    lambda_opt, b_opt = svm_fit(...)
    acc_gauss_kernel.append(svm_predict(...))
    
    lambda_opt, b_opt = svm_fit(...)
    acc_linear_kernel.append(svm_predict(...)
    print('Iteration-->' + str(iters))
print('Average dual accuracy with optimal kernel is ' + str(np.mean(acc_opt_kernel)))
print('Average dual accuracy with polynomial kernel is ' + str(np.mean(acc_poly_kernel)))
print('Average dual accuracy with gaussian kernel is ' + str(np.mean(acc_gauss_kernel)))
print('Average dual accuracy with linear kernel is ' + str(np.mean(acc_linear_kernel)))