In [287]:
import pandas as pd
import cvxpy as cp
import numpy as np


In [288]:
from sklearn.metrics.pairwise import chi2_kernel

In [289]:
class KernelManager:
  def __init__(self, xTrain, yTrain, xTest, yTest):
    self.xTrain = xTrain
    self.yTrain = yTrain
    self.xTest = xTest
    self.yTest = yTest

  def linear_kernel(x1, x2):
    return np.dot(x1, x2)

  def polynomial_kernel(x, y, p=3):
    return (1 + np.dot(x, y)) ** p

  def gaussian_kernel(x, y, sigma=5.0):
    return np.exp(-np.linalg.norm(x-y)**2 / (2 * (sigma ** 2)))

  def hellinger_kernel(X1, X2):
    X1,X2 = np.sqrt(X1),np.sqrt(X2)
    return X1 @ X2

  def chi_square_kernel(x,y):
    sum = 0.0

    for i in range(len(x)):
      if (x[i]+y[i]) != 0:
        sum += (2*x[i]*y[i])/(x[i]+y[i])
    
    return sum

  def intersection_kernel(x,y):
    sum = 0.0

    for i in range(len(x)):
      sum += min(x[i],y[i])
    return sum

  def findAccuracy(self, modelLinear, initialText):
    modelLinear.fit(self.xTrain, self.yTrain)
    y_pred = modelLinear.predict(self.xTest)
    accuracy = metrics.accuracy_count(y_true=self.yTest, y_pred=y_pred)
    print(initialText, accuracy, "\n") 
    return accuracy
  

In [290]:
data = pd.read_csv("/content/letter-recognition.csv")
data.columns = ['letter', 'xbox', 'ybox', 'width', 'height', 'onpix', 'xbar','ybar', 'x2bar', 'y2bar', 'xybar', 'x2ybar', 'xy2bar', 'xedge','xedgey', 'yedge', 'yedgex']

In [291]:
for i in range(1,17):
  colummn = data.columns[i]
  maxx = max(data[colummn])
  minn = min(data[colummn])
  d = maxx - minn
  data[colummn] -= minn
  data[colummn] /= d


In [292]:
dataA_df = data.loc[data['letter']=='A']
total = len(dataA_df)
train = int(0.8*total)    # number of A samples in training
test = total -train       # number of A samples in testing
dataA = []

for i in range(total):
  temp = []
  for j in range(1,17,1):
    temp.append(dataA_df.iloc[i,j])
  dataA.append(temp)

dataA = np.array(dataA)

In [293]:
train_X = dataA[0:train]
print(train_X,train_X.shape)

[[0.06666667 0.06666667 0.2        ... 0.4        0.13333333 0.46666667]
 [0.2        0.46666667 0.33333333 ... 0.4        0.2        0.53333333]
 [0.2        0.53333333 0.33333333 ... 0.4        0.2        0.46666667]
 ...
 [0.2        0.53333333 0.33333333 ... 0.46666667 0.06666667 0.53333333]
 [0.06666667 0.         0.13333333 ... 0.46666667 0.06666667 0.53333333]
 [0.13333333 0.2        0.2        ... 0.4        0.06666667 0.53333333]] (631, 16)


In [294]:
test_X = dataA[train:]
test_Y = [1]*test

# including 5 samples each of remaining alphabets
for i in range(1,26,1):
  c = chr(i+ord('A'))
  c_df = data.loc[data['letter']==c][0:5]
  del c_df['letter']
  c_df = c_df.to_numpy()
  for j in c_df:
    test_X = np.vstack((test_X,j))
    test_Y.append(-1)

test_Y = np.array(test_Y)
print(test_X,test_X.shape)
print(test_Y,test_Y.shape)

[[0.4        0.66666667 0.6        ... 0.53333333 0.53333333 0.26666667]
 [0.2        0.6        0.4        ... 0.46666667 0.26666667 0.33333333]
 [0.2        0.4        0.33333333 ... 0.4        0.13333333 0.53333333]
 ...
 [0.2        0.53333333 0.26666667 ... 0.53333333 0.53333333 0.53333333]
 [0.06666667 0.         0.06666667 ... 0.53333333 0.4        0.53333333]
 [0.4        0.66666667 0.53333333 ... 0.46666667 0.4        0.6       ]] (283, 16)
[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -

In [295]:
manager = KernelManager(train_X,pred_Y, test_X, test_Y)

In [296]:
def kernel_matrix(X,kernel=linear_kernel):
  m = X.shape[0]
  K = np.zeros((m,m))
  for i in range(m):
    for j in range(m):
      K[i,j] = kernel(X[i], X[j])
  
  return K

In [297]:
kernel_matrix(train_X,KernelManager.hellinger_kernel)

array([[3.73333333, 4.4112756 , 4.22853921, ..., 4.02657957, 3.43691428,
        3.89152273],
       [4.4112756 , 5.46666667, 5.27764618, ..., 5.03454465, 3.91234393,
        4.68337527],
       [4.22853921, 5.27764618, 5.13333333, ..., 4.88078713, 3.68341075,
        4.48244205],
       ...,
       [4.02657957, 5.03454465, 4.88078713, ..., 4.93333333, 3.63932105,
        4.31316326],
       [3.43691428, 3.91234393, 3.68341075, ..., 3.63932105, 3.46666667,
        3.59692613],
       [3.89152273, 4.68337527, 4.48244205, ..., 4.31316326, 3.59692613,
        4.13333333]])

In [298]:
m = len(train_X)
v1,v2 = 0.9,0.9
e = 2/3
c1,c2 = 1/(v1*m),e/(v2*m)

In [299]:
def optimize(train_X,c1,c2,e,kernel=linear_kernel):

  m = len(train_X)     # number of samples
  n = len(train_X[0])  # number of features in one samples

  alpha = cp.Variable(m)
  alpha1 = cp.Variable(m)

  A1 = np.ones((1,m))
  b1 = np.array([1])
  b2 = np.array([e])

  G = np.eye(m)
  h = np.full((m,),0)
  h1 = np.full((m,),c1)
  h2 = np.full((m,),c2)
  G1 = -np.eye(m)

  K = kernel_matrix(train_X,kernel)

  prob = cp.Problem(cp.Minimize((1/2)*cp.quad_form((alpha-alpha1),K)),
                    [A1 @ alpha == b1,
                    A1 @ alpha1 == b2,
                    G @ alpha <= h1,
                    G @ alpha1 <=h2,
                    G1 @ alpha <= h,
                    G1 @ alpha1 <= h])
  prob.solve()
  # print(prob.status+" Solution found")

  alpha = alpha.value
  alpha1 = alpha1.value

  return alpha,alpha1

In [300]:
def bias_find(alpha,alpha1,c1,c2,X,kernel=linear_kernel):

  m = X.shape[0]

  n = 0 
  sum = 0

  for i in range(m):
    if (alpha[i]>0 and alpha[i]<c1):
      n+=1
      for j in range(m):
        sum += ((alpha[j]-alpha1[j])*kernel(X[i],X[j]))
  
  sum = sum/n;
  p1 = sum

  n = 0 
  sum = 0.0

  for i in range(m):
    if (alpha1[i]>0 and alpha1[i]<c2):
      n+=1
      for j in range(m):
        sum += ((alpha[j]-alpha1[j])*kernel(X[i],X[j]))
  
  sum = sum/n;

  p2=sum

  return p1,p2

In [301]:
def svm_count(x,train_X,alpha,alpha1,kernel=linear_kernel):
  m = train_X.shape[0] 
  count = 0.0

  for i in range(m):
    count += (alpha[i]-alpha1[i])*kernel(x,train_X[i])
  
  return count

In [302]:
def predict(x,train_X,p1,p2,alpha,alpha1,kernel=linear_kernel):

  count = svm_count(x,train_X,alpha,alpha1,kernel)
  return np.sign((count-p1)*(p2-count))

In [303]:
#Using Linear Kernel
from sklearn.metrics import matthews_corrcoef

alpha,alpha1 = optimize(train_X,c1,c2,e,linear_kernel)
p1,p2 = bias_find(alpha,alpha1,c1,c2,train_X,linear_kernel)

pred_Y = []
total_test = len(test_Y)
correct = 0

for i in range(total_test):
  res = predict(test_X[i],train_X,p1,p2,alpha,alpha1,linear_kernel)
  pred_Y.append(int(res))

print("correlation coefficient Using Intersection Kernel is :", matthews_corrcoef(test_Y, pred_Y))

correlation coefficient Using Intersection Kernel is : 0.1654787361413416


In [304]:
#Using Polynomial Kernel
from sklearn.metrics import matthews_corrcoef

alpha,alpha1 = optimize(train_X,c1,c2,e,polynomial_kernel)
p1,p2 = bias_find(alpha,alpha1,c1,c2,train_X,polynomial_kernel)

pred_Y = []
total_test = len(test_Y)
correct = 0
for i in range(total_test):
  
  res = predict(test_X[i],train_X,p1,p2,alpha,alpha1,polynomial_kernel)
  pred_Y.append(int(res))
print("correlation coefficient Using Intersection Kernel is :", matthews_corrcoef(test_Y, pred_Y))

correlation coefficient Using Intersection Kernel is : 0.21619143781205985


In [305]:
#Using Gaussian Kernel
from sklearn.metrics import matthews_corrcoef

alpha,alpha1 = optimize(train_X,c1,c2,e,gaussian_kernel)
p1,p2 = bias_find(alpha,alpha1,c1,c2,train_X,gaussian_kernel)

pred_Y = []
total_test = len(test_Y)
correct = 0
for i in range(total_test):
  
  res = predict(test_X[i],train_X,p1,p2,alpha,alpha1,gaussian_kernel)
  pred_Y.append(int(res))
print("correlation coefficient Using Intersection Kernel is :", matthews_corrcoef(test_Y, pred_Y))

correlation coefficient Using Intersection Kernel is : -0.1873314482020479


In [306]:
#using hellinger
from sklearn.metrics import matthews_corrcoef

alpha,alpha1 = optimize(train_X,c1,c2,e,hellinger_kernel)
p1,p2 = bias_find(alpha,alpha1,c1,c2,train_X,hellinger_kernel)

pred_Y = []
total_test = len(test_Y)
correct = 0
for i in range(total_test):
  
  res = predict(test_X[i],train_X,p1,p2,alpha,alpha1,hellinger_kernel)
  pred_Y.append(int(res))
print("correlation coefficient Using Intersection Kernel is :", matthews_corrcoef(test_Y, pred_Y))

correlation coefficient Using Intersection Kernel is : 0.21491918681694266


In [307]:
#Using Chi square Kernel
from sklearn.metrics import matthews_corrcoef

alpha,alpha1 = optimize(train_X,c1,c2,e,chi_square_kernel)
p1,p2 = bias_find(alpha,alpha1,c1,c2,train_X,chi_square_kernel)

pred_Y = []
total_test = len(test_Y)
correct = 0
for i in range(total_test):
  
  res = predict(test_X[i],train_X,p1,p2,alpha,alpha1,chi_square_kernel)
  pred_Y.append(int(res))
print("correlation coefficient Using Intersection Kernel is :", matthews_corrcoef(test_Y, pred_Y))

correlation coefficient Using Intersection Kernel is : 0.26520668302954026


In [308]:
#Using Intersection Kernel
from sklearn.metrics import matthews_corrcoef

alpha,alpha1 = optimize(train_X,c1,c2,e,intersection_kernel)
p1,p2 = bias_find(alpha,alpha1,c1,c2,train_X,intersection_kernel)

pred_Y = []
total_test = len(test_Y)
correct = 0
for i in range(total_test):
  
  res = predict(test_X[i],train_X,p1,p2,alpha,alpha1,intersection_kernel)
  pred_Y.append(int(res))

print("correlation coefficient Using Intersection Kernel is :", matthews_corrcoef(test_Y, pred_Y))

correlation coefficient Using Intersection Kernel is : 0.17667648470710767
