In [89]:
import numpy as np
from qpsolvers import solve_qp  
import pandas as pd
from sklearn.metrics import accuracy_score
class CustomSVM:
    def __init__(self, C=1.0, kernel='linear', degree=1, gamma='auto'):
        self.C = C
        self.kernel = kernel
        self.degree = degree
        self.gamma = gamma
        self.kernel_function = None
        
    def _linear_kernel(self, X1, X2):
        return np.dot(X1, X2.T)

    def _rbf_kernel(self, X1, X2, sigma=5.0):
        diff = X1[:, np.newaxis] - X2  
        norm = np.linalg.norm(diff, axis=2) 
        gamma = 1 / (2 * sigma**2)
        return np.exp(-gamma * norm ** 2)
    
    def _poly_kernel(self, X1, X2):
        return (np.dot(X1, X2.T) ) ** self.degree

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.X = X
        self.y = y

        if self.kernel == 'linear':
            self.kernel_function = self._linear_kernel
        elif self.kernel == 'rbf':
            self.kernel_function = self._rbf_kernel
        elif self.kernel == 'poly':
            self.kernel_function = self._poly_kernel
        else:
            raise ValueError("Invalid kernel type.")

        self.Gram = self.kernel_function(X, X)

        P = np.outer(y, y) * self.Gram
        q = -np.ones(n_samples)
        G = np.vstack((-np.eye(n_samples), np.eye(n_samples)))
        h = np.hstack((np.zeros(n_samples), np.ones(n_samples) * self.C))
        Aeq = y.reshape(1, -1)
        beq = np.array([0.0])
        lb = np.zeros(n_samples)
        ub = self.C * np.ones(n_samples)

        alpha = solve_qp(P, q, G, h, Aeq, beq, lb, ub, solver='clarabel')
        
        eps = 2.2204e-16
        for i in range(alpha.size):
            if alpha[i] >= self.C - np.sqrt(eps):
                alpha[i] = self.C
                alpha[i] = np.round(alpha[i], 6)
            elif alpha[i] <= 0 + np.sqrt(eps):
                alpha[i] = 0
                alpha[i] = np.round(alpha[i], 6)
            else:
                alpha[i] = np.round(alpha[i], 6)
                print(f"support vector: alpha = {alpha[i]}")

        support_vector_indices = np.where(alpha > 0)[0]
        self.support_vectors = X[support_vector_indices]
        self.support_vector_labels = y[support_vector_indices]

        bias = np.mean(self.support_vector_labels - np.dot(self.Gram[support_vector_indices][:, support_vector_indices], alpha[support_vector_indices] * self.support_vector_labels))
        self.alpha = alpha
        self.bias = bias

    def predict(self, X):
        n_samples = X.shape[0]  #樣本數
        decision_function = np.zeros(n_samples)  # 初始化决

        # 篩選支持向量
        support_vector_indices = np.where(self.alpha > 0)[0]
        support_vectors = self.support_vectors
        support_vector_labels = self.support_vector_labels

        # 计算结果
        for i in range(n_samples):
            decision_function[i] = np.sum(
                self.alpha[support_vector_indices] * support_vector_labels *
                self.kernel_function(X[i, :], support_vectors)
            ) + self.bias

        return np.sign(decision_function)


In [90]:
x_head = pd.read_csv('iris_head_23.txt', delim_whitespace=True)
x_tail = pd.read_csv('iris_tail_23.txt', delim_whitespace=True)
# 將標籤值替換為1 -1
x_head['species'] = x_head['species'].replace({2: -1, 3: 1})
x_tail['species'] = x_tail['species'].replace({2: -1, 3: 1})

y_head = x_head['species'].values
y_tail = x_tail['species'].values
columns_to_drop = ["species","Sepal_length","Sepal_width"]
x_head = x_head.drop(columns_to_drop, axis=1).values
x_tail = x_tail.drop(columns_to_drop, axis=1).values

In [91]:
#train
svm_linear = CustomSVM(C=1, kernel='linear')

svm_linear.fit(x_head, y_head)
print("Alpha (Lagrange Multipliers):", svm_linear.alpha)
print("Bias (Intercept):", svm_linear.bias)


support vector: alpha = 0.066667
support vector: alpha = 0.066667
support vector: alpha = 0.066667
support vector: alpha = 0.2
support vector: alpha = 0.0
Alpha (Lagrange Multipliers): [1.       0.066667 1.       0.       1.       0.       1.       0.
 0.       0.       0.       0.       0.       1.       0.       0.
 0.066667 0.       0.066667 0.       1.       0.       1.       0.
 0.       0.       1.       0.       0.       0.       0.       1.
 0.       0.       0.       1.       0.2      0.       1.       0.
 0.       0.       0.       0.       1.       0.       1.       0.
 1.       0.      ]
Bias (Intercept): -10.454198072222216


For best performance, build P as a scipy.sparse.csc_matrix rather than as a numpy.ndarray
For best performance, build G as a scipy.sparse.csc_matrix rather than as a numpy.ndarray
For best performance, build A as a scipy.sparse.csc_matrix rather than as a numpy.ndarray


In [92]:
y_pred = svm_linear.predict(x_tail)
from sklearn.metrics import accuracy_score, confusion_matrix

accuracy = accuracy_score(y_tail, y_pred)

print(f"Accuracy: {accuracy}")


Accuracy: 0.94
