In [1]:
import time
import numpy as np
import random
import math
from tqdm import tqdm

In [2]:
def load_data(file_name):
    data_arr = []
    label_arr = []
    
    fr = open(file_name)
    for line in fr.readlines():
        cur_line = line.strip().split(',')
        data_arr.append([int(num) / 255 for num in curLine[1:]])
        if int(cur_line[0]) == 0:
            label_arr.append(1)
        else:
            label_arr.append(-1)
    return data_arr, label_arr

In [6]:
class SVM:
    def __init__(self, train_data, train_label, sigma = 10, C = 200, toler = 0.001):
        self.train_data = np.mat(train_data)
        self.train_label = np.mat(train_label).T
        
        self.m, self.n = np.shape(self.train_data)
        self.sigma = sigma  # 高斯核分母的sigma
        self.C = C  # 惩罚参数
        self.toler = toler  # 松弛变量
        
        self.k = self.calcKernel()  # 核函数
        self.b = 0  # 偏置b
        self.alpha = [0] * self.train_data.shape[0]
        
        # SMO运算过程中的Ei
        self.E = [0 * self.train_data[i, 0] for i in range(self.train_label.shape[0])]
        self.supportVecIndex = []
        
    def calcKernel(self):
        # 初始化高斯核 k[i][j] = Xi * Xj
        k = [[0 for i in range(self.m)] for j in range(self.m)]
        
        print("计算高斯核...")
        for i in tqdm(range(self.m)):
            X = self.train_data[i, :]
            for j in range(i, self.m):
                Z = self.train_data[j, :]
                result = (X - Z) * (X - Z).T
                result = np.exp(-1 * result / (2 * self.sigma**2))
                k[i][j] = result
                k[j][i] = result
        return k
    
    def calcSingleKernel(self, x1, x2):
        '''
        单独计算核函数
        '''
        result = (x1 - x2) * (x1 - x2).T
        result = np.exp(-1 * result / (2 * self.sigma**2))
        return np.exp(result)
    
    def train(self, epoch=100):
        iterStep = 0
        parameterChanged = 1
        
        # 如果parameter没有改变，则停止（说明参数收敛了）
        while(iterStep < epoch) and (parameterChanged > 0):
            iterStep += 1
            parameterChanged = 0 # reset
            
            # 找SMO的第一个变量
            for i in range(self.m):
                if self.isSatisfyKKT(i) == False:
                    # 如果找到了不满足KKT条件的，则开始找第2个
                    E1 = self.calcEi(i)
                    E2, j = self.getAlphaJ(E1, i)
                    