In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import  train_test_split
import matplotlib.pyplot as plt

In [2]:
def create_data():
    iris = load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    df['label'] = iris.target
    df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
    data = np.array(df.iloc[:100, [0, 1, -1]])
    for i in range(len(data)):
        if data[i,-1] == 0:
            data[i,-1] = -1
    # print(data)
    return data[:,:2], data[:,-1]

In [3]:
X, y = create_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [19]:
class SVC():
    def __init__(self,C=1.0,max_iter=1000,kernel='linear',epsilon=0.001,c=0.5,d=3):
        self.C=C
        self.max_iter=max_iter
        self.kernel=kernel
        self.epsilon=epsilon
        
        self.c=c
        self.d=d
        
        if self.C<0:
            raise ValueError('penalty term must be positive,but got (C=%r)' % self.C)
        if self.kernel not in ('linear','polynomial'):
            raise ValueError('we only support linear or polynomial keranl function !')
            
    def fit(self,X,Y):
        self.X=X
        self.Y=Y
        self.b=0.0
        self.m,self.n=X.shape
        self.alpha=[0.0]*self.m
        self.E=np.array([self._E_(i) for i in range(self.m)])
        self._smo()
        
        # the predict val of sample[i]
    def _g_(self,i):
        K=np.array([self._K_(j,i) for j in range(self.m)])
        return np.dot(self.alpha*self.Y,K)+self.b
        
    def _K_(self,i,j):
        if isinstance(i,int):
            if self.kernel=='linear':
                return np.inner(self.X[i],self.X[j])
            elif self.kernel=='polynomial':
                return np.dot(self.X[i],self.X[j]+self.c)**self.d
        else:
            if self.kernel=='linear':
                return np.inner(i,self.X[j])
            elif self.kernel=='polynomial':
                return np.dot(i,self.X[j]+self.c)**self.d
    
    # compute predict_val - true val
    def _E_(self,i):
        return self._g_(i)-self.Y[i]
    
    # check if alpha satisfy kkt
    def _satisfy_KKT_(self,i):
        tmp=self.Y[i]*self._g_(i)
        if abs(self.alpha[i])<self.epsilon:
            return tmp>=1
        elif abs(self.alpha[i]-self.C)<self.epsilon:
            return tmp<=1
        else :
            return abs(tmp-1)< self.epsilon
        
    #select 2 alpha to update,if return none ,stop:
    def _select_2(self):
        #first alpha,find not satisfy kkt  0<alpha<c
        alpha_1_index=[i for i in range(self.m) if self.alpha[i]>self.epsilon and \
                      abs(self.alpha[i]-self.C)>self.epsilon]
        alpha_2_index=list(set(list(range(self.m)))-set(alpha_1_index))
        
        alpha_1_index.extend(alpha_2_index) # 先找在 0<a<c之间的,再找其他的
        
        for i in alpha_1_index:
            if self._satisfy_KKT_(i):
                continue
            imax=(0,0)
            E1=self.E[i]
            alpha_1_index.remove(i)
            for j in alpha_1_index:
                E2=self.E[j]
                if abs(E1-E2)>imax[0]:
                    imax=(abs(E1-E2),j)
                    
            return i,imax[1]
        
    # classical sequential minimal optimization algorithm
    def _smo(self):
        for _ in range(self.max_iter):
            print('iter ronnd: %d' % (_+1))
            
            tup=self._select_2()
            if tup is None:
                print('satisfy stop condition')
                break
            else :
                i1,i2=tup
            E1,E2=self.E[i1],self.E[i2]
            eta=self._K_(i1,i1)+self._K_(i2,i2)-2*self._K_(i1,i2)
            alpha2_new_unc=self.alpha[i2]+self.Y[i2]*(E1-E2)/eta
            
            if self.Y[i1]==self.Y[i2]:
                L=max(0,self.alpha[i2]+self.alpha[i1]-self.C)
                H=min(self.C,self.alpha[i2]+self.alpha[i1])
                
            else:
                L=max(0,self.alpha[i2]-self.alpha[i1])
                H=min(self.C,self.C+self.alpha[i2]-self.alpha[i1])
            
            alpha2_new=H if alpha2_new_unc>H else L if alpha2_new_unc<L else alpha2_new_unc
            alpha1_new=self.alpha[i1]+self.Y[i1]*self.Y[i2]*(self.alpha[i2]-alpha2_new)
            
            b1_new=-E1-self.Y[i1]*self._K_(i1,i1)*(alpha1_new-self.alpha[i1]) \
            -self.Y[i2]*self._K_(i2,i1)*(alpha2_new-self.alpha[i2])+self.b
            b2_new=-E2-self.Y[i1]*self._K_(i1,i2)*(alpha1_new-self.alpha[i1]) \
            -self.Y[i2]*self._K_(i2,i2)*(alpha2_new-self.alpha[i2])+self.b
            
            #update b ,alpha,E1,E2
            
            if alpha1_new>0 and alpha1_new<self.C:
                self.b=b1_new
            elif alpha2_new>0 and alpha2_new<self.C:
                self.b=b2_new    
            else:
                self.b=(b1_new+b2_new)/2
                
            self.alpha[i1]=alpha1_new
            self.alpha[i2]=alpha2_new
            self.E[i1]=self._E_(i1)
            self.E[i2]=self._E_(i2)
    
    
    def _predict_(self, x):
        res = self.b
        for i in range(self.m):
            res += self.alpha[i]*self.Y[i]*self._K_(x, i)

        return 1 if res > 0 else -1

    def predict(self, X):
        res = [self._predict_(x) for x in X]
        return res


In [20]:
def pred_score(y_pred,y_true):
    count=0
    for i in y_pred:
        if y_pred[i]==y_true[i]:
            count+=1
    return count/len(y_pred)

In [18]:
svc = SVC()
svc.fit(X_train,y_train)
pred_score(svc.predict(X_test),y_test)

iter ronnd: 1
iter ronnd: 2
iter ronnd: 3
iter ronnd: 4
iter ronnd: 5
iter ronnd: 6
iter ronnd: 7
iter ronnd: 8
iter ronnd: 9
iter ronnd: 10
iter ronnd: 11
iter ronnd: 12
iter ronnd: 13
iter ronnd: 14
iter ronnd: 15
iter ronnd: 16
iter ronnd: 17
iter ronnd: 18
iter ronnd: 19
iter ronnd: 20
iter ronnd: 21
iter ronnd: 22
iter ronnd: 23
iter ronnd: 24
iter ronnd: 25
iter ronnd: 26
iter ronnd: 27
iter ronnd: 28
iter ronnd: 29
iter ronnd: 30
iter ronnd: 31
iter ronnd: 32
iter ronnd: 33
iter ronnd: 34
iter ronnd: 35
iter ronnd: 36
iter ronnd: 37
iter ronnd: 38
iter ronnd: 39
iter ronnd: 40
iter ronnd: 41
iter ronnd: 42
iter ronnd: 43
iter ronnd: 44
iter ronnd: 45
iter ronnd: 46
iter ronnd: 47
iter ronnd: 48
iter ronnd: 49
iter ronnd: 50
iter ronnd: 51
iter ronnd: 52
iter ronnd: 53
iter ronnd: 54
iter ronnd: 55
iter ronnd: 56
iter ronnd: 57
iter ronnd: 58
iter ronnd: 59
iter ronnd: 60
iter ronnd: 61
iter ronnd: 62
iter ronnd: 63
iter ronnd: 64
iter ronnd: 65
iter ronnd: 66
iter ronnd: 67
iter

iter ronnd: 645
iter ronnd: 646
iter ronnd: 647
iter ronnd: 648
iter ronnd: 649
iter ronnd: 650
iter ronnd: 651
iter ronnd: 652
iter ronnd: 653
iter ronnd: 654
iter ronnd: 655
iter ronnd: 656
iter ronnd: 657
iter ronnd: 658
iter ronnd: 659
iter ronnd: 660
iter ronnd: 661
iter ronnd: 662
iter ronnd: 663
iter ronnd: 664
iter ronnd: 665
iter ronnd: 666
iter ronnd: 667
iter ronnd: 668
iter ronnd: 669
iter ronnd: 670
iter ronnd: 671
iter ronnd: 672
iter ronnd: 673
iter ronnd: 674
iter ronnd: 675
iter ronnd: 676
iter ronnd: 677
iter ronnd: 678
iter ronnd: 679
iter ronnd: 680
iter ronnd: 681
iter ronnd: 682
iter ronnd: 683
iter ronnd: 684
iter ronnd: 685
iter ronnd: 686
iter ronnd: 687
iter ronnd: 688
iter ronnd: 689
iter ronnd: 690
iter ronnd: 691
iter ronnd: 692
iter ronnd: 693
iter ronnd: 694
iter ronnd: 695
iter ronnd: 696
iter ronnd: 697
iter ronnd: 698
iter ronnd: 699
iter ronnd: 700
iter ronnd: 701
iter ronnd: 702
iter ronnd: 703
iter ronnd: 704
iter ronnd: 705
iter ronnd: 706
iter ron

1.0

In [27]:
a=[2.0]*25
y_test*a

array([ 2.,  2., -2.,  2.,  2.,  2.,  2., -2., -2., -2.,  2., -2., -2.,
       -2., -2., -2., -2., -2.,  2., -2.,  2.,  2.,  2.,  2.,  2.])

In [30]:
b=[3.0]*25

In [39]:
[i * j for i in [1,2,3,4] for j in [1,2,3,4] ]

[1, 2, 3, 4, 2, 4, 6, 8, 3, 6, 9, 12, 4, 8, 12, 16]