In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import  train_test_split
import matplotlib.pyplot as plt

In [125]:
def create_data():
    iris = load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    df['label'] = iris.target
    df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
    data = np.array(df.iloc[:100, [0, 1, -1]])
    for i in range(len(data)):
        if data[i,-1] == 0:
            data[i,-1] = -1
    # print(data)
    return data[:,:2], data[:,-1]

In [126]:
X, y = create_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,random_state=13)

In [212]:
class SVM_SMO():
    def __init__(self,max_iter=1000,C=1.0,kernel='linear',c=0.5,d=3,b=0,gamma='auto'):
        self.C=C
        self.max_iter=max_iter
        self.kernel=kernel
        self.c=c
        self.d=d
        self.b=b
        self.gamma=gamma
        if self.kernel not in('linear','polynomial','gaussian'):
            raise ValueError('we only support linear,polynomial and gaussian kernel function!')

    
    def fit(self,x,y):
        self.x=x
        self.y=y
        self.m,self.n=x.shape
        if self.gamma=='auto':
            self.gamma=1/self.m
        self.alpha=[0.0]*self.m
        self.E=np.array([self._E_(i) for i in range(self.m)])
        print(self.E)
        self._smo()
        print(self.alpha)
        
    def _E_(self,i):
        return self._g_(i)-self.y[i]
    
    ### kernel f=sigma ayk(xi,x)+b
    def _g_(self,i):
        k=[self._k_(i,j) for j in range(self.m)]
        return np.dot(self.alpha*self.y,k)+self.b
    
    def _k_(self,i,j):
        if isinstance(i,int):
            if self.kernel=='linear':
                return np.inner(self.x[i],self.x[j])
            elif self.kernel=='polynomial':
                return (np.dot(self.x[i],self.x[j])+self.c)**self.d
            else :
                return np.exp((np.linalg.norm(self.x[i]-self.x[j])**2)*-1*self.gamma)
        else:
            if self.kernel=='linear':
                return np.inner(i,self.x[j])
            elif self.kernel=='polynomial':
                return (np.dot(i,self.x[j])+self.c)**self.d
            else:
                return np.exp((np.linalg.norm(i-self.x[j])**2)*-1*self.gamma)
    
    
    def _satisfy(self,i):
        tmp=self.y[i]*self._g_(i)
        if self.alpha[i] == 0:
            return tmp>=1
        elif self.alpha[i]>self.C:
            return tmp<=1
        else:
            return tmp ==1
        
    def _select_2(self):
        alpha_1_index=[i for i in range(self.m) if self.alpha[i]>0 and self.alpha[i]<self.C]
        alpha_2_index=list(set(list(range(self.m)))-set(alpha_1_index))
        alpha_1_index.extend(alpha_2_index)
        
        for i in alpha_1_index:
            if self._satisfy(i):
                continue
            imax=(0,0)
            E1=self.E[i]
            alpha_1_index.remove(i)
            for j in alpha_1_index:
                E2=self.E[j]
                if abs(E1-E2)>imax[0]:
                    imax=(abs(E1-E2),j)
            return i,imax[1]
    
    def _smo(self):
        print('max iter : '+str(self.max_iter))
        for _ in range(self.max_iter):
            if (_+1)%100==0 :
                print('iter round :%d' % (_+1))
            
            tup = self._select_2()
            if tup is None:
                print("alphas satisfy kkt condition,stop")
                break
            else :
                i1,i2=tup
            E1,E2=self.E[i1],self.E[i2]
            
            eta=self._k_(i1,i1)+self._k_(i2,i2)-2*self._k_(i1,i2)
            alpha2_new_unc=self.alpha[i2]+self.y[i2]*(E1-E2)/eta
            
            if self.y[i2]==self.y[i1]:
                L = max(0, self.alpha[i2] + self.alpha[i1] - self.C)
                H = min(self.C, self.alpha[i2] + self.alpha[i1])
            else:
                L = max(0, self.alpha[i2] - self.alpha[i1])
                H = min(self.C, self.C + self.alpha[i2] - self.alpha[i1])
            
            alpha2_new=H if alpha2_new_unc>H else L if alpha2_new_unc<L else alpha2_new_unc
            
            alpha1_new=self.alpha[i1]+self.y[i1]*self.y[i2]*(self.alpha[i2]-alpha2_new)
            
            b1_new=-E1-self.y[i1]*self._k_(i1,i1)*(alpha1_new-self.alpha[i1]) \
            -self.y[i2]*self._k_(i2,i2)*(alpha2_new-self.alpha[i2])+self.b
        
            b2_new=-E2-self.y[i1]*self._k_(i1,i2)*(alpha1_new-self.alpha[i1]) \
            -self.y[i2]*self._k_(i2,i2)*(alpha2_new-self.alpha[i2])+self.b
                
                
            if alpha1_new>0 and alpha1_new<self.C:
                self.b=b1_new
            elif alpha2_new>0 and alpha2_new<self.C:
                self.b=b2_new
            else:
                self.b=(b1_new+b2_new)/2
            
            self.alpha[i1]=alpha1_new
            self.alpha[i2]=alpha2_new
            self.E[i1]=self._E_(i1)
            self.E[i2]=self._E_(i2)
            
    def predict(self,x):
        res=[self._predict_(x1) for x1 in x]
        return res
    
    def _predict_(self,x):
        res=self.b
        for i in range(self.m):
            res+=self.alpha[i]*self.y[i]*self._k_(x,i)
        return 1 if res>0 else -1

In [213]:
smo=SVM_SMO(max_iter=5)
smo.fit(X_train,y_train)
smo.predict(X_test)

[-1. -1. -1. -1.  1.  1.  1.  1.  1.  1. -1.  1. -1.  1. -1. -1.  1. -1.
 -1.  1.  1. -1. -1. -1.  1. -1. -1.  1. -1.  1. -1. -1.  1. -1. -1.  1.
  1. -1. -1. -1.  1. -1.  1. -1.  1.  1. -1.  1. -1.  1.  1.  1.  1. -1.
  1. -1.  1.  1.  1.  1. -1. -1. -1. -1. -1. -1.  1. -1.  1. -1.  1.  1.
 -1.  1. -1.]
max iter : 5
[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


[-1,
 1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 1,
 -1,
 1,
 -1,
 -1,
 1,
 -1,
 1,
 1,
 -1,
 -1,
 1,
 1,
 -1]

In [214]:
def pred_score(y_pred,y_true):
    count=0
    for i in range(len(y_pred)):
        if y_pred[i]==y_true[i]:
            count+=1
    return count/len(y_pred)

In [215]:
pred_score(smo.predict(X_test),y_test)

0.96

In [216]:
print(y_test)

[-1.  1.  1. -1. -1. -1. -1. -1.  1.  1. -1.  1. -1.  1. -1. -1.  1. -1.
  1.  1. -1. -1.  1.  1. -1.]


In [217]:
print(smo.predict(X_test))

[-1, 1, 1, -1, -1, -1, -1, -1, 1, -1, -1, 1, -1, 1, -1, -1, 1, -1, 1, 1, -1, -1, 1, 1, -1]


In [219]:
smo2=SVM_SMO(max_iter=50,kernel='gaussian1')
smo2.fit(X_train,y_train)
smo2.predict(X_test)

ValueError: we only support linear,polynomial and gaussian kernel function!

In [188]:
pred_score(smo2.predict(X_test),y_test)

0.96

sklearn

In [170]:
from sklearn.svm import SVC
clf=SVC()
clf.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [151]:
clf.score(X_test,y_test)

0.96

In [156]:
v1=[1,1]
v2=[2,2]
v3=[3,3]
vec1=np.array([v1])
vec2=np.array([v2])
vec3=np.array([v3])

In [153]:
from sklearn.metrics import euclidean_distances

In [154]:
euclidean_distances(vec1,vec2)

array([[1.41421356]])

In [155]:
np.linalg.norm(vec1-vec2)

1.4142135623730951

In [160]:
euclidean_distances(vec1,vec3)

array([[2.82842712]])

In [161]:
np.power(8,1/2)

2.8284271247461903

In [162]:
np.exp(2)

7.38905609893065

In [163]:
np.exp(1)

2.718281828459045

In [197]:
v1.var()

AttributeError: 'list' object has no attribute 'var'

In [202]:
X_test[0]

array([4.9, 3.1])

In [203]:
1/5

0.2