# 统计学习方法第二章 感知机代码
笔记见[http://jieguangzhou.github.io](http://jieguangzhou.github.io)

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# 用于打印数据
def print_var(var, name='x'):
    print(name+':\n',var, '\nshape:', var.shape if hasattr(var, 'shape') else '')

## 以下为感知机学习算法的原始形式

In [3]:
# 例2.1 中的数据
x = np.array([[3, 3],[4, 3], [1, 1]]).T
print_var(x, 'x')
y = np.array([1, 1, -1])
print_var(y, 'y')
lr = 1

In [4]:
w, b = np.zeros([1,2]), 0
print_var(w, 'w')
print_var(b, 'b')

In [5]:
def wx_plus_b(x, w, b):
    pred = np.dot(w, x) + b
    return pred

In [6]:
all_True = False
iter_num = 0

# 不断迭代，直到所有样本被正确分类为止
while not all_True:
    iter_num += 1
    for i in range(x.shape[1]):
        
        # 取第i个样本
        sample_x = x[:, i]
        sample_y = y[i]
        pred = wx_plus_b(sample_x, w, b)
        
        # 若样本被正确分类则跳过
        if sample_y * pred > 0:
            continue
        else:
            # 更新参数 w和b
            w = w + lr * sample_y * sample_x
            b = b + lr * sample_y
            print(iter_num, ' x_%s '%(i+1), ' (%d,%d) '%(w[0,0], w[0,1]), str(b).rjust(2), '分类超平面: %dx1+%dx2%+d'%(w[0,0], w[0,1], b))
            break
    else:
        # 若全部被分类
        all_True = True
        


## 以下为感知机学习算法的对偶形式

In [7]:
# 例2.1 中的数据
x = np.array([[3, 3],[4, 3], [1, 1]]).T
print_var(x, 'x')
y = np.array([1, 1, -1])
print_var(y, 'y')
lr = 1

In [8]:
# 计算 Gram 矩阵，可由x点乘x的转置矩阵得到
Gram = np.dot(x.T, x)
print_var(Gram, 'Gram')

In [9]:
alpha = np.zeros(x.shape[1])
print_var(alpha, 'alpha')
b = 0

In [10]:
# 对偶形式的计算方法
def calc_dual(x_i_idx, alphas_j, ys_j, b):
    """
    x_i_idx: 传入的样本的index
    alphas_j: 所有alpha的值
    ys_j: 所有y的值
    """
    pred = 0
    for j, (y_j, alpha_j) in enumerate(zip(ys_j, alphas_j)):
        # 从Gram矩阵中取出 x_j_i，并按照公式计算
        x_j_i = Gram[x_i_idx, j]
        pred += alpha_j * y_j * x_j_i
    pred = pred + b
    return pred

In [11]:
all_True = False
iter_num = 0
while not all_True:
    iter_num += 1
    for i in range(x.shape[1]):
        sample_y = y[i]
        pred = calc_dual(i, alpha, y, b)
        if sample_y * pred > 0:
            continue
        else:
            # 更新参数alpha和b
            alpha[i] += lr
            b += lr * sample_y
            
            # 根据alpha和x,y计算w
            w = (np.multiply(x, y) * alpha).sum(1)
            S = ' 分类超平面: %dx1+%dx2%+d=0'%(w[0], w[1], b)
            
            print('k:%2d '%iter_num, ' x_%s '%(i+1), 'a1:%2d  a2:%2d  a3:%2d  b:%+d'%(*alpha, b), S)
            
            break
    else:
        all_True = True