In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
def PLA_no_bias(X, y):
    N,d = X.shape
    normal = np.random.randn(d,)
    
    for _ in range(1000):
        # find one wrong sample
        for i in range(N):
            if np.dot(X[i], normal) * y[i] <= 0:
                normal += y[i] * X[i]
                # print accuracy
                if _ % 100 == 99:
                    print("Iter %s: %s"
                          %(_+1, (np.sign(np.dot(X, normal)) == y).mean()))
                break
        else:
            break

    print((np.sign(np.dot(X, normal)) == y).mean())
    return normal

def PLA_with_bias(X, y):
    N,d = X.shape
    augX = np.hstack([np.ones((N,1)), X])
    bias_normal = PLA_no_bias(augX, y)
    return bias_normal[0], bias_normal[1:]

In [None]:
def draw_classifier_origin(X, y, normal, bias=0):
    """
    Input:
        X, y: the X, y to be used for linear_classifier
        normal: a normal vector
    Output:
        an illustration of the classifier
        This function works only when X.shape[1] == 2.
    """
    fig = plt.figure(figsize=(5,5))
    ax = plt.axes()
    ### draw data points
    ax.scatter(X[:,0], X[:,1], c=y, cmap='viridis')
    
    ### set boundary
    xleft, xright = X[:,0].min(), X[:,0].max()
    yleft, yright = X[:,1].min(), X[:,1].max()
    xwidth = xright - xleft
    ywidth = yright - yleft
    width = max([xwidth, ywidth])
    xleft, xright = xleft - (width-xwidth)/2, xright + (width-xwidth)/2
    yleft, yright = yleft - (width-ywidth)/2, yright + (width-ywidth)/2
    ax.set_xlim(xleft, xright)
    ax.set_ylim(yleft, yright)
    
    ### draw normal vector and the line
    length = np.sqrt(np.sum(normal ** 2))
    c1,c2 = normal / length * (0.25*width)
    ax.arrow(0, 0, c1, c2, color='red', head_width=0.05*width)
    shift = bias / normal.dot(normal) * normal
    ax.plot(np.array([-4*width*c2, 4*width*c2]) - shift[0], 
            np.array([4*width*c1, -4*width*c1]) - shift[1], 
            color='red')
#     fig.savefig('linear_classifier.png')

In [None]:
# PLA no bias
X = np.random.randn(100,2)
y = np.sign(X.dot(np.array([1,1])))
plt.axis('equal')
plt.scatter(*X.T, c=y)

In [None]:
normal = PLA_no_bias(X, y)
draw_classifier_origin(X, y, normal)

In [None]:
# PLA with bias
X = np.random.randn(100,2)
y = np.sign(X.dot(np.array([1,1])) + 1)
plt.axis('equal')
plt.scatter(*X.T, c=y)

In [None]:
bias, normal = PLA_with_bias(X, y)
draw_classifier_origin(X, y, normal, bias)

In [None]:
# still running or not separable?
X = np.random.randn(100000,2)
y = np.sign(X.dot(np.array([1,1])) + 1)
plt.axis('equal')
plt.scatter(*X.T, c=y)

In [None]:
bias, normal = PLA_with_bias(X, y)
draw_classifier_origin(X, y, normal, bias)

In [None]:
# feature engineering
X = 5 * np.random.randn(10000,2)
lengths = np.linalg.norm(X, axis=1)
band1 = (lengths > 1) & (lengths <2)  
band2 = (lengths > 3) & (lengths <4)
X = np.vstack([X[band1], X[band2]])
y = np.array([-1]*band1.sum() + [1]*band2.sum())
plt.axis('equal')
plt.scatter(*X.T, c=y)

In [None]:
X_new = np.hstack([X, X**2])
bias, normal = PLA_with_bias(X_new, y)

bias, normal

In [None]:
# hand written digits of 0's and 1's
# load from NSYSU-digits dataset
# https://github.com/SageLabTW/auto-grading
import os
import urllib
import numpy as np

base = r"https://github.com/SageLabTW/auto-grading/raw/master/nsysu-digits/"
for c in ['X', 'y']:
    filename = "nsysu-digits-%s.csv"%c
    if filename not in os.listdir('.'):
        print(filename, 'not found --- will download')
        urllib.request.urlretrieve(base + c + ".csv", filename)

Xsys = np.genfromtxt('nsysu-digits-X.csv', dtype=int, delimiter=',') ### flattened already
ysys = np.genfromtxt('nsysu-digits-y.csv', dtype=int, delimiter=',')
print(Xsys.shape)

In [None]:
# you may try different numbers
a,b = 0,1
mask = (ysys == a) | (ysys == b)
X = Xsys[mask,:]
y = ysys[mask]
y[y == a] = -1
y[y == b] = 1

fig,axs = plt.subplots(2, 5, figsize=(10,4))
for i in range(10):
    ax = axs[i // 5][i % 5]
    ax.axis('off')
    ax.imshow(X[i].reshape(28,28), vmin=0, vmax=255, cmap="binary")

In [None]:
from sklearn.model_selection import train_test_split
Xtrain,Xtest,ytrain,ytest = train_test_split(X, y)
print(ytrain.size, ytest.size)

In [None]:
bias,normal = PLA_with_bias(Xtrain, ytrain)

acc = (np.sign(Xtest.dot(normal) + bias) == ytest).mean()
print("Test set accuracy:", acc)