In [1]:
import numpy as np
import matplotlib.pyplot as plt


In [2]:
# load and preprocess the data

spambase_path = "./data/spambase/spambase.data"
X = np.genfromtxt(spambase_path, delimiter=',')

Y = X[:, -1]
X = X[:, :-1]

Y[Y < 0.01] = -1
X = (X - X.mean(axis=0)) / X.std(axis=0)
X = np.hstack((X, np.ones((X.shape[0],1))))


In [3]:
def perceptron(X, Y, T, eta=0.1):
    correct = np.zeros(T,)

    N, d = X.shape
    W = np.zeros(d,)

    for t in range(T):
        it = np.random.randint(N)
        x = X[it]

        y_bar = -1 if W.dot(x) < 0 else 1
        y = Y[it]

        if y*y_bar < 0:
            W = W + eta * y * x
            correct[t] = 0
        else:
            correct[t] = 1
            
    return correct


In [4]:
def winnow(X, Y, T, eta=0.1):
    correct = np.zeros(T,)
    X = np.hstack((X, -X))
    
    N, d = X.shape
    W = np.ones(d,) / d
    
    for t in range(T):
        it = np.random.randint(N)
        x = X[it]
        
        y_bar = -1 if W.dot(x) < 0 else 1
        y = Y[it]
        
        if y*y_bar < 0:
            W = W * np.exp(eta * y * x)
            W = W / W.sum()
            correct[t] = 0
        else:
            correct[t] = 1
            
    return correct
    

In [22]:
def check_accuracy(X, Y, W):
    cor = sum([1 for x, y in zip(X, Y) if y*W.dot(x) > 0])
    return 1.0 * cor / X.shape[0]

In [5]:
T = 10000
runs = 100
correct_perc1 = np.zeros((runs, T))

for i in range(runs):
    np.random.seed(i*57)
    cor = perceptron(X, Y, T, eta=0.1)
    correct_perc1[i, :] = cor


In [11]:
T = 10000
runs = 100
correct_winn1 = np.zeros((runs, T))

for i in range(runs):
    np.random.seed(i*57)
    cor = winnow(X, Y, T, eta=0.1)
    correct_winn1[i, :] = cor


In [18]:
T = 10000
runs = 100
correct_perc2 = np.zeros((runs, T))

for i in range(runs):
    np.random.seed(i*57)
    cor = perceptron(X, Y, T, eta=1.0)
    correct_perc2[i, :] = cor


In [19]:
T = 10000
runs = 100
correct_winn2 = np.zeros((runs, T))

for i in range(runs):
    np.random.seed(i*57)
    cor = winnow(X, Y, T, eta=1.0)
    correct_winn2[i, :] = cor


In [17]:
%matplotlib notebook

xlab = np.arange(T)
g = 100

mu = correct_perc1.mean(axis=0)
sig = correct_perc1.std(axis=0)**2
plt.errorbar(xlab[::g], mu[::g], yerr=sig[::g], fmt='o', label="perceptron(eta=0.1)")


plt.legend(loc=4)
plt.ylim(0, 1)
plt.ylabel("correct predictions")
plt.xlabel("time steps")
plt.title("Perceptron on Spambase database")
plt.show()



<IPython.core.display.Javascript object>