In [7]:
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
import random
import autograd.numpy as np
from autograd import grad
from autograd import elementwise_grad as egrad

In [8]:
pwd = os.getcwd()

In [9]:
# Pull data

fname = pwd + '/data_banknote_authentication.txt'
banknote_data = np.loadtxt(fname, delimiter = ',')
banknote_data

array([[  3.6216 ,   8.6661 ,  -2.8073 ,  -0.44699,   0.     ],
       [  4.5459 ,   8.1674 ,  -2.4586 ,  -1.4621 ,   0.     ],
       [  3.866  ,  -2.6383 ,   1.9242 ,   0.10645,   0.     ],
       ...,
       [ -3.7503 , -13.4586 ,  17.5932 ,  -2.7771 ,   1.     ],
       [ -3.5637 ,  -8.3827 ,  12.393  ,  -1.2823 ,   1.     ],
       [ -2.5419 ,  -0.65804,   2.6842 ,   1.1952 ,   1.     ]])

In [10]:
banknote_data.shape

(1372, 5)

In [11]:
def onehot(y):
    b = np.zeros((y.size, y.max()+1))
    b[np.arange(y.size),y] = 1
    return b

In [12]:
# Split the data into train & test datasets

banknote_x = banknote_data[:, :4]
banknote_y = onehot(banknote_data[:, 4].astype(int))

banknote_x_train, banknote_x_test, banknote_y_train, banknote_y_test = train_test_split(banknote_x, banknote_y, test_size=0.33, random_state=42)
banknote_x_train = banknote_x_train.transpose()
banknote_x_test = banknote_x_test.transpose()

banknote_y_train = banknote_y_train.transpose()
banknote_y_test = banknote_y_test.transpose()
print(f'Sizes of:\nx_train: {banknote_x_train.shape},\ty_train: {banknote_y_train.shape},\tx_test: {banknote_x_test.shape},\ty_test: {banknote_y_test.shape}')

Sizes of:
x_train: (4, 919),	y_train: (2, 919),	x_test: (4, 453),	y_test: (2, 453)


In [13]:
NTRN = len(banknote_x_train[0])
NTST = len(banknote_x_test[0])

XDIM = len(banknote_x_train)
YDIM = len(banknote_y_train)

In [14]:
w = np.random.rand(YDIM, XDIM)
w

array([[0.81474619, 0.9805949 , 0.1390099 , 0.55829626],
       [0.71173289, 0.67536457, 0.95749833, 0.70042555]])

In [15]:
#Â Initial predictions
ip = np.transpose([np.argmax(np.dot(w, banknote_x_train[:,i])) for i in range(NTRN)])
print(ip.shape)
ip

(919,)


array([1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0,
       0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
       1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0,
       0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0,
       1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0,

In [16]:
# correct answers
ca = np.transpose([np.argmax(banknote_y_train[:,i]) for i in range(NTRN)])
print(ca.shape)
ca

(919,)


array([1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1,
       1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0,

In [17]:
# accuracy
def acc(w, x, y):
    return np.mean(np.argmax(np.dot(w, x), axis=0) == np.argmax(y, axis=0))

In [18]:
print(acc(w, banknote_x_train, banknote_y_train), acc(w, banknote_x_test, banknote_y_test))

0.5745375408052231 0.543046357615894


In [19]:
banknote_x_train.shape[1]

919

In [20]:
# training loop
def train(algo, x, y, num_epochs = int(2**20)):
    w = np.zeros((y.shape[0], x.shape[0]))
    num_examples = x.shape[1]
    next_print = 1
    for epoch in range(num_epochs):
        i = np.random.randint(0, num_examples-1)
        algo(w, x[:, i], y[:, i])
        if epoch == next_print:
            print(f'Iteration: {epoch}\tAccuracy: {acc(w, x, y)}\tW-norm: {np.linalg.norm(w)}')
            next_print = min(2*epoch, num_epochs)
    print(f'Final W: {w}')
    return w

In [21]:
def perceptron(w, x, y):
    guess = np.argmax(np.dot(w, x))
    correct_ans = np.argmax(y)
    if guess != correct_ans:
        w[correct_ans, :] += x
        w[guess, :] -= x

In [22]:
wperceptron = train(perceptron, banknote_x_train, banknote_y_train)

Iteration: 1	Accuracy: 0.5495103373231773	W-norm: 0.0
Iteration: 2	Accuracy: 0.5495103373231773	W-norm: 0.0
Iteration: 4	Accuracy: 0.7486398258977149	W-norm: 17.98002103970404
Iteration: 8	Accuracy: 0.7486398258977149	W-norm: 17.98002103970404
Iteration: 16	Accuracy: 0.8422198041349293	W-norm: 15.855448441877385
Iteration: 32	Accuracy: 0.8258977149075082	W-norm: 20.857371338772104
Iteration: 64	Accuracy: 0.9042437431991295	W-norm: 26.0578297028454
Iteration: 128	Accuracy: 0.8955386289445049	W-norm: 34.94874851606981
Iteration: 256	Accuracy: 0.9445048966267682	W-norm: 38.190544060445276
Iteration: 512	Accuracy: 0.9521218715995647	W-norm: 43.13281429351264
Iteration: 1024	Accuracy: 0.941240478781284	W-norm: 57.63097639142704
Iteration: 2048	Accuracy: 0.8803046789989118	W-norm: 77.12119455998047
Iteration: 4096	Accuracy: 0.9542981501632208	W-norm: 87.91508819335458
Iteration: 8192	Accuracy: 0.9466811751904244	W-norm: 99.01172486174328
Iteration: 16384	Accuracy: 0.9532100108813928	W-norm: 

In [23]:
def adaline(w, x, y, lr = 0.0001):
    error = np.dot(w, x) - y
    w -= lr * np.outer(error, x)

In [24]:
wadaline = train(adaline, banknote_x_train, banknote_y_train)

Iteration: 1	Accuracy: 0.6594124047878128	W-norm: 0.0013387284289652836
Iteration: 2	Accuracy: 0.6387377584330794	W-norm: 0.002539958652798517
Iteration: 4	Accuracy: 0.6496191512513602	W-norm: 0.003293002875756407
Iteration: 8	Accuracy: 0.6855277475516867	W-norm: 0.004332023188808136
Iteration: 16	Accuracy: 0.6702937976060935	W-norm: 0.0072471552739841
Iteration: 32	Accuracy: 0.6572361262241567	W-norm: 0.013278033235740193
Iteration: 64	Accuracy: 0.6670293797606094	W-norm: 0.02344093846172342
Iteration: 128	Accuracy: 0.6942328618063112	W-norm: 0.03899006126535003
Iteration: 256	Accuracy: 0.73449401523395	W-norm: 0.06545817508019897
Iteration: 512	Accuracy: 0.8607181719260065	W-norm: 0.10154600411907813
Iteration: 1024	Accuracy: 0.9151251360174102	W-norm: 0.15620650029336877
Iteration: 2048	Accuracy: 0.9445048966267682	W-norm: 0.21201066951523595
Iteration: 4096	Accuracy: 0.9466811751904244	W-norm: 0.24964101951190132
Iteration: 8192	Accuracy: 0.9488574537540805	W-norm: 0.26413233850414

In [25]:
def softmax(w, x, y, lr = 0.01):
    probs = np.exp(np.dot(w, x))
    probs /= np.sum(probs)
    error = probs - y
    w -= lr * np.outer(error, x)

In [26]:
wsoftmax = train(softmax, banknote_x_train, banknote_y_train)

Iteration: 1	Accuracy: 0.6594124047878128	W-norm: 0.07181362484524383
Iteration: 2	Accuracy: 0.6550598476605005	W-norm: 0.11057691602017777
Iteration: 4	Accuracy: 0.6507072905331882	W-norm: 0.1461294334632158
Iteration: 8	Accuracy: 0.6833514689880305	W-norm: 0.18256762501025048
Iteration: 16	Accuracy: 0.8052230685527747	W-norm: 0.20736107309271445
Iteration: 32	Accuracy: 0.8335146898803046	W-norm: 0.30548225414236085
Iteration: 64	Accuracy: 0.9303590859630033	W-norm: 0.44392470443105697
Iteration: 128	Accuracy: 0.9466811751904244	W-norm: 0.6238812189672804
Iteration: 256	Accuracy: 0.9357997823721437	W-norm: 0.8610340698032355
Iteration: 512	Accuracy: 0.955386289445049	W-norm: 1.119461457074803
Iteration: 1024	Accuracy: 0.9608269858541894	W-norm: 1.3886828354333869
Iteration: 2048	Accuracy: 0.9488574537540805	W-norm: 1.729861317845446
Iteration: 4096	Accuracy: 0.9608269858541894	W-norm: 2.1290827803767414
Iteration: 8192	Accuracy: 0.9608269858541894	W-norm: 2.518999142070026
Iteration: 

In [56]:
def optimize(loss, x, y, lr = 1, num_epochs = int(2**20)):
    w = np.zeros((banknote_y_train.shape[0], banknote_x_train.shape[0]))
    num_examples = banknote_x_train.shape[1]
    next_print = 1
    for epoch in range(num_epochs + 1):
        i = random.randint(0, num_examples-1)
        loss_grad = grad(loss)
        del_w = loss_grad(w, x[:, i], y[:, i])
        w -= lr*del_w
        if epoch == next_print:
            print(f'Iteration: {epoch}\tAccuracy: {acc(w, x, y)}\tW-norm: {np.linalg.norm(w)}')
            next_print = min(2*epoch, num_epochs)
    print(f'Final W: {w}')
    return w

In [57]:
def perceptronloss(w, x, y):
    score = np.dot(w, x)
    guess = np.argmax(score)
    correct_ans = np.argmax(y)
    return score[guess] - score[correct_ans]

In [58]:
wperceptron2 = optimize(perceptronloss, banknote_x_train, banknote_y_train)

Iteration: 1	Accuracy: 0.5495103373231773	W-norm: 0.0
Iteration: 2	Accuracy: 0.5495103373231773	W-norm: 0.0
Iteration: 4	Accuracy: 0.4211099020674646	W-norm: 5.290171870553924
Iteration: 8	Accuracy: 0.5625680087051143	W-norm: 5.6661670938298325
Iteration: 16	Accuracy: 0.6974972796517954	W-norm: 11.901057317936083
Iteration: 32	Accuracy: 0.940152339499456	W-norm: 15.165438098332027
Iteration: 64	Accuracy: 0.9096844396082698	W-norm: 26.37517203509059
Iteration: 128	Accuracy: 0.9064200217627857	W-norm: 38.866815190865545
Iteration: 256	Accuracy: 0.8356909684439608	W-norm: 51.32323597264314
Iteration: 512	Accuracy: 0.9521218715995647	W-norm: 58.94075301787031
Iteration: 1024	Accuracy: 0.9608269858541894	W-norm: 69.19395585091425
Iteration: 2048	Accuracy: 0.9532100108813928	W-norm: 77.09464376684446
Iteration: 4096	Accuracy: 0.941240478781284	W-norm: 84.00056980325063
Iteration: 8192	Accuracy: 0.9630032644178455	W-norm: 95.09367460462087
Iteration: 16384	Accuracy: 0.9630032644178455	W-norm:

In [49]:
def quadraticloss(w, x, y):
   return 0.5 * np.sum(np.square(np.abs(np.dot(w, x) - y)))

In [53]:
wadaline2 = optimize(quadraticloss, banknote_x_train, banknote_y_train, lr = 0.0001)

Iteration: 1	Accuracy: 0.44940152339499456	W-norm: 0.001085302968766251
Iteration: 2	Accuracy: 0.470076169749728	W-norm: 0.0011531065278076419
Iteration: 4	Accuracy: 0.4733405875952122	W-norm: 0.0017473255796710797
Iteration: 8	Accuracy: 0.5680087051142546	W-norm: 0.0022678408433794003
Iteration: 16	Accuracy: 0.7388465723612623	W-norm: 0.004769068841815052
Iteration: 32	Accuracy: 0.6800870511425462	W-norm: 0.009713912342420746
Iteration: 64	Accuracy: 0.7573449401523396	W-norm: 0.0161548517107483
Iteration: 128	Accuracy: 0.7290533188248096	W-norm: 0.03363579621705654
Iteration: 256	Accuracy: 0.7562568008705114	W-norm: 0.05859070637224487
Iteration: 512	Accuracy: 0.8313384113166485	W-norm: 0.09747627366790566
Iteration: 1024	Accuracy: 0.9031556039173014	W-norm: 0.15198118414721798
Iteration: 2048	Accuracy: 0.9477693144722524	W-norm: 0.21298300734927483
Iteration: 4096	Accuracy: 0.9532100108813928	W-norm: 0.2533507780288085
Iteration: 8192	Accuracy: 0.9423286180631121	W-norm: 0.2677176431

In [54]:
def NLL(w, x, y):
    probs = np.exp(np.dot(w, x))
    probs /= np.sum(probs)
    correct_ans = np.argmax(y)
    return -np.log(probs[correct_ans])

In [55]:
wsoftmax2 = optimize(NLL, banknote_x_train, banknote_y_train, lr = 0.01)

Iteration: 1	Accuracy: 0.6528835690968444	W-norm: 0.07269255326011799
Iteration: 2	Accuracy: 0.7736670293797606	W-norm: 0.07333482222414799
Iteration: 4	Accuracy: 0.7671381936887922	W-norm: 0.1171861871711065
Iteration: 8	Accuracy: 0.7377584330794341	W-norm: 0.13550389541116678
Iteration: 16	Accuracy: 0.7878128400435256	W-norm: 0.22137849556195635
Iteration: 32	Accuracy: 0.8019586507072906	W-norm: 0.3095547983674853
Iteration: 64	Accuracy: 0.9064200217627857	W-norm: 0.46598069359307953
Iteration: 128	Accuracy: 0.9575625680087051	W-norm: 0.6853846698130158
Iteration: 256	Accuracy: 0.9336235038084875	W-norm: 0.8938131235835516
Iteration: 512	Accuracy: 0.926006528835691	W-norm: 1.1536524591459862
Iteration: 1024	Accuracy: 0.9640914036996736	W-norm: 1.4497994229937088
Iteration: 2048	Accuracy: 0.9619151251360174	W-norm: 1.7618215742362024
Iteration: 4096	Accuracy: 0.9532100108813928	W-norm: 2.1128853649671084
Iteration: 8192	Accuracy: 0.955386289445049	W-norm: 2.501855243457162
Iteration: 