In [106]:
import numpy as np
import hrr_utils
import utils

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.datasets import fetch_mldata

In [107]:
mnist = fetch_mldata('MNIST original', data_home='mnist')

In [108]:
data = mnist.data
labels = mnist.target

(data, labels) = utils.Shuffle(data, labels)
N=1000

In [109]:
def project_mnist(inp, projection_mat):
    inp = np.where(inp > 0, 1, 0).astype(np.float32)
    # Look only at presence 1
    vec = np.sign(inp.dot(projection_mat))
    return vec

def random_vec(num_elements, n=N):
    # Everything has equal chance (.5) of being pos, neg
    # returns num_elements x N
    return np.where(np.random.random((num_elements, N)) > .5, 1, -1)

hamming = lambda x, y: np.sign(x.reshape((1, -1))).dot(np.sign(y.reshape((-1, 1))))
# Hamming similarity
    

In [110]:
ratio = .8

train_set = data[:int(len(data)*ratio)]
train_label = labels[:int(len(data)*ratio)]
test_set = data[int(len(data)*ratio):]
test_label = labels[int(len(data)*ratio) :]

In [111]:
(train_set, train_label) = utils.Shuffle(train_set, train_label)

N = 1000
W = np.zeros((N, N), dtype=np.float32)
random_labels = random_vec(10, N)
projection_mat = np.random.random((784, N))-.5



for i in range(train_label.shape[0]):
    if i%1000 == 0:
        print i
    dat = project_mnist(train_set[i], projection_mat).reshape((1, N))
    label = random_labels[int(train_label[i])].reshape((1, N))
    
    pred = (label-dat.dot(W)/N)
    W += dat.T.dot(pred)

k1 = []
k2 = []
for j in range(test_label.shape[0]):
    dat = project_mnist(test_set[j], projection_mat).reshape((1, N))
    label = random_labels[int(test_label[j])].reshape((1, N))
    k1.append((dat.dot(W)/N).dot(label.T))
    k2.append(hamming(dat.dot(W), label))



0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000


In [112]:
print np.array(k1).mean()
print np.array(k2).mean()

722.198378133
705.555571429


In [114]:
corr = 0 

error = np.zeros((10, 10))

for j in range(1000):
    dat = project_mnist(test_set[j], projection_mat).reshape((1, N))
    label = random_labels[int(test_label[j])].reshape((1, N))

    vec = (dat.dot(W)/N).dot(random_labels.T)
    corr += np.argmax(vec) == test_label[j]
    error[np.argmax(vec), test_label[j]] += 1



In [127]:
(corr+0.0)/len(train_set)

0.83755357142857145

In [None]:

error = np.zeros((10, 10))
corr = 0

for j in range(len(train_set)):
    dat = project_mnist(train_set[j], projection_mat).reshape((1, N))
    label = random_labels[int(train_label[j])].reshape((1, N))

    vec = (dat.dot(W)/N).dot(random_labels.T)
    corr += np.argmax(vec) == train_label[j]
    error[np.argmax(vec), train_label[j]] += 1

In [158]:
ave = np.mean(train_set, axis=0)

def project_mnist2(inp, projection_mat):
    n = inp-ave;
    filt2 = (n<0).astype(np.float)
    filt3 = (n>0).astype(np.float)
    
    total_filt = filt3-filt2
    
    return np.where(total_filt.dot(np.sign(projection_mat)).reshape([1, N])>=0, 1, -1)

project_mnist2(test_set[0], projection_mat)
    

array([[ 1, -1,  1, -1,  1,  1, -1, -1,  1, -1,  1, -1, -1,  1,  1,  1, -1,
        -1, -1,  1, -1,  1,  1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1,
         1,  1,  1, -1,  1,  1,  1,  1,  1, -1, -1,  1,  1,  1, -1, -1, -1,
         1,  1,  1,  1,  1, -1, -1, -1,  1,  1, -1, -1,  1,  1,  1, -1, -1,
         1,  1, -1,  1, -1,  1, -1, -1,  1, -1,  1,  1, -1,  1, -1,  1, -1,
         1,  1,  1,  1,  1,  1, -1, -1, -1, -1,  1,  1, -1, -1, -1, -1, -1,
        -1,  1, -1, -1,  1, -1, -1,  1, -1,  1,  1,  1,  1, -1, -1, -1, -1,
        -1, -1, -1, -1,  1,  1,  1,  1,  1, -1, -1, -1,  1,  1, -1, -1,  1,
         1, -1,  1, -1,  1, -1,  1, -1,  1, -1, -1,  1, -1, -1,  1, -1, -1,
        -1, -1,  1,  1, -1, -1,  1, -1,  1, -1,  1,  1, -1, -1, -1,  1,  1,
        -1, -1,  1,  1, -1, -1,  1, -1,  1, -1, -1, -1,  1,  1,  1,  1, -1,
        -1,  1, -1, -1, -1,  1, -1, -1, -1,  1,  1,  1, -1,  1,  1,  1,  1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1,  1,  1,  1, -1,
        -1, 

In [160]:
(train_set, train_label) = utils.Shuffle(train_set, train_label)

N = 1000
W = np.zeros((N, N), dtype=np.float32)
random_labels = random_vec(10, N)
projection_mat = np.random.random((784, N))-.5



for i in range(train_label.shape[0]):
    if i%1000 == 0:
        print i, np.max(W)
    dat = project_mnist2(train_set[i], projection_mat).reshape((1, N))
    label = random_labels[int(train_label[i])].reshape((1, N))
    
    pred = (label-dat.dot(W)/N)
    W += dat.T.dot(pred)
    
k1 = []
k2 = []
for j in range(test_label.shape[0]):
    dat = project_mnist2(test_set[j], projection_mat).reshape((1, N))
    label = random_labels[int(test_label[j])].reshape((1, N))
    k1.append((dat.dot(W)/N).dot(label.T))
    k2.append(hamming(dat.dot(W), label))



0 0.0
1000 86.0817
2000 90.4369
3000 104.916
4000 118.652
5000 122.507
6000 119.54
7000 122.783
8000 128.634
9000 114.259
10000 121.736
11000 126.908
12000 143.96
13000 147.363
14000 130.598
15000 128.099
16000 139.321
17000 128.377
18000 125.172
19000 122.686
20000 139.842
21000 149.125
22000 151.954
23000 134.538
24000 148.038
25000 158.966
26000 164.271
27000 171.283
28000 181.035
29000 182.162
30000 171.856
31000 181.242
32000 190.699
33000 192.39
34000 189.505
35000 185.982
36000 197.219
37000 180.742
38000 177.633
39000 174.26
40000 187.258
41000 175.11
42000 184.488
43000 191.038
44000 187.454
45000 185.747
46000 178.363
47000 189.159
48000 174.379
49000 170.272
50000 165.566
51000 164.561
52000 179.977
53000 168.8
54000 172.954
55000 167.377


array([[ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       ..., 
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan]], dtype=float32)