In [1]:
# Import needed files and basic setup
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision

import numpy as np

import matplotlib
import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import Axes3D

import data_gen2
import tropical

from ipywidgets import Output
from IPython.display import display, Markdown, Latex, Math, clear_output

from sklearn import neighbors

import math

from cvxopt import solvers, matrix

import time

import scipy

%matplotlib notebook
#plt.ion()

In [2]:
# Hyperparameters
n_epochs = 100
batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 100

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

<torch._C.Generator at 0x7fe8505bd7b0>

In [3]:
# Load training and testing sets
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('files/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('files/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_test, shuffle=True)

In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)

In [5]:
doTrain = True
if doTrain:
    network = Net()
    optimizer = optim.SGD(network.parameters(), lr=learning_rate,
                          momentum=momentum)

In [6]:
if doTrain:
    train_losses = []
    train_acc = []
    train_counter = []
    test_losses = []
    test_acc = []
    test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

In [7]:
def train(epoch):
    network.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = network(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            pred = output.data.max(1, keepdim=True)[1]
            correct = pred.eq(target.data.view_as(pred)).sum()
            
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.6f}'.format(
                  epoch, batch_idx * len(data), len(train_loader.dataset),
                  100. * batch_idx / len(train_loader), loss.item(), 100. * correct / 64))
            
            train_losses.append(loss.item())
            train_acc.append(100. * correct / 64)
            train_counter.append((batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))

In [8]:
def test():
    network.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = network(data)
            test_loss += F.nll_loss(output, target, size_average=False).item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)
    test_acc.append(100. * correct / len(test_loader.dataset))
    print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
          test_loss, correct, len(test_loader.dataset),
          100. * correct / len(test_loader.dataset)))

In [9]:
if doTrain:
    test()
    for epoch in range(1, n_epochs + 1):
        train(epoch)
        test()

  if sys.path[0] == '':



Test set: Avg. loss: 2.2975, Accuracy: 1076/10000 (10%)


Test set: Avg. loss: 0.2697, Accuracy: 9248/10000 (92%)


Test set: Avg. loss: 0.2058, Accuracy: 9409/10000 (94%)


Test set: Avg. loss: 0.1739, Accuracy: 9506/10000 (95%)


Test set: Avg. loss: 0.1520, Accuracy: 9555/10000 (95%)


Test set: Avg. loss: 0.1405, Accuracy: 9580/10000 (95%)


Test set: Avg. loss: 0.1285, Accuracy: 9620/10000 (96%)


Test set: Avg. loss: 0.1204, Accuracy: 9643/10000 (96%)


Test set: Avg. loss: 0.1153, Accuracy: 9669/10000 (96%)


Test set: Avg. loss: 0.1100, Accuracy: 9671/10000 (96%)


Test set: Avg. loss: 0.1049, Accuracy: 9681/10000 (96%)




Test set: Avg. loss: 0.1008, Accuracy: 9707/10000 (97%)


Test set: Avg. loss: 0.0958, Accuracy: 9724/10000 (97%)


Test set: Avg. loss: 0.0954, Accuracy: 9708/10000 (97%)


Test set: Avg. loss: 0.0923, Accuracy: 9719/10000 (97%)


Test set: Avg. loss: 0.0924, Accuracy: 9721/10000 (97%)


Test set: Avg. loss: 0.0877, Accuracy: 9736/10000 (97%)


Test set: Avg. loss: 0.0867, Accuracy: 9742/10000 (97%)


Test set: Avg. loss: 0.0849, Accuracy: 9742/10000 (97%)


Test set: Avg. loss: 0.0840, Accuracy: 9750/10000 (97%)


Test set: Avg. loss: 0.0834, Accuracy: 9752/10000 (97%)


Test set: Avg. loss: 0.0830, Accuracy: 9758/10000 (97%)




Test set: Avg. loss: 0.0812, Accuracy: 9754/10000 (97%)


Test set: Avg. loss: 0.0785, Accuracy: 9766/10000 (97%)


Test set: Avg. loss: 0.0800, Accuracy: 9760/10000 (97%)


Test set: Avg. loss: 0.0782, Accuracy: 9761/10000 (97%)


Test set: Avg. loss: 0.0775, Accuracy: 9756/10000 (97%)


Test set: Avg. loss: 0.0781, Accuracy: 9766/10000 (97%)


Test set: Avg. loss: 0.0775, Accuracy: 9759/10000 (97%)


Test set: Avg. loss: 0.0764, Accuracy: 9766/10000 (97%)


Test set: Avg. loss: 0.0761, Accuracy: 9772/10000 (97%)


Test set: Avg. loss: 0.0755, Accuracy: 9770/10000 (97%)


Test set: Avg. loss: 0.0750, Accuracy: 9771/10000 (97%)




Test set: Avg. loss: 0.0746, Accuracy: 9771/10000 (97%)


Test set: Avg. loss: 0.0728, Accuracy: 9778/10000 (97%)


Test set: Avg. loss: 0.0724, Accuracy: 9782/10000 (97%)


Test set: Avg. loss: 0.0743, Accuracy: 9782/10000 (97%)


Test set: Avg. loss: 0.0721, Accuracy: 9780/10000 (97%)


Test set: Avg. loss: 0.0717, Accuracy: 9786/10000 (97%)


Test set: Avg. loss: 0.0717, Accuracy: 9780/10000 (97%)


Test set: Avg. loss: 0.0715, Accuracy: 9784/10000 (97%)


Test set: Avg. loss: 0.0711, Accuracy: 9778/10000 (97%)


Test set: Avg. loss: 0.0731, Accuracy: 9786/10000 (97%)


Test set: Avg. loss: 0.0712, Accuracy: 9780/10000 (97%)




Test set: Avg. loss: 0.0702, Accuracy: 9787/10000 (97%)


Test set: Avg. loss: 0.0715, Accuracy: 9782/10000 (97%)


Test set: Avg. loss: 0.0714, Accuracy: 9785/10000 (97%)


Test set: Avg. loss: 0.0704, Accuracy: 9793/10000 (97%)


Test set: Avg. loss: 0.0698, Accuracy: 9789/10000 (97%)


Test set: Avg. loss: 0.0671, Accuracy: 9794/10000 (97%)


Test set: Avg. loss: 0.0689, Accuracy: 9788/10000 (97%)


Test set: Avg. loss: 0.0693, Accuracy: 9795/10000 (97%)


Test set: Avg. loss: 0.0699, Accuracy: 9780/10000 (97%)


Test set: Avg. loss: 0.0693, Accuracy: 9792/10000 (97%)


Test set: Avg. loss: 0.0680, Accuracy: 9788/10000 (97%)




Test set: Avg. loss: 0.0681, Accuracy: 9793/10000 (97%)


Test set: Avg. loss: 0.0673, Accuracy: 9789/10000 (97%)


Test set: Avg. loss: 0.0688, Accuracy: 9794/10000 (97%)


Test set: Avg. loss: 0.0702, Accuracy: 9792/10000 (97%)


Test set: Avg. loss: 0.0683, Accuracy: 9795/10000 (97%)


Test set: Avg. loss: 0.0700, Accuracy: 9789/10000 (97%)


Test set: Avg. loss: 0.0685, Accuracy: 9802/10000 (98%)


Test set: Avg. loss: 0.0683, Accuracy: 9793/10000 (97%)


Test set: Avg. loss: 0.0676, Accuracy: 9795/10000 (97%)


Test set: Avg. loss: 0.0703, Accuracy: 9789/10000 (97%)




Test set: Avg. loss: 0.0709, Accuracy: 9795/10000 (97%)


Test set: Avg. loss: 0.0687, Accuracy: 9796/10000 (97%)


Test set: Avg. loss: 0.0675, Accuracy: 9791/10000 (97%)


Test set: Avg. loss: 0.0688, Accuracy: 9795/10000 (97%)


Test set: Avg. loss: 0.0701, Accuracy: 9794/10000 (97%)


Test set: Avg. loss: 0.0700, Accuracy: 9785/10000 (97%)


Test set: Avg. loss: 0.0682, Accuracy: 9798/10000 (97%)


Test set: Avg. loss: 0.0693, Accuracy: 9794/10000 (97%)


Test set: Avg. loss: 0.0698, Accuracy: 9795/10000 (97%)


Test set: Avg. loss: 0.0702, Accuracy: 9791/10000 (97%)


Test set: Avg. loss: 0.0691, Accuracy: 9797/10000 (97%)




Test set: Avg. loss: 0.0689, Accuracy: 9803/10000 (98%)


Test set: Avg. loss: 0.0691, Accuracy: 9796/10000 (97%)


Test set: Avg. loss: 0.0679, Accuracy: 9801/10000 (98%)


Test set: Avg. loss: 0.0685, Accuracy: 9802/10000 (98%)


Test set: Avg. loss: 0.0688, Accuracy: 9800/10000 (98%)


Test set: Avg. loss: 0.0697, Accuracy: 9800/10000 (98%)


Test set: Avg. loss: 0.0682, Accuracy: 9800/10000 (98%)


Test set: Avg. loss: 0.0687, Accuracy: 9794/10000 (97%)


Test set: Avg. loss: 0.0689, Accuracy: 9786/10000 (97%)


Test set: Avg. loss: 0.0685, Accuracy: 9797/10000 (97%)


Test set: Avg. loss: 0.0695, Accuracy: 9791/10000 (97%)




Test set: Avg. loss: 0.0704, Accuracy: 9798/10000 (97%)


Test set: Avg. loss: 0.0678, Accuracy: 9795/10000 (97%)


Test set: Avg. loss: 0.0676, Accuracy: 9798/10000 (97%)


Test set: Avg. loss: 0.0685, Accuracy: 9802/10000 (98%)


Test set: Avg. loss: 0.0701, Accuracy: 9801/10000 (98%)


Test set: Avg. loss: 0.0687, Accuracy: 9803/10000 (98%)


Test set: Avg. loss: 0.0694, Accuracy: 9802/10000 (98%)


Test set: Avg. loss: 0.0692, Accuracy: 9805/10000 (98%)


Test set: Avg. loss: 0.0692, Accuracy: 9795/10000 (97%)


Test set: Avg. loss: 0.0694, Accuracy: 9794/10000 (97%)


Test set: Avg. loss: 0.0699, Accuracy: 9792/10000 (97%)




Test set: Avg. loss: 0.0688, Accuracy: 9800/10000 (98%)


Test set: Avg. loss: 0.0701, Accuracy: 9795/10000 (97%)


Test set: Avg. loss: 0.0698, Accuracy: 9803/10000 (98%)



In [10]:
if doTrain:
    params = []
    for param in network.parameters():
        params.append(param.detach().numpy())

    A1 = params[0]
    b1 = params[1]
    A2 = params[2]
    b2 = params[3]

In [11]:
if doTrain:
    np.savez('parameters', A1=A1, b1=b1, A2=A2, b2=b2)

In [24]:
npzfile = np.load('parameters.npz')

A1 = npzfile['A1']
b1 = npzfile['b1']
A2 = npzfile['A2']
b2 = npzfile['b2']

In [14]:
# Get all the inputs and targets
inputs = np.zeros((0, 784))
targets = np.zeros((0, 1))
for batch_idx, (data, target) in enumerate(train_loader):
    x = data.view(-1, 28*28).numpy()
    y = target.numpy()
    
    inputs = np.vstack((inputs, x))
    targets = np.vstack([targets, y.reshape((-1, 1))])
    
# Create the KNN for the original datapoints
inputVector = neighbors.KNeighborsClassifier(3, weights='distance', p=2)
inputVector.fit(inputs, targets.ravel())

# Create the KNN for the feature vectors
features = np.matmul(inputs, A1.T) + b1
inputs = None
featureVector = neighbors.KNeighborsClassifier(3, weights='distance', p=2)
featureVector.fit(features, targets.ravel())

# Create the KNN for the binary vectors
featuresBinary = features > 0
features = None
binaryVector = neighbors.KNeighborsClassifier(3, weights='distance', metric='hamming')
binaryVector.fit(featuresBinary, targets.ravel())
featuresBinary = None

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='distance')

In [25]:
print(A2.shape)
print(A1.shape)

(1, 128)
(128, 784)


In [42]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(sorted(np.abs(np.multiply(A2, np.power(np.linalg.norm(A1, axis=1), 2))).T));
ax.set_title('Representative magnitude of weights')
ax.set_xlabel('Weight $i$')
ax.set_ylabel('$||A1_i||_2^2 * A2_i$');

<IPython.core.display.Javascript object>

In [17]:
# K nearest neighbors test
def test():
    vals = []
    for i in range(1, 11):
        start = time.time()
        # Get all the inputs and targets
        inputs = np.zeros((0, 784))
        targets = np.zeros((0, 1))
        for batch_idx, (data, target) in enumerate(train_loader):
            x = data.view(-1, 28*28).numpy()
            y = target.numpy()

            inputs = np.vstack((inputs, x))
            targets = np.vstack([targets, y.reshape((-1, 1))])

        # Create the KNN for the original datapoints
        inputVector = neighbors.KNeighborsClassifier(i, weights='distance', p=2)
        inputVector.fit(inputs, targets.ravel())

        # Create the KNN for the feature vectors
        features = np.matmul(inputs, A1.T) + b1
        inputs = None
        featureVector = neighbors.KNeighborsClassifier(i, weights='distance', p=2)
        featureVector.fit(features, targets.ravel())

        # Create the KNN for the binary vectors
        featuresBinary = features > 0
        features = None
        binaryVector = neighbors.KNeighborsClassifier(i, weights='distance', metric='hamming')
        binaryVector.fit(featuresBinary, targets.ravel())
        featuresBinary = None

        correctInput = 0
        correctFeature = 0
        correctBinary = 0

        timeInput = 0
        timeFeature = 0
        timeBinary = 0

        for data, target in test_loader:

            data = data.view(-1, 784).numpy()

            start = time.time()
            output1 = inputVector.predict(data)
            timeInput += time.time() - start

            data = np.matmul(data, A1.T) + b1
            start = time.time()
            output2 = featureVector.predict(data)
            timeFeature += time.time() - start

            start = time.time()
            data = data > 0
            output3 = binaryVector.predict(data)
            timeBinary += time.time() - start

            correctInput += np.sum(output1 == target.numpy())
            correctFeature += np.sum(output2 == target.numpy())
            correctBinary += np.sum(output3 == target.numpy())

        vals.append((correctInput, correctFeature, correctBinary))
        print(vals[-1], time.time() - start)
    return vals

In [18]:
vals = test()
# Original, feature representation, binary
# correct/time

(9691, 9757, 9746) 8.544295310974121
(9691, 9757, 9748) 8.81536078453064
(9717, 9767, 9747) 9.402190446853638
(9714, 9771, 9758) 9.48528504371643
(9691, 9757, 9758) 9.656675577163696
(9709, 9759, 9757) 9.344479084014893
(9700, 9748, 9751) 9.457987070083618
(9706, 9749, 9753) 9.763645887374878
(9673, 9740, 9751) 9.78117847442627
(9684, 9742, 9747) 9.717866659164429


In [19]:
original = [val[0]/100 for val in vals]
feature = [val[1]/100 for val in vals]
binary = [val[2]/100 for val in vals]

fig = plt.figure()
ax = fig.add_subplot(111)

ax.plot(original, label='Input')
ax.plot(feature, label='Feature')
ax.plot(binary, label='Binary')
ax.legend()
ax.set_xlabel('K')
ax.set_ylabel('Accuracy')

<IPython.core.display.Javascript object>

Text(0,0.5,'Accuracy')

In [None]:
# Connected components test

In [8]:
# Get and save the feature for each sample
features = np.zeros((0, 128))
binaryFeatures = np.zeros((0, 128), dtype=np.bool)
targets = np.zeros((0, 1))

for batch_idx, (data, target) in enumerate(train_loader):
    x = data.view(-1, 28*28).numpy()
    y = target.numpy()
    
    feature = np.matmul(x, A1.T) + b1
    
    features = np.vstack((features, feature))
    binaryFeatures = np.vstack((binaryFeatures, feature > 0))
    targets = np.vstack([targets, y.reshape((-1, 1))])
    
np.savez('features', features=features, targets=targets)
np.savez('binaryFeatures', binaryFeatures=binaryFeatures, targets=targets)

In [9]:
features = None

In [4]:
npzfile = np.load('binaryFeatures.npz')

binaryFeatures = npzfile['binaryFeatures']
targets = npzfile['targets']

In [5]:
print(binaryFeatures.shape)

(60000, 128)


In [11]:
distances = None

In [14]:
binaryFeatures = binaryFeatures[targets.ravel() == 5, :]

In [15]:
print(binaryFeatures.shape)

(5421, 128)


In [8]:
def calcDistDistribution(features):
    # Distances
    distances = scipy.spatial.distance.squareform((scipy.spatial.distance.pdist(features, metric='hamming')*128).astype(np.uint8))
    
    # How many of each distance
    dists = []
    for i in range(129):
        if i == 0:
            #print(i, (np.sum(distances == i) - binaryFeatures.shape[0])/2)
            dists.append((np.sum(distances == i) - binaryFeatures.shape[0])//2)
        else:
            #print(i, np.sum(distances == i)/2)
            dists.append(np.sum(distances == i)//2)
            
    # How many connected components
    comps = []
    foundOne = 0
    for i in range(1, 128):
        tempDistances = np.copy(distances)
        tempDistances[tempDistances > i] = 0
        comps.append(scipy.sparse.csgraph.connected_components(tempDistances)[0])
        if comps[-1] == 1:
            foundOne = i
            break
    for i in range(foundOne, 128):
        comps.append(1)
        
    return distances, dists, comps

In [9]:
# Plot distributions
fig = plt.figure(figsize=(10, 30))
fig.tight_layout()

axFull = fig.add_subplot(1, 2, 1)
axFull.set_title('Distance distribution')
axFull.set_axis_off()
axFull = fig.add_subplot(1, 2, 2)
axFull.set_title('Components distribution')
axFull.set_axis_off()

for i in range(10):
    axFull = fig.add_subplot(10, 1, i+1)
    ax1 = fig.add_subplot(10, 2, i*2+1)
    ax2 = fig.add_subplot(10, 2, i*2+2)
    
    distances, dists, comps = calcDistDistribution(binaryFeatures[targets.ravel() == i, :])
    
    print("{}:\t mean: {}\t median: {}\t max: {}\t min: {}".format(i, np.mean(distances), np.median(distances), np.amax(distances), np.amin(distances[distances > 0])))
    
    ax1.plot(dists/sum(dists))
    ax2.plot(comps)
    axFull.set_title('{}'.format(i))
    axFull.set_axis_off()

<IPython.core.display.Javascript object>

0:	 mean: 22.386270264671023	 median: 22.0	 max: 69	 min: 1
1:	 mean: 26.290450789496077	 median: 25.0	 max: 80	 min: 1
2:	 mean: 36.154774706373054	 median: 36.0	 max: 91	 min: 1
3:	 mean: 34.755386532835885	 median: 34.0	 max: 87	 min: 1
4:	 mean: 29.77592135524536	 median: 29.0	 max: 82	 min: 1
5:	 mean: 40.44144994761502	 median: 40.0	 max: 88	 min: 1
6:	 mean: 28.21706307025119	 median: 28.0	 max: 76	 min: 1
7:	 mean: 30.217072742895105	 median: 29.0	 max: 84	 min: 1
8:	 mean: 32.25725899079695	 median: 32.0	 max: 79	 min: 1
9:	 mean: 32.21811712098362	 median: 32.0	 max: 82	 min: 1


In [10]:
distances = np.zeros((binaryFeatures.shape[0], binaryFeatures.shape[0]), dtype=np.uint8)

for i in range(binaryFeatures.shape[0]):
    for j in range(i, binaryFeatures.shape[0]):
        distances[i, j] = np.sum(np.logical_xor(binaryFeatures[i, :], binaryFeatures[j, :]))
        distances[j, i] = distances[i, j]

In [11]:
np.save('fullHammingDistances', distances)

In [6]:
distances = np.load('fullHammingDistances.npy')
print(distances.shape)

(60000, 60000)


In [12]:
print("{}:\t mean: {}\t median: {}\t max: {}\t min: {}".format(i, np.mean(distances), np.median(distances), np.amax(distances), np.amin(distances[distances > 0])))

59999:	 mean: 52.79831396944444	 median: 54.0	 max: 104	 min: 1


In [7]:
# Calc full distribution
dists = []
for i in range(129):
    if i == 0:
        #print(i, (np.sum(distances == i) - binaryFeatures.shape[0])/2)
        dists.append((np.sum(distances == i) - binaryFeatures.shape[0])//2)
    else:
        #print(i, np.sum(distances == i)/2)
        dists.append(np.sum(distances == i)//2)

# How many connected components
# comps = []
# foundOne = 0
# for i in range(1, 128):
#     tempDistances = np.copy(distances)
#     tempDistances[tempDistances > i] = 0
#     comps.append(scipy.sparse.csgraph.connected_components(tempDistances)[0])
#     if comps[-1] == 1:
#         foundOne = i
#         break
# for i in range(foundOne, 128):
#     comps.append(1)

In [8]:
# Plot full distribution
fig = plt.figure(figsize=(5, 5))
ax1 = fig.add_subplot(1, 1, 1)

#print("{}:\t mean: {}\t median: {}\t max: {}\t min: {}".format(i, np.mean(distances), np.median(distances), np.amax(distances), np.amin(distances[distances > 0])))

ax1.plot(dists/np.sum(dists))
#ax2.plot(comps)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f999b2127b8>]

In [15]:
# Plot full distribution
fig = plt.figure(figsize=(10, 5))
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)

#print("{}:\t mean: {}\t median: {}\t max: {}\t min: {}".format(i, np.mean(distances), np.median(distances), np.amax(distances), np.amin(distances[distances > 0])))

ax1.plot(dists/np.sum(dists))
#ax2.plot(comps)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f989ae3a240>]

In [7]:
# Plot box and whiskers
fig = plt.figure(figsize=(10, 50))
for i in range(10):
    ax = plt.subplot(10, 1, i+1)
    distances = scipy.spatial.distance.squareform((scipy.spatial.distance.pdist(binaryFeatures[targets.ravel() == i, :], metric='hamming')*128).astype(np.uint8))
    indices = np.eye(distances.shape[0], dtype=bool)
    
    means = np.mean(distances, axis=0)
    medians = np.median(distances, axis=0)
    maxs = np.amax(distances, axis=0)
    distances[indices] = 129
    mins = np.amin(distances, axis=0)
    ax.boxplot([means, medians, maxs, mins])
    ax.set_xticklabels(['mean', 'median', 'max', 'min'])
    ax.set_title('Class {}'.format(i))
    ax.set_ylim([0, 85])

<IPython.core.display.Javascript object>

In [41]:
npzfile = np.load('binaryFeatures.npz')

binaryFeatures = npzfile['binaryFeatures']
targets = npzfile['targets']

npzfile = np.load('parameters.npz')

A1 = npzfile['A1']
b1 = npzfile['b1']
A2 = npzfile['A2']
b2 = npzfile['b2']

In [50]:
print(binaryFeatures.shape)
print(A1.shape)
print(A2.shape)

wVecs = np.zeros((0, 784))
for i in range(binaryFeatures.shape[0]):
    wVecs = np.vstack((wVecs, np.matmul(A2[:, binaryFeatures[i].ravel()], A1[binaryFeatures[i].ravel(), :])))
print(wVecs.shape)

# fig = plt.figure(figsize=(10, 50))
# for i in range(10):
#     ax = plt.subplot(10, 1, i+1)
#     distances = scipy.spatial.distance.squareform((scipy.spatial.distance.pdist(binaryFeatures[targets.ravel() == i, :], metric='hamming')*128).astype(np.uint8))
#     indices = np.eye(distances.shape[0], dtype=bool)




(60000, 128)
(128, 784)
(1, 128)


KeyboardInterrupt: 

In [53]:
np.savez('wVecs', wVecs=wVecs, targets=targets)

In [20]:
npzfile = np.load('wVecs.npz')

wVecs = npzfile['wVecs']
targets = npzfile['targets']

In [63]:
from sklearn.decomposition import PCA

In [59]:
print(targets.shape)

(60000, 1)


In [81]:
# PCA
fig = plt.figure(figsize=(10, 30))

axFull = fig.add_subplot(1, 2, 1)
axFull.set_title('Singular values')
axFull.set_axis_off()
axFull = fig.add_subplot(1, 2, 2)
axFull.set_title('Cumulative explained variance')
axFull.set_axis_off()

for i in range(10):
    axFull = fig.add_subplot(10, 1, i+1)
    ax1 = fig.add_subplot(10, 2, 2*i + 1)
    ax2 = fig.add_subplot(10, 2, 2*i + 2)
    
    wVecsClass = wVecs[targets.ravel() == i, :]
    pika = PCA(n_components=40)
    pika.fit(wVecsClass)
    ax1.plot(pika.singular_values_)
    ax2.plot(np.cumsum(pika.explained_variance_ratio_))
    ax2.set_ylim([0, 1])
    axFull.set_title(i)
    axFull.set_axis_off()

<IPython.core.display.Javascript object>

In [21]:
# Plot box and whisker plots
fig = plt.figure(figsize=(10, 50))
fig.tight_layout()

for i in range(10):
    ax = plt.subplot(10, 1, i+1)
    
    wVecsClass = wVecs[targets.ravel() == i, :]
    distances = scipy.spatial.distance.squareform((scipy.spatial.distance.pdist(wVecsClass, metric='cosine')))
    
    indices = np.eye(distances.shape[0], dtype=bool)
    means = np.mean(distances, axis=0)
    medians = np.median(distances, axis=0)
    maxs = np.amax(distances, axis=0)
    distances[indices] = 101
    mins = np.amin(distances, axis=0)
    
    #print("{}:\t mean: {}\t median: {}\t max: {}\t min: {}".format(i, np.mean(distances), np.median(distances), np.amax(distances), np.amin(distances[distances > 0])))
    
    ax.boxplot([means, medians, maxs, mins])
    ax.set_xticklabels(['mean', 'median', 'max', 'min'])
    ax.set_title('Class {}'.format(i))
    ax.set_ylim([0, 2])

<IPython.core.display.Javascript object>

In [43]:
# Plot distributions
fig = plt.figure(figsize=(10, 30))
fig.tight_layout()

for i in range(10):
    axFull = fig.add_subplot(10, 1, i+1)
    
    wVecsClass = wVecs[targets.ravel() == i, :]
    distances = scipy.spatial.distance.squareform((scipy.spatial.distance.pdist(wVecsClass, metric='cosine')))
    
    dists = []
    for j in range(100):
        if j == 0:
            dists.append((np.sum(np.logical_and(j*(2/100) < distances, distances < (j+1)*(2/100))) - distances.shape[0])//2)
        else:
            dists.append(np.sum(np.logical_and(j*(2/100) < distances, distances < (j+1)*(2/100)))//2)
    
    axFull.plot(np.linspace(0, 2, 100), dists/sum(dists))
    axFull.set_title('{}'.format(i))

<IPython.core.display.Javascript object>