In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sklearn
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.decomposition import PCA
import time
import sys
import math
import numpy as np
import random
import joblib
from tqdm import tqdm_notebook

import Config
import Dataloader as DL
import HD_basis as HDB
import HD_encoder as HDE
import HD_classifier as HDC


In [3]:
dl = DL.Dataloader()
nFeatures, nClasses, traindata, trainlabels, testdata, testlabels = dl.getParam()
traindata = traindata[:20000]

Loading dataset MNIST from MNIST
Loading train data... train data of shape (60000, 784) loaded
Loading test data...  test  data of shape (10000, 784) loaded
Data Loaded. Num of features = 784 Num of Classes = 10

In [4]:
param = Config.config
param["nFeatures"] = nFeatures
param["nClasses"] = nClasses
print(param)

{'data_location': '../dataset/', 'directory': 'MNIST', 'dataset': 'MNIST', 'D': 2500, 'vector': 'Gaussian', 'mu': 0, 'sigma': 1, 'binarize': 0, 'lr': 0.037, 'sparse': 0, 's': 0.1, 'binaryModel': 0, 'width': None, 'height': None, 'nLayers': 5, 'uniform_dim': 1, 'uniform_ker': 1, 'dArr': None, 'k': 5, 'kArr': None, 'one_shot': 0, 'data_percentages': [1.0, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5], 'train_percent': 1, 'dropout': 0, 'drop_percentages': [0, 0.1, 0.2, 0.5], 'dropout_rate': 0, 'update_type': <Update_T.FULL: 1>, 'iter_per_trial': 3, 'iter_per_encoding': 5, 'epochs': 50, 'nFeatures': 784, 'nClasses': 10}


In [5]:
################# VANILLA #################
hdb = HDB.HD_basis(HDB.Generator.Vanilla, param)
basis = hdb.getBasis()
bid = hdb.getParam()["id"]
# Update param with bid
param = hdb.getParam()
print(bid)

Generating vanilla HD basis of shape... 

HBox(children=(IntProgress(value=0, description='vectors', max=2500, style=ProgressStyle(description_width='in…

(2500, 784)
Encoding time: 0.11269903182983398 
Dumping basis into file: base_4719.pkl 



4719


In [6]:
# Retrieve info upto basis generator, given correct bid
#bid = 6679
#basis, param = HDB.loadBasis("base_%d.pkl"%bid)

In [7]:
hde = HDE.HD_encoder(basis)

trainencoded = hde.encodeData(traindata)
HDE.saveEncoded(trainencoded, trainlabels, bid, "train")

testencoded = hde.encodeData(testdata)
HDE.saveEncoded(testencoded, testlabels, bid, "test")

Encoding data of shape (20000, 784)


HBox(children=(IntProgress(value=0, description='samples encoded', max=20000, style=ProgressStyle(description_…




Time spent: 274 sec
Dumping data into encoded_4719_train.pkl 
Encoding data of shape (10000, 784)


HBox(children=(IntProgress(value=0, description='samples encoded', max=10000, style=ProgressStyle(description_…




Time spent: 145 sec
Dumping data into encoded_4719_test.pkl 


'encoded_4719_test.pkl'

In [8]:
# Retrieve info upto encoder, given correct bid
#bid = 6679
#basis, param = HDB.loadBasis("base_%d.pkl"%bid)
#trainencoded, trainlabels = HDE.loadEncoded("encoded_%d_train.pkl"%bid)
#testencoded, testlabels = HDE.loadEncoded("encoded_%d_test.pkl"%bid)

In [5]:
# train data 
def train(hdc, traindata, trainlabels, testdata, testlabels, param = Config.config):
    train_acc = []
    test_acc = []
    for _ in tqdm_notebook(range(param["epochs"]), desc='epochs'):
        train_acc.append(hdc.fit(traindata, trainlabels, param))
        test_acc.append(hdc.test(testdata, testlabels))
        if len(train_acc) % 5 == 0:
            print("Train: %f \t \t Test: %f"%(train_acc[-1], test_acc[-1]))
        if train_acc[-1] == 1:
            print("Train: %f \t \t Test: %f"%(train_acc[-1], test_acc[-1]))
            break
    return np.asarray(train_acc), np.asarray(test_acc)

In [6]:
def dump_log(param, train_acc, test_acc, filename):
    joblib.dump((param, train_acc, test_acc), open(filename, "wb"), compress=True)

In [11]:
# Should have 90%
hdc = HDC.HD_classifier(param["D"], param["nClasses"], bid)
train_acc, test_acc = train(hdc, trainencoded, trainlabels, testencoded, testlabels)
dump_log(param, train_acc, test_acc, "Baseline_"+param["id"]+".pickle")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """


HBox(children=(IntProgress(value=0, description='epochs', max=50, style=ProgressStyle(description_width='initi…

Fitting with configuration: [('one_shot', 0), ('dropout', 0), ('lr', 0.037)] 


Train: 0.981800 	 	 Test: 0.948700
Train: 0.996550 	 	 Test: 0.947200
Train: 0.997500 	 	 Test: 0.952500
Train: 0.997800 	 	 Test: 0.953500
Train: 0.999250 	 	 Test: 0.955800
Train: 1.000000 	 	 Test: 0.955400
Train: 1.000000 	 	 Test: 0.955400


In [7]:
################# BAKLAVA #################
#param = Config.config
#param["nFeatures"] = nFeatures
#param["nClasses"] = nClasses

hdb = HDB.HD_basis(HDB.Generator.Baklava, param)
basis = hdb.getBasis()
bid = hdb.getParam()["id"]
# Update param with bid
param = hdb.getParam()
print(bid)

(28, 28, 500)


Configuring baklava HD basis of 5 layers:
Layer No.0: 	 500 dimension vector with kernel of size 5
Layer No.1: 	 500 dimension vector with kernel of size 5
Layer No.2: 	 500 dimension vector with kernel of size 5
Layer No.3: 	 500 dimension vector with kernel of size 5
Layer No.4: 	 500 dimension vector with kernel of size 5
Encoding time: 0.10272550582885742 
Dumping basis into file: base_6687.pkl 


(28, 28, 500)
(28, 28, 500)
(28, 28, 500)
(28, 28, 500)
6687


In [8]:
# Retrieve info upto basis generator, given correct bid
#bid = 2775
#basis, param = HDB.loadBasis("base_%d.pkl"%bid)

In [9]:
hde = HDE.HD_encoder(basis)

trainencoded = hde.encodeData(traindata)
HDE.saveEncoded(trainencoded, trainlabels, bid, "train")

testencoded = hde.encodeData(testdata)
HDE.saveEncoded(testencoded, testlabels, bid, "test")

Encoding data of shape (20000, 784)


HBox(children=(IntProgress(value=0, description='samples encoded', max=20000, style=ProgressStyle(description_…




Time spent: 391 sec
Dumping data into encoded_6687_train.pkl 
Encoding data of shape (10000, 784)


HBox(children=(IntProgress(value=0, description='samples encoded', max=10000, style=ProgressStyle(description_…




Time spent: 197 sec
Dumping data into encoded_6687_test.pkl 


'encoded_6687_test.pkl'

In [10]:
# Should have 96%
hdc = HDC.HD_classifier(param["D"], param["nClasses"], bid)
train_acc, test_acc = train(hdc, trainencoded, trainlabels, testencoded, testlabels)
dump_log(param, train_acc, test_acc, "ConvHD_"+param["id"]+".pkl")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """


HBox(children=(IntProgress(value=0, description='epochs', max=50, style=ProgressStyle(description_width='initi…

Fitting with configuration: [('one_shot', 0), ('dropout', 0), ('lr', 0.037)] 


Train: 0.870650 	 	 Test: 0.687000
Train: 0.931450 	 	 Test: 0.743600
Train: 0.956500 	 	 Test: 0.737100
Train: 0.975500 	 	 Test: 0.756500
Train: 0.981350 	 	 Test: 0.753200
Train: 0.989350 	 	 Test: 0.756800
Train: 0.991250 	 	 Test: 0.756600
Train: 0.993350 	 	 Test: 0.755300
Train: 0.993450 	 	 Test: 0.753400
Train: 0.994200 	 	 Test: 0.752800



In [None]:
print(param)

In [29]:
param["D"] = 18
param["nFeatures"] = 49
param["k"] = 3
param["width"] = None

In [106]:
param

{'data_location': '../dataset/',
 'directory': 'MNIST',
 'dataset': 'MNIST',
 'D': 18,
 'vector': 'Gaussian',
 'mu': 0,
 'sigma': 1,
 'binarize': 0,
 'lr': 0.037,
 'sparse': 0,
 's': 0.1,
 'binaryModel': 0,
 'width': None,
 'height': None,
 'nLayers': 1,
 'uniform_dim': 1,
 'uniform_ker': 1,
 'dArr': [20],
 'k': 3,
 'kArr': [2],
 'one_shot': 0,
 'data_percentages': [1.0, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5],
 'train_percent': 1,
 'dropout': 0,
 'drop_percentages': [0, 0.1, 0.2, 0.5],
 'dropout_rate': 0,
 'update_type': <Update_T.FULL: 1>,
 'iter_per_trial': 3,
 'iter_per_encoding': 5,
 'epochs': 300,
 'nFeatures': 49,
 'nClasses': 10,
 'id': '4244',
 'gen_type': <Generator.Baklava: 2>}

In [107]:
np.set_printoptions(suppress=True)
np.set_printoptions(threshold=sys.maxsize)
hde_2 = HDB.HD_basis(HDB.Generator.Baklava, param)

tl shape(3, 3, 2)
tl shape(3, 3, 2)
tl shape(3, 3, 2)
tl shape(3, 3, 2)
(7, 7, 18)
BASIS
[[[ 0.89465018]
  [-0.3636299 ]
  [-0.93736594]
  [-0.16924253]
  [ 2.24553608]
  [-2.12263862]
  [ 1.16083571]
  [-1.30004509]
  [-0.25013847]
  [-1.03056662]
  [ 1.4592357 ]
  [ 0.04249397]
  [-2.02961641]
  [ 0.18363528]
  [ 0.61052385]
  [-0.27854874]
  [-0.3557393 ]
  [-1.15588722]]

 [[ 0.89465018]
  [-0.3636299 ]
  [-0.93736594]
  [-0.16924253]
  [-2.61121691]
  [-0.3140668 ]
  [ 1.16083571]
  [-1.30004509]
  [-0.25013847]
  [-1.03056662]
  [-0.4747853 ]
  [-1.61771779]
  [-2.02961641]
  [ 0.18363528]
  [ 0.61052385]
  [-0.27854874]
  [-1.17748125]
  [ 0.80410633]]

 [[ 0.89465018]
  [-0.3636299 ]
  [-0.81440009]
  [-0.19997482]
  [-2.61121691]
  [-0.3140668 ]
  [ 1.16083571]
  [-1.30004509]
  [ 0.55118527]
  [-0.52476617]
  [-0.4747853 ]
  [-1.61771779]
  [-2.02961641]
  [ 0.18363528]
  [ 1.15001779]
  [-0.45018479]
  [-1.17748125]
  [ 0.80410633]]

 [[-0.75662376]
  [-1.50591467]
  [-0.814

Configuring baklava HD basis of 1 layers:
Layer No.0: 	 18 dimension vector with kernel of size 3
Encoding time: 0.026927471160888672 
Dumping basis into file: base_4438.pkl 


In [108]:
base = hde_2.getBasis()

In [111]:
print(base[:,0]-base[:,8])
print(base[:,3]-base[:,8])
print(base[:,21]-base[:,8])
print(base[:,24]-base[:,8])

[ 0.          0.          0.          0.          4.856753   -1.80857182
  0.          0.          0.          0.          1.934021    1.66021177
 -2.30312327 -0.12713046  0.75668558 -1.05574931 -0.31279303 -0.35796743]
[-1.65127394 -1.14228477  0.12296585 -0.03073228  0.          0.
 -1.54863198  3.21190008  0.80132375  0.50580045  0.          0.
 -0.59613286 -2.03311389  1.29617953 -1.22738537 -1.13453498  1.60202612]
[-1.96270204  0.27931018 -0.12009234 -0.93319169  3.56221362 -0.05951439
 -1.74512608 -0.45263226  1.77347006  1.43881149 -0.11010587  1.55199348
  0.          0.          0.          0.          1.11343119  0.96399651]
[-2.92040673  0.6519046   0.08489368  0.03383545  1.44071439  0.15933281
 -1.35439863  0.76430806  0.17542158  0.64044691 -0.25521443  2.77741923
  0.01493934 -0.1128453  -1.03941094 -0.91095963  0.          0.        ]


In [None]:
## Random code
e = np.empty((2,2,2), dtype = int)
q = np.empty((2,2,2), dtype = int)
f = np.asarray([[[1,2],[3,4]],[[5,6],[7,8]]])
g = f * 10
np.concatenate((e, f, g), axis = 2)


tl = np.asarray([10,11,12,13,14,15,16,17,18])
tr = np.asarray([20,21,22,23,24,25,26,27,28])
bl = np.asarray([30,31,32,33,34,35,36,37,38])
br = np.asarray([40,41,42,43,44,45,46,47,48])
#print(tl.shape)
#print(tl[0:2,0:2])
e = HDB.bak_extend(tl,tr,bl,br, 3, 3)
f = []
f.append(e)
f.append(e)
f.append(e)
np.concatenate(f, axis = 2)