In [1]:
from zipfile import ZipFile
import numpy as np

'''load your data here'''

class DataLoader(object):
    def __init__(self):
        DIR = '../data/'
        pass
    
    # Returns images and labels corresponding for training and testing. Default mode is train. 
    # For retrieving test data pass mode as 'test' in function call.
    def load_data(self, mode = 'train'):
        label_filename = mode + '_labels'
        image_filename = mode + '_images'
        label_zip = '../data/' + label_filename + '.zip'
        image_zip = '../data/' + image_filename + '.zip'
        with ZipFile(label_zip, 'r') as lblzip:
            labels = np.frombuffer(lblzip.read(label_filename), dtype=np.uint8, offset=8)
        with ZipFile(image_zip, 'r') as imgzip:
            images = np.frombuffer(imgzip.read(image_filename), dtype=np.uint8, offset=16).reshape(len(labels), 784)
        return images, labels

In [2]:
data_loader = DataLoader()
train_images, train_labels = data_loader.load_data('train')
test_images, test_labels = data_loader.load_data('test')

In [3]:
NUM_TRAIN = int(.7*len(train_labels))
NUM_TRAIN

42000

In [4]:
#SHUFFLE
np.random.seed(42)
perm = np.random.permutation(train_images.shape[0])
train_images = train_images[perm]
train_labels = train_labels[perm]

#Split into train and val
val_images = train_images[NUM_TRAIN:]
val_labels = train_labels[NUM_TRAIN:]
train_images = train_images[:NUM_TRAIN]
train_labels = train_labels[:NUM_TRAIN]

In [5]:
import mxnet as mx
from mxnet import nd, autograd, gluon

  from numpy.testing.decorators import setastest
  import OpenSSL.SSL


In [6]:
#Network 1
def network_1():
    net = gluon.nn.Sequential()
    with net.name_scope():
        net.add(gluon.nn.Dense(512, activation="relu"))
        net.add(gluon.nn.Dense(128, activation="relu"))
        net.add(gluon.nn.Dense(64, activation="relu"))
        net.add(gluon.nn.Dense(32, activation="relu"))
        net.add(gluon.nn.Dense(16, activation="relu"))
        net.add(gluon.nn.Dense(10))
    
    return net
 
#Network 2
def network_2():
    net = gluon.nn.Sequential()
    with net.name_scope():
        net.add(gluon.nn.Dense(1024, activation="relu"))
        net.add(gluon.nn.Dense(512, activation="relu"))
        net.add(gluon.nn.Dense(256, activation="relu"))
        net.add(gluon.nn.Dense(10))
        
    return net

In [7]:
NUM_EPOCHS = 1000
net1 = network_2()
net1.collect_params().initialize(mx.init.Normal(sigma=.1))
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net1.collect_params(), 'sgd', {'learning_rate': .0001})

In [8]:
losses = []
train_images = mx.nd.array(train_images)
train_labels = mx.nd.array(train_labels)
val_images = mx.nd.array(val_images)
val_labels = mx.nd.array(val_labels)
for e in range(NUM_EPOCHS):
    for i in range(5):
        with autograd.record():
            output = net1(train_images[i*7000:(i+1)*7000])
            loss = softmax_cross_entropy(output,train_labels[i*7000:(i+1)*7000])
        loss.backward()
        trainer.step(7000)
        #losses.append(loss)
        acc = mx.metric.Accuracy()
        out_val = net1(val_images)
        acc.update(preds=nd.argmax(out_val,axis=1),labels=val_labels)
        print("Epoch %d : Loss : %f, Val Accuracy : %f"%(e,nd.sum(loss).asscalar(),acc.get()[1]))

Epoch 0 : Loss : 9835163.000000, Val Accuracy : 0.134389
Epoch 0 : Loss : 21202528.000000, Val Accuracy : 0.181778


KeyboardInterrupt: 

In [None]:
lss = [nd.sum(loss).asscalar() for loss in losses]

In [None]:
lss

In [9]:
dc = net1.collect_params()
i = 0
w1 = 0
b1 = 0
all_params = []
for k,v in dc.items():
    if(i == 2):
        break
    all_params.append(v.data().asnumpy())
    i+=1
    print(k)
    print(v)
    print("============================")

sequential0_dense0_weight
Parameter sequential0_dense0_weight (shape=(1024, 784), dtype=float32)
sequential0_dense0_bias
Parameter sequential0_dense0_bias (shape=(1024,), dtype=float32)


In [10]:
w1 = all_params[0]
b1 = all_params[1]

In [13]:
y_test = test_images.dot(w1.T) + b1

In [14]:
y = train_images.asnumpy().dot(w1.T) + b1

In [15]:
from sklearn.linear_model import LogisticRegression

In [19]:
model = LogisticRegression(max_iter=10)

In [20]:
model.fit(y,train_labels.asnumpy())

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=10, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [22]:
pred = model.predict(y_test)

In [23]:
np.sum(pred==test_labels)/len(test_labels)

0.8368

In [24]:
len(test_labels)

10000

In [25]:
np.sum(pred==test_labels)

8368

In [28]:
print(pred)

[9. 2. 1. ... 8. 1. 5.]


In [29]:
print(test_labels)

[9 2 1 ... 8 1 5]


In [31]:
ct = 0
for i in range(len(test_labels)):
    if pred[i] != test_labels[i]:
        print(i)
        ct +=1

12
17
21
23
25
40
45
49
50
51
53
57
66
68
72
89
91
98
103
127
135
141
147
150
151
153
155
170
183
193
219
222
227
239
241
243
244
249
255
271
273
282
285
289
299
308
312
313
316
324
325
332
337
341
344
354
359
367
378
381
382
396
406
409
441
444
454
457
460
474
476
483
485
490
510
511
526
527
529
531
546
548
557
560
562
563
565
569
572
577
578
586
587
595
608
616
622
623
628
632
634
635
639
661
663
664
665
669
670
685
686
688
689
701
702
711
716
722
725
732
737
750
753
760
761
787
793
800
801
823
844
846
851
852
858
881
890
902
905
909
921
925
926
930
935
938
946
956
958
960
963
965
966
968
971
976
977
979
986
993
994
1002
1004
1005
1008
1019
1025
1027
1053
1055
1056
1058
1065
1074
1082
1096
1101
1104
1107
1108
1110
1111
1124
1125
1130
1131
1139
1152
1160
1162
1174
1194
1197
1207
1210
1213
1218
1223
1231
1235
1236
1249
1251
1254
1259
1277
1280
1284
1285
1294
1300
1305
1326
1329
1334
1336
1339
1353
1359
1365
1374
1377
1388
1396
1399
1408
1449
1455
1458
1462
1483
1487
1489
1495
1496
1501

In [32]:
ct

1632