Project 1 data test

In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import os
import gzip


def load_mnist_images(filename):
    if not os.path.exists(filename):
        download(filename)
    # Read the inputs in Yann LeCun's binary format.
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    # The inputs are vectors now, we reshape them to monochrome 2D images,
    # following the shape convention: (examples, channels, rows, columns)
    data = data.reshape(-1, 784)
    # The inputs come as bytes, we convert them to float32 in range [0,1].
    # (Actually to range [0, 255/256], for compatibility to the version
    # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
    return data / np.float32(256)

def load_mnist_labels(filename):
    if not os.path.exists(filename):
        download(filename)
    # Read the labels in Yann LeCun's binary format.
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
    # The labels are vectors of integers now, that's exactly what we want.
    return data

X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')

print "train Data"
print X_train.shape,y_train.shape
print "test data"
print X_test.shape, y_test.shape

print X_train.shape
X_train, X_val = X_train[:-10000], X_train[-10000:]
y_train, y_val = y_train[:-10000], y_train[-10000:]

mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4,
                    solver='sgd', verbose=10, tol=1e-4, random_state=1,
                    learning_rate_init=.1)

mlp.fit(X_train, y_train)

print '***********1'
print("Training set score: %f" % mlp.score(X_train, y_train))
print("Test set score: %f" % mlp.score(X_test, y_test))
y_hat=mlp.predict(X_train)
print 'Training accuracy using accuracy_score function',accuracy_score(y_train,y_hat)
y_hat=mlp.predict(X_test)
print 'Training accuracy using accuracy_score function',accuracy_score(y_test,y_hat)

print '*****************2'
k=y_test!=y_hat
print k

print '*****************3'
itemindex = np.where(k==True)
print itemindex[0]
print itemindex[0].shape

print '*****************4'
print len(itemindex[0])

random_index_match=1423
random_index_no_match=1422

print '*****************5'
print X_test[random_index_match].shape
match_image=np.reshape(X_test[random_index_match],(28,28))
plt.imshow(match_image,cmap='gray')

print y_test[random_index_match]
print '*****************6'
print X_test[random_index_no_match].shape
no_match_image=np.reshape(X_test[random_index_no_match],(28,28))
plt.imshow(no_match_image,cmap='gray')

print '*****************7'
print y_test[random_index_no_match],y_hat[random_index_no_match]

train Data
(60000, 784) (60000,)
test data
(10000, 784) (10000,)
(60000, 784)
Iteration 1, loss = 0.34707208
Iteration 2, loss = 0.16803904
Iteration 3, loss = 0.12461142
Iteration 4, loss = 0.10228343
Iteration 5, loss = 0.08809855
Iteration 6, loss = 0.07480725
Iteration 7, loss = 0.06668371
Iteration 8, loss = 0.05893584
Iteration 9, loss = 0.05195750
Iteration 10, loss = 0.04597532
***********1




Training set score: 0.988760
Test set score: 0.970100
Training accuracy using accuracy_score function 0.98876
Training accuracy using accuracy_score function 0.9701
*****************2
[False False False ... False False False]
*****************3
[ 139  149  233  247  274  320  321  448  449  495  582  613  619  674
  684  691  707  720  726  740  760  844  874  883  895  947  951  956
  965 1003 1014 1039 1044 1107 1112 1178 1194 1224 1226 1232 1242 1247
 1251 1279 1299 1319 1325 1328 1378 1393 1414 1464 1500 1520 1522 1527
 1530 1549 1553 1569 1601 1607 1609 1621 1671 1681 1709 1730 1751 1754
 1790 1878 1901 1903 1913 1940 1941 1952 1955 1984 2004 2016 2018 2033
 2043 2044 2073 2074 2109 2118 2130 2135 2145 2182 2185 2186 2266 2272
 2293 2326 2333 2369 2371 2387 2406 2414 2441 2488 2526 2534 2607 2611
 2654 2720 2736 2743 2760 2810 2836 2863 2896 2921 2927 2939 2945 2952
 2953 3060 3108 3114 3115 3117 3122 3284 3289 3330 3333 3406 3422 3475
 3503 3520 3533 3542 3559 3597 3607 3674 3681