## MNIST at a glance

In [None]:
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
# Datasets loaded by Scikit-Learn generally have a similar dictionary structure including:
# • A DESCR key describing the dataset
# • A "data" key containing an array with one row per instance and one column per feature
# • A "target" key containing an array with the labels

In [None]:
X, y = mnist["data"], mnist["target"]
N, d = X.shape
print( "Dataset size: {}, dimension: {}".format(N, d) )

In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

digit = X[20000]
digit = digit.reshape( 28, 28 )
plt.imshow( digit, cmap = matplotlib.cm.binary, interpolation = "nearest" )
plt.axis("off")

In [None]:
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
sss = StratifiedShuffleSplit( n_splits = 1, test_size = 0.1, random_state = 42 )
for train_index, test_index in sss.split( X, y ):
    Xtrain = X[train_index]
    ytrain = y[train_index]
    Xtest = X[test_index]
    ytest = y[test_index]
Xtrain.shape

## Training a Binary Classifier (3-Detector)

In [None]:
ytrain_th = (ytrain == 3)
ytest_th = (ytest == 3)

### SGD

In [None]:
from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier( max_iter = 1000, tol = 1e-3, random_state = 42 )
sgd_clf.fit( Xtrain, ytrain_th )
sgd_clf.predict( [X[20000]] )

In [None]:
from sklearn.model_selection import cross_val_score
cross_val_score( sgd_clf, Xtrain, ytrain_th, cv = 10, scoring = "accuracy" )

## Random forests

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_predict
forest_clf = RandomForestClassifier( random_state = 42 )
y_probas_forest = cross_val_predict( forest_clf, Xtrain, ytrain, cv = 10, method="predict_proba")

# Neural Nets with Tensorflow

In [6]:
import tensorflow as tf

tf.reset_default_graph()

# architecture
d, n_hid1, n_hid2, n_out = 28*28, 300, 100, 10

# during the execution phase, X will be replaced with one training batch at a time
# all the instances in a training batch will be processed simultaneously by the network
X = tf.placeholder( tf.float32, shape = (None, d), name = "X" ) # Shape[0]: data set size
y = tf.placeholder( tf.int64, shape = (None), name = "y" )

from tensorflow.contrib.layers import fully_connected

with tf.name_scope("dnn"):
    hidden1 = fully_connected( X, n_hid1, scope = "hidden1" )
    hidden2 = fully_connected( hidden1, n_hid2, scope = "hidden2" )
    logits = fully_connected( hidden2, n_out, scope = "outputs", activation_fn = None )

with tf.name_scope("loss"):
    # softmax_cross_entropy_with_logits() -> takes one-hot vectors
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels = y, logits = logits )
    loss = tf.reduce_mean( xentropy, name = "loss" )
    
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer( learning_rate )
    training_op = optimizer.minimize( loss )
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k( logits, y, 1 )
    accuracy = tf.reduce_mean( tf.cast( correct, tf.float32 ) )
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

# EXECUTION PHASE
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

n_epochs, batch_size = 400, 50

with tf.Session() as sesh:
    init.run()
    for epoch in range(n_epochs):
        for it in range( mnist.train.num_examples // batch_size ):
            Xbatch, ybatch = mnist.train.next_batch( batch_size )
            sesh.run( training_op, feed_dict = { X: Xbatch, y: ybatch } )
        acc_train = accuracy.eval( feed_dict = { X: Xbatch, y: ybatch } )
        acc_test = accuracy.eval( feed_dict = { X: mnist.test.images, y: mnist.test.labels } )
        print( epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test )
    save_path = saver.save( sesh, "./mnist_final_model.ckpt" )

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
0 Train accuracy: 0.94 Test accuracy: 0.902
1 Train accuracy: 0.9 Test accuracy: 0.9207
2 Train accuracy: 0.96 Test accuracy: 0.9296
3 Train accuracy: 0.98 Test accuracy: 0.9344
4 Train accuracy: 0.98 Test accuracy: 0.9425
5 Train accuracy: 0.94 Test accuracy: 0.9471
6 Train accuracy: 0.98 Test accuracy: 0.9524
7 Train accuracy: 0.94 Test accuracy: 0.9541
8 Train accuracy: 0.92 Test accuracy: 0.9567
9 Train accuracy: 1.0 Test accuracy: 0.9602
10 Train accuracy: 0.96 Test accuracy: 0.9613
11 Train accuracy: 0.98 Test accuracy: 0.964
12 Train accuracy: 0.98 Test accuracy: 0.9651
13 Train accuracy: 0.96 Test accuracy: 0.9664
14 Train accuracy: 0.98 Test accuracy: 0.9666
15 Train accuracy: 0.96 Test accuracy: 0.9683
16 Train accuracy: 1.0 Test accuracy: 0.9683
17 Train accuracy: 0.98 Test accuracy: 0.96

177 Train accuracy: 1.0 Test accuracy: 0.9789
178 Train accuracy: 1.0 Test accuracy: 0.979
179 Train accuracy: 1.0 Test accuracy: 0.9788
180 Train accuracy: 1.0 Test accuracy: 0.9791
181 Train accuracy: 1.0 Test accuracy: 0.9789
182 Train accuracy: 1.0 Test accuracy: 0.9788
183 Train accuracy: 1.0 Test accuracy: 0.9792
184 Train accuracy: 1.0 Test accuracy: 0.979
185 Train accuracy: 1.0 Test accuracy: 0.9791
186 Train accuracy: 1.0 Test accuracy: 0.9791
187 Train accuracy: 1.0 Test accuracy: 0.9788
188 Train accuracy: 1.0 Test accuracy: 0.9791
189 Train accuracy: 1.0 Test accuracy: 0.9793
190 Train accuracy: 1.0 Test accuracy: 0.9789
191 Train accuracy: 1.0 Test accuracy: 0.979
192 Train accuracy: 1.0 Test accuracy: 0.9792
193 Train accuracy: 1.0 Test accuracy: 0.9792
194 Train accuracy: 1.0 Test accuracy: 0.9791
195 Train accuracy: 1.0 Test accuracy: 0.979
196 Train accuracy: 1.0 Test accuracy: 0.9789
197 Train accuracy: 1.0 Test accuracy: 0.979
198 Train accuracy: 1.0 Test accuracy: 

356 Train accuracy: 1.0 Test accuracy: 0.979
357 Train accuracy: 1.0 Test accuracy: 0.9792
358 Train accuracy: 1.0 Test accuracy: 0.979
359 Train accuracy: 1.0 Test accuracy: 0.9792
360 Train accuracy: 1.0 Test accuracy: 0.9793
361 Train accuracy: 1.0 Test accuracy: 0.9791
362 Train accuracy: 1.0 Test accuracy: 0.979
363 Train accuracy: 1.0 Test accuracy: 0.9791
364 Train accuracy: 1.0 Test accuracy: 0.9792
365 Train accuracy: 1.0 Test accuracy: 0.9792
366 Train accuracy: 1.0 Test accuracy: 0.9792
367 Train accuracy: 1.0 Test accuracy: 0.9793
368 Train accuracy: 1.0 Test accuracy: 0.9791
369 Train accuracy: 1.0 Test accuracy: 0.9792
370 Train accuracy: 1.0 Test accuracy: 0.9794
371 Train accuracy: 1.0 Test accuracy: 0.9793
372 Train accuracy: 1.0 Test accuracy: 0.9791
373 Train accuracy: 1.0 Test accuracy: 0.979
374 Train accuracy: 1.0 Test accuracy: 0.9791
375 Train accuracy: 1.0 Test accuracy: 0.9791
376 Train accuracy: 1.0 Test accuracy: 0.9793
377 Train accuracy: 1.0 Test accuracy: