In [1]:
!pip install imageio

Collecting imageio
[?25l  Downloading https://files.pythonhosted.org/packages/a7/1d/33c8686072148b3b0fcc12a2e0857dd8316b8ae20a0fa66c8d6a6d01c05c/imageio-2.3.0-py2.py3-none-any.whl (3.3MB)
[K    100% |████████████████████████████████| 3.3MB 7.8MB/s 
Installing collected packages: imageio
Successfully installed imageio-2.3.0


In [2]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import StratifiedShuffleSplit
import matplotlib.pyplot as plt
import pandas as pd
import urllib.request
import os, tarfile
import imageio
import tensorflow as tf
# from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline

tf.test.gpu_device_name()

'/device:GPU:0'

In [0]:
MNIST_URL = 'https://www.dropbox.com/s/z80cxadhmslpxay/train.csv?dl=1'

## Load Data

In [0]:
root_folder = 'drive/app/mnist/'
# root_folder = 'D:/dev/data/'

In [0]:
def fetch_data(URL, DOWNLOAD_FOLDER, DOWNLOAD_FILE):
  if not os.path.isdir(DOWNLOAD_FOLDER):
   os.makedirs(DOWNLOAD_FOLDER)
    
  urllib.request.urlretrieve(URL, DOWNLOAD_FOLDER+DOWNLOAD_FILE)

In [6]:
print('Beginning file download with urllib2...')
fetch_data(MNIST_URL, root_folder, 'train.csv')

Beginning file download with urllib2...


In [7]:
!ls -l drive/app/mnist/

total 74976
-rw-r--r-- 1 root root 76775041 May 13 10:26 train.csv


#### MNIST

#### ToDos
- Create a function to fetch data from a url.
- Check if it is already downloaded.
- Check if the file is csv or tar gz etc.
- Add cross-validation code to be able to use sklearn cross_val_score function to quickly evaluate the performance.

In [0]:
def split_train_test(XY, n_splits=1, test_size=0.2, random_state=42):
    split = StratifiedShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=random_state)
    for train_index, test_index in split.split(XY[0], XY[1]):
        X_train, Y_train = XY[0][train_index,:], XY[1][train_index]
        X_test, Y_test = XY[0][test_index,:], XY[1][test_index]
        
    return X_train, Y_train, X_test, Y_test

In [0]:
def get_mnist_data(file_name, split_data=False):
  if os.path.isfile(file_name):
    df = pd.read_csv(file_name)
    data = df.as_matrix()
    Y = data[:, 0]
    X = data[:, 1:]

    X = (X - X.mean(axis=1, keepdims=True)) / X.std(axis=1, keepdims=True)

    if split_train_test:
        return split_train_test((X, Y), n_splits=1, test_size=0.2, random_state=42)

    return X, Y
  else:
    print('File does not exist')
    return

In [0]:
def one_hot_encoder(label):
    encoder = OneHotEncoder()
    label_1hot = encoder.fit_transform(label.reshape(-1,1))
    return label_1hot

In [11]:
mnist_folder = root_folder+'train.csv'
X_train, Y_train, X_test, Y_test = get_mnist_data(file_name=mnist_folder, split_data=True)
print("Train: [{}, {}], Test: [{}, {}]".format(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape))

Train: [(33600, 784), (33600,)], Test: [(8400, 784), (8400,)]


In [0]:
Y_train_1hot = one_hot_encoder(Y_train).toarray()
Y_test_1hot = one_hot_encoder(Y_test).toarray()
# print(Y_train_1hot[0:10])

In [0]:
def init_weights(in_features, out_features):
    W = np.random.randn(in_features, out_features) / sqrt(in_features)
    b = np.zeros(out_features)
    return W, b

In [0]:
class HiddenLinearLayer(object):
    def __init__(self, in_features, out_features, activation_fn):
        self.in_features = in_features
        self.out_features = out_features
        self.activation_fn = activation_fn
        
        W, b = init_weights(in_features, out_features)
        
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))
        
        
    def forward(self, x):
        return self.activation_fn(tf.matmul(x, self.W) + self.b)
        

In [0]:
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

In [0]:
def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

In [0]:
def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

In [0]:
def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

In [0]:
class CNN(object):
  def __init__(self, width, height, n_channels):
    self.input = tf.reshape(X, [-1, width, height, n_channels])
    self.conv1

In [0]:
def CNN_fn(X, Y, width, height, n_channels):
    input_layer = tf.reshape(X, [-1, width, height, n_channels])
    conv_11 = tf.layers.conv2d(input_layer, filters=32, kernel_size=[5,5], padding='SAME', activation=tf.nn.relu)
    conv_12 = tf.layers.conv2d(conv_11, filters=64, kernel_size=[5,5], padding='SAME', activation=tf.nn.relu)
    pool_11 = tf.layers.max_pooling2d(conv_12, pool_size=[2,2], strides=2)

    conv_21 = tf.layers.conv2d(pool_11, filters=128, kernel_size=[5,5], padding='SAME', activation=tf.nn.relu)
    conv_22 = tf.layers.conv2d(conv_21, filters=256, kernel_size=[5,5], padding='SAME', activation=tf.nn.relu)
    pool_21 = tf.layers.max_pooling2d(conv_22, pool_size=[2,2], strides=2)

    flat_3 = tf.layers.flatten(pool_21)

    fc_4 = tf.layers.dense(inputs=flat_3, units=1024, activation=tf.nn.relu)
    drop_5 = tf.layers.dropout(fc_4, keep_prob)
    fc_5 = tf.layers.dense(inputs=fc_4, units=10)
    return fc_5

In [0]:
WIDTH = 28
HEIGHT = 28
N_CHANNELS = 1
N_CLASSES = 10
BATCH_SIZE = 100
MAX_ITER = 10
N_BATCHES = X_train.shape[0]//BATCH_SIZE
PRINT_TIME = N_BATCHES//2

In [0]:
X = tf.placeholder(tf.float32, shape=(None, WIDTH*HEIGHT), name='X')
Y = tf.placeholder(tf.float32, shape=(None, N_CLASSES), name='Y')
keep_prob = tf.placeholder(tf.float32)

W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

In [0]:
Y_pred = CNN_fn(X, Y, WIDTH, HEIGHT, N_CHANNELS)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=Y_pred))

In [0]:
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001, decay=0.95, momentum=0.9).minimize(cost)

In [0]:
predictions = tf.argmax(Y_pred, axis=1)

In [43]:
session = tf.InteractiveSession()
init = tf.global_variables_initializer()
session.run(init)
for iter in range(MAX_ITER):
    X_shuffled, Y_shuffled = shuffle(X_train, Y_train_1hot)
    for ibatch in range(N_BATCHES):
        X_batch = X_shuffled[ibatch*BATCH_SIZE:(ibatch+1)*BATCH_SIZE,:]
        Y_batch = Y_shuffled[ibatch*BATCH_SIZE:(ibatch+1)*BATCH_SIZE,:]
        
        # Training
        #train_cost = session.run(cost, feed_dict={X:X_batch, Y:Y_batch})
        session.run(optimizer, feed_dict={X:X_batch, Y:Y_batch, keep_prob:0.5})
        
        if ibatch%PRINT_TIME == 0:
            test_cost = session.run(cost, feed_dict={X:X_test, Y:Y_test_1hot, keep_prob:1.0})
            test_predictions = session.run(predictions, feed_dict={X:X_test})
            #print(test_predictions.shape)
            error_rate = np.sum(test_predictions!=Y_test)*100/len(test_predictions)
            print('Epoch:{}, Batch:{}, Test loss:{}, Test accuracy:{}%'.format(iter, ibatch, test_cost, np.round(100-error_rate, 3)))
            

Epoch:0, Batch:0, Test loss:2.309131383895874, Test accuracy:8.274%
Epoch:0, Batch:168, Test loss:0.36426299810409546, Test accuracy:88.929%
Epoch:1, Batch:0, Test loss:0.10789676010608673, Test accuracy:96.571%
Epoch:1, Batch:168, Test loss:0.08194460719823837, Test accuracy:97.619%
Epoch:2, Batch:0, Test loss:0.05785835161805153, Test accuracy:98.321%
Epoch:2, Batch:168, Test loss:0.07808986306190491, Test accuracy:97.917%
Epoch:3, Batch:0, Test loss:0.03999604284763336, Test accuracy:98.857%
Epoch:3, Batch:168, Test loss:0.03863728046417236, Test accuracy:98.952%
Epoch:4, Batch:0, Test loss:0.044862981885671616, Test accuracy:98.75%
Epoch:4, Batch:168, Test loss:0.050590235739946365, Test accuracy:98.762%
Epoch:5, Batch:0, Test loss:0.04827107861638069, Test accuracy:98.774%
Epoch:5, Batch:168, Test loss:0.03972402587532997, Test accuracy:99.06%
Epoch:6, Batch:0, Test loss:0.04635478928685188, Test accuracy:99.119%
Epoch:6, Batch:168, Test loss:0.04560421034693718, Test accuracy:98.