### Setup

In [1]:
import glob
import io
import math
import os

from IPython import display
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import style
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import metrics
import tensorflow as tf
from tensorflow.python.data import Dataset

tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = "{:.1f}".format
style.use('ggplot')

  from ._conv import register_converters as _register_converters


### Load Dataset and Randomization

In [3]:
mnist_dataframe = pd.read_csv(
    io.open("mnist_train_small.csv", "r"),
    sep=",",
    header=None)
mnist_dataframe = mnist_dataframe.head(10000) # first 10000 rows
mnist_dataframe = mnist_dataframe.reindex(np.random.permutation(mnist_dataframe.index))
mnist_dataframe.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
7697,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
276,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
895,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6604,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1005,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
display.display(mnist_dataframe.loc[2:60, 71:75])
display.display(mnist_dataframe[0])
display.display(mnist_dataframe.loc[:, 1:784])

Unnamed: 0,71,72,73,74,75
2,0,0,0,0,0
4441,0,0,0,0,0
3845,18,255,81,0,0
8122,0,0,0,0,0
7224,0,0,0,0,0
...,...,...,...,...,...
6258,0,0,0,0,0
3331,0,0,0,0,0
5444,0,0,0,0,0
8061,0,0,0,0,0


7697    5
276     9
895     3
6604    8
1005    0
       ..
4733    8
3129    7
747     1
3117    9
1340    1
Name: 0, Length: 10000, dtype: int64

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,775,776,777,778,779,780,781,782,783,784
7697,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
276,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
895,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6604,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1005,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4733,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3129,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
747,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3117,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Parse out Labels and Features

In [31]:
# Column0 represent labels
# Column1 through Column784 represent features --> gray scale pixels
def parse_labels_and_features(dataset):
    labels = dataset[0]
    
    # DataFrame.loc index ranges are inclusive at both ends
    features = dataset.loc[:, 1:784]
    # Scale the data to [0, 1] by dividing out the max value, 255.
    features = features / 255
    return labels, features