# NN Multiclass with TensorFlow2

**Objectives:**
  - Train both a neural network to classify handwritten digits from the classic MNIST data set

The [data](https://developers.google.com/machine-learning/crash-course/california-housing-data-description) is based on 1990 census data from California.

## Setup
In this first cell, we'll load the necessary libraries.

In [1]:
import math
import numpy as np
import pandas as pd
from sklearn import metrics
import tensorflow as tf
if tf.__version__[0] == "1":
    !pip install tensorflow==2.0.0-alpha0
print(tf.__version__)

2.0.0-alpha0


In [0]:
data = pd.read_csv(
  "https://download.mlcc.google.com/mledu-datasets/mnist_train_small.csv",
  sep=",",
  header=None)

In [3]:
data.shape

(20000, 785)

In [4]:
data.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
data.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
count,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,...,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0
mean,4.4702,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.215,0.0877,0.0365,0.01365,0.0326,0.006,0.0,0.0,0.0,0.0
std,2.892755,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.314663,3.921566,2.712459,0.950794,2.718034,0.600318,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,253.0,254.0,253.0,79.0,254.0,62.0,0.0,0.0,0.0,0.0


## Split dataset

In [6]:
feature_idx = data.columns[1:]
pixel_cols = ["pix{}".format(i) for i in feature_idx]
data.columns = ["label"] + pixel_cols
data = data.sample(frac=1).reset_index(drop=True)
data.head(5)

Unnamed: 0,label,pix1,pix2,pix3,pix4,pix5,pix6,pix7,pix8,pix9,...,pix775,pix776,pix777,pix778,pix779,pix780,pix781,pix782,pix783,pix784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
train_df = data.loc[:7500, :]
train_df.shape

(7501, 785)

In [8]:
validation_df = data.loc[7500:10000, :]
validation_df.shape

(2501, 785)

In [0]:
NUMERIC_COLUMNS = pixel_cols
feature_columns = []
for i in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(i, dtype=tf.float32))

In [0]:
def make_input_fn(data_df, label_df, num_epochs, shuffle, batch_size=100):
  def input_function():
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
    if shuffle:
      ds = ds.shuffle(1000)
    ds = ds.batch(batch_size).repeat(num_epochs)
    return ds
  return input_function

## Training

In [0]:
train_targets = train_df.loc[:, "label"]
train_features = train_df.loc[:, pixel_cols]
train_input_fn = make_input_fn(train_features, train_targets, num_epochs=10, shuffle=True)

In [12]:
estimator = tf.estimator.DNNClassifier(
    feature_columns=feature_columns,
    hidden_units=[100, 100],
    n_classes=10
)
estimator.train(train_input_fn, steps=100)
result = estimator.evaluate(train_input_fn)
print(result)

W0504 20:27:57.308820 140597610059648 estimator.py:1799] Using temporary folder as model directory: /tmp/tmp_8jf389n
W0504 20:27:57.335705 140597610059648 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/training_util.py:238: Variable.initialized_value (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
W0504 20:27:58.363629 140597610059648 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1257: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0504 20:27:58.611766 140597610059648 deprecation.py:323] From 

{'accuracy': 0.5845887, 'average_loss': 1.2459158, 'loss': 1.2421411, 'global_step': 100}


In [13]:
train_predictions = estimator.predict(make_input_fn(
    train_features, 
    train_targets, 
    num_epochs=1, 
    shuffle=False)) # generator
train_predictions = [pd.Series(i["probabilities"]).idxmax() for i in train_predictions]
acc = metrics.accuracy_score(train_targets, train_predictions)
print("Accuracy: {}".format(acc))

Accuracy: 0.5845887215037995


## Validation

In [14]:
validation_targets = validation_df.loc[:, "label"]
validation_features = validation_df.loc[:, pixel_cols]
validation_predictions = estimator.predict(make_input_fn(
    validation_features, 
    validation_targets, 
    num_epochs=1, 
    shuffle=False)) # generator
validation_predictions = [pd.Series(i["probabilities"]).idxmax() for i in validation_predictions]
acc = metrics.accuracy_score(validation_targets, validation_predictions)
print("Accuracy: {}".format(acc))

Accuracy: 0.5757696921231508


In [15]:
calibration_data = pd.DataFrame()
calibration_data["validation_predictions"] = pd.Series(validation_predictions)
calibration_data["validation_targets"] = pd.Series(validation_targets.values)
calibration_data.describe()

Unnamed: 0,validation_predictions,validation_targets
count,2501.0,2501.0
mean,5.805678,4.453818
std,3.023082,2.922322
min,0.0,0.0
25%,3.0,2.0
50%,7.0,4.0
75%,8.0,7.0
max,9.0,9.0


## Testing

In [16]:
test_data = pd.read_csv(
  "https://download.mlcc.google.com/mledu-datasets/mnist_test.csv",
  sep=",",
  header=None)
test_data.columns = ["label"] + pixel_cols
test_data.shape

(10000, 785)

In [17]:
test_data.describe()

Unnamed: 0,label,pix1,pix2,pix3,pix4,pix5,pix6,pix7,pix8,pix9,...,pix775,pix776,pix777,pix778,pix779,pix780,pix781,pix782,pix783,pix784
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,...,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,4.4434,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.1793,0.1636,0.0526,0.0006,0.0,0.0,0.0,0.0,0.0,0.0
std,2.895865,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.674149,5.736072,2.420004,0.06,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,253.0,253.0,156.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
test_targets = test_data.loc[:, "label"]
test_features = test_data.loc[:, pixel_cols]
test_predictions = estimator.predict(make_input_fn(
    test_features, 
    test_targets, 
    num_epochs=1, 
    shuffle=False)) # generator
test_predictions = [pd.Series(i["probabilities"]).idxmax() for i in test_predictions]
acc = metrics.accuracy_score(test_targets, test_predictions)
print("Accuracy: {}".format(acc))

Accuracy: 0.566


In [19]:
calibration_data = pd.DataFrame()
calibration_data["test_predictions"] = pd.Series(test_predictions)
calibration_data["test_targets"] = pd.Series(test_targets.values)
calibration_data.describe()

Unnamed: 0,test_predictions,test_targets
count,10000.0,10000.0
mean,5.7368,4.4434
std,3.039712,2.895865
min,0.0,0.0
25%,3.0,2.0
50%,7.0,4.0
75%,8.0,7.0
max,9.0,9.0


## Keras

In [0]:
train_targets_keras = pd.get_dummies(train_targets, prefix="pix")

In [21]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(
    100, 
    activation="relu", 
    input_shape=(train_features.shape[1],)))
model.add(tf.keras.layers.Dense(100, activation="relu"))
model.add(tf.keras.layers.Dense(10, activation="softmax"))
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"])
model.fit(
    train_features.values, 
    train_targets_keras.values, 
    epochs=10, 
    batch_size=100,
#     validation_data=(validation_features.values, validation_targets.values)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fdf31e9ea20>

In [22]:
train_predictions = model.predict(train_features.values, batch_size=32)
train_predictions = [pd.Series(i).idxmax() for i in train_predictions]
acc = metrics.accuracy_score(train_targets, train_predictions)
print("Accuracy: {}".format(acc))

Accuracy: 0.9840021330489268


In [23]:
validation_predictions = model.predict(validation_features.values, batch_size=32)
validation_predictions = [pd.Series(i).idxmax() for i in validation_predictions]
acc = metrics.accuracy_score(validation_targets, validation_predictions)
print("Accuracy: {}".format(acc))

Accuracy: 0.897640943622551


In [24]:
test_predictions = model.predict(test_features.values, batch_size=32)
test_predictions = [pd.Series(i).idxmax() for i in test_predictions]
acc = metrics.accuracy_score(test_targets, test_predictions)
print("Accuracy: {}".format(acc))

Accuracy: 0.901
