## Quick Setup

In [None]:
import numpy as np

# Add SystemML PySpark API file.
sc.addPyFile("https://raw.githubusercontent.com/apache/incubator-systemml/3d5f9b11741f6d6ecc6af7cbaa1069cde32be838/src/main/java/org/apache/sysml/api/python/SystemML.py")

# Create a SystemML MLContext object
from SystemML import MLContext
ml = MLContext(sc)

## Download Data - MNIST

The MNIST dataset contains labeled images of handwritten digits, where each example is a 28x28 pixel image of grayscale values scaled to [0,1] and stretched out as 784 pixels, and each label is a one-hot encoding over 10 possible digits.  Here, we use TensorFlow's API for accessing the data, and retrieve 50,000 training examples, 5,000 validation examples, and 10,000 test examples.  [Note: TensorFlow can easily be installed via [these instructions](https://www.tensorflow.org/versions/r0.9/get_started/os_setup.html#pip-installation).]

In [None]:
%%sh
mkdir -p examples/data/mnist/

In [None]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# Get MNIST data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

# Save to CSV
np.savetxt("examples/data/mnist/train_images.csv", mnist.train.images, delimiter=",")
np.savetxt("examples/data/mnist/train_labels.csv", mnist.train.labels, delimiter=",")
np.savetxt("examples/data/mnist/val_images.csv", mnist.validation.images, delimiter=",")
np.savetxt("examples/data/mnist/val_labels.csv", mnist.validation.labels, delimiter=",")
np.savetxt("examples/data/mnist/test_images.csv", mnist.test.images, delimiter=",")
np.savetxt("examples/data/mnist/test_labels.csv", mnist.test.labels, delimiter=",")

## SystemML Softmax Model

### 1. Train

In [None]:
script = """
source("examples/mnist_softmax.dml") as mnist_softmax

# Read data
X = read($X, format="csv")
y = read($y, format="csv")

X_val = read($X_val, format="csv")
y_val = read($y_val, format="csv")

# Train
[W, b] = mnist_softmax::train(X, y, X_val, y_val)

# Write model out
write(W, $Wout)
write(b, $bout)

print("")
print("")
"""
ml.reset()
out = ml.executeScript(script, {"X": "examples/data/mnist/train_images.csv", 
                                "y": "examples/data/mnist/train_labels.csv",
                                "X_val": "examples/data/mnist/val_images.csv", 
                                "y_val": "examples/data/mnist/val_labels.csv"},
                       outputs=["W", "b"])

### 2. Extract model from SystemML back into PySpark

In [None]:
W = out.getDF(sqlContext, "W").sort("ID").drop("ID")
b = out.getDF(sqlContext, "b").sort("ID").drop("ID")

### 3. Compute Test Accuracy

In [None]:
script = """
source("examples/mnist_softmax.dml") as mnist_softmax

# Read data & coefficients
X_test = read($X_test, format="csv")
y_test = read($y_test, format="csv")
W = read($W)
b = read($b)

# Eval on test set
[loss, accuracy] = mnist_softmax::eval(X_test, y_test, W, b)

print("Accuracy: " + accuracy)

print("")
print("")
"""
ml.reset()
out = ml.executeScript(script, {"X_test": "examples/data/mnist/test_images.csv",
                                "y_test": "examples/data/mnist/test_labels.csv",
                                "W": W, "b": b})