# Autoencoder
This notebook demonstrates the invocation of the SystemML autoencoder script, and alternative ways of passing in/out data.

# Install SystemML Python package and jar file
You can install SystemML Python tgz file either from 
  1. Distribution location (https://dist.apache.org/repos/dist/release/incubator/systemml/) or 
  2. Latest daily built code (https://sparktc.ibmcloud.com/repo/latest/) or
  3. From your local system if you have extracted recent code and built locally. 
     (e.g. ~/git/incubator-systemml/target/systemml-1.0.0-incubating-SNAPSHOT-python.tgz)
     
### This notebook is supported with SystemML 0.14.0 and above.

In [None]:
# !pip install --user systemml>=0.14.0

!pip install  https://dist.apache.org/repos/dist/release/incubator/systemml/0.14.0-incubating/systemml-0.14.0-incubating-python.tgz
    
# !pip install https://sparktc.ibmcloud.com/repo/latest/systemml-1.0.0-incubating-SNAPSHOT-python.tgz
    
# !pip install ~/git/incubator-systemml/target/systemml-1.0.0-incubating-SNAPSHOT-python.tgz

In [None]:
!pip show systemml

In [None]:
import pandas as pd
from systemml import MLContext, dml
ml = MLContext(sc)
print(ml.info())
sc.version

## SystemML Read/Write data from local file system

In [None]:
FsPath = "/tmp/data/"
inp  = FsPath + "Input/"
outp = FsPath + "Output/"

Generate Data and write out to file.

In [None]:
X_pd = pd.DataFrame(range(1, 2001,1),dtype=float).values.reshape(100,20)
script ="""
    write(X, $Xfile)
"""
prog = dml(script).input(X=X_pd).input(**{"$Xfile":inp+"X.csv"})
ml.execute(prog)

In [None]:
!ls -l /tmp/data/Input

In [None]:
autoencoderURL = "https://raw.githubusercontent.com/apache/incubator-systemml/master/scripts/staging/autoencoder-2layer.dml"
rets = ("iter", "num_iters_per_epoch", "beg", "end", "o")

prog = dml(autoencoderURL).input(**{"$X":inp+"X.csv"}) \
                          .input(**{"$H1":500, "$H2":2, "$BATCH":36, "$EPOCH":5 \
                                    , "$W1_out":outp+"W1_out", "$b1_out":outp+"b1_out" \
                                    , "$W2_out":outp+"W2_out", "$b2_out":outp+"b2_out" \
                                    , "$W3_out":outp+"W3_out", "$b3_out":outp+"b3_out" \
                                    , "$W4_out":outp+"W4_out", "$b4_out":outp+"b4_out" \
                                   }).output(*rets)
iter, num_iters_per_epoch, beg, end, o = ml.execute(prog).get(*rets)
print (iter, num_iters_per_epoch, beg, end, o)

In [None]:
!ls -l /tmp/data/Output

## Alternatively to passing in/out file names, use Python variables.

In [None]:
autoencoderURL = "https://raw.githubusercontent.com/apache/incubator-systemml/master/scripts/staging/autoencoder-2layer.dml"
rets = ("iter", "num_iters_per_epoch", "beg", "end", "o")
rets2 = ("W1", "b1", "W2", "b2", "W3", "b3", "W4", "b4")

prog = dml(autoencoderURL).input(X=X_pd) \
                          .input(**{ "$H1":500, "$H2":2, "$BATCH":36, "$EPOCH":5}) \
                          .output(*rets) \
                          .output(*rets2)
result = ml.execute(prog)
iter, num_iters_per_epoch, beg, end, o = result.get(*rets)
W1, b1, W2, b2, W3, b3, W4, b4 = result.get(*rets2)

print (iter, num_iters_per_epoch, beg, end, o)

## Uninstall/Clean up SystemML Python package and jar file

In [None]:
!yes | pip uninstall systemml