In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import systemml  # pip3 install systemml
from systemml import MLContext, dml, pydml

plt.rcParams['figure.figsize'] = (10, 6)

In [None]:
ml = MLContext(sc)

# Read in train & val data

In [None]:
train = sqlContext.read.load("data/train_100_grayscale.parquet")
val = sqlContext.read.load("data/val_100_grayscale.parquet")
train, val

In [None]:
c = 1
size = 256

In [None]:
tc = train.count()
tc  # 100

In [None]:
vc = val.count()
vc  # 100

# Extract X and Y

In [None]:
X_df = train.select("__INDEX", "sample")
X_val_df = val.select("__INDEX", "sample")
Y_df = train.select("__INDEX", "tumor_score")
Y_val_df = val.select("__INDEX", "tumor_score")
X_df, X_val_df, Y_df, Y_val_df

# Convert to SystemML Matrices

In [None]:
script = """
# # Scale images to [0,1]
# X = X / 255
# X_val = X_val / 255
X = X * 2 - 1
X_val = X_val * 2 - 1

# One-hot encode the labels
num_tumor_classes = 3
n = nrow(Y)
n_val = nrow(Y_val)
Y = table(seq(1, n), Y, n, num_tumor_classes)
Y_val = table(seq(1, n_val), Y_val, n_val, num_tumor_classes)
"""
outputs = ("X", "X_val", "Y", "Y_val")
script = dml(script).input(X=X_df, X_val=X_val_df, Y=Y_df, Y_val=Y_val_df).output(*outputs)
X, X_val, Y, Y_val = ml.execute(script).get(*outputs)
X, X_val, Y, Y_val

# Softmax Classifier

## Train

In [None]:
script = """
source("mnist_softmax.dml") as clf

# Hyperparameters & Settings
lr = 5e-5  # learning rate
mu = 0.9  # momentum
decay = 1  #0.99  # learning rate decay constant
batch_size = 50
epochs = 100
iters = ceil(nrow(Y) / batch_size)

# Train
[W, b] = clf::train(X, Y, X_val, Y_val, lr, mu, decay, batch_size, epochs, iters)
"""
script = dml(script).input(X=X, Y=Y, X_val=X_val, Y_val=Y_val).output("W", "b")
# script = dml(script).input(X=X_df, X_val=X_val_df, Y=Y_df, Y_val=Y_val_df).output("W", "b")
W, b = ml.execute(script).get("W", "b")
W, b

## Eval

In [None]:
script = """
source("mnist_softmax.dml") as clf

# Eval
probs = clf::predict(X, W, b)
[loss, accuracy] = clf::eval(probs, Y)
probs_val = clf::predict(X_val, W, b)
[loss_val, accuracy_val] = clf::eval(probs_val, Y_val)
"""
outputs = ("loss", "accuracy", "loss_val", "accuracy_val")
script = dml(script).input(X=X, Y=Y, X_val=X_val, Y_val=Y_val, W=W, b=b).output(*outputs)
#script = dml(script).input(X=X_df, X_val=X_val_df, Y=Y_df, Y_val=Y_val_df, W=W, b=b).output(*outputs)
loss, acc, loss_val, acc_val = ml.execute(script).get(*outputs)
loss, acc, loss_val, acc_val

In [None]:
loss, acc, loss_val, acc_val

---

# LeNet ConvNet

## Train

In [None]:
script = """
source("mnist_lenet.dml") as clf

# Hyperparameters & Settings
lr = 5e-5  # learning rate
mu = 0.9  # momentum
decay = 1  #0.99  # learning rate decay constant
lambda = 5e-04
batch_size = 50
epochs = 2
iters = ceil(nrow(Y) / batch_size)
dir = "models/lenet/"

# Train
[Wc1, bc1, Wc2, bc2, Wc3, bc3, Wa1, ba1, Wa2, ba2] = clf::train(X, Y, X_val, Y_val, C, Hin, Win, lr, mu, decay, lambda, batch_size, epochs, iters, dir)
"""
outputs = ("Wc1", "bc1", "Wc2", "bc2", "Wc3", "bc3", "Wa1", "ba1", "Wa2", "ba2")
script = (dml(script).input(X=X, X_val=X_val, Y=Y, Y_val=Y_val,
                            C=c, Hin=size, Win=size)
                     .output(*outputs))
Wc1, bc1, Wc2, bc2, Wc3, bc3, Wa1, ba1, Wa2, ba2 = ml.execute(script).get(*outputs)
Wc1, bc1, Wc2, bc2, Wc3, bc3, Wa1, ba1, Wa2, ba2

In [None]:
Wc1, bc1, Wc2, bc2, Wc3, bc3, Wa1, ba1, Wa2, ba2

## Eval

In [None]:
script = """
source("mnist_lenet.dml") as clf

# Eval
probs = clf::predict(X, C, Hin, Win, Wc1, bc1, Wc2, bc2, Wc3, bc3, Wa1, ba1, Wa2, ba2)
[loss, accuracy] = clf::eval(probs, Y)
probs_val = clf::predict(X_val, C, Hin, Win, Wc1, bc1, Wc2, bc2, Wc3, bc3, Wa1, ba1, Wa2, ba2)
[loss_val, accuracy_val] = clf::eval(probs_val, Y_val)
"""
outputs = ("loss", "accuracy", "loss_val", "accuracy_val")
script = (dml(script).input(X=X, X_val=X_val, Y=Y, Y_val=Y_val,
                            C=c, Hin=size, Win=size,
                            Wc1=Wc1, bc1=bc1,
                            Wc2=Wc2, bc2=bc2,
                            Wc3=Wc3, bc3=bc3,
                            Wa1=Wa1, ba1=ba1,
                            Wa2=Wa2, ba2=ba2)
                     .output(*outputs))
loss, acc, loss_val, acc_val = ml.execute(script).get(*outputs)
loss, acc, loss_val, acc_val

In [None]:
loss, acc, loss_val, acc_val