In [0]:
from tensorflow import keras
from keras.datasets import fashion_mnist

import matplotlib.pyplot as plt
import numpy as np

In [0]:
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

In [0]:
print("train_images: " + str(train_images.shape))
print("train_labels: " + str(train_labels.shape))
print("test_images:  " + str(test_images.shape))
print("test_labels:  " + str(test_labels.shape))

In [0]:
print(train_images[0])

In [0]:
print(train_labels[0])

In [0]:
classes = [
  "t_shirt_top",
  "trouser",
  "pullover",
  "dress",
  "coat",
  "sandal",
  "shirt",
  "sneaker",
  "bag",
  "ankle_boot"
]

num_classes = len(classes)

for i in range(num_classes):
  ax = plt.subplot(2, 5, i + 1)
  plt.imshow(
    np.column_stack(train_images[i].reshape(1, 28, 28)),
    cmap = plt.cm.binary
  )
  plt.axis("off")
  ax.set_title(classes[train_labels[i]])

In [0]:
train_images_saved = train_images.reshape((train_images.shape[0], -1))
test_images_saved = test_images.reshape((test_images.shape[0], -1))

In [0]:
print("train_images_saved: " + str(train_images_saved.shape))
print("test_images_saved:  " + str(test_images_saved.shape))

In [0]:
train_code = 1
test_code = 2

In [0]:
train_data = [
  (i,
   train_images_saved[i].astype(int).tolist(),
   int(train_labels[i]),
   train_code,
  ) for i in range(len(train_labels))
]

test_data = [
  (i,
   test_images_saved[i].astype(int).tolist(),
   int(test_labels[i]),
   test_code
  ) for i in range(len(test_labels))
]

In [0]:
from pyspark.sql.types import *

schema = StructType([
  StructField("img_idx", IntegerType(), True),
  StructField("img", ArrayType(IntegerType()), True),
  StructField("img_label", IntegerType(), True),
  StructField("img_use", IntegerType(), True)
])

train_df = spark.createDataFrame(train_data, schema)

test_df = spark.createDataFrame(test_data, schema)

In [0]:
tf_images_df = train_df.union(test_df)

In [0]:
tf_images_df.show(5)

In [0]:
import array, binascii

def vector_to_hex(vector):
  vector_bytes = bytes(array.array("I", vector))
  vector_hex = binascii.hexlify(vector_bytes)
  vector_string = str(vector_hex.decode())
  return vector_string

vector_to_hex = udf(vector_to_hex, StringType())
spark.udf.register("vector_to_hex", vector_to_hex)

In [0]:
tf_images_df = tf_images_df.withColumn(
  "img_vector",
  vector_to_hex("img")
)

In [0]:
tf_images_df.show(5)

In [0]:
tf_images_df = tf_images_df.drop("img")

In [0]:
tf_images_df.show(5)

In [0]:
train_images = train_images / 255.0
test_images = test_images / 255.0

In [0]:
model = keras.Sequential(layers = [
  keras.layers.Flatten(input_shape = (28, 28)),
  keras.layers.Dense(128, activation = "relu"),
  keras.layers.Dense(10, activation = "softmax")
])

model.compile(optimizer = "adam",
              loss = "sparse_categorical_crossentropy",
              metrics = ["accuracy"]
             )

model.summary()

In [0]:
history = model.fit(train_images,
                    train_labels,
                    batch_size = 60,
                    epochs = 10,
                    validation_split = 0.2,
                    verbose = 2)

In [0]:
plt.title("Model Accuracy")

plt.xlabel("Epoch")
plt.ylabel("Accuracy")

plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])

plt.legend(["Train", "Validation"])

plt.show()

In [0]:
plt.title("Model Loss")

plt.xlabel("Epoch")
plt.ylabel("Loss")

plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])

plt.legend(["Train", "Validation"])

plt.show()

In [0]:
(loss, accuracy) = model.evaluate(test_images, test_labels, verbose = 2)

In [0]:
predictions = model.predict(test_images)

print(predictions[0])

In [0]:
from sklearn.metrics import confusion_matrix
from keras.utils import np_utils

cm = confusion_matrix(
  np.argmax(np_utils.to_categorical(test_labels, num_classes), axis = 1),
  np.argmax(predictions, axis = 1)
)

In [0]:
import plotly.graph_objects as go

data = go.Heatmap(
  z = cm[::-1],
  x = classes,
  y = classes[::-1].copy(),
  colorscale = "Reds"
)

annotations = []
thresh = cm.max() / 2

for i, row in enumerate(cm):
  for j, value in enumerate(row):
    annotations.append(
      {
        "x" : classes[j],
        "y" : classes[i],
        "font" : {"color" : "white" if value > thresh else "black"},
        "text" : str(value),
        "xref" : "x1",
        "yref" : "y1",
        "showarrow" : False
      }
    )
    
layout = {
  "title" : "Confusion Matrix",
  "xaxis" : {"title" : "Predicted"},
  "yaxis" : {"title" : "True"},
  "annotations" : annotations
}

fig = go.Figure(data = data, layout = layout)
fig.show()

In [0]:
import plotly.express as px
from sklearn.metrics import precision_score

precision_scores = precision_score(
  np.argmax(np_utils.to_categorical(test_labels, num_classes), axis = 1),
  np.argmax(predictions, axis = 1),
  average = None
)

fig = px.bar(precision_scores,
             x = classes,
             y = precision_scores,
             labels = dict(x = "Classes", y = "Precision"),
             title = "Precision Scores")

fig.update_xaxes(tickangle = 45)
fig.show()

In [0]:
from sklearn.metrics import recall_score

recall_scores = recall_score(
  np.argmax(np_utils.to_categorical(test_labels, num_classes), axis = 1),
  np.argmax(predictions, axis = 1),
  average = None
)

fig = px.bar(recall_scores,
             x = classes,
             y = recall_scores,
             labels = dict(x = "Classes", y = "Recall"),
             title = "Recall Scores")

fig.update_xaxes(tickangle = 45)
fig.show()

In [0]:
prediction_results = [
  (i,
   predictions[i].astype(float).tolist(),
   int(test_labels[i]),
   test_code
  )
  for i in range(len(test_labels))
]

In [0]:
print(prediction_results[0])

In [0]:
prediction_schema = StructType([
  StructField("img_idx", IntegerType()),
  StructField("prediction_results", ArrayType(FloatType())),
  StructField("img_label", IntegerType()),
  StructField("img_use", IntegerType())
])

prediction_results_df = spark.createDataFrame(prediction_results, prediction_schema)

In [0]:
prediction_results_df.show(5)

In [0]:
import pyspark.sql.functions as F

prediction_results_df = prediction_results_df.select(
  ["img_idx", "img_label", "img_use"] + [F.col("prediction_results")[i] for i in range(num_classes)]
)

col_names = ["img_idx", "img_label", "img_use"] + [classes[i] for i in range(num_classes)]

prediction_results_df = prediction_results_df.toDF(*col_names)

In [0]:
prediction_results_df.show(5)

In [0]:
%run ./Setup

In [0]:
spark.conf.set("spark.datasource.singlestore.ddlEndpoint", cluster)
spark.conf.set("spark.datasource.singlestore.user", "admin")
spark.conf.set("spark.datasource.singlestore.password", password)
spark.conf.set("spark.datasource.singlestore.disablePushdown", "false")

In [0]:
(tf_images_df.write
   .format("singlestore")
   .option("loadDataCompression", "LZ4")
   .mode("ignore")
   .save("ml.tf_images"))

In [0]:
(prediction_results_df.write
   .format("singlestore")
   .option("loadDataCompression", "LZ4")
   .mode("ignore")
   .save("ml.prediction_results"))