In [None]:
from pyalink.alink import *
useLocalEnv(4)

from utils import *
import os
import pandas as pd

Chap13_DATA_DIR = ROOT_DIR + "mnist" + os.sep

Chap13_DENSE_TRAIN_FILE = "dense_train.ak"
Chap13_DENSE_TEST_FILE = "dense_test.ak"

PIPELINE_TF_MODEL = "pipeline_tf_model.ak"
PIPELINE_PYTORCH_MODEL = "pipeline_pytorch_model.ak"
PIPELINE_ONNX_MODEL = "pipeline_onnx_model.ak"

AlinkGlobalConfiguration.setPrintProcessInfo(True)


In [None]:
# c_1_3 

print(AlinkGlobalConfiguration.getPluginDir())

print("Auto Plugin Download : ")

print(AlinkGlobalConfiguration.getAutoPluginDownload())

downloader = AlinkGlobalConfiguration.getPluginDownloader()

print(downloader.listAvailablePlugins())

In [None]:
def softmax(train_set, test_set) :
    Pipeline()\
        .add(\
            Softmax()\
                .setVectorCol("vec")\
                .setLabelCol("label")\
                .setPredictionCol("pred")\
        )\
        .fit(train_set)\
        .transform(test_set)\
        .link(\
            EvalMultiClassBatchOp()\
                .setLabelCol("label")\
                .setPredictionCol("pred")\
                .lazyPrintMetrics()\
        )
    
    BatchOperator.execute()

In [None]:
def dnn(train_set, test_set) :
    Pipeline()\
         .add(
            VectorFunction()\
                .setSelectedCol("vec")\
                .setFuncName("Scale")\
                .setWithVariable(1.0 / 255.0)
        )\
       .add(\
            VectorToTensor()\
                .setTensorDataType("float")\
                .setSelectedCol("vec")\
                .setOutputCol("tensor")\
                .setReservedCols(["label"])\
        )\
        .add(\
            KerasSequentialClassifier()\
                .setTensorCol("tensor")\
                .setLabelCol("label")\
                .setPredictionCol("pred")\
                .setLayers([
                    "Dense(256, activation='relu')",
                    "Dense(128, activation='relu')"
                ])\
                .setNumEpochs(50)\
                .setBatchSize(512)\
                .setValidationSplit(0.1)\
                .setSaveBestOnly(True)\
                .setBestMetric("sparse_categorical_accuracy")\
                .setNumWorkers(1)\
                .setNumPSs(0)\
       )\
        .fit(train_set)\
        .transform(test_set)\
        .link(\
            EvalMultiClassBatchOp()\
                .setLabelCol("label")\
                .setPredictionCol("pred")\
                .lazyPrintMetrics()\
        )
    
    BatchOperator.execute()

In [None]:
def cnn(train_set, test_set) :
    Pipeline()\
        .add(
            VectorFunction()\
                .setSelectedCol("vec")\
                .setFuncName("Scale")\
                .setWithVariable(1.0 / 255.0)
        )\
        .add(\
            VectorToTensor()\
                .setTensorDataType("float")\
                .setTensorShape([28, 28])\
                .setSelectedCol("vec")\
                .setOutputCol("tensor")\
                .setReservedCols(["label"])\
        )\
        .add(\
            KerasSequentialClassifier()\
                .setTensorCol("tensor")\
                .setLabelCol("label")\
                .setPredictionCol("pred")\
                .setLayers([
                    "Reshape((28, 28, 1))",
                    "Conv2D(32, kernel_size=(3, 3), activation='relu')",
                    "MaxPooling2D(pool_size=(2, 2))",
                    "Conv2D(64, kernel_size=(3, 3), activation='relu')",
                    "MaxPooling2D(pool_size=(2, 2))",
                    "Flatten()",
                    "Dropout(0.5)"
                ])\
                .setNumEpochs(20)\
                .setValidationSplit(0.1)\
                .setSaveBestOnly(True)\
                .setBestMetric("sparse_categorical_accuracy")\
                .setNumWorkers(1)\
                .setNumPSs(0)\
        )\
        .fit(train_set)\
        .transform(test_set)\
        .link(\
            EvalMultiClassBatchOp()\
                .setLabelCol("label")\
                .setPredictionCol("pred")\
                .lazyPrintMetrics()\
        )
    
    BatchOperator.execute()

In [None]:
# c_2
sw = Stopwatch()
sw.start()

train_set = AkSourceBatchOp().setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TRAIN_FILE)
test_set = AkSourceBatchOp().setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TEST_FILE)

softmax(train_set, test_set)

dnn(train_set, test_set)

cnn(train_set, test_set)

sw.stop()
print(sw.getElapsedTimeSpan())

In [None]:
Chap16_DATA_DIR = ROOT_DIR + "wine" + os.sep
Chap16_TRAIN_FILE = "train.ak";
Chap16_TEST_FILE = "test.ak";
Chap16_FEATURE_COL_NAMES = [
    "fixedAcidity", "volatileAcidity", "citricAcid", "residualSugar", "chlorides",
    "freeSulfurDioxide", "totalSulfurDioxide", "density", "pH", "sulphates",
    "alcohol"
]

def linearReg(train_set, test_set) :
    LinearRegression()\
        .setFeatureCols(Chap16_FEATURE_COL_NAMES)\
        .setLabelCol("quality")\
        .setPredictionCol("pred")\
        .enableLazyPrintModelInfo()\
        .fit(train_set)\
        .transform(test_set)\
        .lazyPrintStatistics()\
        .link(\
            EvalRegressionBatchOp()\
                .setLabelCol("quality")\
                .setPredictionCol("pred")\
                .lazyPrintMetrics()\
        )
        
    BatchOperator.execute()

In [None]:
def dnnReg(train_set, test_set) :
    Pipeline()\
        .add(\
            StandardScaler()\
                .setSelectedCols(Chap16_FEATURE_COL_NAMES)\
        )\
        .add(\
            VectorAssembler()\
                .setSelectedCols(Chap16_FEATURE_COL_NAMES)\
                .setOutputCol("vec")\
        )\
        .add(\
            VectorToTensor()\
                .setSelectedCol("vec")\
                .setOutputCol("tensor")\
                .setReservedCols(["quality"])\
        )\
        .add(\
            KerasSequentialRegressor()\
                .setTensorCol("tensor")\
                .setLabelCol("quality")\
                .setPredictionCol("pred")\
                .setLayers([
                    "Dense(64, activation='relu')",
                    "Dense(64, activation='relu')",
                    "Dense(64, activation='relu')",
                    "Dense(64, activation='relu')",
                    "Dense(64, activation='relu')"
                ])\
                .setNumEpochs(20)\
                .setNumWorkers(1)\
                .setNumPSs(0)\
       )\
        .fit(train_set)\
        .transform(test_set)\
        .lazyPrintStatistics()\
        .link(\
            EvalRegressionBatchOp()\
                .setLabelCol("quality")\
                .setPredictionCol("pred")\
                .lazyPrintMetrics()\
        )
        
    BatchOperator.execute()

In [None]:
# c_3
sw = Stopwatch()
sw.start()

train_set = AkSourceBatchOp().setFilePath(Chap16_DATA_DIR + Chap16_TRAIN_FILE)
test_set = AkSourceBatchOp().setFilePath(Chap16_DATA_DIR + Chap16_TEST_FILE)

linearReg(train_set, test_set)

dnnReg(train_set, test_set)

sw.stop()
print(sw.getElapsedTimeSpan())

In [None]:
import numpy as np

@udf(input_types=[AlinkDataTypes.TENSOR()], result_type=AlinkDataTypes.INT()) 
def get_max_index(tensor: np.ndarray):
    return tensor.argmax().item()


In [None]:
# c_4_2
AkSourceBatchOp()\
    .setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TEST_FILE)\
    .link(\
        VectorFunctionBatchOp()\
            .setSelectedCol("vec")\
            .setFuncName("Scale")\
            .setWithVariable(1.0 / 255.0)
    )\
    .link(\
        VectorToTensorBatchOp()\
            .setTensorDataType("float")\
            .setTensorShape([1, 28, 28, 1])\
            .setSelectedCol("vec")\
            .setOutputCol("input_1")\
            .setReservedCols(["label"])
    )\
    .link(\
        TFSavedModelPredictBatchOp()\
            .setModelPath("https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/mnist_model_tf.zip")\
            .setSelectedCols(["input_1"])\
            .setOutputSchemaStr("output_1 FLOAT_TENSOR")\
    )\
    .lazyPrint(3)\
    .link(\
        UDFBatchOp()\
            .setFunc(get_max_index)
            .setSelectedCols(["output_1"])
            .setOutputCol("pred")
    )\
    .lazyPrint(3)\
    .link(\
        EvalMultiClassBatchOp()\
            .setLabelCol("label")\
            .setPredictionCol("pred")\
            .lazyPrintMetrics()
    )

BatchOperator.execute()

In [None]:
# c_4_3
AkSourceStreamOp()\
    .setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TEST_FILE)\
    .link(\
        VectorFunctionStreamOp()\
            .setSelectedCol("vec")\
            .setFuncName("Scale")\
            .setWithVariable(1.0 / 255.0)
    )\
    .link(\
        VectorToTensorStreamOp()\
            .setTensorDataType("float")\
            .setTensorShape([1, 28, 28, 1])\
            .setSelectedCol("vec")\
            .setOutputCol("input_1")\
            .setReservedCols(["label"])
    )\
    .link(\
        TFSavedModelPredictStreamOp()\
            .setModelPath("https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/mnist_model_tf.zip")\
            .setSelectedCols(["input_1"])\
            .setOutputSchemaStr("output_1 FLOAT_TENSOR")
    )\
    .link(\
        UDFStreamOp()\
            .setFunc(get_max_index)\
            .setSelectedCols(["output_1"])\
            .setOutputCol("pred")
    )\
    .sample(0.001)\
    .print()

StreamOperator.execute()

In [None]:
# c_4_4
PipelineModel(\
    VectorFunction()\
        .setSelectedCol("vec")\
        .setFuncName("Scale")\
        .setWithVariable(1.0 / 255.0),\
    VectorToTensor()\
        .setTensorDataType("float")\
        .setTensorShape([1, 28, 28, 1])\
        .setSelectedCol("vec")\
        .setOutputCol("input_1")\
        .setReservedCols(["label"]),\
    TFSavedModelPredictor()\
        .setModelPath("https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/mnist_model_tf.zip")\
        .setSelectedCols(["input_1"])\
        .setOutputSchemaStr("output_1 FLOAT_TENSOR")\
).save(Chap13_DATA_DIR + PIPELINE_TF_MODEL, True)
BatchOperator.execute()

PipelineModel\
    .load(Chap13_DATA_DIR + PIPELINE_TF_MODEL)\
    .transform(\
        AkSourceStreamOp()\
            .setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TEST_FILE)
    )\
    .link(\
        UDFStreamOp()\
            .setFunc(get_max_index)\
            .setSelectedCols(["output_1"])\
            .setOutputCol("pred")
    )\
    .sample(0.001)\
    .print()
StreamOperator.execute()

In [None]:
# c_4_5
source = AkSourceBatchOp().setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TEST_FILE)

print(source.getSchemaStr())

df = source.firstN(1).collectToDataframe()

row = [df.iat[0,0], df.iat[0,1].item()]

localPredictor = LocalPredictor(Chap13_DATA_DIR + PIPELINE_TF_MODEL, "vec string, label int")

print(localPredictor.getOutputSchemaStr())

r = localPredictor.map(row)
print(str(r[0]) + " | " + str(r[2]))


In [None]:
# c_5_2
AkSourceBatchOp()\
    .setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TEST_FILE)\
    .link(\
        VectorToTensorBatchOp()\
            .setTensorDataType("float")\
            .setTensorShape([1, 1, 28, 28])\
            .setSelectedCol("vec")\
            .setOutputCol("tensor")\
            .setReservedCols(["label"])
    )\
    .link(\
        TorchModelPredictBatchOp()\
            .setModelPath(
                "https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/mnist_model_pytorch.pt")\
            .setSelectedCols(["tensor"])\
            .setOutputSchemaStr("output_1 FLOAT_TENSOR")
    )\
    .lazyPrint(3)\
    .link(\
        UDFBatchOp()\
            .setFunc(get_max_index)\
            .setSelectedCols(["output_1"])\
            .setOutputCol("pred")
    )\
    .lazyPrint(3)\
    .link(\
        EvalMultiClassBatchOp()\
            .setLabelCol("label")\
            .setPredictionCol("pred")\
            .lazyPrintMetrics()
    )

BatchOperator.execute()

In [None]:
# c_5_3
AkSourceStreamOp()\
    .setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TEST_FILE)\
    .link(\
        VectorToTensorStreamOp()\
            .setTensorDataType("float")\
            .setTensorShape([1, 1, 28, 28])
            .setSelectedCol("vec")\
            .setOutputCol("tensor")\
            .setReservedCols(["label"])
    )\
    .link(\
        TorchModelPredictStreamOp()\
            .setModelPath(
                "https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/mnist_model_pytorch.pt")\
            .setSelectedCols(["tensor"])\
            .setOutputSchemaStr("output_1 FLOAT_TENSOR")
    )\
    .link(\
        UDFStreamOp()\
            .setFunc(get_max_index)\
            .setSelectedCols(["output_1"])\
            .setOutputCol("pred")
    )\
    .sample(0.001)\
    .print()

StreamOperator.execute()

In [None]:
# c_5_4
PipelineModel(\
    VectorToTensor()\
        .setTensorDataType("float")\
        .setTensorShape([1, 1, 28, 28])\
        .setSelectedCol("vec")\
        .setOutputCol("tensor")\
        .setReservedCols(["label"]),
    TorchModelPredictor()\
        .setModelPath(
            "https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/mnist_model_pytorch.pt")\
        .setSelectedCols(["tensor"])\
        .setOutputSchemaStr("output_1 FLOAT_TENSOR")
).save(Chap13_DATA_DIR + PIPELINE_PYTORCH_MODEL, True)
BatchOperator.execute()

PipelineModel\
    .load(Chap13_DATA_DIR + PIPELINE_PYTORCH_MODEL)\
    .transform(\
        AkSourceStreamOp()\
            .setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TEST_FILE)
    )\
    .link(\
        UDFStreamOp()\
            .setFunc(get_max_index)\
            .setSelectedCols(["output_1"])\
            .setOutputCol("pred")
    )\
    .sample(0.001)\
    .print()
StreamOperator.execute()

In [None]:
# c_5_5
source = AkSourceBatchOp().setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TEST_FILE)

print(source.getSchemaStr())

df = source.firstN(1).collectToDataframe()

row = [df.iat[0,0], df.iat[0,1].item()]

localPredictor = LocalPredictor(Chap13_DATA_DIR + PIPELINE_PYTORCH_MODEL, "vec string, label int")

print(localPredictor.getOutputSchemaStr())

r = localPredictor.map(row)
print(str(r[0]) + " | " + str(r[2]))


In [None]:
# c_6_2
import json

source = RandomTableSourceBatchOp() \
    .setNumRows(100) \
    .setNumCols(10)

colNames = source.getColNames()
label = "label"

userParams = {
    'featureCols': json.dumps(colNames),
    'labelCol': label,
    'batch_size': 16,
    'num_epochs': 1
}

tensorFlowBatchOp = TensorFlowBatchOp() \
    .setUserFiles(["https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/tf_dnn_batch.py"]) \
    .setMainScriptFile("https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/tf_dnn_batch.py") \
    .setUserParams(json.dumps(userParams)) \
    .setOutputSchemaStr("model_id long, model_info string") \
    .setNumWorkers(1) \
    .setNumPSs(0)
source = source.select("*, case when RAND() > 0.5 then 1. else 0. end as label") \
    .link(tensorFlowBatchOp) \
    .print()

In [None]:
# c_7_1
AkSourceBatchOp()\
    .setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TEST_FILE)\
    .link(
        VectorToTensorBatchOp()\
            .setTensorDataType("float")\
            .setTensorShape([1, 1, 28, 28])\
            .setSelectedCol("vec")\
            .setOutputCol("tensor")\
            .setReservedCols(["label"])
    )\
    .link(
        OnnxModelPredictBatchOp()\
            .setModelPath(
                "https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/cnn_mnist_pytorch.onnx")\
            .setSelectedCols(["tensor"])\
            .setInputNames(["0"])\
            .setOutputNames(["21"])\
            .setOutputSchemaStr("probabilities FLOAT_TENSOR")
    )\
    .link(
        UDFBatchOp()\
            .setFunc(get_max_index)\
            .setSelectedCols(["probabilities"])\
            .setOutputCol("pred")
    )\
    .lazyPrint(3)\
    .link(
        EvalMultiClassBatchOp()\
            .setLabelCol("label")\
            .setPredictionCol("pred")\
            .lazyPrintMetrics()
    )

BatchOperator.execute()


In [None]:
# c_7_2
AkSourceStreamOp()\
    .setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TEST_FILE)\
    .link(
        VectorToTensorStreamOp()\
            .setTensorDataType("float")\
            .setTensorShape([1, 1, 28, 28])\
            .setSelectedCol("vec")\
            .setOutputCol("tensor")\
            .setReservedCols(["label"])
    )\
    .link(
        OnnxModelPredictStreamOp()\
            .setModelPath("https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/cnn_mnist_pytorch.onnx")\
            .setSelectedCols(["tensor"])\
            .setInputNames(["0"])\
            .setOutputNames(["21"])\
            .setOutputSchemaStr("probabilities FLOAT_TENSOR")
    )\
    .link(
        UDFStreamOp()\
            .setFunc(get_max_index)\
            .setSelectedCols(["probabilities"])\
            .setOutputCol("pred")
    )\
    .sample(0.001)\
    .print()

StreamOperator.execute()

In [None]:
# c_7_3
PipelineModel(
    VectorToTensor()\
        .setTensorDataType("float")\
        .setTensorShape([1, 1, 28, 28])\
        .setSelectedCol("vec")\
        .setOutputCol("tensor")\
        .setReservedCols(["label"]),
    OnnxModelPredictor()\
        .setModelPath("https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/cnn_mnist_pytorch.onnx")\
        .setSelectedCols(["tensor"])\
        .setInputNames(["0"])\
        .setOutputNames(["21"])\
        .setOutputSchemaStr("probabilities FLOAT_TENSOR")
).save(Chap13_DATA_DIR + PIPELINE_ONNX_MODEL, True)
BatchOperator.execute()

PipelineModel\
    .load(Chap13_DATA_DIR + PIPELINE_ONNX_MODEL)\
    .transform(
        AkSourceStreamOp()\
            .setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TEST_FILE)
    )\
    .link(
        UDFStreamOp()\
            .setFunc(get_max_index)\
            .setSelectedCols(["probabilities"])\
            .setOutputCol("pred")
    )\
    .sample(0.001)\
    .print()
StreamOperator.execute()

In [None]:
# c_7_4
source = AkSourceBatchOp().setFilePath(Chap13_DATA_DIR + Chap13_DENSE_TEST_FILE)

print(source.getSchemaStr())

df = source.firstN(1).collectToDataframe()

row = [df.iat[0,0], df.iat[0,1].item()]

localPredictor = LocalPredictor(Chap13_DATA_DIR + PIPELINE_ONNX_MODEL, "vec string, label int")

print(localPredictor.getOutputSchemaStr())

r = localPredictor.map(row)
print(str(r[0]) + " | " + str(r[2]))