Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IGNITE-10234: Inference workflow for ML #5415

Closed
wants to merge 10 commits into from
Closed
@@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.ignite.examples.ml.inference;

import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import javax.cache.Cache;
import org.apache.ignite.Ignite;
import org.apache.ignite.IgniteCache;
import org.apache.ignite.Ignition;
import org.apache.ignite.cache.query.QueryCursor;
import org.apache.ignite.cache.query.ScanQuery;
import org.apache.ignite.examples.ml.regression.linear.LinearRegressionLSQRTrainerExample;
import org.apache.ignite.examples.ml.util.MLSandboxDatasets;
import org.apache.ignite.examples.ml.util.SandboxMLCache;
import org.apache.ignite.ml.inference.InfModel;
import org.apache.ignite.ml.inference.builder.IgniteDistributedInfModelBuilder;
import org.apache.ignite.ml.inference.parser.IgniteFunctionInfModelParser;
import org.apache.ignite.ml.inference.parser.InfModelParser;
import org.apache.ignite.ml.inference.reader.InMemoryInfModelReader;
import org.apache.ignite.ml.inference.reader.InfModelReader;
import org.apache.ignite.ml.math.primitives.vector.Vector;
import org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer;
import org.apache.ignite.ml.regressions.linear.LinearRegressionModel;

/**
* This example is based on {@link LinearRegressionLSQRTrainerExample}, but to perform inference it uses an approach
* implemented in {@link org.apache.ignite.ml.inference} package.
*/
public class IgniteFunctionDistributedInferenceExample {
/** Run example. */
public static void main(String... args) throws IOException, ExecutionException, InterruptedException {
System.out.println();
System.out.println(">>> Linear regression model over cache based dataset usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");

IgniteCache<Integer, Vector> dataCache = new SandboxMLCache(ignite)
.fillCacheWith(MLSandboxDatasets.MORTALITY_DATA);

System.out.println(">>> Create new linear regression trainer object.");
LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();

System.out.println(">>> Perform the training to get the model.");
LinearRegressionModel mdl = trainer.fit(
ignite,
dataCache,
(k, v) -> v.copyOfRange(1, v.size()),
(k, v) -> v.get(0)
);

System.out.println(">>> Linear regression model: " + mdl);

System.out.println(">>> Preparing model reader and model parser.");
InfModelReader reader = new InMemoryInfModelReader(mdl);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not simplify to ModelReader

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have kind of another Model interface in our code base. From my perspective it would be better to have prefix Inf to distinguish them.

InfModelParser<Vector, Double> parser = new IgniteFunctionInfModelParser<>();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not to simplify to Model Parser

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment above.

try (InfModel<Vector, Future<Double>> infMdl = new IgniteDistributedInfModelBuilder(ignite, 4, 4)
.build(reader, parser)) {
System.out.println(">>> Inference model is ready.");

System.out.println(">>> ---------------------------------");
System.out.println(">>> | Prediction\t| Ground Truth\t|");
System.out.println(">>> ---------------------------------");

try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
for (Cache.Entry<Integer, Vector> observation : observations) {
Vector val = observation.getValue();
Vector inputs = val.copyOfRange(1, val.size());
double groundTruth = val.get(0);

double prediction = infMdl.predict(inputs).get();

System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", prediction, groundTruth);
}
}
}

System.out.println(">>> ---------------------------------");

System.out.println(">>> Linear regression model over cache based dataset usage example completed.");
}
}
}
@@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.ignite.examples.ml.inference;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import org.apache.ignite.Ignite;
import org.apache.ignite.Ignition;
import org.apache.ignite.internal.util.IgniteUtils;
import org.apache.ignite.ml.inference.InfModel;
import org.apache.ignite.ml.inference.builder.IgniteDistributedInfModelBuilder;
import org.apache.ignite.ml.inference.parser.InfModelParser;
import org.apache.ignite.ml.inference.parser.TensorFlowSavedModelInfModelParser;
import org.apache.ignite.ml.inference.reader.FileSystemInfModelReader;
import org.apache.ignite.ml.inference.reader.InfModelReader;
import org.apache.ignite.ml.util.MnistUtils;
import org.tensorflow.Tensor;

/**
* This example demonstrates how to: load TensorFlow model into Java, make inference in distributed environment using
* Apache Ignite services.
*/
public class TensorFlowDistributedInferenceExample {
/** Path to the directory with saved TensorFlow model. */
private static final String MODEL_PATH = "examples/src/main/resources/ml/mnist_tf_model";

/** Path to the MNIST images data. */
private static final String MNIST_IMG_PATH = "org/apache/ignite/examples/ml/util/datasets/t10k-images-idx3-ubyte";

/** Path to the MNIST labels data. */
private static final String MNIST_LBL_PATH = "org/apache/ignite/examples/ml/util/datasets/t10k-labels-idx1-ubyte";

/** Run example. */
public static void main(String[] args) throws IOException, ExecutionException, InterruptedException {
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
File mdlRsrc = IgniteUtils.resolveIgnitePath(MODEL_PATH);
if (mdlRsrc == null)
throw new IllegalArgumentException("Resource not found [resource_path=" + MODEL_PATH + "]");

InfModelReader reader = new FileSystemInfModelReader(mdlRsrc.getPath());

InfModelParser<double[], Long> parser = new TensorFlowSavedModelInfModelParser<double[], Long>("serve")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TFModelParser is better

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment above.


.withInput("Placeholder", doubles -> {
float[][][] reshaped = new float[1][28][28];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it a Tensor-like structure here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep.

for (int i = 0; i < doubles.length; i++)
reshaped[0][i / 28][i % 28] = (float)doubles[i];
return Tensor.create(reshaped);
})

.withOutput(Collections.singletonList("ArgMax"), collectedTensors -> {
return collectedTensors.get("ArgMax").copyTo(new long[1])[0];
});

List<MnistUtils.MnistLabeledImage> images = MnistUtils.mnistAsListFromResource(
MNIST_IMG_PATH,
MNIST_LBL_PATH,
new Random(0),
10000
);

long t0 = System.currentTimeMillis();

try (InfModel<double[], Future<Long>> threadedMdl = new IgniteDistributedInfModelBuilder(ignite, 4, 4)
.build(reader, parser)) {
List<Future<?>> futures = new ArrayList<>(images.size());
for (MnistUtils.MnistLabeledImage image : images)
futures.add(threadedMdl.predict(image.getPixels()));
for (Future<?> f : futures)
f.get();
}

long t1 = System.currentTimeMillis();

System.out.println("Threaded model throughput: " + images.size() / ((t1 - t0) / 1000.0) + " req/sec");
}
}
}
@@ -0,0 +1,85 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.ignite.examples.ml.inference;

import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import org.apache.ignite.internal.util.IgniteUtils;
import org.apache.ignite.ml.inference.InfModel;
import org.apache.ignite.ml.inference.builder.SingleInfModelBuilder;
import org.apache.ignite.ml.inference.parser.InfModelParser;
import org.apache.ignite.ml.inference.parser.TensorFlowSavedModelInfModelParser;
import org.apache.ignite.ml.inference.reader.FileSystemInfModelReader;
import org.apache.ignite.ml.inference.reader.InfModelReader;
import org.apache.ignite.ml.util.MnistUtils;
import org.tensorflow.Tensor;

/**
* This example demonstrates how to: load TensorFlow model into Java, make inference using this model in one thread.
*/
public class TensorFlowLocalInferenceExample {
/** Path to the directory with saved TensorFlow model. */
private static final String MODEL_PATH = "examples/src/main/resources/ml/mnist_tf_model";

/** Path to the MNIST images data. */
private static final String MNIST_IMG_PATH = "org/apache/ignite/examples/ml/util/datasets/t10k-images-idx3-ubyte";

/** Path to the MNIST labels data. */
private static final String MNIST_LBL_PATH = "org/apache/ignite/examples/ml/util/datasets/t10k-labels-idx1-ubyte";

/** Run example. */
public static void main(String[] args) throws IOException {
File mdlRsrc = IgniteUtils.resolveIgnitePath(MODEL_PATH);
if (mdlRsrc == null)
throw new IllegalArgumentException("Resource not found [resource_path=" + MODEL_PATH + "]");

InfModelReader reader = new FileSystemInfModelReader(mdlRsrc.getPath());

InfModelParser<double[], Long> parser = new TensorFlowSavedModelInfModelParser<double[], Long>("serve")
.withInput("Placeholder", doubles -> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Refactor lambdas in parameters in separate variables and get them apropriate names

float[][][] reshaped = new float[1][28][28];
for (int i = 0; i < doubles.length; i++)
reshaped[0][i / 28][i % 28] = (float)doubles[i];
return Tensor.create(reshaped);
})
.withOutput(Collections.singletonList("ArgMax"), collectedTensors -> {
return collectedTensors.get("ArgMax").copyTo(new long[1])[0];
});

List<MnistUtils.MnistLabeledImage> images = MnistUtils.mnistAsListFromResource(
MNIST_IMG_PATH,
MNIST_LBL_PATH,
new Random(0),
10000
);

long t0 = System.currentTimeMillis();

try (InfModel<double[], Long> locMdl = new SingleInfModelBuilder().build(reader, parser)) {
for (MnistUtils.MnistLabeledImage image : images)
locMdl.predict(image.getPixels());
}

long t1 = System.currentTimeMillis();

System.out.println("Threaded model throughput: " + 1.0 * images.size() / ((t1 - t0) / 1000) + " req/sec");
}
}
@@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.ignite.examples.ml.inference;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import org.apache.ignite.internal.util.IgniteUtils;
import org.apache.ignite.ml.inference.InfModel;
import org.apache.ignite.ml.inference.builder.ThreadedInfModelBuilder;
import org.apache.ignite.ml.inference.parser.InfModelParser;
import org.apache.ignite.ml.inference.parser.TensorFlowSavedModelInfModelParser;
import org.apache.ignite.ml.inference.reader.FileSystemInfModelReader;
import org.apache.ignite.ml.inference.reader.InfModelReader;
import org.apache.ignite.ml.util.MnistUtils;
import org.tensorflow.Tensor;

/**
* This example demonstrates how to: load TensorFlow model into Java, make inference using this model in multiple
* threads.
*/
public class TensorFlowThreadedInferenceExample {
/** Path to the directory with saved TensorFlow model. */
private static final String MODEL_PATH = "examples/src/main/resources/ml/mnist_tf_model";

/** Path to the MNIST images data. */
private static final String MNIST_IMG_PATH = "org/apache/ignite/examples/ml/util/datasets/t10k-images-idx3-ubyte";

/** Path to the MNIST labels data. */
private static final String MNIST_LBL_PATH = "org/apache/ignite/examples/ml/util/datasets/t10k-labels-idx1-ubyte";

/** Run example. */
public static void main(String[] args) throws IOException, ExecutionException, InterruptedException {
File mdlRsrc = IgniteUtils.resolveIgnitePath(MODEL_PATH);
if (mdlRsrc == null)
throw new IllegalArgumentException("Resource not found [resource_path=" + MODEL_PATH + "]");

InfModelReader reader = new FileSystemInfModelReader(mdlRsrc.getPath());

InfModelParser<double[], Long> parser = new TensorFlowSavedModelInfModelParser<double[], Long>("serve")

.withInput("Placeholder", doubles -> {
float[][][] reshaped = new float[1][28][28];
for (int i = 0; i < doubles.length; i++)
reshaped[0][i / 28][i % 28] = (float)doubles[i];
return Tensor.create(reshaped);
})

.withOutput(Collections.singletonList("ArgMax"), collectedTensors -> {
return collectedTensors.get("ArgMax").copyTo(new long[1])[0];
});

List<MnistUtils.MnistLabeledImage> images = MnistUtils.mnistAsListFromResource(
MNIST_IMG_PATH,
MNIST_LBL_PATH,
new Random(0),
10000
);

long t0 = System.currentTimeMillis();

try (InfModel<double[], Future<Long>> threadedMdl = new ThreadedInfModelBuilder(8)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we really need this kind of Model? For what case it can be used?

.build(reader, parser)) {
List<Future<?>> futures = new ArrayList<>(images.size());
for (MnistUtils.MnistLabeledImage image : images)
futures.add(threadedMdl.predict(image.getPixels()));
for (Future<?> f : futures)
f.get();
}

long t1 = System.currentTimeMillis();

System.out.println("Threaded model throughput: " + 1.0 * images.size() / ((t1 - t0) / 1000) + " req/sec");
}
}