diff --git a/CHANGELOG.md b/CHANGELOG.md index ee545e3c4d0..401cabb90d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ # Changelog +## [v0.4.5](https://github.com/gojek/feast/tree/v0.4.5) (2020-02-14) + +[Full Changelog](https://github.com/gojek/feast/compare/v0.4.4...v0.4.5) + +**Merged pull requests:** +- Use bzip2 compressed feature set json as pipeline option [\#466](https://github.com/gojek/feast/pull/466) ([khorshuheng](https://github.com/khorshuheng)) +- Make redis key creation more determinisitic [\#471](https://github.com/gojek/feast/pull/471) ([zhilingc](https://github.com/zhilingc)) +- Helm Chart Upgrades [\#458](https://github.com/gojek/feast/pull/458) ([Yanson](https://github.com/Yanson)) +- Exclude version from grouping [\#441](https://github.com/gojek/feast/pull/441) ([khorshuheng](https://github.com/khorshuheng)) +- Use concrete class for AvroCoder compatibility [\#465](https://github.com/gojek/feast/pull/465) ([zhilingc](https://github.com/zhilingc)) +- Fix typo in split string length check [\#464](https://github.com/gojek/feast/pull/464) ([zhilingc](https://github.com/zhilingc)) +- Update README.md and remove versions from Helm Charts [\#457](https://github.com/gojek/feast/pull/457) ([woop](https://github.com/woop)) +- Deduplicate example notebooks [\#456](https://github.com/gojek/feast/pull/456) ([woop](https://github.com/woop)) +- Allow users not to set max age for batch retrieval [\#446](https://github.com/gojek/feast/pull/446) ([zhilingc](https://github.com/zhilingc)) + ## [v0.4.4](https://github.com/gojek/feast/tree/v0.4.4) (2020-01-28) [Full Changelog](https://github.com/gojek/feast/compare/v0.4.3...v0.4.4) diff --git a/README.md b/README.md index d9b16748266..97cbcde421d 100644 --- a/README.md +++ b/README.md @@ -28,11 +28,29 @@ my_model = ml.fit(data) prediction = my_model.predict(fs.get_online_features(customer_features, customer_entities)) ``` +## Getting Started with Docker Compose +The following commands will start Feast in online-only mode. +``` +git clone https://github.com/gojek/feast.git +cd feast/infra/docker-compose +cp .env.sample .env +docker-compose up -d +``` + +A [Jupyter Notebook](http://localhost:8888/tree/feast/examples) is now available to start using Feast. + +Please see the links below to set up Feast for batch/historical serving with BigQuery. + ## Important resources - * [Why Feast?](docs/why-feast.md) - * [Concepts](docs/concepts.md) - * [Installation](docs/getting-started/installing-feast.md) - * [Getting Help](docs/community.md) + +Please refer to the official documentation at + + * [Why Feast?](https://docs.feast.dev/why-feast) + * [Concepts](https://docs.feast.dev/concepts) + * [Installation](https://docs.feast.dev/installing-feast/overview) + * [Examples](https://github.com/gojek/feast/blob/master/examples/) + * [Change Log](https://github.com/gojek/feast/blob/master/CHANGELOG.md) + * [Slack (#Feast)](https://join.slack.com/t/kubeflow/shared_invite/enQtNDg5MTM4NTQyNjczLTdkNTVhMjg1ZTExOWI0N2QyYTQ2MTIzNTJjMWRiOTFjOGRlZWEzODc1NzMwNTMwM2EzNjY1MTFhODczNjk4MTk) ## Notice diff --git a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java index 2de46ae1f2d..d80d6547186 100644 --- a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java +++ b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java @@ -22,7 +22,6 @@ import com.google.common.base.Strings; import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.util.JsonFormat; -import com.google.protobuf.util.JsonFormat.Printer; import feast.core.FeatureSetProto; import feast.core.SourceProto; import feast.core.StoreProto; @@ -30,15 +29,13 @@ import feast.core.exception.JobExecutionException; import feast.core.job.JobManager; import feast.core.job.Runner; -import feast.core.model.FeatureSet; -import feast.core.model.Job; -import feast.core.model.JobStatus; -import feast.core.model.Project; -import feast.core.model.Source; -import feast.core.model.Store; +import feast.core.job.option.FeatureSetJsonByteConverter; +import feast.core.model.*; import feast.core.util.TypeConversion; import feast.ingestion.ImportJob; +import feast.ingestion.options.BZip2Compressor; import feast.ingestion.options.ImportOptions; +import feast.ingestion.options.OptionCompressor; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -88,7 +85,12 @@ public Job startJob(Job job) { job.getStore().toProto(), false); } catch (InvalidProtocolBufferException e) { - throw new RuntimeException(String.format("Unable to start job %s", job.getId()), e); + log.error(e.getMessage()); + throw new IllegalArgumentException( + String.format( + "DataflowJobManager failed to START job with id '%s' because the job" + + "has an invalid spec. Please check the FeatureSet, Source and Store specs. Actual error message: %s", + job.getId(), e.getMessage())); } } @@ -103,12 +105,15 @@ public Job updateJob(Job job) { try { List featureSetProtos = job.getFeatureSets().stream().map(FeatureSet::toProto).collect(Collectors.toList()); - return submitDataflowJob( job.getId(), featureSetProtos, job.getSource().toProto(), job.getStore().toProto(), true); - } catch (InvalidProtocolBufferException e) { - throw new RuntimeException(String.format("Unable to update job %s", job.getId()), e); + log.error(e.getMessage()); + throw new IllegalArgumentException( + String.format( + "DataflowJobManager failed to UPDATE job with id '%s' because the job" + + "has an invalid spec. Please check the FeatureSet, Source and Store specs. Actual error message: %s", + job.getId(), e.getMessage())); } } @@ -210,13 +215,12 @@ private ImportOptions getPipelineOptions( throws IOException { String[] args = TypeConversion.convertMapToArgs(defaultOptions); ImportOptions pipelineOptions = PipelineOptionsFactory.fromArgs(args).as(ImportOptions.class); - Printer printer = JsonFormat.printer(); - List featureSetsJson = new ArrayList<>(); - for (FeatureSetProto.FeatureSet featureSet : featureSets) { - featureSetsJson.add(printer.print(featureSet.getSpec())); - } - pipelineOptions.setFeatureSetJson(featureSetsJson); - pipelineOptions.setStoreJson(Collections.singletonList(printer.print(sink))); + + OptionCompressor> featureSetJsonCompressor = + new BZip2Compressor<>(new FeatureSetJsonByteConverter()); + + pipelineOptions.setFeatureSetJson(featureSetJsonCompressor.compress(featureSets)); + pipelineOptions.setStoreJson(Collections.singletonList(JsonFormat.printer().print(sink))); pipelineOptions.setProject(projectId); pipelineOptions.setUpdate(update); pipelineOptions.setRunner(DataflowRunner.class); diff --git a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java index fdf3aad9bc3..35ab45e6306 100644 --- a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java +++ b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java @@ -17,9 +17,7 @@ package feast.core.job.direct; import com.google.common.base.Strings; -import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.util.JsonFormat; -import com.google.protobuf.util.JsonFormat.Printer; import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.StoreProto; @@ -27,12 +25,15 @@ import feast.core.exception.JobExecutionException; import feast.core.job.JobManager; import feast.core.job.Runner; +import feast.core.job.option.FeatureSetJsonByteConverter; import feast.core.model.FeatureSet; import feast.core.model.Job; import feast.core.model.JobStatus; import feast.core.util.TypeConversion; import feast.ingestion.ImportJob; +import feast.ingestion.options.BZip2Compressor; import feast.ingestion.options.ImportOptions; +import feast.ingestion.options.OptionCompressor; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -92,17 +93,15 @@ public Job startJob(Job job) { } private ImportOptions getPipelineOptions( - List featureSets, StoreProto.Store sink) - throws InvalidProtocolBufferException { + List featureSets, StoreProto.Store sink) throws IOException { String[] args = TypeConversion.convertMapToArgs(defaultOptions); ImportOptions pipelineOptions = PipelineOptionsFactory.fromArgs(args).as(ImportOptions.class); - Printer printer = JsonFormat.printer(); - List featureSetsJson = new ArrayList<>(); - for (FeatureSetProto.FeatureSet featureSet : featureSets) { - featureSetsJson.add(printer.print(featureSet.getSpec())); - } - pipelineOptions.setFeatureSetJson(featureSetsJson); - pipelineOptions.setStoreJson(Collections.singletonList(printer.print(sink))); + + OptionCompressor> featureSetJsonCompressor = + new BZip2Compressor<>(new FeatureSetJsonByteConverter()); + + pipelineOptions.setFeatureSetJson(featureSetJsonCompressor.compress(featureSets)); + pipelineOptions.setStoreJson(Collections.singletonList(JsonFormat.printer().print(sink))); pipelineOptions.setRunner(DirectRunner.class); pipelineOptions.setProject(""); // set to default value to satisfy validation if (metrics.isEnabled()) { diff --git a/core/src/main/java/feast/core/job/option/FeatureSetJsonByteConverter.java b/core/src/main/java/feast/core/job/option/FeatureSetJsonByteConverter.java new file mode 100644 index 00000000000..dbd04d668fd --- /dev/null +++ b/core/src/main/java/feast/core/job/option/FeatureSetJsonByteConverter.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.job.option; + +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.util.JsonFormat; +import feast.core.FeatureSetProto; +import feast.ingestion.options.OptionByteConverter; +import java.util.ArrayList; +import java.util.List; + +public class FeatureSetJsonByteConverter + implements OptionByteConverter> { + + /** + * Convert list of feature sets to json strings joined by new line, represented as byte arrays + * + * @param featureSets List of feature set protobufs + * @return Byte array representation of the json strings + * @throws InvalidProtocolBufferException + */ + @Override + public byte[] toByte(List featureSets) + throws InvalidProtocolBufferException { + JsonFormat.Printer printer = + JsonFormat.printer().omittingInsignificantWhitespace().printingEnumsAsInts(); + List featureSetsJson = new ArrayList<>(); + for (FeatureSetProto.FeatureSet featureSet : featureSets) { + featureSetsJson.add(printer.print(featureSet.getSpec())); + } + return String.join("\n", featureSetsJson).getBytes(); + } +} diff --git a/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java b/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java index c263515ed08..9f26c6919e4 100644 --- a/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java +++ b/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java @@ -19,11 +19,7 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; +import static org.mockito.Mockito.*; import static org.mockito.MockitoAnnotations.initMocks; import com.google.api.services.dataflow.Dataflow; @@ -44,14 +40,15 @@ import feast.core.config.FeastProperties.MetricsProperties; import feast.core.exception.JobExecutionException; import feast.core.job.Runner; -import feast.core.model.FeatureSet; -import feast.core.model.Job; -import feast.core.model.JobStatus; -import feast.core.model.Source; -import feast.core.model.Store; +import feast.core.job.option.FeatureSetJsonByteConverter; +import feast.core.model.*; +import feast.ingestion.options.BZip2Compressor; import feast.ingestion.options.ImportOptions; +import feast.ingestion.options.OptionCompressor; import java.io.IOException; +import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.beam.runners.dataflow.DataflowPipelineJob; import org.apache.beam.runners.dataflow.DataflowRunner; @@ -131,8 +128,11 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { expectedPipelineOptions.setAppName("DataflowJobManager"); expectedPipelineOptions.setJobName(jobName); expectedPipelineOptions.setStoreJson(Lists.newArrayList(printer.print(store))); + + OptionCompressor> featureSetJsonCompressor = + new BZip2Compressor<>(new FeatureSetJsonByteConverter()); expectedPipelineOptions.setFeatureSetJson( - Lists.newArrayList(printer.print(featureSet.getSpec()))); + featureSetJsonCompressor.compress(Collections.singletonList(featureSet))); ArgumentCaptor captor = ArgumentCaptor.forClass(ImportOptions.class); @@ -170,7 +170,19 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { // Assume the files that are staged are correct expectedPipelineOptions.setFilesToStage(actualPipelineOptions.getFilesToStage()); - assertThat(actualPipelineOptions.toString(), equalTo(expectedPipelineOptions.toString())); + assertThat( + actualPipelineOptions.getFeatureSetJson(), + equalTo(expectedPipelineOptions.getFeatureSetJson())); + assertThat( + actualPipelineOptions.getDeadLetterTableSpec(), + equalTo(expectedPipelineOptions.getDeadLetterTableSpec())); + assertThat( + actualPipelineOptions.getStatsdHost(), equalTo(expectedPipelineOptions.getStatsdHost())); + assertThat( + actualPipelineOptions.getMetricsExporterType(), + equalTo(expectedPipelineOptions.getMetricsExporterType())); + assertThat( + actualPipelineOptions.getStoreJson(), equalTo(expectedPipelineOptions.getStoreJson())); assertThat(actual.getExtId(), equalTo(expectedExtJobId)); } diff --git a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java index 2dd87cfc6e3..64412f4391e 100644 --- a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java +++ b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java @@ -40,14 +40,19 @@ import feast.core.StoreProto.Store.Subscription; import feast.core.config.FeastProperties.MetricsProperties; import feast.core.job.Runner; +import feast.core.job.option.FeatureSetJsonByteConverter; import feast.core.model.FeatureSet; import feast.core.model.Job; import feast.core.model.JobStatus; import feast.core.model.Source; import feast.core.model.Store; +import feast.ingestion.options.BZip2Compressor; import feast.ingestion.options.ImportOptions; +import feast.ingestion.options.OptionCompressor; import java.io.IOException; +import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.beam.runners.direct.DirectRunner; import org.apache.beam.sdk.PipelineResult; @@ -121,8 +126,11 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { expectedPipelineOptions.setProject(""); expectedPipelineOptions.setStoreJson(Lists.newArrayList(printer.print(store))); expectedPipelineOptions.setProject(""); + + OptionCompressor> featureSetJsonCompressor = + new BZip2Compressor<>(new FeatureSetJsonByteConverter()); expectedPipelineOptions.setFeatureSetJson( - Lists.newArrayList(printer.print(featureSet.getSpec()))); + featureSetJsonCompressor.compress(Collections.singletonList(featureSet))); String expectedJobId = "feast-job-0"; ArgumentCaptor pipelineOptionsCaptor = @@ -150,7 +158,20 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { expectedPipelineOptions.setOptionsId( actualPipelineOptions.getOptionsId()); // avoid comparing this value - assertThat(actualPipelineOptions.toString(), equalTo(expectedPipelineOptions.toString())); + assertThat( + actualPipelineOptions.getFeatureSetJson(), + equalTo(expectedPipelineOptions.getFeatureSetJson())); + assertThat( + actualPipelineOptions.getDeadLetterTableSpec(), + equalTo(expectedPipelineOptions.getDeadLetterTableSpec())); + assertThat( + actualPipelineOptions.getStatsdHost(), equalTo(expectedPipelineOptions.getStatsdHost())); + assertThat( + actualPipelineOptions.getMetricsExporterType(), + equalTo(expectedPipelineOptions.getMetricsExporterType())); + assertThat( + actualPipelineOptions.getStoreJson(), equalTo(expectedPipelineOptions.getStoreJson())); + assertThat(jobStarted.getPipelineResult(), equalTo(mockPipelineResult)); assertThat(jobStarted.getJobId(), equalTo(expectedJobId)); assertThat(actual.getExtId(), equalTo(expectedJobId)); diff --git a/core/src/test/java/feast/core/job/option/FeatureSetJsonByteConverterTest.java b/core/src/test/java/feast/core/job/option/FeatureSetJsonByteConverterTest.java new file mode 100644 index 00000000000..2dfeef1d969 --- /dev/null +++ b/core/src/test/java/feast/core/job/option/FeatureSetJsonByteConverterTest.java @@ -0,0 +1,83 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.job.option; + +import static org.junit.Assert.*; + +import com.google.protobuf.InvalidProtocolBufferException; +import feast.core.FeatureSetProto; +import feast.core.SourceProto; +import feast.types.ValueProto; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.junit.Test; + +public class FeatureSetJsonByteConverterTest { + + private FeatureSetProto.FeatureSet newFeatureSet(Integer version, Integer numberOfFeatures) { + List features = + IntStream.range(1, numberOfFeatures + 1) + .mapToObj( + i -> + FeatureSetProto.FeatureSpec.newBuilder() + .setValueType(ValueProto.ValueType.Enum.FLOAT) + .setName("feature".concat(Integer.toString(i))) + .build()) + .collect(Collectors.toList()); + + return FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetProto.FeatureSetSpec.newBuilder() + .setSource( + SourceProto.Source.newBuilder() + .setType(SourceProto.SourceType.KAFKA) + .setKafkaSourceConfig( + SourceProto.KafkaSourceConfig.newBuilder() + .setBootstrapServers("somebrokers:9092") + .setTopic("sometopic"))) + .addAllFeatures(features) + .setVersion(version) + .addEntities( + FeatureSetProto.EntitySpec.newBuilder() + .setName("entity") + .setValueType(ValueProto.ValueType.Enum.STRING))) + .build(); + } + + @Test + public void shouldConvertFeatureSetsAsJsonStringBytes() throws InvalidProtocolBufferException { + int nrOfFeatureSet = 1; + int nrOfFeatures = 1; + List featureSets = + IntStream.range(1, nrOfFeatureSet + 1) + .mapToObj(i -> newFeatureSet(i, nrOfFeatures)) + .collect(Collectors.toList()); + + String expectedOutputString = + "{\"version\":1," + + "\"entities\":[{\"name\":\"entity\",\"valueType\":2}]," + + "\"features\":[{\"name\":\"feature1\",\"valueType\":6}]," + + "\"source\":{" + + "\"type\":1," + + "\"kafkaSourceConfig\":{" + + "\"bootstrapServers\":\"somebrokers:9092\"," + + "\"topic\":\"sometopic\"}}}"; + FeatureSetJsonByteConverter byteConverter = new FeatureSetJsonByteConverter(); + assertEquals(expectedOutputString, new String(byteConverter.toByte(featureSets))); + } +} diff --git a/datatypes/java/README.md b/datatypes/java/README.md index 535fac73d2e..d5124eabb46 100644 --- a/datatypes/java/README.md +++ b/datatypes/java/README.md @@ -16,7 +16,7 @@ Dependency Coordinates dev.feast datatypes-java - 0.4.0-SNAPSHOT + 0.4.5-SNAPSHOT ``` diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 535fbe6081c..d2cc03a20bd 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -6,9 +6,15 @@ * [Getting Help](getting-help.md) * [Contributing](contributing.md) -## Getting Started +## Installing Feast + +* [Overview](installing-feast/overview.md) +* [Docker Compose](installing-feast/docker-compose.md) +* [Google Kubernetes Engine \(GKE\)](installing-feast/gke.md) +* [Troubleshooting](installing-feast/troubleshooting.md) + +## Using Feast -* [Installing Feast](getting-started/installing-feast.md) * [Using Feast](https://github.com/gojek/feast/blob/master/examples/basic/basic.ipynb) ## Reference diff --git a/docs/concepts.md b/docs/concepts.md index 860515c3699..ae158f8f829 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -2,7 +2,7 @@ ## Architecture -![Logical diagram of a typical Feast deployment](.gitbook/assets/basic-architecture-diagram.svg) +![Logical diagram of a typical Feast deployment](.gitbook/assets/basic-architecture-diagram%20%282%29.svg) The core components of a Feast deployment are @@ -106,13 +106,13 @@ Feast supports the following types for feature values * DOUBLE * FLOAT * BOOL -* BYTES_LIST -* STRING_LIST -* INT32_LIST -* INT64_LIST -* DOUBLE_LIST -* FLOAT_LIST -* BOOL_LIST +* BYTES\_LIST +* STRING\_LIST +* INT32\_LIST +* INT64\_LIST +* DOUBLE\_LIST +* FLOAT\_LIST +* BOOL\_LIST ## Glossary diff --git a/docs/contributing.md b/docs/contributing.md index 38caffd654b..d4189191f09 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -300,7 +300,7 @@ docker run --rm \ ## Code reviews -Code submission to Feast \(including submission from project maintainers\) requires review and approval. Please submit a **pull request** to initiate the code review process. We use [prow](https://github.com/kubernetes/test-infra/tree/master/prow) to manage the testing and reviewing of pull requests. Please refer to [config.yaml](../.prow/config.yaml) for details on the test jobs. +Code submission to Feast \(including submission from project maintainers\) requires review and approval. Please submit a **pull request** to initiate the code review process. We use [prow](https://github.com/kubernetes/test-infra/tree/master/prow) to manage the testing and reviewing of pull requests. Please refer to [config.yaml](https://github.com/gojek/feast/tree/4cd928d1d3b7972b15f0c5dd29593fcedecea9f5/.prow/config.yaml) for details on the test jobs. ## Code conventions diff --git a/docs/getting-help.md b/docs/getting-help.md index d8180ab7842..597a782d606 100644 --- a/docs/getting-help.md +++ b/docs/getting-help.md @@ -1,23 +1,36 @@ # Getting Help -## Chat +### Chat -* Come and chat with us in the [\#Feast Slack channel in the Kubeflow workspace](https://join.slack.com/t/kubeflow/shared_invite/enQtNDg5MTM4NTQyNjczLTdkNTVhMjg1ZTExOWI0N2QyYTQ2MTIzNTJjMWRiOTFjOGRlZWEzODc1NzMwNTMwM2EzNjY1MTFhODczNjk4MTk) and catch up on all things Feast! +* Come and say hello in [\#Feast](https://join.slack.com/t/kubeflow/shared_invite/enQtNDg5MTM4NTQyNjczLTdkNTVhMjg1ZTExOWI0N2QyYTQ2MTIzNTJjMWRiOTFjOGRlZWEzODc1NzMwNTMwM2EzNjY1MTFhODczNjk4MTk) over in the Kubeflow Slack. -## GitHub +### GitHub * Feast's GitHub repo can be [found here](https://github.com/gojek/feast/). * Found a bug or need a feature? [Create an issue on GitHub](https://github.com/gojek/feast/issues/new) -## Mailing list +### Community Call -### Feast discussion +We have a community call every 2 weeks. Alternating between two times. + +* 11 am \(UTC + 8\) +* 5 pm \(UTC + 8\) + +Please join the [**feast-dev**](getting-help.md#feast-development) mailing list to receive the the calendar invitation. + +### Mailing list + +#### Feast discussion * Google Group: [https://groups.google.com/d/forum/feast-discuss](https://groups.google.com/d/forum/feast-discuss) * Mailing List: [feast-discuss@googlegroups.com](mailto:feast-discuss@googlegroups.com) -### Feast development +#### Feast development * Google Group: [https://groups.google.com/d/forum/feast-dev](https://groups.google.com/d/forum/feast-dev) * Mailing List: [feast-dev@googlegroups.com](mailto:feast-dev@googlegroups.com) +### Google Drive + +The Feast community also maintains a [Google Drive](https://drive.google.com/drive/u/0/folders/0AAe8j7ZK3sxSUk9PVA) with documents like RFCs, meeting notes, or roadmaps. Please join one of the above mailing lists \(feast-dev or feast-discuss\) to gain access to the drive. + diff --git a/docs/getting-started/installing-feast.md b/docs/getting-started/installing-feast.md deleted file mode 100644 index 699dd5fa8f1..00000000000 --- a/docs/getting-started/installing-feast.md +++ /dev/null @@ -1,433 +0,0 @@ -# Installing Feast - -## Overview - -This installation guide will demonstrate three ways of installing Feast: - -* \*\*\*\*[**Docker Compose \(Quickstart\):**](installing-feast.md#docker-compose) Fastest way to get Feast up and running. Provides a pre-installed Jupyter Notebook with the Feast Python SDK and sample code. -* [**Minikube**](installing-feast.md#minikube)**:** This installation has no external dependencies, but does not have a historical feature store installed. It allows users to quickly get a feel for Feast. -* [**Google Kubernetes Engine:**](installing-feast.md#google-kubernetes-engine) This guide installs a single cluster Feast installation on Google's GKE. It has Google Cloud specific dependencies like BigQuery, Dataflow, and Google Cloud Storage. - -## Docker Compose \(Quickstart\) - -### Overview - -A docker compose file is provided to quickly test Feast with the official docker images. There is no hard dependency on GCP, unless batch serving is required. Once you have set up Feast using Docker Compose, you will be able to: - -* Create, register, and manage feature sets -* Ingest feature data into Feast -* Retrieve features for online serving - -{% hint style="info" %} -The docker compose setup uses Direct Runner for the Apache Beam jobs. Running Beam with the Direct Runner means it does not need a dedicated runner like Flink or Dataflow, but this comes at the cost of performance. We recommend the use of a full runner when running Feast with very large workloads. -{% endhint %} - -### 0. Requirements - -* [Docker compose](https://docs.docker.com/compose/install/) should be installed. -* TCP ports 6565, 6566, 8888, and 9094 should not be in use. Otherwise, modify the port mappings in `infra/docker-compose/docker-compose.yml` to use unoccupied ports. -* \(for batch serving only\) For batch serving you will also need a [GCP service account key](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) that has access to GCS and BigQuery. Port 6567 will be used for the batch serving endpoint. -* \(for batch serving only\) [Google Cloud SDK ](https://cloud.google.com/sdk/install)installed, authenticated, and configured to the project you will use. - -### 1. Step-by-step guide \(Online serving only\) - -Clone the [Feast repository](https://github.com/gojek/feast/) and navigate to the `docker-compose` sub-directory: - -```bash -git clone https://github.com/gojek/feast.git && \ -cd feast && export FEAST_HOME_DIR=$(pwd) && \ -cd infra/docker-compose -``` - -Make a copy of the `.env.sample` file: - -```bash -cp .env.sample .env -``` - -Start Feast: - -```javascript -docker-compose up -d -``` - -A Jupyter notebook is now available to use Feast: - -[http://localhost:8888/notebooks/feast-notebooks/feast-quickstart.ipynb](http://localhost:8888/notebooks/feast-notebooks/feast-quickstart.ipynb) - -### 2. Step-by-step guide \(Batch and online serving\) - -Clone the [Feast repository](https://github.com/gojek/feast/) and navigate to the `docker-compose` sub-directory: - -```bash -git clone https://github.com/gojek/feast.git && \ -cd feast && export FEAST_HOME_DIR=$(pwd) && \ -cd infra/docker-compose -``` - -Create a [service account ](https://cloud.google.com/iam/docs/creating-managing-service-accounts)from the GCP console and copy it to the `gcp-service-accounts` folder: - -```javascript -cp my-service-account.json ${FEAST_HOME_DIR}/infra/docker-compose/gcp-service-accounts -``` - -Create a Google Cloud Storage bucket. Make sure that your service account above has read/write permissions to this bucket: - -```bash -gsutil mb gs://my-feast-staging-bucket -``` - -Make a copy of the `.env.sample` file: - -```bash -cp .env.sample .env -``` - -Customize the `.env` file based on your environment. At the very least you have to modify: - -* **FEAST\_CORE\_GCP\_SERVICE\_ACCOUNT\_KEY:** This should be your service account file name without the .json extension. -* **FEAST\_BATCH\_SERVING\_GCP\_SERVICE\_ACCOUNT\_KEY:** This should be your service account file name without the .json extension. -* **FEAST\_JUPYTER\_GCP\_SERVICE\_ACCOUNT\_KEY:** This should be your service account file name without the .json extension. -* **FEAST\_JOB\_STAGING\_LOCATION:** Google Cloud Storage bucket that Feast will use to stage data exports and batch retrieval requests. - -We will also need to customize the `bq-store.yml` file inside `infra/docker-compose/serving/` to configure the BigQuery storage configuration as well as the feature sets that the store subscribes to. At a minimum you will need to set: - -* **project\_id:** This is you GCP project id. -* **dataset\_id:** This is the name of the BigQuery dataset that tables will be created in. Each feature set will have one table in BigQuery. - -Start Feast: - -```javascript -docker-compose -f docker-compose.yml -f docker-compose.batch.yml up -d -``` - -A Jupyter notebook is now available to use Feast: - -[http://localhost:8888/notebooks/feast-notebooks](http://localhost:8888/tree/feast-notebooks) - -## Minikube - -### Overview - -This guide will install Feast into [Minikube](https://github.com/kubernetes/minikube). Once Feast is installed you will be able to: - -* Define and register features. -* Load feature data from both batch and streaming sources. -* Retrieve features for online serving. - -{% hint style="warning" %} -This Minikube installation guide is for demonstration purposes only. It is not meant for production use, and does not install a historical feature store. -{% endhint %} - -### 0. Requirements - -The following software should be installed prior to starting: - -1. [Minikube](https://kubernetes.io/docs/tasks/tools/install-minikube/) should be installed. -2. [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) installed and configured to work with Minikube. -3. [Helm](https://helm.sh/3) \(2.16.0 or greater\). Helm 3 has not been tested yet. - -### 1. Set up Minikube - -Start Minikube. Note the minimum cpu and memory below: - -```bash -minikube start --cpus=3 --memory=4096 --kubernetes-version='v1.15.5' -``` - -Set up your Feast environmental variables - -```bash -export FEAST_IP=$(minikube ip) -export FEAST_CORE_URL=${FEAST_IP}:32090 -export FEAST_SERVING_URL=${FEAST_IP}:32091 -``` - -### 2. Install Feast with Helm - -Clone the [Feast repository](https://github.com/gojek/feast/) and navigate to the `charts` sub-directory: - -```bash -git clone https://github.com/gojek/feast.git && \ -cd feast && export FEAST_HOME_DIR=$(pwd) && \ -cd infra/charts/feast -``` - -Copy the `values-demo.yaml` file for your installation: - -```bash -cp values-demo.yaml my-feast-values.yaml -``` - -Update all occurrences of the domain `feast.example.com` inside of `my-feast-values.yaml` with your Minikube IP. This is to allow external access to the services in the cluster. You can find your Minikube IP by running the following command `minikube ip`, or simply replace the text from the command line: - -```bash -sed -i "s/feast.example.com/${FEAST_IP}/g" my-feast-values.yaml -``` - -Install Tiller: - -```bash -helm init -``` - -Install the Feast Helm chart: - -```bash -helm install --name feast -f my-feast-values.yaml . -``` - -Ensure that the system comes online. This will take a few minutes - -```bash -watch kubectl get pods -``` - -```bash -NAME READY STATUS RESTARTS AGE -pod/feast-feast-core-666fd46db4-l58l6 1/1 Running 0 5m -pod/feast-feast-serving-online-84d99ddcbd 1/1 Running 0 6m -pod/feast-kafka-0 1/1 Running 0 3m -pod/feast-kafka-1 1/1 Running 0 4m -pod/feast-kafka-2 1/1 Running 0 4m -pod/feast-postgresql-0 1/1 Running 0 5m -pod/feast-redis-master-0 1/1 Running 0 5m -pod/feast-zookeeper-0 1/1 Running 0 5m -pod/feast-zookeeper-1 1/1 Running 0 5m -pod/feast-zookeeper-2 1/1 Running 0 5m -``` - -### 3. Connect to Feast with the Python SDK - -Install the Python SDK using pip: - -```bash -pip install -e ${FEAST_HOME_DIR}/sdk/python -``` - -Configure the Feast Python SDK: - -```bash -feast config set core_url ${FEAST_CORE_URL} -feast config set serving_url ${FEAST_SERVING_URL} -``` - -That's it! You can now start to use Feast! - -## Google Kubernetes Engine - -### Overview - -This guide will install Feast into a Kubernetes cluster on GCP. It assumes that all of your services will run within a single K8s cluster. Once Feast is installed you will be able to: - -* Define and register features. -* Load feature data from both batch and streaming sources. -* Retrieve features for model training. -* Retrieve features for online serving. - -{% hint style="info" %} -This guide requires [Google Cloud Platform](https://cloud.google.com/) for installation. - -* [BigQuery](https://cloud.google.com/bigquery/) is used for storing historical features. -* [Cloud Dataflow](https://cloud.google.com/dataflow/) is used for running data ingestion jobs. -* [Google Cloud Storage](https://cloud.google.com/storage/) is used for intermediate data storage. -{% endhint %} - -### 0. Requirements - -1. [Google Cloud SDK ](https://cloud.google.com/sdk/install)installed, authenticated, and configured to the project you will use. -2. [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) installed. -3. [Helm](https://helm.sh/3) \(2.16.0 or greater\) installed on your local machine with Tiller installed in your cluster. Helm 3 has not been tested yet. - -### 1. Set up GCP - -First define the environmental variables that we will use throughout this installation. Please customize these to reflect your environment. - -```bash -export FEAST_GCP_PROJECT_ID=my-gcp-project -export FEAST_GCP_REGION=us-central1 -export FEAST_GCP_ZONE=us-central1-a -export FEAST_BIGQUERY_DATASET_ID=feast -export FEAST_GCS_BUCKET=${FEAST_GCP_PROJECT_ID}_feast_bucket -export FEAST_GKE_CLUSTER_NAME=feast -export FEAST_S_ACCOUNT_NAME=feast-sa -``` - -Create a Google Cloud Storage bucket for Feast to stage data during exports: - -```bash -gsutil mb gs://${FEAST_GCS_BUCKET} -``` - -Create a BigQuery dataset for storing historical features: - -```bash -bq mk ${FEAST_BIGQUERY_DATASET_ID} -``` - -Create the service account that Feast will run as: - -```bash -gcloud iam service-accounts create ${FEAST_S_ACCOUNT_NAME} - -gcloud projects add-iam-policy-binding ${FEAST_GCP_PROJECT_ID} \ - --member serviceAccount:${FEAST_S_ACCOUNT_NAME}@${FEAST_GCP_PROJECT_ID}.iam.gserviceaccount.com \ - --role roles/editor - -gcloud iam service-accounts keys create key.json --iam-account \ -${FEAST_S_ACCOUNT_NAME}@${FEAST_GCP_PROJECT_ID}.iam.gserviceaccount.com -``` - -Ensure that [Dataflow API is enabled](https://console.cloud.google.com/apis/api/dataflow.googleapis.com/overview): - -```bash -gcloud services enable dataflow.googleapis.com -``` - -### 2. Set up a Kubernetes \(GKE\) cluster - -{% hint style="warning" %} -Provisioning a GKE cluster can expose your services publicly. This guide does not cover securing access to the cluster. -{% endhint %} - -Create a GKE cluster: - -```bash -gcloud container clusters create ${FEAST_GKE_CLUSTER_NAME} \ - --machine-type n1-standard-4 -``` - -Create a secret in the GKE cluster based on your local key `key.json`: - -```bash -kubectl create secret generic feast-gcp-service-account --from-file=key.json -``` - -For this guide we will use `NodePort` for exposing Feast services. In order to do so, we must find an internal IP of at least one GKE node. - -```bash -export FEAST_IP=$(kubectl describe nodes | grep InternalIP | awk '{print $2}' | head -n 1) -export FEAST_CORE_URL=${FEAST_IP}:32090 -export FEAST_ONLINE_SERVING_URL=${FEAST_IP}:32091 -export FEAST_BATCH_SERVING_URL=${FEAST_IP}:32092 -``` - -Confirm that you are able to access this node: - -```bash -ping $FEAST_IP -``` - -```bash -PING 10.123.114.11 (10.203.164.22) 56(84) bytes of data. -64 bytes from 10.123.114.11: icmp_seq=1 ttl=63 time=54.2 ms -64 bytes from 10.123.114.11: icmp_seq=2 ttl=63 time=51.2 ms -``` - -Add firewall rules in gcloud to open up ports: -```bash -gcloud compute firewall-rules create feast-core-port --allow tcp:32090 -gcloud compute firewall-rules create feast-online-port --allow tcp:32091 -gcloud compute firewall-rules create feast-batch-port --allow tcp:32092 -gcloud compute firewall-rules create feast-redis-port --allow tcp:32101 -gcloud compute firewall-rules create feast-kafka-ports --allow tcp:31090-31095 -``` - -### 3. Set up Helm - -Run the following command to provide Tiller with authorization to install Feast: - -```bash -kubectl apply -f - < + +This guide will install Feast into a Kubernetes cluster on GCP. It assumes that all of your services will run within a single Kubernetes cluster. Once Feast is installed you will be able to: + +* Define and register features. +* Load feature data from both batch and streaming sources. +* Retrieve features for model training. +* Retrieve features for online serving. + +{% hint style="info" %} +This guide requires [Google Cloud Platform](https://cloud.google.com/) for installation. + +* [BigQuery](https://cloud.google.com/bigquery/) is used for storing historical features. +* [Google Cloud Storage](https://cloud.google.com/storage/) is used for intermediate data storage. +{% endhint %} + +## 0. Requirements + +1. [Google Cloud SDK ](https://cloud.google.com/sdk/install)installed, authenticated, and configured to the project you will use. +2. [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) installed. +3. [Helm](https://helm.sh/3) \(2.16.0 or greater\) installed on your local machine with Tiller installed in your cluster. Helm 3 has not been tested yet. + +## 1. Set up GCP + +First define the environmental variables that we will use throughout this installation. Please customize these to reflect your environment. + +```bash +export FEAST_GCP_PROJECT_ID=my-gcp-project +export FEAST_GCP_REGION=us-central1 +export FEAST_GCP_ZONE=us-central1-a +export FEAST_BIGQUERY_DATASET_ID=feast +export FEAST_GCS_BUCKET=${FEAST_GCP_PROJECT_ID}_feast_bucket +export FEAST_GKE_CLUSTER_NAME=feast +export FEAST_SERVICE_ACCOUNT_NAME=feast-sa +``` + +Create a Google Cloud Storage bucket for Feast to stage batch data exports: + +```bash +gsutil mb gs://${FEAST_GCS_BUCKET} +``` + +Create the service account that Feast will run as: + +```bash +gcloud iam service-accounts create ${FEAST_SERVICE_ACCOUNT_NAME} + +gcloud projects add-iam-policy-binding ${FEAST_GCP_PROJECT_ID} \ + --member serviceAccount:${FEAST_SERVICE_ACCOUNT_NAME}@${FEAST_GCP_PROJECT_ID}.iam.gserviceaccount.com \ + --role roles/editor + +gcloud iam service-accounts keys create key.json --iam-account \ +${FEAST_SERVICE_ACCOUNT_NAME}@${FEAST_GCP_PROJECT_ID}.iam.gserviceaccount.com +``` + +## 2. Set up a Kubernetes \(GKE\) cluster + +{% hint style="warning" %} +Provisioning a GKE cluster can expose your services publicly. This guide does not cover securing access to the cluster. +{% endhint %} + +Create a GKE cluster: + +```bash +gcloud container clusters create ${FEAST_GKE_CLUSTER_NAME} \ + --machine-type n1-standard-4 +``` + +Create a secret in the GKE cluster based on your local key `key.json`: + +```bash +kubectl create secret generic feast-gcp-service-account --from-file=key.json +``` + +For this guide we will use `NodePort` for exposing Feast services. In order to do so, we must find an External IP of at least one GKE node. This should be a public IP. + +```bash +export FEAST_IP=$(kubectl describe nodes | grep ExternalIP | awk '{print $2}' | head -n 1) +export FEAST_CORE_URL=${FEAST_IP}:32090 +export FEAST_ONLINE_SERVING_URL=${FEAST_IP}:32091 +export FEAST_BATCH_SERVING_URL=${FEAST_IP}:32092 +``` + +Add firewall rules to open up ports on your Google Cloud Platform project: + +```bash +gcloud compute firewall-rules create feast-core-port --allow tcp:32090 +gcloud compute firewall-rules create feast-online-port --allow tcp:32091 +gcloud compute firewall-rules create feast-batch-port --allow tcp:32092 +gcloud compute firewall-rules create feast-redis-port --allow tcp:32101 +gcloud compute firewall-rules create feast-kafka-ports --allow tcp:31090-31095 +``` + +## 3. Set up Helm + +Run the following command to provide Tiller with authorization to install Feast: + +```bash +kubectl apply -f - <8888/tcp feast_jupyter_1 +8e49dbe81b92 gcr.io/kf-feast/feast-serving:latest "java -Xms1024m -Xmx…" 2 minutes ago Up 5 seconds 0.0.0.0:6567->6567/tcp feast_batch-serving_1 +b859494bd33a gcr.io/kf-feast/feast-serving:latest "java -jar /opt/feas…" 2 minutes ago Up About a minute 0.0.0.0:6566->6566/tcp feast_online-serving_1 +5c4962811767 gcr.io/kf-feast/feast-core:latest "java -jar /opt/feas…" 2 minutes ago Up 2 minutes 0.0.0.0:6565->6565/tcp feast_core_1 +1ba7239e0ae0 confluentinc/cp-kafka:5.2.1 "/etc/confluent/dock…" 2 minutes ago Up 2 minutes 0.0.0.0:9092->9092/tcp, 0.0.0.0:9094->9094/tcp feast_kafka_1 +e2779672735c confluentinc/cp-zookeeper:5.2.1 "/etc/confluent/dock…" 2 minutes ago Up 2 minutes 2181/tcp, 2888/tcp, 3888/tcp feast_zookeeper_1 +39ac26f5c709 postgres:12-alpine "docker-entrypoint.s…" 2 minutes ago Up 2 minutes 5432/tcp feast_db_1 +3c4ee8616096 redis:5-alpine "docker-entrypoint.s…" 2 minutes ago Up 2 minutes 0.0.0.0:6379->6379/tcp feast_redis_1 +``` + +### Google Kubernetes Engine + +All services should either be in a `running` state or `complete`state: + +```text +kubectl get pods +``` + +```text +NAME READY STATUS RESTARTS AGE +feast-feast-core-5ff566f946-4wlbh 1/1 Running 1 32m +feast-feast-serving-batch-848d74587b-96hq6 1/1 Running 2 32m +feast-feast-serving-online-df69755d5-fml8v 1/1 Running 2 32m +feast-kafka-0 1/1 Running 1 32m +feast-kafka-1 1/1 Running 0 30m +feast-kafka-2 1/1 Running 0 29m +feast-kafka-config-3e860262-zkzr8 0/1 Completed 0 32m +feast-postgresql-0 1/1 Running 0 32m +feast-prometheus-statsd-exporter-554db85b8d-r4hb8 1/1 Running 0 32m +feast-redis-master-0 1/1 Running 0 32m +feast-zookeeper-0 1/1 Running 0 32m +feast-zookeeper-1 1/1 Running 0 32m +feast-zookeeper-2 1/1 Running 0 31m +``` + +## How can I verify that I can connect to all services? + +First find the `IP:Port` combination of your services. + +### **Docker Compose \(from inside the docker cluster\)** + +You will probably need to connect using the hostnames of services and standard Feast ports: + +```bash +export FEAST_CORE_URL=core:6565 +export FEAST_ONLINE_SERVING_URL=online-serving:6566 +export FEAST_BATCH_SERVING_URL=batch-serving:6567 +``` + +### **Docker Compose \(from outside the docker cluster\)** + +You will probably need to connect using `localhost` and standard ports: + +```bash +export FEAST_CORE_URL=localhost:6565 +export FEAST_ONLINE_SERVING_URL=localhost:6566 +export FEAST_BATCH_SERVING_URL=localhost:6567 +``` + +### **Google Kubernetes Engine \(GKE\)** + +You will need to find the external IP of one of the nodes as well as the NodePorts. Please make sure that your firewall is open for these ports: + +```bash +export FEAST_IP=$(kubectl describe nodes | grep ExternalIP | awk '{print $2}' | head -n 1) +export FEAST_CORE_URL=${FEAST_IP}:32090 +export FEAST_ONLINE_SERVING_URL=${FEAST_IP}:32091 +export FEAST_BATCH_SERVING_URL=${FEAST_IP}:32092 +``` + +`netcat`, `telnet`, or even `curl` can be used to test whether all services are available and ports are open, but `grpc_cli` is the most powerful. It can be installed from [here](https://github.com/grpc/grpc/blob/master/doc/command_line_tool.md). + +### Testing Feast Core: + +```bash +grpc_cli ls ${FEAST_CORE_URL} feast.core.CoreService +``` + +```text +GetFeastCoreVersion +GetFeatureSet +ListFeatureSets +ListStores +ApplyFeatureSet +UpdateStore +CreateProject +ArchiveProject +ListProjects +``` + +### Testing Feast Batch Serving and Online Serving + +```bash +grpc_cli ls ${FEAST_BATCH_SERVING_URL} feast.serving.ServingService +``` + +```text +GetFeastServingInfo +GetOnlineFeatures +GetBatchFeatures +GetJob +``` + +```bash +grpc_cli ls ${FEAST_ONLINE_SERVING_URL} feast.serving.ServingService +``` + +```text +GetFeastServingInfo +GetOnlineFeatures +GetBatchFeatures +GetJob +``` + +## How can I print logs from the Feast Services? + +Feast will typically have three services that you need to monitor if something goes wrong. + +* Feast Core +* Feast Serving \(Online\) +* Feast Serving \(Batch\) + +In order to print the logs from these services, please run the commands below. + +### Docker Compose + +```text + docker logs -f feast_core_1 +``` + +```text +docker logs -f feast_batch-serving_1 +``` + +```text +docker logs -f feast_online-serving_1 +``` + +### Google Kubernetes Engine + +```text +kubectl logs $(kubectl get pods | grep feast-core | awk '{print $1}') +``` + +```text +kubectl logs $(kubectl get pods | grep feast-serving-batch | awk '{print $1}') +``` + +```text +kubectl logs $(kubectl get pods | grep feast-serving-online | awk '{print $1}') +``` + diff --git a/examples/basic/basic.ipynb b/examples/basic/basic.ipynb index 49658b42357..94fc82f2ce9 100644 --- a/examples/basic/basic.ipynb +++ b/examples/basic/basic.ipynb @@ -15,15 +15,15 @@ "1. Create a synthetic customer feature dataset\n", "2. Register a feature set to represent these features in Feast\n", "3. Ingest these features into Feast\n", - "4. Create a feature query and retrieve historical feature data\n", - "5. Create a feature query and retrieve online feature data" + "4. Create a feature query and retrieve online feature data\n", + "5. Create a feature query and retrieve historical feature data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 1. Clone Feast and install all dependencies" + "### 0. Configuration" ] }, { @@ -32,9 +32,79 @@ "metadata": {}, "outputs": [], "source": [ - "!git clone https://github.com/gojek/feast.git \\\n", - "&& cd feast/sdk/python/ && pip install --upgrade --quiet -e . \\\n", - "&& pip install --quiet --upgrade pandas numpy protobuf" + "import os\n", + "\n", + "# Feast Core acts as the central feature registry\n", + "FEAST_CORE_URL = os.getenv('FEAST_CORE_URL', 'core:6565')\n", + "\n", + "# Feast Online Serving allows for the retrieval of real-time feature data\n", + "FEAST_ONLINE_SERVING_URL = os.getenv('FEAST_ONLINE_SERVING_URL', 'online-serving:6566')\n", + "\n", + "# Feast Batch Serving allows for the retrieval of historical feature data\n", + "FEAST_BATCH_SERVING_URL = os.getenv('FEAST_BATCH_SERVING_URL', 'batch-serving:6567')\n", + "\n", + "# PYTHON_REPOSITORY_PATH is the path to the Python SDK inside the Feast Git Repo\n", + "PYTHON_REPOSITORY_PATH = os.getenv('PYTHON_REPOSITORY_PATH', '../../')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Install Feast SDK" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install from PyPi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --ignore-installed --upgrade feast" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "(Alternative) Install from local repository" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "os.environ['PYTHON_SDK_PATH'] = os.path.join(PYTHON_REPOSITORY_PATH, 'sdk/python')\n", + "sys.path.append(os.environ['PYTHON_SDK_PATH'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!echo $PYTHON_SDK_PATH" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!python -m pip install --ignore-installed --upgrade -e ${PYTHON_SDK_PATH}" ] }, { @@ -66,7 +136,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Configure Feast services and connect the Feast client" + "### 3. Configure Feast services and connect the Feast client\n", + "\n", + "Connect to Feast Core and Feast Online Serving" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client = Client(core_url=FEAST_CORE_URL, serving_url=FEAST_ONLINE_SERVING_URL)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a project workspace" ] }, { @@ -75,9 +163,14 @@ "metadata": {}, "outputs": [], "source": [ - "CORE_URL = 'localhost:6565'\n", - "ONLINE_SERVING_URL = 'localhost:6566'\n", - "BATCH_SERVING_URL = 'localhost:6567'" + "client.create_project('customer_project')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the active project" ] }, { @@ -86,8 +179,6 @@ "metadata": {}, "outputs": [], "source": [ - "client = Client(core_url=CORE_URL, serving_url=BATCH_SERVING_URL) # Connect to Feast Core\n", - "client.create_project('customer_project')\n", "client.set_project('customer_project')" ] }, @@ -95,7 +186,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4. Create synthetic customer features" + "### 4. Create customer features" ] }, { @@ -132,7 +223,7 @@ " }\n", ")\n", "\n", - "print(customer_features.head(10))" + "print(customer_features.head(500))" ] }, { @@ -147,9 +238,7 @@ "metadata": {}, "source": [ "Now we will create a feature set for these features. Feature sets are essentially a schema that represent\n", - "feature values. Feature sets allow Feast to both identify feature values and their structure. \n", - "\n", - "In this case we need to define any entity columns as well as the maximum age. The entity column in this case is \"customer_id\". Max age is set to 1 day (defined in seconds). This means that for each feature query during retrieval, the serving API will only retrieve features up to a maximum of 1 day per provided timestamp and entity combination. " + "feature values. Feature sets allow Feast to both identify feature values and their structure. The following feature set contains no features yet." ] }, { @@ -160,8 +249,8 @@ "source": [ "customer_fs = FeatureSet(\n", " \"customer_transactions\",\n", - " max_age=Duration(seconds=86400),\n", - " entities=[Entity(name='customer_id', dtype=ValueType.INT64)]\n", + " entities=[Entity(name='customer_id', dtype=ValueType.INT64)],\n", + " max_age=Duration(seconds=432000) \n", ")" ] }, @@ -169,7 +258,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Here we are automatically inferring the schema from the provided dataset" + "Here we are automatically inferring the schema from the provided dataset. The two features from the dataset will be added to the feature set" ] }, { @@ -241,16 +330,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 8. Create a batch retrieval query" + "### 8. Retrieve online features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In order to retrieve historical feature data, the user must provide an entity_rows dataframe. This dataframe contains a combination of timestamps and entities. In this case, the user must provide both customer_ids and timestamps. \n", - "\n", - "We will randomly generate timestamps over the last 30 days, and assign customer_ids to them. When these entity rows are sent to the Feast Serving API to retrieve feature values, along with a list of feature ids, Feast is then able to attach the correct feature values to each entity row. The one exception is if the feature values fall outside of the maximum age window." + "The process of retrieving features from the online API is very similar to that of the batch API. The only major difference is that users do not have to provide timestamps (only the latest features are returned, as long as they are within the maximum age window)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The example below retrieves online features for a single customer: \"1001\". It is possible to retrieve any features from feast, even outside of the current project." ] }, { @@ -259,23 +353,51 @@ "metadata": {}, "outputs": [], "source": [ - "event_timestamps = [datetime.utcnow().replace(tzinfo=utc) - timedelta(days=randrange(15), hours=randrange(24), minutes=randrange(60)) for day in range(30)]\n", - "\n", - "entity_rows = pd.DataFrame(\n", - " {\n", - " \"datetime\": event_timestamps,\n", - " \"customer_id\": [customers[idx % len(customers)] for idx in range(len(event_timestamps))],\n", - " }\n", + "online_features = client.get_online_features(\n", + " feature_refs=[\n", + " f\"daily_transactions\",\n", + " f\"total_transactions\",\n", + " ],\n", + " entity_rows=[\n", + " GetOnlineFeaturesRequest.EntityRow(\n", + " fields={\n", + " \"customer_id\": Value(\n", + " int64_val=1001)\n", + " }\n", + " )\n", + " ],\n", ")\n", - "\n", - "print(entity_rows.head(10))" + "print(online_features)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 9. Retrieve historical/batch features" + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The following section requires Google Cloud Platform (Google Cloud Storage and BigQuery)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 9. Create a batch retrieval query" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to retrieve historical feature data, the user must provide an entity_rows dataframe. This dataframe contains a combination of timestamps and entities. In this case, the user must provide both customer_ids and timestamps. \n", + "\n", + "We will randomly generate timestamps over the last 30 days, and assign customer_ids to them. When these entity rows are sent to the Feast Serving API to retrieve feature values, along with a list of feature ids, Feast is then able to attach the correct feature values to each entity row. " ] }, { @@ -284,29 +406,30 @@ "metadata": {}, "outputs": [], "source": [ - "job = client.get_batch_features(\n", - " feature_refs=[\n", - " f\"daily_transactions\", \n", - " f\"total_transactions\", \n", - " ],\n", - " entity_rows=entity_rows\n", - " )\n", - "df = job.to_dataframe()\n", - "print(df.head(10))" + "event_timestamps = [datetime.utcnow().replace(tzinfo=utc) - timedelta(days=randrange(15), hours=randrange(24), minutes=randrange(60)) for day in range(30)]\n", + "\n", + "entity_rows = pd.DataFrame(\n", + " {\n", + " \"datetime\": event_timestamps,\n", + " \"customer_id\": [customers[idx % len(customers)] for idx in range(len(event_timestamps))],\n", + " }\n", + ")\n", + "\n", + "print(entity_rows.head(10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 10. Retrieve online features" + "### 10. Retrieve historical/batch features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The process of retrieving features from the online API is very similar to that of the batch API. The only major difference is that users do not have to provide timestamps (only the latest features are returned, as long as they are within the maximum age window)" + "Next we will create a new client object, but this time we will configure it to connect to the Batch Serving Service. This service will allow us to retrieve historical feature data." ] }, { @@ -315,37 +438,39 @@ "metadata": {}, "outputs": [], "source": [ - "online_client = Client(core_url=CORE_URL, serving_url=ONLINE_SERVING_URL)\n", - "online_client.set_project(\"customer_project\")" + "batch_client = Client(core_url=FEAST_CORE_URL, serving_url=FEAST_BATCH_SERVING_URL)\n", + "batch_client.set_project(\"customer_project\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The example below retrieves online features for a single customer: \"1001\"" + "By calling the `get_batch_features` method we are able to retrieve a `job` object for the exporting of feature data. For every entity and timestamp combination in `entity_rows` we will be receiving a row with feature values joined to it." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "online_features = online_client.get_online_features(\n", - " feature_refs=[\n", - " f\"daily_transactions\",\n", - " f\"total_transactions\",\n", - " ],\n", - " entity_rows=[\n", - " GetOnlineFeaturesRequest.EntityRow(\n", - " fields={\n", - " \"customer_id\": Value(\n", - " int64_val=1001)\n", - " }\n", - " )\n", - " ],\n", - ")" + "job = batch_client.get_batch_features(\n", + " feature_refs=[\n", + " f\"customer_project/daily_transactions\", \n", + " f\"customer_project/total_transactions\", \n", + " ],\n", + " entity_rows=entity_rows\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once the job is complete, it is possible to retrieve the exported data (from Google Cloud Storage) and load it into memory as a Pandas Dataframe." ] }, { @@ -354,7 +479,8 @@ "metadata": {}, "outputs": [], "source": [ - "print(online_features)" + "df = job.to_dataframe()\n", + "print(df.head(10))" ] } ], @@ -374,7 +500,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.4" }, "pycharm": { "stem_cell": { diff --git a/infra/charts/feast/Chart.yaml b/infra/charts/feast/Chart.yaml index c8f328548a9..e4ca21aa62f 100644 --- a/infra/charts/feast/Chart.yaml +++ b/infra/charts/feast/Chart.yaml @@ -1,4 +1,4 @@ apiVersion: v1 description: A Helm chart to install Feast on kubernetes name: feast -version: 0.4.4 +version: 0.4.5 diff --git a/infra/charts/feast/README.md b/infra/charts/feast/README.md index 0463a9a3f89..ca526ad0b9f 100644 --- a/infra/charts/feast/README.md +++ b/infra/charts/feast/README.md @@ -36,10 +36,10 @@ helm repo add feast-charts https://feast-charts.storage.googleapis.com helm repo update ``` -Install Feast release with minimal features, without batch serving and persistency: +Install Feast release with minimal features, without batch serving and persistence: ```bash RELEASE_NAME=demo -helm install feast-charts/feast --name $RELEASE_NAME --version 0.3.2 -f values-demo.yaml +helm install feast-charts/feast --name $RELEASE_NAME -f values-demo.yaml ``` Install Feast release for typical use cases, with batch and online serving: @@ -60,7 +60,7 @@ PROJECT_ID=google-cloud-project-id DATASET_ID=bigquery-dataset-id # Install the Helm release using default values.yaml -helm install feast-charts/feast --name feast --version 0.3.2 \ +helm install feast-charts/feast --name feast \ --set feast-serving-batch."application\.yaml".feast.jobs.staging-location=$STAGING_LOCATION \ --set feast-serving-batch."store\.yaml".bigquery_config.project_id=$PROJECT_ID \ --set feast-serving-batch."store\.yaml".bigquery_config.dataset_id=$DATASET_ID @@ -81,17 +81,26 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-core.kafka.topics[0].name` | Default topic name in Kafka| `feast` | `feast-core.kafka.topics[0].replicationFactor` | No of replication factor for the topic| `1` | `feast-core.kafka.topics[0].partitions` | No of partitions for the topic | `1` +| `feast-core.prometheus-statsd-exporter.enabled` | Flag to install Prometheus StatsD Exporter | `false` +| `feast-core.prometheus-statsd-exporter.*` | Refer to this [link](charts/feast-core/charts/prometheus-statsd-exporter/values.yaml | | `feast-core.replicaCount` | No of pods to create | `1` | `feast-core.image.repository` | Repository for Feast Core Docker image | `gcr.io/kf-feast/feast-core` -| `feast-core.image.tag` | Tag for Feast Core Docker image | `0.3.2` +| `feast-core.image.tag` | Tag for Feast Core Docker image | `0.4.5` | `feast-core.image.pullPolicy` | Image pull policy for Feast Core Docker image | `IfNotPresent` +| `feast-core.prometheus.enabled` | Add annotations to enable Prometheus scraping | `false` | `feast-core.application.yaml` | Configuration for Feast Core application | Refer to this [link](charts/feast-core/values.yaml) | `feast-core.springConfigMountPath` | Directory to mount application.yaml | `/etc/feast/feast-core` | `feast-core.gcpServiceAccount.useExistingSecret` | Flag to use existing secret for GCP service account | `false` | `feast-core.gcpServiceAccount.existingSecret.name` | Secret name for the service account | `feast-gcp-service-account` | `feast-core.gcpServiceAccount.existingSecret.key` | Secret key for the service account | `key.json` | `feast-core.gcpServiceAccount.mountPath` | Directory to mount the JSON key file | `/etc/gcloud/service-accounts` +| `feast-core.gcpProjectId` | Project ID to set `GOOGLE_CLOUD_PROJECT` to change default project used by SDKs | `""` +| `feast-core.jarPath` | Path to Jar file in the Docker image | `/opt/feast/feast-core.jar` | `feast-core.jvmOptions` | Options for the JVM | `[]` +| `feast-core.logLevel` | Application logging level | `warn` +| `feast-core.logType` | Application logging type (`JSON` or `Console`) | `JSON` +| `feast-core.springConfigProfiles` | Map of profile name to file content for additional Spring profiles | `{}` +| `feast-core.springConfigProfilesActive` | CSV of profiles to enable from `springConfigProfiles` | `""` | `feast-core.livenessProbe.enabled` | Flag to enable liveness probe | `true` | `feast-core.livenessProbe.initialDelaySeconds` | Delay before liveness probe is initiated | `60` | `feast-core.livenessProbe.periodSeconds` | How often to perform the probe | `10` @@ -109,6 +118,7 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-core.grpc.port` | Kubernetes Service port for GRPC request| `6565` | `feast-core.grpc.targetPort` | Container port for GRPC request| `6565` | `feast-core.resources` | CPU and memory allocation for the pod | `{}` +| `feast-core.ingress` | See *Ingress Parameters* [below](#ingress-parameters) | `{}` | `feast-serving-online.enabled` | Flag to install Feast Online Serving | `true` | `feast-serving-online.redis.enabled` | Flag to install Redis in Feast Serving | `false` | `feast-serving-online.redis.usePassword` | Flag to use password to access Redis | `false` @@ -116,8 +126,9 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-online.core.enabled` | Flag for Feast Serving to use Feast Core in the same Helm release | `true` | `feast-serving-online.replicaCount` | No of pods to create | `1` | `feast-serving-online.image.repository` | Repository for Feast Serving Docker image | `gcr.io/kf-feast/feast-serving` -| `feast-serving-online.image.tag` | Tag for Feast Serving Docker image | `0.3.2` +| `feast-serving-online.image.tag` | Tag for Feast Serving Docker image | `0.4.5` | `feast-serving-online.image.pullPolicy` | Image pull policy for Feast Serving Docker image | `IfNotPresent` +| `feast-serving-online.prometheus.enabled` | Add annotations to enable Prometheus scraping | `true` | `feast-serving-online.application.yaml` | Application configuration for Feast Serving | Refer to this [link](charts/feast-serving/values.yaml) | `feast-serving-online.store.yaml` | Store configuration for Feast Serving | Refer to this [link](charts/feast-serving/values.yaml) | `feast-serving-online.springConfigMountPath` | Directory to mount application.yaml and store.yaml | `/etc/feast/feast-serving` @@ -125,7 +136,13 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-online.gcpServiceAccount.existingSecret.name` | Secret name for the service account | `feast-gcp-service-account` | `feast-serving-online.gcpServiceAccount.existingSecret.key` | Secret key for the service account | `key.json` | `feast-serving-online.gcpServiceAccount.mountPath` | Directory to mount the JSON key file | `/etc/gcloud/service-accounts` +| `feast-serving-online.gcpProjectId` | Project ID to set `GOOGLE_CLOUD_PROJECT` to change default project used by SDKs | `""` +| `feast-serving-online.jarPath` | Path to Jar file in the Docker image | `/opt/feast/feast-serving.jar` | `feast-serving-online.jvmOptions` | Options for the JVM | `[]` +| `feast-serving-online.logLevel` | Application logging level | `warn` +| `feast-serving-online.logType` | Application logging type (`JSON` or `Console`) | `JSON` +| `feast-serving-online.springConfigProfiles` | Map of profile name to file content for additional Spring profiles | `{}` +| `feast-serving-online.springConfigProfilesActive` | CSV of profiles to enable from `springConfigProfiles` | `""` | `feast-serving-online.livenessProbe.enabled` | Flag to enable liveness probe | `true` | `feast-serving-online.livenessProbe.initialDelaySeconds` | Delay before liveness probe is initiated | `60` | `feast-serving-online.livenessProbe.periodSeconds` | How often to perform the probe | `10` @@ -143,6 +160,7 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-online.grpc.port` | Kubernetes Service port for GRPC request| `6566` | `feast-serving-online.grpc.targetPort` | Container port for GRPC request| `6566` | `feast-serving-online.resources` | CPU and memory allocation for the pod | `{}` +| `feast-serving-online.ingress` | See *Ingress Parameters* [below](#ingress-parameters) | `{}` | `feast-serving-batch.enabled` | Flag to install Feast Batch Serving | `true` | `feast-serving-batch.redis.enabled` | Flag to install Redis in Feast Serving | `false` | `feast-serving-batch.redis.usePassword` | Flag to use password to access Redis | `false` @@ -150,8 +168,9 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-batch.core.enabled` | Flag for Feast Serving to use Feast Core in the same Helm release | `true` | `feast-serving-batch.replicaCount` | No of pods to create | `1` | `feast-serving-batch.image.repository` | Repository for Feast Serving Docker image | `gcr.io/kf-feast/feast-serving` -| `feast-serving-batch.image.tag` | Tag for Feast Serving Docker image | `0.3.2` +| `feast-serving-batch.image.tag` | Tag for Feast Serving Docker image | `0.4.5` | `feast-serving-batch.image.pullPolicy` | Image pull policy for Feast Serving Docker image | `IfNotPresent` +| `feast-serving-batch.prometheus.enabled` | Add annotations to enable Prometheus scraping | `true` | `feast-serving-batch.application.yaml` | Application configuration for Feast Serving | Refer to this [link](charts/feast-serving/values.yaml) | `feast-serving-batch.store.yaml` | Store configuration for Feast Serving | Refer to this [link](charts/feast-serving/values.yaml) | `feast-serving-batch.springConfigMountPath` | Directory to mount application.yaml and store.yaml | `/etc/feast/feast-serving` @@ -159,7 +178,13 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-batch.gcpServiceAccount.existingSecret.name` | Secret name for the service account | `feast-gcp-service-account` | `feast-serving-batch.gcpServiceAccount.existingSecret.key` | Secret key for the service account | `key.json` | `feast-serving-batch.gcpServiceAccount.mountPath` | Directory to mount the JSON key file | `/etc/gcloud/service-accounts` +| `feast-serving-batch.gcpProjectId` | Project ID to set `GOOGLE_CLOUD_PROJECT` to change default project used by SDKs | `""` +| `feast-serving-batch.jarPath` | Path to Jar file in the Docker image | `/opt/feast/feast-serving.jar` | `feast-serving-batch.jvmOptions` | Options for the JVM | `[]` +| `feast-serving-batch.logLevel` | Application logging level | `warn` +| `feast-serving-batch.logType` | Application logging type (`JSON` or `Console`) | `JSON` +| `feast-serving-batch.springConfigProfiles` | Map of profile name to file content for additional Spring profiles | `{}` +| `feast-serving-batch.springConfigProfilesActive` | CSV of profiles to enable from `springConfigProfiles` | `""` | `feast-serving-batch.livenessProbe.enabled` | Flag to enable liveness probe | `true` | `feast-serving-batch.livenessProbe.initialDelaySeconds` | Delay before liveness probe is initiated | `60` | `feast-serving-batch.livenessProbe.periodSeconds` | How often to perform the probe | `10` @@ -176,4 +201,51 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-batch.http.targetPort` | Container port for HTTP request | `8080` | `feast-serving-batch.grpc.port` | Kubernetes Service port for GRPC request| `6566` | `feast-serving-batch.grpc.targetPort` | Container port for GRPC request| `6566` -| `feast-serving-batch.resources` | CPU and memory allocation for the pod | `{}` \ No newline at end of file +| `feast-serving-batch.resources` | CPU and memory allocation for the pod | `{}` +| `feast-serving-batch.ingress` | See *Ingress Parameters* [below](#ingress-parameters) | `{}` + +## Ingress Parameters + +The following table lists the configurable parameters of the ingress section for each Feast module. + +Note, there are two ingresses available for each module - `grpc` and `http`. + +| Parameter | Description | Default +| ----------------------------- | ----------- | ------- +| `ingress.grcp.enabled` | Enables an ingress (endpoint) for the gRPC server | `false` +| `ingress.grcp.*` | See below | +| `ingress.http.enabled` | Enables an ingress (endpoint) for the HTTP server | `false` +| `ingress.http.*` | See below | +| `ingress.*.class` | Value for `kubernetes.io/ingress.class` | `nginx` +| `ingress.*.hosts` | List of host-names for the ingress | `[]` +| `ingress.*.annotations` | Additional ingress annotations | `{}` +| `ingress.*.https.enabled` | Add a tls section to the ingress | `true` +| `ingress.*.https.secretNames` | Map of hostname to TLS secret name | `{}` If not specified, defaults to `domain-tld-tls` e.g. `feast.example.com` uses secret `example-com-tls` +| `ingress.*.auth.enabled` | Enable auth on the ingress (only applicable for `nginx` type | `false` +| `ingress.*.auth.signinHost` | External hostname of the OAuth2 proxy to use | First item in `ingress.hosts`, replacing the sub-domain with 'auth' e.g. `feast.example.com` uses `auth.example.com` +| `ingress.*.auth.authUrl` | Internal URI to internal auth endpoint | `http://auth-server.auth-ns.svc.cluster.local/auth` +| `ingress.*.whitelist` | Subnet masks to whitelist (i.e. value for `nginx.ingress.kubernetes.io/whitelist-source-range`) | `"""` + +To enable all the ingresses will a config like the following (while also adding the hosts etc): + +```yaml +feast-core: + ingress: + grpc: + enabled: true + http: + enabled: true +feast-serving-online: + ingress: + grpc: + enabled: true + http: + enabled: true +feast-serving-batch: + ingress: + grpc: + enabled: true + http: + enabled: true +``` + diff --git a/infra/charts/feast/charts/feast-core/Chart.yaml b/infra/charts/feast/charts/feast-core/Chart.yaml index 86d0699b9ac..28b3297bba9 100644 --- a/infra/charts/feast/charts/feast-core/Chart.yaml +++ b/infra/charts/feast/charts/feast-core/Chart.yaml @@ -1,4 +1,4 @@ apiVersion: v1 description: A Helm chart for core component of Feast name: feast-core -version: 0.4.4 +version: 0.4.5 diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/.helmignore b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/.helmignore similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/.helmignore rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/.helmignore diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/Chart.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/Chart.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/Chart.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/Chart.yaml diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/README.md b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/README.md similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/README.md rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/README.md diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/NOTES.txt b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/NOTES.txt similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/NOTES.txt rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/NOTES.txt diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/_helpers.tpl b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/_helpers.tpl similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/_helpers.tpl rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/_helpers.tpl diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/config.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/config.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/config.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/config.yaml diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/deployment.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/deployment.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/deployment.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/deployment.yaml diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/pvc.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/pvc.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/pvc.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/pvc.yaml diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/service.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/service.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/service.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/service.yaml diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/serviceaccount.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/serviceaccount.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/serviceaccount.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/serviceaccount.yaml diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/values.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/values.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/values.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/values.yaml diff --git a/infra/charts/feast/charts/feast-core/requirements.yaml b/infra/charts/feast/charts/feast-core/requirements.yaml index efe9fec508a..ef1e39a7d0f 100644 --- a/infra/charts/feast/charts/feast-core/requirements.yaml +++ b/infra/charts/feast/charts/feast-core/requirements.yaml @@ -6,4 +6,10 @@ dependencies: - name: kafka version: 0.20.1 repository: "@incubator" - condition: kafka.enabled \ No newline at end of file + condition: kafka.enabled +- name: common + version: 0.0.5 + repository: "@incubator" +- name: prometheus-statsd-exporter + version: 0.1.2 + condition: prometheus-statsd-exporter.enabled \ No newline at end of file diff --git a/infra/charts/feast/charts/feast-core/templates/_ingress.yaml b/infra/charts/feast/charts/feast-core/templates/_ingress.yaml new file mode 100644 index 00000000000..5bed6df0470 --- /dev/null +++ b/infra/charts/feast/charts/feast-core/templates/_ingress.yaml @@ -0,0 +1,68 @@ +{{- /* +This takes an array of three values: +- the top context +- the feast component +- the service protocol +- the ingress context +*/ -}} +{{- define "feast.ingress" -}} +{{- $top := (index . 0) -}} +{{- $component := (index . 1) -}} +{{- $protocol := (index . 2) -}} +{{- $ingressValues := (index . 3) -}} +apiVersion: extensions/v1beta1 +kind: Ingress +{{ include "feast.ingress.metadata" . }} +spec: + rules: + {{- range $host := $ingressValues.hosts }} + - host: {{ $host }} + http: + paths: + - path: / + backend: + serviceName: {{ include (printf "feast-%s.fullname" $component) $top }} + servicePort: {{ index $top.Values "service" $protocol "port" }} + {{- end }} +{{- if $ingressValues.https.enabled }} + tls: + {{- range $host := $ingressValues.hosts }} + - secretName: {{ index $ingressValues.https.secretNames $host | default (splitList "." $host | rest | join "-" | printf "%s-tls") }} + hosts: + - {{ $host }} + {{- end }} +{{- end -}} +{{- end -}} + +{{- define "feast.ingress.metadata" -}} +{{- $commonMetadata := fromYaml (include "common.metadata" (first .)) }} +{{- $overrides := fromYaml (include "feast.ingress.metadata-overrides" .) -}} +{{- toYaml (merge $overrides $commonMetadata) -}} +{{- end -}} + +{{- define "feast.ingress.metadata-overrides" -}} +{{- $top := (index . 0) -}} +{{- $component := (index . 1) -}} +{{- $protocol := (index . 2) -}} +{{- $ingressValues := (index . 3) -}} +{{- $commonFullname := include "common.fullname" $top }} +metadata: + name: {{ $commonFullname }}-{{ $component }}-{{ $protocol }} + annotations: + kubernetes.io/ingress.class: {{ $ingressValues.class | quote }} + {{- if (and (eq $ingressValues.class "nginx") $ingressValues.auth.enabled) }} + nginx.ingress.kubernetes.io/auth-url: {{ $ingressValues.auth.authUrl | quote }} + nginx.ingress.kubernetes.io/auth-response-headers: "x-auth-request-email, x-auth-request-user" + nginx.ingress.kubernetes.io/auth-signin: "https://{{ $ingressValues.auth.signinHost | default (splitList "." (index $ingressValues.hosts 0) | rest | join "." | printf "auth.%s")}}/oauth2/start?rd=/r/$host/$request_uri" + {{- end }} + {{- if (and (eq $ingressValues.class "nginx") $ingressValues.whitelist) }} + nginx.ingress.kubernetes.io/whitelist-source-range: {{ $ingressValues.whitelist | quote -}} + {{- end }} + {{- if (and (eq $ingressValues.class "nginx") (eq $protocol "grpc") ) }} + # TODO: Allow choice of GRPC/GRPCS + nginx.ingress.kubernetes.io/backend-protocol: "GRPC" + {{- end }} + {{- if $ingressValues.annotations -}} + {{ include "common.annote" $ingressValues.annotations | indent 4 }} + {{- end }} +{{- end -}} diff --git a/infra/charts/feast/charts/feast-core/templates/configmap.yaml b/infra/charts/feast/charts/feast-core/templates/configmap.yaml index 68dc45c0571..da45cad5bdf 100644 --- a/infra/charts/feast/charts/feast-core/templates/configmap.yaml +++ b/infra/charts/feast/charts/feast-core/templates/configmap.yaml @@ -11,22 +11,43 @@ metadata: heritage: {{ .Release.Service }} data: application.yaml: | -{{- $config := index .Values "application.yaml"}} +{{- toYaml (index .Values "application.yaml") | nindent 4 }} {{- if .Values.postgresql.enabled }} -{{- $datasource := dict "url" (printf "jdbc:postgresql://%s:%s/%s" (printf "%s-postgresql" .Release.Name) (.Values.postgresql.service.port | toString) (.Values.postgresql.postgresqlDatabase)) "driverClassName" "org.postgresql.Driver" }} -{{- $newConfig := dict "spring" (dict "datasource" $datasource) }} -{{- $config := mergeOverwrite $config $newConfig }} + application-bundled-postgresql.yaml: | + spring: + datasource: + url: {{ printf "jdbc:postgresql://%s:%s/%s" (printf "%s-postgresql" .Release.Name) (.Values.postgresql.service.port | toString) (.Values.postgresql.postgresqlDatabase) }} + driverClassName: org.postgresql.Driver {{- end }} -{{- if .Values.kafka.enabled }} -{{- $topic := index .Values.kafka.topics 0 }} -{{- $options := dict "topic" $topic.name "replicationFactor" $topic.replicationFactor "partitions" $topic.partitions }} -{{- if not .Values.kafka.external.enabled }} -{{- $_ := set $options "bootstrapServers" (printf "%s:9092" (printf "%s-kafka" .Release.Name)) }} +{{ if .Values.kafka.enabled }} + {{- $topic := index .Values.kafka.topics 0 }} + application-bundled-kafka.yaml: | + feast: + stream: + type: kafka + options: + topic: {{ $topic.name | quote }} + replicationFactor: {{ $topic.replicationFactor }} + partitions: {{ $topic.partitions }} + {{- if not .Values.kafka.external.enabled }} + bootstrapServers: {{ printf "%s:9092" (printf "%s-kafka" .Release.Name) }} + {{- end }} {{- end }} -{{- $newConfig := dict "feast" (dict "stream" (dict "type" "kafka" "options" $options))}} -{{- $config := mergeOverwrite $config $newConfig }} + +{{- if (index .Values "prometheus-statsd-exporter" "enabled" )}} + application-bundled-statsd.yaml: | + feast: + jobs: + metrics: + enabled: true + type: statsd + host: prometheus-statsd-exporter + port: 9125 {{- end }} -{{- toYaml $config | nindent 4 }} +{{- range $name, $content := .Values.springConfigProfiles }} + application-{{ $name }}.yaml: | +{{- toYaml $content | nindent 4 }} +{{- end }} diff --git a/infra/charts/feast/charts/feast-core/templates/deployment.yaml b/infra/charts/feast/charts/feast-core/templates/deployment.yaml index 02a533c2637..df834b6749e 100644 --- a/infra/charts/feast/charts/feast-core/templates/deployment.yaml +++ b/infra/charts/feast/charts/feast-core/templates/deployment.yaml @@ -18,6 +18,13 @@ spec: release: {{ .Release.Name }} template: metadata: + {{- if .Values.prometheus.enabled }} + annotations: + {{ $config := index .Values "application.yaml" }} + prometheus.io/path: /metrics + prometheus.io/port: "{{ $config.server.port }}" + prometheus.io/scrape: "true" + {{- end }} labels: app: {{ template "feast-core.name" . }} component: core @@ -40,9 +47,9 @@ spec: containers: - name: {{ .Chart.Name }} - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + image: '{{ .Values.image.repository }}:{{ required "No .image.tag found. This must be provided as input." .Values.image.tag }}' imagePullPolicy: {{ .Values.image.pullPolicy }} - + volumeMounts: - name: {{ template "feast-core.fullname" . }}-config mountPath: "{{ .Values.springConfigMountPath }}" @@ -53,31 +60,48 @@ spec: {{- end }} env: + - name: LOG_TYPE + value: {{ .Values.logType | quote }} + - name: LOG_LEVEL + value: {{ .Values.logLevel | quote }} + {{- if .Values.postgresql.enabled }} - name: SPRING_DATASOURCE_USERNAME - value: {{ .Values.postgresql.postgresqlUsername }} + value: {{ .Values.postgresql.postgresqlUsername | quote }} - name: SPRING_DATASOURCE_PASSWORD - value: {{ .Values.postgresql.postgresqlPassword }} + value: {{ .Values.postgresql.postgresqlPassword | quote }} {{- end }} {{- if .Values.gcpServiceAccount.useExistingSecret }} - name: GOOGLE_APPLICATION_CREDENTIALS value: {{ .Values.gcpServiceAccount.mountPath }}/{{ .Values.gcpServiceAccount.existingSecret.key }} {{- end }} + {{- if .Values.gcpProjectId }} + - name: GOOGLE_CLOUD_PROJECT + value: {{ .Values.gcpProjectId | quote }} + {{- end }} command: - java {{- range .Values.jvmOptions }} - - {{ . }} + - {{ . | quote }} + {{- end }} + - -jar + - {{ .Values.jarPath | quote }} + - "--spring.config.location=file:{{ .Values.springConfigMountPath }}/" + {{- $profilesArray := splitList "," .Values.springConfigProfilesActive -}} + {{- $profilesArray = append $profilesArray (.Values.postgresql.enabled | ternary "bundled-postgresql" "") -}} + {{- $profilesArray = append $profilesArray (.Values.kafka.enabled | ternary "bundled-kafka" "") -}} + {{- $profilesArray = append $profilesArray (index .Values "prometheus-statsd-exporter" "enabled" | ternary "bundled-statsd" "") -}} + {{- $profilesArray = compact $profilesArray -}} + {{- if $profilesArray }} + - "--spring.profiles.active={{ join "," $profilesArray }}" {{- end }} - - -jar - - /opt/feast/feast-core.jar - - "--spring.config.location=file:{{ .Values.springConfigMountPath }}/application.yaml" ports: - name: http containerPort: {{ .Values.service.http.targetPort }} - - name: grpc + - name: grpc containerPort: {{ .Values.service.grpc.targetPort }} {{- if .Values.livenessProbe.enabled }} @@ -103,6 +127,6 @@ spec: timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} failureThreshold: {{ .Values.readinessProbe.failureThreshold }} {{- end }} - + resources: {{- toYaml .Values.resources | nindent 10 }} diff --git a/infra/charts/feast/charts/feast-core/templates/ingress.yaml b/infra/charts/feast/charts/feast-core/templates/ingress.yaml index 86fc2d3f175..7f453e1a75f 100644 --- a/infra/charts/feast/charts/feast-core/templates/ingress.yaml +++ b/infra/charts/feast/charts/feast-core/templates/ingress.yaml @@ -1,28 +1,7 @@ -{{- if .Values.ingress.enabled -}} -{{- $fullName := include "feast-core.fullname" . -}} -apiVersion: extensions/v1beta1 -kind: Ingress -metadata: - name: {{ $fullName }} - labels: - app: {{ template "feast-core.name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version }} - component: core - heritage: {{ .Release.Service }} - release: {{ .Release.Name }} - annotations: -{{- with .Values.ingress.annotations }} -{{ toYaml . | indent 4 }} +{{- if .Values.ingress.http.enabled -}} +{{ template "feast.ingress" (list . "core" "http" .Values.ingress.http) }} +{{- end }} +--- +{{ if .Values.ingress.grpc.enabled -}} +{{ template "feast.ingress" (list . "core" "grpc" .Values.ingress.grpc) }} {{- end }} -spec: - rules: - {{- range .Values.ingress.hosts }} - - host: {{ .host | quote }} - http: - paths: - - path: / - backend: - serviceName: {{ $fullName }} - servicePort: {{ .port | quote }} - {{- end }} -{{- end }} \ No newline at end of file diff --git a/infra/charts/feast/charts/feast-core/values.yaml b/infra/charts/feast/charts/feast-core/values.yaml index 321d71c844d..077906dc35d 100644 --- a/infra/charts/feast/charts/feast-core/values.yaml +++ b/infra/charts/feast/charts/feast-core/values.yaml @@ -1,12 +1,15 @@ -# postgresql configures Postgresql that is installed as part of Feast Core. +# ============================================================ +# Bundled PostgreSQL +# ============================================================ + # Refer to https://github.com/helm/charts/tree/c42002a21abf8eff839ff1d2382152bde2bbe596/stable/postgresql # for additional configuration. postgresql: # enabled specifies whether Postgresql should be installed as part of Feast Core. # - # Feast Core requires a database to store data such as the created FeatureSets + # Feast Core requires a database to store data such as the created FeatureSets # and job statuses. If enabled, the database and service port specified below - # will override "spring.datasource.url" value in application.yaml. The + # will override "spring.datasource.url" value in application.yaml. The # username and password will also be set as environment variables that will # override "spring.datasource.username/password" in application.yaml. enabled: true @@ -20,12 +23,15 @@ postgresql: # port is the TCP port that Postgresql will listen to port: 5432 -# kafka configures Kafka that is installed as part of Feast Core. +# ============================================================ +# Bundled Kafka +# ============================================================ + # Refer to https://github.com/helm/charts/tree/c42002a21abf8eff839ff1d2382152bde2bbe596/incubator/kafka # for additional configuration. kafka: # enabled specifies whether Kafka should be installed as part of Feast Core. - # + # # Feast Core requires a Kafka instance to be set as the default source for # FeatureRows. If enabled, "feast.stream" option in application.yaml will # be overridden by this installed Kafka configuration. @@ -36,22 +42,38 @@ kafka: replicationFactor: 1 partitions: 1 + +# ============================================================ +# Bundled Prometheus StatsD Exporter +# ============================================================ + +prometheus-statsd-exporter: + enabled: false + +# ============================================================ +# Feast Core +# ============================================================ + # replicaCount is the number of pods that will be created. replicaCount: 1 # image configures the Docker image for Feast Core image: repository: gcr.io/kf-feast/feast-core - tag: 0.3.2 pullPolicy: IfNotPresent +# Add prometheus scraping annotations to the Pod metadata. +# If enabled, you must also ensure server.port is specified under application.yaml +prometheus: + enabled: false + # application.yaml is the main configuration for Feast Core application. -# +# # Feast Core is a Spring Boot app which uses this yaml configuration file. # Refer to https://github.com/gojek/feast/blob/79eb4ab5fa3d37102c1dca9968162a98690526ba/core/src/main/resources/application.yml # for a complete list and description of the configuration. # -# Note that some properties defined in application.yaml may be overriden by +# Note that some properties defined in application.yaml may be overriden by # Helm under certain conditions. For example, if postgresql and kafka dependencies # are enabled. application.yaml: @@ -97,7 +119,14 @@ application.yaml: host: localhost port: 8125 -# springConfigMountPath is the directory path where application.yaml will be +springConfigProfiles: {} +# db: | +# spring: +# datasource: +# driverClassName: org.postgresql.Driver +# url: jdbc:postgresql://${DB_HOST:127.0.0.1}:${DB_PORT:5432}/${DB_DATABASE:postgres} +springConfigProfilesActive: "" +# springConfigMountPath is the directory path where application.yaml will be # mounted in the container. springConfigMountPath: /etc/feast/feast-core @@ -108,7 +137,7 @@ gcpServiceAccount: useExistingSecret: false existingSecret: # name is the secret name of the existing secret for the service account. - name: feast-gcp-service-account + name: feast-gcp-service-account # key is the secret key of the existing secret for the service account. # key is normally derived from the file name of the JSON key file. key: key.json @@ -116,19 +145,29 @@ gcpServiceAccount: # the value of "existingSecret.key" is file name of the service account file. mountPath: /etc/gcloud/service-accounts -# jvmOptions are options that will be passed to the Java Virtual Machine (JVM) +# Project ID picked up by the Cloud SDK (e.g. BigQuery run against this project) +gcpProjectId: "" + +# Path to Jar file in the Docker image. +# If you are using gcr.io/kf-feast/feast-core this should not need to be changed +jarPath: /opt/feast/feast-core.jar + +# jvmOptions are options that will be passed to the Java Virtual Machine (JVM) # running Feast Core. -# +# # For example, it is good practice to set min and max heap size in JVM. # https://stackoverflow.com/questions/6902135/side-effect-for-increasing-maxpermsize-and-max-heap-size # # Refer to https://docs.oracle.com/cd/E22289_01/html/821-1274/configuring-the-default-jvm-and-java-arguments.html # to see other JVM options that can be set. # -# jvmOptions: -# - -Xms1024m +jvmOptions: [] +# - -Xms1024m # - -Xmx1024m +logType: JSON +logLevel: warn + livenessProbe: enabled: true initialDelaySeconds: 60 @@ -163,12 +202,29 @@ service: # nodePort: ingress: - enabled: false - annotations: {} - # kubernetes.io/ingress.class: nginx - hosts: - # - host: chart-example.local - # port: http + grpc: + enabled: false + class: nginx + hosts: [] + annotations: {} + https: + enabled: true + secretNames: {} + whitelist: "" + auth: + enabled: false + http: + enabled: false + class: nginx + hosts: [] + annotations: {} + https: + enabled: true + secretNames: {} + whitelist: "" + auth: + enabled: false + authUrl: http://auth-server.auth-ns.svc.cluster.local/auth resources: {} # We usually recommend not to specify default resources and to leave this as a conscious diff --git a/infra/charts/feast/charts/feast-serving/Chart.yaml b/infra/charts/feast/charts/feast-serving/Chart.yaml index 2e9cf89243d..c610474c3e5 100644 --- a/infra/charts/feast/charts/feast-serving/Chart.yaml +++ b/infra/charts/feast/charts/feast-serving/Chart.yaml @@ -1,4 +1,4 @@ apiVersion: v1 description: A Helm chart for serving component of Feast name: feast-serving -version: 0.4.4 +version: 0.4.5 diff --git a/infra/charts/feast/charts/feast-serving/requirements.yaml b/infra/charts/feast/charts/feast-serving/requirements.yaml index fa4c1df4c10..2cee3f81494 100644 --- a/infra/charts/feast/charts/feast-serving/requirements.yaml +++ b/infra/charts/feast/charts/feast-serving/requirements.yaml @@ -3,3 +3,6 @@ dependencies: version: 9.5.0 repository: "@stable" condition: redis.enabled +- name: common + version: 0.0.5 + repository: "@incubator" diff --git a/infra/charts/feast/charts/feast-serving/templates/_helpers.tpl b/infra/charts/feast/charts/feast-serving/templates/_helpers.tpl index 49abb6b8e50..ab670cc8cc7 100644 --- a/infra/charts/feast/charts/feast-serving/templates/_helpers.tpl +++ b/infra/charts/feast/charts/feast-serving/templates/_helpers.tpl @@ -43,3 +43,10 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} app.kubernetes.io/managed-by: {{ .Release.Service }} {{- end -}} + +{{/* +Helpers +*/}} +{{- define "bq_store_and_no_job_options" -}} +{{ and (eq (index .Values "store.yaml" "type") "BIGQUERY") (empty (index .Values "application.yaml" "feast" "jobs" "store-options")) }} +{{- end -}} diff --git a/infra/charts/feast/charts/feast-serving/templates/_ingress.yaml b/infra/charts/feast/charts/feast-serving/templates/_ingress.yaml new file mode 100644 index 00000000000..5bed6df0470 --- /dev/null +++ b/infra/charts/feast/charts/feast-serving/templates/_ingress.yaml @@ -0,0 +1,68 @@ +{{- /* +This takes an array of three values: +- the top context +- the feast component +- the service protocol +- the ingress context +*/ -}} +{{- define "feast.ingress" -}} +{{- $top := (index . 0) -}} +{{- $component := (index . 1) -}} +{{- $protocol := (index . 2) -}} +{{- $ingressValues := (index . 3) -}} +apiVersion: extensions/v1beta1 +kind: Ingress +{{ include "feast.ingress.metadata" . }} +spec: + rules: + {{- range $host := $ingressValues.hosts }} + - host: {{ $host }} + http: + paths: + - path: / + backend: + serviceName: {{ include (printf "feast-%s.fullname" $component) $top }} + servicePort: {{ index $top.Values "service" $protocol "port" }} + {{- end }} +{{- if $ingressValues.https.enabled }} + tls: + {{- range $host := $ingressValues.hosts }} + - secretName: {{ index $ingressValues.https.secretNames $host | default (splitList "." $host | rest | join "-" | printf "%s-tls") }} + hosts: + - {{ $host }} + {{- end }} +{{- end -}} +{{- end -}} + +{{- define "feast.ingress.metadata" -}} +{{- $commonMetadata := fromYaml (include "common.metadata" (first .)) }} +{{- $overrides := fromYaml (include "feast.ingress.metadata-overrides" .) -}} +{{- toYaml (merge $overrides $commonMetadata) -}} +{{- end -}} + +{{- define "feast.ingress.metadata-overrides" -}} +{{- $top := (index . 0) -}} +{{- $component := (index . 1) -}} +{{- $protocol := (index . 2) -}} +{{- $ingressValues := (index . 3) -}} +{{- $commonFullname := include "common.fullname" $top }} +metadata: + name: {{ $commonFullname }}-{{ $component }}-{{ $protocol }} + annotations: + kubernetes.io/ingress.class: {{ $ingressValues.class | quote }} + {{- if (and (eq $ingressValues.class "nginx") $ingressValues.auth.enabled) }} + nginx.ingress.kubernetes.io/auth-url: {{ $ingressValues.auth.authUrl | quote }} + nginx.ingress.kubernetes.io/auth-response-headers: "x-auth-request-email, x-auth-request-user" + nginx.ingress.kubernetes.io/auth-signin: "https://{{ $ingressValues.auth.signinHost | default (splitList "." (index $ingressValues.hosts 0) | rest | join "." | printf "auth.%s")}}/oauth2/start?rd=/r/$host/$request_uri" + {{- end }} + {{- if (and (eq $ingressValues.class "nginx") $ingressValues.whitelist) }} + nginx.ingress.kubernetes.io/whitelist-source-range: {{ $ingressValues.whitelist | quote -}} + {{- end }} + {{- if (and (eq $ingressValues.class "nginx") (eq $protocol "grpc") ) }} + # TODO: Allow choice of GRPC/GRPCS + nginx.ingress.kubernetes.io/backend-protocol: "GRPC" + {{- end }} + {{- if $ingressValues.annotations -}} + {{ include "common.annote" $ingressValues.annotations | indent 4 }} + {{- end }} +{{- end -}} diff --git a/infra/charts/feast/charts/feast-serving/templates/configmap.yaml b/infra/charts/feast/charts/feast-serving/templates/configmap.yaml index 0ec80252c16..934216a9d5f 100644 --- a/infra/charts/feast/charts/feast-serving/templates/configmap.yaml +++ b/infra/charts/feast/charts/feast-serving/templates/configmap.yaml @@ -11,37 +11,43 @@ metadata: heritage: {{ .Release.Service }} data: application.yaml: | -{{- $config := index .Values "application.yaml" }} +{{- toYaml (index .Values "application.yaml") | nindent 4 }} {{- if .Values.core.enabled }} -{{- $newConfig := dict "feast" (dict "core-host" (printf "%s-feast-core" .Release.Name)) }} -{{- $config := mergeOverwrite $config $newConfig }} + application-bundled-core.yaml: | + feast: + core-host: {{ printf "%s-feast-core" .Release.Name }} {{- end }} -{{- $store := index .Values "store.yaml" }} -{{- if and (eq $store.type "BIGQUERY") (not (hasKey $config.feast.jobs "store-options")) }} -{{- $jobStore := dict "host" (printf "%s-redis-headless" .Release.Name) "port" 6379 }} -{{- $newConfig := dict "feast" (dict "jobs" (dict "store-options" $jobStore)) }} -{{- $config := mergeOverwrite $config $newConfig }} +{{- if eq (include "bq_store_and_no_job_options" .) "true" }} + application-bundled-redis.yaml: | + feast: + jobs: + store-options: + host: {{ printf "%s-redis-headless" .Release.Name }} + port: 6379 {{- end }} -{{- toYaml $config | nindent 4 }} - store.yaml: | -{{- $config := index .Values "store.yaml"}} +{{- $store := index .Values "store.yaml"}} -{{- if and .Values.redis.enabled (eq $config.type "REDIS") }} +{{- if and .Values.redis.enabled (eq $store.type "REDIS") }} {{- if eq .Values.redis.master.service.type "ClusterIP" }} {{- $newConfig := dict "redis_config" (dict "host" (printf "%s-redis-headless" .Release.Name) "port" .Values.redis.redisPort) }} -{{- $config := mergeOverwrite $config $newConfig }} +{{- $config := mergeOverwrite $store $newConfig }} {{- end }} {{- if and (eq .Values.redis.master.service.type "LoadBalancer") (not (empty .Values.redis.master.service.loadBalancerIP)) }} {{- $newConfig := dict "redis_config" (dict "host" .Values.redis.master.service.loadBalancerIP "port" .Values.redis.redisPort) }} -{{- $config := mergeOverwrite $config $newConfig }} +{{- $config := mergeOverwrite $store $newConfig }} {{- end }} {{- end }} -{{- toYaml $config | nindent 4 }} +{{- toYaml $store | nindent 4 }} + +{{- range $name, $content := .Values.springConfigProfiles }} + application-{{ $name }}.yaml: | +{{- toYaml $content | nindent 4 }} +{{- end }} diff --git a/infra/charts/feast/charts/feast-serving/templates/deployment.yaml b/infra/charts/feast/charts/feast-serving/templates/deployment.yaml index 5be636df96b..64dd3955d0c 100644 --- a/infra/charts/feast/charts/feast-serving/templates/deployment.yaml +++ b/infra/charts/feast/charts/feast-serving/templates/deployment.yaml @@ -47,9 +47,9 @@ spec: containers: - name: {{ .Chart.Name }} - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + image: '{{ .Values.image.repository }}:{{ required "No .image.tag found. This must be provided as input." .Values.image.tag }}' imagePullPolicy: {{ .Values.image.pullPolicy }} - + volumeMounts: - name: {{ template "feast-serving.fullname" . }}-config mountPath: "{{ .Values.springConfigMountPath }}" @@ -60,24 +60,40 @@ spec: {{- end }} env: + - name: LOG_TYPE + value: {{ .Values.logType | quote }} + - name: LOG_LEVEL + value: {{ .Values.logLevel | quote }} + {{- if .Values.gcpServiceAccount.useExistingSecret }} - name: GOOGLE_APPLICATION_CREDENTIALS value: {{ .Values.gcpServiceAccount.mountPath }}/{{ .Values.gcpServiceAccount.existingSecret.key }} {{- end }} + {{- if .Values.gcpProjectId }} + - name: GOOGLE_CLOUD_PROJECT + value: {{ .Values.gcpProjectId | quote }} + {{- end }} command: - java {{- range .Values.jvmOptions }} - - {{ . }} + - {{ . | quote }} + {{- end }} + - -jar + - {{ .Values.jarPath | quote }} + - "--spring.config.location=file:{{ .Values.springConfigMountPath }}/" + {{- $profilesArray := splitList "," .Values.springConfigProfilesActive -}} + {{- $profilesArray = append $profilesArray (.Values.core.enabled | ternary "bundled-core" "") -}} + {{- $profilesArray = append $profilesArray (eq (include "bq_store_and_no_job_options" .) "true" | ternary "bundled-redis" "") -}} + {{- $profilesArray = compact $profilesArray -}} + {{- if $profilesArray }} + - "--spring.profiles.active={{ join "," $profilesArray }}" {{- end }} - - -jar - - /opt/feast/feast-serving.jar - - "--spring.config.location=file:{{ .Values.springConfigMountPath }}/application.yaml" ports: - name: http containerPort: {{ .Values.service.http.targetPort }} - - name: grpc + - name: grpc containerPort: {{ .Values.service.grpc.targetPort }} {{- if .Values.livenessProbe.enabled }} @@ -101,6 +117,6 @@ spec: timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} failureThreshold: {{ .Values.readinessProbe.failureThreshold }} {{- end }} - + resources: {{- toYaml .Values.resources | nindent 10 }} diff --git a/infra/charts/feast/charts/feast-serving/templates/ingress.yaml b/infra/charts/feast/charts/feast-serving/templates/ingress.yaml index c6b4cb07a81..1bcd176147a 100644 --- a/infra/charts/feast/charts/feast-serving/templates/ingress.yaml +++ b/infra/charts/feast/charts/feast-serving/templates/ingress.yaml @@ -1,28 +1,7 @@ -{{- if .Values.ingress.enabled -}} -{{- $fullName := include "feast-serving.fullname" . -}} -apiVersion: extensions/v1beta1 -kind: Ingress -metadata: - name: {{ $fullName }} - labels: - app: {{ template "feast-serving.name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version }} - component: serving - heritage: {{ .Release.Service }} - release: {{ .Release.Name }} - annotations: -{{- with .Values.ingress.annotations }} -{{ toYaml . | indent 4 }} +{{- if .Values.ingress.http.enabled -}} +{{ template "feast.ingress" (list . "serving" "http" .Values.ingress.http) }} {{- end }} -spec: - rules: - {{- range .Values.ingress.hosts }} - - host: {{ .host | quote }} - http: - paths: - - path: / - backend: - serviceName: {{ $fullName }} - servicePort: {{ .port | quote }} - {{- end }} +--- +{{ if .Values.ingress.grpc.enabled -}} +{{ template "feast.ingress" (list . "serving" "grpc" .Values.ingress.grpc) }} {{- end }} diff --git a/infra/charts/feast/charts/feast-serving/values.yaml b/infra/charts/feast/charts/feast-serving/values.yaml index d489a48748d..52d10cd7440 100644 --- a/infra/charts/feast/charts/feast-serving/values.yaml +++ b/infra/charts/feast/charts/feast-serving/values.yaml @@ -3,23 +3,23 @@ # for additional configuration redis: # enabled specifies whether Redis should be installed as part of Feast Serving. - # + # # If enabled, "redis_config" in store.yaml will be overwritten by Helm # to the configuration in this Redis installation. enabled: false # usePassword specifies if password is required to access Redis. Note that # Feast 0.3 does not support Redis with password. - usePassword: false + usePassword: false # cluster configuration for Redis. cluster: # enabled specifies if Redis should be installed in cluster mode. enabled: false -# core configures Feast Core in the same parent feast chart that this Feast +# core configures Feast Core in the same parent feast chart that this Feast # Serving connects to. core: # enabled specifies that Feast Serving will use Feast Core installed - # in the same parent feast chart. If enabled, Helm will overwrite + # in the same parent feast chart. If enabled, Helm will overwrite # "feast.core-host" in application.yaml with the correct value. enabled: true @@ -29,7 +29,6 @@ replicaCount: 1 # image configures the Docker image for Feast Serving image: repository: gcr.io/kf-feast/feast-serving - tag: 0.3.2 pullPolicy: IfNotPresent # application.yaml is the main configuration for Feast Serving application. @@ -38,7 +37,7 @@ image: # Refer to https://github.com/gojek/feast/blob/79eb4ab5fa3d37102c1dca9968162a98690526ba/serving/src/main/resources/application.yml # for a complete list and description of the configuration. # -# Note that some properties defined in application.yaml may be overridden by +# Note that some properties defined in application.yaml may be overridden by # Helm under certain conditions. For example, if core is enabled, then # "feast.core-host" will be overridden. Also, if "type: BIGQUERY" is specified # in store.yaml, "feast.jobs.store-options" will be overridden as well with @@ -67,19 +66,19 @@ application.yaml: port: 8080 # store.yaml is the configuration for Feast Store. -# +# # Refer to this link for description: # https://github.com/gojek/feast/blob/79eb4ab5fa3d37102c1dca9968162a98690526ba/protos/feast/core/Store.proto # # Use the correct store configuration depending on whether the installed # Feast Serving is "online" or "batch", by uncommenting the correct store.yaml. # -# Note that if "redis.enabled: true" and "type: REDIS" in store.yaml, +# Note that if "redis.enabled: true" and "type: REDIS" in store.yaml, # Helm will override "redis_config" with configuration of Redis installed # in this chart. -# +# # Note that if "type: BIGQUERY" in store.yaml, Helm assumes Feast Online serving -# is also installed with Redis store. Helm will then override "feast.jobs.store-options" +# is also installed with Redis store. Helm will then override "feast.jobs.store-options" # in application.yaml with the installed Redis store configuration. This is # because in Feast 0.3, Redis job store is required. # @@ -105,7 +104,14 @@ application.yaml: # name: "*" # version: "*" -# springConfigMountPath is the directory path where application.yaml and +springConfigProfiles: {} +# db: | +# spring: +# datasource: +# driverClassName: org.postgresql.Driver +# url: jdbc:postgresql://${DB_HOST:127.0.0.1}:${DB_PORT:5432}/${DB_DATABASE:postgres} +springConfigProfilesActive: "" +# springConfigMountPath is the directory path where application.yaml and # store.yaml will be mounted in the container. springConfigMountPath: /etc/feast/feast-serving @@ -116,7 +122,7 @@ gcpServiceAccount: useExistingSecret: false existingSecret: # name is the secret name of the existing secret for the service account. - name: feast-gcp-service-account + name: feast-gcp-service-account # key is the secret key of the existing secret for the service account. # key is normally derived from the file name of the JSON key file. key: key.json @@ -124,19 +130,29 @@ gcpServiceAccount: # the value of "existingSecret.key" is file name of the service account file. mountPath: /etc/gcloud/service-accounts -# jvmOptions are options that will be passed to the Java Virtual Machine (JVM) +# Project ID picked up by the Cloud SDK (e.g. BigQuery run against this project) +gcpProjectId: "" + +# Path to Jar file in the Docker image. +# If using gcr.io/kf-feast/feast-serving this should not need to be changed. +jarPath: /opt/feast/feast-serving.jar + +# jvmOptions are options that will be passed to the Java Virtual Machine (JVM) # running Feast Core. -# +# # For example, it is good practice to set min and max heap size in JVM. # https://stackoverflow.com/questions/6902135/side-effect-for-increasing-maxpermsize-and-max-heap-size # # Refer to https://docs.oracle.com/cd/E22289_01/html/821-1274/configuring-the-default-jvm-and-java-arguments.html # to see other JVM options that can be set. # -# jvmOptions: -# - -Xms768m +jvmOptions: [] +# - -Xms768m # - -Xmx768m +logType: JSON +logLevel: warn + livenessProbe: enabled: false initialDelaySeconds: 60 @@ -171,12 +187,29 @@ service: # nodePort: ingress: - enabled: false - annotations: {} - # kubernetes.io/ingress.class: nginx - hosts: - # - host: chart-example.local - # port: http + grpc: + enabled: false + class: nginx + hosts: [] + annotations: {} + https: + enabled: true + secretNames: {} + whitelist: "" + auth: + enabled: false + http: + enabled: false + class: nginx + hosts: [] + annotations: {} + https: + enabled: true + secretNames: {} + whitelist: "" + auth: + enabled: false + authUrl: http://auth-server.auth-ns.svc.cluster.local/auth prometheus: enabled: true @@ -186,6 +219,7 @@ resources: {} # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # # limits: # cpu: 100m # memory: 128Mi diff --git a/infra/charts/feast/requirements.lock b/infra/charts/feast/requirements.lock index 8afd9521573..e441790dc76 100644 --- a/infra/charts/feast/requirements.lock +++ b/infra/charts/feast/requirements.lock @@ -1,12 +1,6 @@ dependencies: -- name: feast-core - repository: "" - version: 0.3.2 -- name: feast-serving - repository: "" - version: 0.3.2 -- name: feast-serving - repository: "" - version: 0.3.2 -digest: sha256:7ee4cd271cbd4ace44817dd12ba65f490a8e3529adf199604a2c2bdad9c2fac3 -generated: "2019-11-27T13:35:41.334054+08:00" +- name: common + repository: https://kubernetes-charts-incubator.storage.googleapis.com + version: 0.0.5 +digest: sha256:935bfb09e9ed90ff800826a7df21adaabe3225511c3ad78df44e1a5a60e93f14 +generated: 2019-12-10T14:47:49.57569Z diff --git a/infra/charts/feast/requirements.yaml b/infra/charts/feast/requirements.yaml index ed280d64b6e..b30635dcdb9 100644 --- a/infra/charts/feast/requirements.yaml +++ b/infra/charts/feast/requirements.yaml @@ -1,12 +1,12 @@ dependencies: - name: feast-core - version: 0.3.2 + version: 0.4.5 condition: feast-core.enabled - name: feast-serving alias: feast-serving-batch - version: 0.3.2 + version: 0.4.5 condition: feast-serving-batch.enabled - name: feast-serving alias: feast-serving-online - version: 0.3.2 - condition: feast-serving-online.enabled + version: 0.4.5 + condition: feast-serving-online.enabled \ No newline at end of file diff --git a/infra/charts/feast/values-demo.yaml b/infra/charts/feast/values-demo.yaml index fad4bc0afb0..2cb5ccbe741 100644 --- a/infra/charts/feast/values-demo.yaml +++ b/infra/charts/feast/values-demo.yaml @@ -1,7 +1,7 @@ # The following are values for installing Feast for demonstration purpose: # - Persistence is disabled since for demo purpose data is not expected # to be durable -# - Only online serving (no batch serving) is installed to remove dependency +# - Only online serving (no batch serving) is installed to remove dependency # on Google Cloud services. Batch serving requires BigQuery dependency. # - Replace all occurrences of "feast.example.com" with the domain name or # external IP pointing to your cluster @@ -68,4 +68,17 @@ feast-serving-online: version: "*" feast-serving-batch: - enabled: false +# enabled: false + enabled: true + store.yaml: + name: bigquery + type: BIGQUERY + bigquery_config: + project_id: PROJECT_ID + dataset_id: DATASET_ID + subscriptions: + - project: "*" + name: "*" + version: "*" + redis: + enabled: false \ No newline at end of file diff --git a/infra/charts/feast/values.yaml b/infra/charts/feast/values.yaml index db3ec44f330..dd2174ae46d 100644 --- a/infra/charts/feast/values.yaml +++ b/infra/charts/feast/values.yaml @@ -2,20 +2,18 @@ # - Feast Core # - Feast Serving Online # - Feast Serving Batch +# - Prometheus StatsD Exporter # # The configuration for different components can be referenced from: # - charts/feast-core/values.yaml # - charts/feast-serving/values.yaml +# - charts/prometheus-statsd-exporter/values.yaml # # Note that "feast-serving-online" and "feast-serving-batch" are # aliases to "feast-serving" chart since in typical scenario two instances # of Feast Serving: online and batch will be deployed. Both described # using the same chart "feast-serving". # -# The following are default values for typical Feast deployment, but not -# for production setting. Refer to "values-production.yaml" for recommended -# values in production environment. -# # Note that the import job by default uses DirectRunner # https://beam.apache.org/documentation/runners/direct/ # in this configuration since it allows Feast to run in more environments @@ -45,16 +43,18 @@ # ============================================================ feast-core: - # enabled specifies whether to install Feast Core component. + # If enabled specifies whether to install Feast Core component. # # Normally, this is set to "false" when Feast users need access to low latency # Feast Serving, by deploying multiple instances of Feast Serving closest # to the client. These instances of Feast Serving however can still use # the same shared Feast Core. enabled: true - # Specify what image tag to use. Keep this consistent for all components + + # Specify which image tag to use. Keep this consistent for all components image: - tag: "0.4.4" + tag: "0.4.5" + # jvmOptions are options that will be passed to the Java Virtual Machine (JVM) # running Feast Core. # @@ -63,6 +63,7 @@ feast-core: jvmOptions: - -Xms1024m - -Xmx1024m + # resources that should be allocated to Feast Core. resources: requests: @@ -70,6 +71,7 @@ feast-core: memory: 1024Mi limits: memory: 2048Mi + # gcpServiceAccount is the Google service account that Feast Core will use. gcpServiceAccount: # useExistingSecret specifies Feast to use an existing secret containing @@ -119,7 +121,7 @@ feast-serving-online: enabled: true # Specify what image tag to use. Keep this consistent for all components image: - tag: "0.4.4" + tag: "0.4.5" # redis.enabled specifies whether Redis should be installed as part of Feast Serving. # # If enabled is set to "false", Feast admin has to ensure there is an @@ -178,7 +180,7 @@ feast-serving-batch: enabled: true # Specify what image tag to use. Keep this consistent for all components image: - tag: "0.4.4" + tag: "0.4.5" # redis.enabled specifies whether Redis should be installed as part of Feast Serving. # # This is usually set to "false" for Feast Serving Batch because the default @@ -235,11 +237,11 @@ feast-serving-batch: # enabled as well. So Feast Serving Batch will share the same # Redis instance to store job statuses. store-type: REDIS - store-options: - # Use the externally exposed redis instance deployed by Online service - # Please set EXTERNAL_IP to your cluster's external IP - host: EXTERNAL_IP - port: 32101 + # Default to use the internal hostname of the redis instance deployed by Online service, + # otherwise use externally exposed by setting EXTERNAL_IP to your cluster's external IP + # store-options: + # host: EXTERNAL_IP + # port: 32101 # store.yaml is the configuration for Feast Store. # # Refer to this link for more description: diff --git a/infra/docker-compose/.env.sample b/infra/docker-compose/.env.sample index e14bde27728..c8652e8fe0c 100644 --- a/infra/docker-compose/.env.sample +++ b/infra/docker-compose/.env.sample @@ -1,19 +1,21 @@ +# General COMPOSE_PROJECT_NAME=feast - FEAST_VERSION=latest +# Feast Core FEAST_CORE_IMAGE=gcr.io/kf-feast/feast-core -FEAST_CORE_CONFIG=direct-runner -FEAST_CORE_GCP_SERVICE_ACCOUNT_KEY=placeholder +FEAST_CORE_CONFIG=direct-runner.yml +FEAST_CORE_GCP_SERVICE_ACCOUNT_KEY=placeholder.json +# Feast Serving FEAST_SERVING_IMAGE=gcr.io/kf-feast/feast-serving -FEAST_ONLINE_SERVING_CONFIG=online-serving -FEAST_ONLINE_STORE_CONFIG=redis-store -FEAST_BATCH_SERVING_CONFIG=batch-serving -FEAST_BATCH_STORE_CONFIG=bq-store -FEAST_BATCH_SERVING_GCP_SERVICE_ACCOUNT_KEY=placeholder -FEAST_JOB_STAGING_LOCATION=gs://your-gcp-project/bucket +FEAST_ONLINE_SERVING_CONFIG=online-serving.yml +FEAST_ONLINE_STORE_CONFIG=redis-store.yml +FEAST_BATCH_SERVING_CONFIG=batch-serving.yml +FEAST_BATCH_STORE_CONFIG=bq-store.yml +FEAST_BATCH_SERVING_GCP_SERVICE_ACCOUNT_KEY=placeholder.json +FEAST_JOB_STAGING_LOCATION=gs://your-gcs-bucket/staging -FEAST_JUPYTER_IMAGE=gcr.io/kf-feast/feast-jupyter -FEAST_JUPYTER_GCP_SERVICE_ACCOUNT_KEY=placeholder +# Jupyter +FEAST_JUPYTER_GCP_SERVICE_ACCOUNT_KEY=placeholder.json diff --git a/infra/docker-compose/docker-compose.batch.yml b/infra/docker-compose/docker-compose.batch.yml deleted file mode 100644 index c00ac9475bd..00000000000 --- a/infra/docker-compose/docker-compose.batch.yml +++ /dev/null @@ -1,25 +0,0 @@ -version: "3.7" - -services: - batch-serving: - image: ${FEAST_SERVING_IMAGE}:${FEAST_VERSION} - volumes: - - ./serving/${FEAST_BATCH_SERVING_CONFIG}.yml:/etc/feast/application.yml - - ./serving/${FEAST_BATCH_STORE_CONFIG}.yml:/etc/feast/store.yml - - ./gcp-service-accounts/${FEAST_BATCH_SERVING_GCP_SERVICE_ACCOUNT_KEY}.json:/etc/gcloud/service-accounts/key.json - depends_on: - - core - - redis - ports: - - 6567:6567 - restart: on-failure - environment: - GOOGLE_APPLICATION_CREDENTIALS: /etc/gcloud/service-accounts/key.json - FEAST_JOB_STAGING_LOCATION: ${FEAST_JOB_STAGING_LOCATION} - command: - - "java" - - "-Xms1024m" - - "-Xmx1024m" - - "-jar" - - "/opt/feast/feast-serving.jar" - - "--spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml" \ No newline at end of file diff --git a/infra/docker-compose/docker-compose.yml b/infra/docker-compose/docker-compose.yml index 44750650cec..27d82efc3ca 100644 --- a/infra/docker-compose/docker-compose.yml +++ b/infra/docker-compose/docker-compose.yml @@ -4,8 +4,8 @@ services: core: image: ${FEAST_CORE_IMAGE}:${FEAST_VERSION} volumes: - - ./core/${FEAST_CORE_CONFIG}.yml:/etc/feast/application.yml - - ./gcp-service-accounts/${FEAST_CORE_GCP_SERVICE_ACCOUNT_KEY}.json:/etc/gcloud/service-accounts/key.json + - ./core/${FEAST_CORE_CONFIG}:/etc/feast/application.yml + - ./gcp-service-accounts/${FEAST_CORE_GCP_SERVICE_ACCOUNT_KEY}:/etc/gcloud/service-accounts/key.json environment: DB_HOST: db GOOGLE_APPLICATION_CREDENTIALS: /etc/gcloud/service-accounts/key.json @@ -24,8 +24,8 @@ services: online-serving: image: ${FEAST_SERVING_IMAGE}:${FEAST_VERSION} volumes: - - ./serving/${FEAST_ONLINE_SERVING_CONFIG}.yml:/etc/feast/application.yml - - ./serving/${FEAST_ONLINE_STORE_CONFIG}.yml:/etc/feast/store.yml + - ./serving/${FEAST_ONLINE_SERVING_CONFIG}:/etc/feast/application.yml + - ./serving/${FEAST_ONLINE_STORE_CONFIG}:/etc/feast/store.yml depends_on: - core - redis @@ -38,12 +38,34 @@ services: - /opt/feast/feast-serving.jar - --spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml + batch-serving: + image: ${FEAST_SERVING_IMAGE}:${FEAST_VERSION} + volumes: + - ./serving/${FEAST_BATCH_SERVING_CONFIG}:/etc/feast/application.yml + - ./serving/${FEAST_BATCH_STORE_CONFIG}:/etc/feast/store.yml + - ./gcp-service-accounts/${FEAST_BATCH_SERVING_GCP_SERVICE_ACCOUNT_KEY}:/etc/gcloud/service-accounts/key.json + depends_on: + - core + - redis + ports: + - 6567:6567 + restart: on-failure + environment: + GOOGLE_APPLICATION_CREDENTIALS: /etc/gcloud/service-accounts/key.json + FEAST_JOB_STAGING_LOCATION: ${FEAST_JOB_STAGING_LOCATION} + command: + - "java" + - "-Xms1024m" + - "-Xmx1024m" + - "-jar" + - "/opt/feast/feast-serving.jar" + - "--spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml" + jupyter: - image: ${FEAST_JUPYTER_IMAGE}:${FEAST_VERSION} + image: jupyter/datascience-notebook:latest volumes: - - ./jupyter/notebooks:/home/jovyan/feast-notebooks - - ./jupyter/features:/home/jovyan/features - - ./gcp-service-accounts/${FEAST_JUPYTER_GCP_SERVICE_ACCOUNT_KEY}.json:/etc/gcloud/service-accounts/key.json + - ../../:/home/jovyan/feast + - ./gcp-service-accounts/${FEAST_JUPYTER_GCP_SERVICE_ACCOUNT_KEY}:/etc/gcloud/service-accounts/key.json depends_on: - core - online-serving diff --git a/infra/docker-compose/jupyter/features/cust_trans_fs.yaml b/infra/docker-compose/jupyter/features/cust_trans_fs.yaml deleted file mode 100644 index eb21ce9b35b..00000000000 --- a/infra/docker-compose/jupyter/features/cust_trans_fs.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: customer_transactions -kind: feature_set -entities: -- name: customer_id - valueType: INT64 -features: -- name: daily_transactions - valueType: FLOAT -- name: total_transactions - valueType: FLOAT -maxAge: 3600s \ No newline at end of file diff --git a/infra/docker-compose/jupyter/features/cust_trans_fs_updated.yaml b/infra/docker-compose/jupyter/features/cust_trans_fs_updated.yaml deleted file mode 100644 index 8293d04b881..00000000000 --- a/infra/docker-compose/jupyter/features/cust_trans_fs_updated.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: customer_transactions -kind: feature_set -entities: -- name: customer_id - valueType: INT64 -features: -- name: daily_transactions - valueType: FLOAT -- name: total_transactions - valueType: FLOAT -- name: discounts - valueType: FLOAT -maxAge: 3600s \ No newline at end of file diff --git a/infra/docker-compose/jupyter/notebooks/feast-batch-serving.ipynb b/infra/docker-compose/jupyter/notebooks/feast-batch-serving.ipynb deleted file mode 100644 index c288093f07b..00000000000 --- a/infra/docker-compose/jupyter/notebooks/feast-batch-serving.ipynb +++ /dev/null @@ -1,504 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Feast Batch Serving\n", - "This is an extension to `feast-quickstart` notebook to demonstrate the batch serving capability of Feast.\n", - "\n", - "## Prerequisite\n", - "- A running Feast Serving service with store configuration that supports batch retrieval. (eg. BigQuery store)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data Preparation\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import feast\n", - "import numpy as np\n", - "import pandas as pd\n", - "from datetime import datetime, timedelta\n", - "from feast.serving.ServingService_pb2 import GetOnlineFeaturesRequest\n", - "from feast.types.Value_pb2 import Value as Value\n", - "from feast.client import Client\n", - "from feast.feature_set import FeatureSet" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "client = feast.Client(core_url=\"core:6565\", serving_url=\"batch-serving:6567\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "cust_trans_fs = FeatureSet.from_yaml(\"../features/cust_trans_fs.yaml\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Feature set updated/created: \"customer_transactions:1\".\n" - ] - } - ], - "source": [ - "client.apply(cust_trans_fs)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datetimecustomer_iddaily_transactionstotal_transactions
02019-12-06 02:17:46.899904100002.797627175.978266
12019-12-06 02:17:46.899915100014.931632153.871975
22019-12-06 02:17:46.899922100020.206628108.558844
32019-12-06 02:17:46.899929100032.354937119.549455
42019-12-06 02:17:46.899937100047.171423115.345183
\n", - "
" - ], - "text/plain": [ - " datetime customer_id daily_transactions \\\n", - "0 2019-12-06 02:17:46.899904 10000 2.797627 \n", - "1 2019-12-06 02:17:46.899915 10001 4.931632 \n", - "2 2019-12-06 02:17:46.899922 10002 0.206628 \n", - "3 2019-12-06 02:17:46.899929 10003 2.354937 \n", - "4 2019-12-06 02:17:46.899937 10004 7.171423 \n", - "\n", - " total_transactions \n", - "0 175.978266 \n", - "1 153.871975 \n", - "2 108.558844 \n", - "3 119.549455 \n", - "4 115.345183 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "offset = 10000\n", - "nr_of_customers = 5\n", - "customer_df = pd.DataFrame(\n", - " {\n", - " \"datetime\": [datetime.utcnow() for _ in range(nr_of_customers)],\n", - " \"customer_id\": [offset + inc for inc in range(nr_of_customers)],\n", - " \"daily_transactions\": [np.random.uniform(0, 10) for _ in range(nr_of_customers)],\n", - " \"total_transactions\": [np.random.uniform(100, 200) for _ in range(nr_of_customers)],\n", - " }\n", - ")\n", - "customer_df" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 5/5 [00:00<00:00, 7.24rows/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Ingested 5 rows into customer_transactions:1\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "client.ingest(cust_trans_fs, dataframe=customer_df)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Batch Retrieval\n", - "Batch retrieval takes a dataframe containing the entities column and event timestamp as an input. The result would be the outer join of the input and the features. The input dataframe needs to have a column named `datetime` as event timestamp. No results will be returned if the difference between the feature ingestion timestamp and the `event_timestamp` is greater than the `maxAge` parameter specified in the feature set." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_transactions_v1_feature_timestampcustomer_idevent_timestampcustomer_transactions_v1_daily_transactionscustomer_transactions_v1_total_transactions
02019-12-06 02:17:46+00:00100012019-12-06 02:17:55.612449+00:004.931632153.871980
12019-12-06 02:17:46+00:00100042019-12-06 02:17:55.612449+00:007.171423115.345184
22019-12-06 02:17:46+00:00100002019-12-06 02:17:55.612449+00:002.797627175.978270
32019-12-06 02:17:46+00:00100022019-12-06 02:17:55.612449+00:000.206628108.558846
42019-12-06 02:17:46+00:00100032019-12-06 02:17:55.612449+00:002.354937119.549450
\n", - "
" - ], - "text/plain": [ - " customer_transactions_v1_feature_timestamp customer_id \\\n", - "0 2019-12-06 02:17:46+00:00 10001 \n", - "1 2019-12-06 02:17:46+00:00 10004 \n", - "2 2019-12-06 02:17:46+00:00 10000 \n", - "3 2019-12-06 02:17:46+00:00 10002 \n", - "4 2019-12-06 02:17:46+00:00 10003 \n", - "\n", - " event_timestamp \\\n", - "0 2019-12-06 02:17:55.612449+00:00 \n", - "1 2019-12-06 02:17:55.612449+00:00 \n", - "2 2019-12-06 02:17:55.612449+00:00 \n", - "3 2019-12-06 02:17:55.612449+00:00 \n", - "4 2019-12-06 02:17:55.612449+00:00 \n", - "\n", - " customer_transactions_v1_daily_transactions \\\n", - "0 4.931632 \n", - "1 7.171423 \n", - "2 2.797627 \n", - "3 0.206628 \n", - "4 2.354937 \n", - "\n", - " customer_transactions_v1_total_transactions \n", - "0 153.871980 \n", - "1 115.345184 \n", - "2 175.978270 \n", - "3 108.558846 \n", - "4 119.549450 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "entity_df = customer_df[[\"customer_id\"]].assign(datetime=datetime.utcnow())\n", - "feature_ids=[\n", - " \"customer_transactions:1:daily_transactions\",\n", - " \"customer_transactions:1:total_transactions\",\n", - "]\n", - "batch_job = client.get_batch_features(feature_ids, entity_df)\n", - "batch_job.to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_transactions_v1_feature_timestampcustomer_idevent_timestampcustomer_transactions_v1_daily_transactionscustomer_transactions_v1_total_transactions
0None100002020-01-05 02:18:43.900732+00:00NoneNone
1None100012020-01-05 02:18:43.900732+00:00NoneNone
2None100022020-01-05 02:18:43.900732+00:00NoneNone
3None100032020-01-05 02:18:43.900732+00:00NoneNone
4None100042020-01-05 02:18:43.900732+00:00NoneNone
\n", - "
" - ], - "text/plain": [ - " customer_transactions_v1_feature_timestamp customer_id \\\n", - "0 None 10000 \n", - "1 None 10001 \n", - "2 None 10002 \n", - "3 None 10003 \n", - "4 None 10004 \n", - "\n", - " event_timestamp \\\n", - "0 2020-01-05 02:18:43.900732+00:00 \n", - "1 2020-01-05 02:18:43.900732+00:00 \n", - "2 2020-01-05 02:18:43.900732+00:00 \n", - "3 2020-01-05 02:18:43.900732+00:00 \n", - "4 2020-01-05 02:18:43.900732+00:00 \n", - "\n", - " customer_transactions_v1_daily_transactions \\\n", - "0 None \n", - "1 None \n", - "2 None \n", - "3 None \n", - "4 None \n", - "\n", - " customer_transactions_v1_total_transactions \n", - "0 None \n", - "1 None \n", - "2 None \n", - "3 None \n", - "4 None " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "stale_entity_df = customer_df[[\"customer_id\"]].assign(datetime=datetime.utcnow() + timedelta(days=30))\n", - "feature_ids=[\n", - " \"customer_transactions:1:daily_transactions\",\n", - " \"customer_transactions:1:total_transactions\",\n", - "]\n", - "batch_job = client.get_batch_features(feature_ids, stale_entity_df)\n", - "batch_job.to_dataframe()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "pycharm": { - "stem_cell": { - "cell_type": "raw", - "metadata": { - "collapsed": false - }, - "source": [] - } - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/infra/docker-compose/jupyter/notebooks/feast-quickstart.ipynb b/infra/docker-compose/jupyter/notebooks/feast-quickstart.ipynb deleted file mode 100644 index b89e59b1e49..00000000000 --- a/infra/docker-compose/jupyter/notebooks/feast-quickstart.ipynb +++ /dev/null @@ -1,569 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Feast Quick Start\n", - "This is a quick example to demonstrate:\n", - "- Register a feature set on Feast\n", - "- Ingest features into Feast\n", - "- Retrieve the ingested features from Feast\n", - "- Update a feature" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import feast\n", - "import numpy as np\n", - "import pandas as pd\n", - "from datetime import datetime\n", - "from feast.serving.ServingService_pb2 import GetOnlineFeaturesRequest\n", - "from feast.types.Value_pb2 import Value as Value\n", - "from feast.client import Client\n", - "from feast.feature_set import FeatureSet" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, instantiate the client.\n", - "Feast endpoints can be set via the following environmental variables: `FEAST_CORE_URL`, `FEAST_SERVING_URL`.\n", - "Alternatively, they can also be passed in explicitly as follows:\n", - " \n", - "`client = feast.Client(core_url=core:6565, serving_url=online-serving:6566)`" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "client = feast.Client()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Register a feature set\n", - "\n", - "Let's create and register our first feature set. Below is an example of a basic customer transactions feature set that has been exported to YAML:\n", - "```\n", - "name: customer_transactions\n", - "kind: feature_set\n", - "entities:\n", - "- name: customer_id\n", - " valueType: INT64\n", - "features:\n", - "- name: daily_transactions\n", - " valueType: FLOAT\n", - "- name: total_transactions\n", - " valueType: FLOAT\n", - "maxAge: 3600s \n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "cust_trans_fs = FeatureSet.from_yaml(\"../features/cust_trans_fs.yaml\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Feature set updated/created: \"customer_transactions:1\".\n" - ] - } - ], - "source": [ - "client.apply(cust_trans_fs)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Ingest features into Feast\n", - "The dataframe below contains the features and entities of the above feature set." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datetimecustomer_iddaily_transactionstotal_transactions
02019-11-26 12:03:47.320634100005.178112110.670651
12019-11-26 12:03:47.320644100010.268114195.393913
22019-11-26 12:03:47.320651100021.486614136.929052
32019-11-26 12:03:47.320658100039.676433166.022999
42019-11-26 12:03:47.320665100045.928573165.687951
\n", - "
" - ], - "text/plain": [ - " datetime customer_id daily_transactions \\\n", - "0 2019-11-26 12:03:47.320634 10000 5.178112 \n", - "1 2019-11-26 12:03:47.320644 10001 0.268114 \n", - "2 2019-11-26 12:03:47.320651 10002 1.486614 \n", - "3 2019-11-26 12:03:47.320658 10003 9.676433 \n", - "4 2019-11-26 12:03:47.320665 10004 5.928573 \n", - "\n", - " total_transactions \n", - "0 110.670651 \n", - "1 195.393913 \n", - "2 136.929052 \n", - "3 166.022999 \n", - "4 165.687951 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "offset = 10000\n", - "nr_of_customers = 5\n", - "customer_df = pd.DataFrame(\n", - " {\n", - " \"datetime\": [datetime.utcnow() for _ in range(nr_of_customers)],\n", - " \"customer_id\": [offset + inc for inc in range(nr_of_customers)],\n", - " \"daily_transactions\": [np.random.uniform(0, 10) for _ in range(nr_of_customers)],\n", - " \"total_transactions\": [np.random.uniform(100, 200) for _ in range(nr_of_customers)],\n", - " }\n", - ")\n", - "customer_df" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/5 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datetimecustomer_iddaily_transactionstotal_transactionsdiscounts
02019-11-26 12:03:47.320634100005.178112110.6706518.389938
12019-11-26 12:03:47.320644100010.268114195.3939130.430047
22019-11-26 12:03:47.320651100021.486614136.9290527.408917
32019-11-26 12:03:47.320658100039.676433166.0229991.192721
42019-11-26 12:03:47.320665100045.928573165.6879512.051037
\n", - "" - ], - "text/plain": [ - " datetime customer_id daily_transactions \\\n", - "0 2019-11-26 12:03:47.320634 10000 5.178112 \n", - "1 2019-11-26 12:03:47.320644 10001 0.268114 \n", - "2 2019-11-26 12:03:47.320651 10002 1.486614 \n", - "3 2019-11-26 12:03:47.320658 10003 9.676433 \n", - "4 2019-11-26 12:03:47.320665 10004 5.928573 \n", - "\n", - " total_transactions discounts \n", - "0 110.670651 8.389938 \n", - "1 195.393913 0.430047 \n", - "2 136.929052 7.408917 \n", - "3 166.022999 1.192721 \n", - "4 165.687951 2.051037 " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "discounts = [np.random.uniform(0, 10) for _ in range(nr_of_customers)]\n", - "customer_df_updated = customer_df.assign(discounts=discounts)\n", - "customer_df_updated" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/5 [00:00 featureSets = - SpecUtil.parseFeatureSetSpecJsonList(options.getFeatureSetJson()); + BZip2Decompressor> decompressor = + new BZip2Decompressor<>(new StringListStreamConverter()); + List featureSetJson = decompressor.decompress(options.getFeatureSetJson()); + List featureSets = SpecUtil.parseFeatureSetSpecJsonList(featureSetJson); List stores = SpecUtil.parseStoreJsonList(options.getStoreJson()); for (Store store : stores) { diff --git a/ingestion/src/main/java/feast/ingestion/options/BZip2Compressor.java b/ingestion/src/main/java/feast/ingestion/options/BZip2Compressor.java new file mode 100644 index 00000000000..b7e4e6ee0af --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/BZip2Compressor.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; + +public class BZip2Compressor implements OptionCompressor { + + private final OptionByteConverter byteConverter; + + public BZip2Compressor(OptionByteConverter byteConverter) { + this.byteConverter = byteConverter; + } + /** + * Compress pipeline option using BZip2 + * + * @param option Pipeline option value + * @return BZip2 compressed option value + * @throws IOException + */ + @Override + public byte[] compress(T option) throws IOException { + ByteArrayOutputStream compressedStream = new ByteArrayOutputStream(); + try (BZip2CompressorOutputStream bzip2Output = + new BZip2CompressorOutputStream(compressedStream)) { + bzip2Output.write(byteConverter.toByte(option)); + } + + return compressedStream.toByteArray(); + } +} diff --git a/ingestion/src/main/java/feast/ingestion/options/BZip2Decompressor.java b/ingestion/src/main/java/feast/ingestion/options/BZip2Decompressor.java new file mode 100644 index 00000000000..ce49c1be6e6 --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/BZip2Decompressor.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; + +public class BZip2Decompressor implements OptionDecompressor { + + private final InputStreamConverter inputStreamConverter; + + public BZip2Decompressor(InputStreamConverter inputStreamConverter) { + this.inputStreamConverter = inputStreamConverter; + } + + @Override + public T decompress(byte[] compressed) throws IOException { + try (ByteArrayInputStream inputStream = new ByteArrayInputStream(compressed); + BZip2CompressorInputStream bzip2Input = new BZip2CompressorInputStream(inputStream)) { + return inputStreamConverter.readStream(bzip2Input); + } + } +} diff --git a/ingestion/src/main/java/feast/ingestion/options/ImportOptions.java b/ingestion/src/main/java/feast/ingestion/options/ImportOptions.java index b299bb47e55..6afdd80dd72 100644 --- a/ingestion/src/main/java/feast/ingestion/options/ImportOptions.java +++ b/ingestion/src/main/java/feast/ingestion/options/ImportOptions.java @@ -28,16 +28,16 @@ public interface ImportOptions extends PipelineOptions, DataflowPipelineOptions, DirectOptions { @Required @Description( - "JSON string representation of the FeatureSet that the import job will process." + "JSON string representation of the FeatureSet that the import job will process, in BZip2 binary format." + "FeatureSet follows the format in feast.core.FeatureSet proto." + "Mutliple FeatureSetSpec can be passed by specifying '--featureSet={...}' multiple times" + "The conversion of Proto message to JSON should follow this mapping:" + "https://developers.google.com/protocol-buffers/docs/proto3#json" + "Please minify and remove all insignificant whitespace such as newline in the JSON string" + "to prevent error when parsing the options") - List getFeatureSetJson(); + byte[] getFeatureSetJson(); - void setFeatureSetJson(List featureSetJson); + void setFeatureSetJson(byte[] featureSetJson); @Required @Description( diff --git a/ingestion/src/main/java/feast/ingestion/options/InputStreamConverter.java b/ingestion/src/main/java/feast/ingestion/options/InputStreamConverter.java new file mode 100644 index 00000000000..e2fef732368 --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/InputStreamConverter.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.IOException; +import java.io.InputStream; + +public interface InputStreamConverter { + + /** + * Used in conjunction with {@link OptionDecompressor} to decompress the pipeline option + * + * @param inputStream Input byte stream in compressed format + * @return Decompressed pipeline option value + */ + T readStream(InputStream inputStream) throws IOException; +} diff --git a/ingestion/src/main/java/feast/ingestion/options/OptionByteConverter.java b/ingestion/src/main/java/feast/ingestion/options/OptionByteConverter.java new file mode 100644 index 00000000000..ff5a41a627d --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/OptionByteConverter.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.IOException; + +public interface OptionByteConverter { + + /** + * Used in conjunction with {@link OptionCompressor} to compress the pipeline option + * + * @param option Pipeline option value + * @return byte representation of the pipeline option value, without compression. + */ + byte[] toByte(T option) throws IOException; +} diff --git a/ingestion/src/main/java/feast/ingestion/options/OptionCompressor.java b/ingestion/src/main/java/feast/ingestion/options/OptionCompressor.java new file mode 100644 index 00000000000..b2345fc3eb1 --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/OptionCompressor.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.IOException; + +public interface OptionCompressor { + + /** + * Compress pipeline option into bytes format. This is necessary as some Beam runner has + * limitation in terms of pipeline option size. + * + * @param option Pipeline option value + * @return Compressed values of the option, as byte array + */ + byte[] compress(T option) throws IOException; +} diff --git a/ingestion/src/main/java/feast/ingestion/options/OptionDecompressor.java b/ingestion/src/main/java/feast/ingestion/options/OptionDecompressor.java new file mode 100644 index 00000000000..affeafdaa0b --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/OptionDecompressor.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.IOException; + +public interface OptionDecompressor { + + /** + * Decompress pipeline option from byte array. + * + * @param compressed Compressed pipeline option value + * @return Decompressed pipeline option + */ + T decompress(byte[] compressed) throws IOException; +} diff --git a/ingestion/src/main/java/feast/ingestion/options/StringListStreamConverter.java b/ingestion/src/main/java/feast/ingestion/options/StringListStreamConverter.java new file mode 100644 index 00000000000..d7277f3c7d6 --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/StringListStreamConverter.java @@ -0,0 +1,41 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.List; +import java.util.stream.Collectors; + +public class StringListStreamConverter implements InputStreamConverter> { + + /** + * Convert Input byte stream to newline separated strings + * + * @param inputStream Input byte stream + * @return List of string + */ + @Override + public List readStream(InputStream inputStream) throws IOException { + BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); + List stringList = reader.lines().collect(Collectors.toList()); + reader.close(); + return stringList; + } +} diff --git a/ingestion/src/main/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFn.java b/ingestion/src/main/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFn.java index 27cca2ffb2e..4b744d0fe6b 100644 --- a/ingestion/src/main/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFn.java +++ b/ingestion/src/main/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFn.java @@ -24,8 +24,9 @@ import feast.store.serving.redis.RedisCustomIO.RedisMutation; import feast.types.FeatureRowProto.FeatureRow; import feast.types.FieldProto.Field; +import java.util.HashMap; +import java.util.List; import java.util.Map; -import java.util.Set; import java.util.stream.Collectors; import org.apache.beam.sdk.transforms.DoFn; import org.slf4j.Logger; @@ -42,17 +43,24 @@ public FeatureRowToRedisMutationDoFn(Map featureSets) { private RedisKey getKey(FeatureRow featureRow) { FeatureSet featureSet = featureSets.get(featureRow.getFeatureSet()); - Set entityNames = + List entityNames = featureSet.getSpec().getEntitiesList().stream() .map(EntitySpec::getName) - .collect(Collectors.toSet()); + .sorted() + .collect(Collectors.toList()); + Map entityFields = new HashMap<>(); Builder redisKeyBuilder = RedisKey.newBuilder().setFeatureSet(featureRow.getFeatureSet()); for (Field field : featureRow.getFieldsList()) { if (entityNames.contains(field.getName())) { - redisKeyBuilder.addEntities(field); + entityFields.putIfAbsent( + field.getName(), + Field.newBuilder().setName(field.getName()).setValue(field.getValue()).build()); } } + for (String entityName : entityNames) { + redisKeyBuilder.addEntities(entityFields.get(entityName)); + } return redisKeyBuilder.build(); } diff --git a/ingestion/src/test/java/feast/ingestion/ImportJobTest.java b/ingestion/src/test/java/feast/ingestion/ImportJobTest.java index 290b38dabee..58ecae8f045 100644 --- a/ingestion/src/test/java/feast/ingestion/ImportJobTest.java +++ b/ingestion/src/test/java/feast/ingestion/ImportJobTest.java @@ -30,13 +30,16 @@ import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; import feast.core.StoreProto.Store.Subscription; +import feast.ingestion.options.BZip2Compressor; import feast.ingestion.options.ImportOptions; +import feast.ingestion.options.OptionByteConverter; import feast.storage.RedisProto.RedisKey; import feast.test.TestUtil; import feast.test.TestUtil.LocalKafka; import feast.test.TestUtil.LocalRedis; import feast.types.FeatureRowProto.FeatureRow; import feast.types.ValueProto.ValueType.Enum; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -48,6 +51,7 @@ import org.apache.beam.sdk.PipelineResult; import org.apache.beam.sdk.PipelineResult.State; import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; import org.apache.kafka.common.serialization.ByteArraySerializer; import org.joda.time.Duration; import org.junit.AfterClass; @@ -162,12 +166,13 @@ public void runPipeline_ShouldWriteToRedisCorrectlyGivenValidSpecAndFeatureRow() .build(); ImportOptions options = PipelineOptionsFactory.create().as(ImportOptions.class); - options.setFeatureSetJson( - Collections.singletonList( - JsonFormat.printer().omittingInsignificantWhitespace().print(featureSet.getSpec()))); - options.setStoreJson( - Collections.singletonList( - JsonFormat.printer().omittingInsignificantWhitespace().print(redis))); + BZip2Compressor compressor = new BZip2Compressor<>(option -> { + JsonFormat.Printer printer = + JsonFormat.printer().omittingInsignificantWhitespace().printingEnumsAsInts(); + return printer.print(option).getBytes(); + }); + options.setFeatureSetJson(compressor.compress(spec)); + options.setStoreJson(Collections.singletonList(JsonFormat.printer().print(redis))); options.setProject(""); options.setBlockOnRun(false); diff --git a/ingestion/src/test/java/feast/ingestion/options/BZip2CompressorTest.java b/ingestion/src/test/java/feast/ingestion/options/BZip2CompressorTest.java new file mode 100644 index 00000000000..cd03b18c793 --- /dev/null +++ b/ingestion/src/test/java/feast/ingestion/options/BZip2CompressorTest.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.junit.Assert; +import org.junit.Test; + +public class BZip2CompressorTest { + + @Test + public void shouldHavBZip2CompatibleOutput() throws IOException { + BZip2Compressor compressor = new BZip2Compressor<>(String::getBytes); + String origString = "somestring"; + try (ByteArrayInputStream inputStream = + new ByteArrayInputStream(compressor.compress(origString)); + BZip2CompressorInputStream bzip2Input = new BZip2CompressorInputStream(inputStream); + BufferedReader reader = new BufferedReader(new InputStreamReader(bzip2Input))) { + Assert.assertEquals(origString, reader.readLine()); + } + } +} diff --git a/ingestion/src/test/java/feast/ingestion/options/BZip2DecompressorTest.java b/ingestion/src/test/java/feast/ingestion/options/BZip2DecompressorTest.java new file mode 100644 index 00000000000..fe7cc789d86 --- /dev/null +++ b/ingestion/src/test/java/feast/ingestion/options/BZip2DecompressorTest.java @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import static org.junit.Assert.*; + +import java.io.*; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; +import org.junit.Test; + +public class BZip2DecompressorTest { + + @Test + public void shouldDecompressBZip2Stream() throws IOException { + BZip2Decompressor decompressor = + new BZip2Decompressor<>( + inputStream -> { + BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); + String output = reader.readLine(); + reader.close(); + return output; + }); + + String originalString = "abc"; + ByteArrayOutputStream compressedStream = new ByteArrayOutputStream(); + try (BZip2CompressorOutputStream bzip2Output = + new BZip2CompressorOutputStream(compressedStream)) { + bzip2Output.write(originalString.getBytes()); + } + + String decompressedString = decompressor.decompress(compressedStream.toByteArray()); + assertEquals(originalString, decompressedString); + } +} diff --git a/ingestion/src/test/java/feast/ingestion/options/StringListStreamConverterTest.java b/ingestion/src/test/java/feast/ingestion/options/StringListStreamConverterTest.java new file mode 100644 index 00000000000..5ce9f054bc9 --- /dev/null +++ b/ingestion/src/test/java/feast/ingestion/options/StringListStreamConverterTest.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import org.junit.Test; + +public class StringListStreamConverterTest { + + @Test + public void shouldReadStreamAsNewlineSeparatedStrings() throws IOException { + StringListStreamConverter converter = new StringListStreamConverter(); + String originalString = "abc\ndef"; + InputStream stringStream = new ByteArrayInputStream(originalString.getBytes()); + assertEquals(Arrays.asList("abc", "def"), converter.readStream(stringStream)); + } +} diff --git a/ingestion/src/test/java/feast/ingestion/util/DateUtilTest.java b/ingestion/src/test/java/feast/ingestion/utils/DateUtilTest.java similarity index 92% rename from ingestion/src/test/java/feast/ingestion/util/DateUtilTest.java rename to ingestion/src/test/java/feast/ingestion/utils/DateUtilTest.java index 71d4e67beaa..151d501a596 100644 --- a/ingestion/src/test/java/feast/ingestion/util/DateUtilTest.java +++ b/ingestion/src/test/java/feast/ingestion/utils/DateUtilTest.java @@ -14,15 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package feast.ingestion.util; +package feast.ingestion.utils; import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.is; -import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.*; import com.google.protobuf.Timestamp; -import feast.ingestion.utils.DateUtil; import junit.framework.TestCase; import org.joda.time.DateTime; diff --git a/ingestion/src/test/java/feast/ingestion/util/JsonUtilTest.java b/ingestion/src/test/java/feast/ingestion/utils/JsonUtilTest.java similarity index 95% rename from ingestion/src/test/java/feast/ingestion/util/JsonUtilTest.java rename to ingestion/src/test/java/feast/ingestion/utils/JsonUtilTest.java index 02af4d819f9..62c74dfc345 100644 --- a/ingestion/src/test/java/feast/ingestion/util/JsonUtilTest.java +++ b/ingestion/src/test/java/feast/ingestion/utils/JsonUtilTest.java @@ -14,12 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package feast.ingestion.util; +package feast.ingestion.utils; import static org.hamcrest.Matchers.equalTo; import static org.junit.Assert.assertThat; -import feast.ingestion.utils.JsonUtil; import java.util.Collections; import java.util.HashMap; import java.util.Map; diff --git a/ingestion/src/test/java/feast/ingestion/util/StoreUtilTest.java b/ingestion/src/test/java/feast/ingestion/utils/StoreUtilTest.java similarity index 91% rename from ingestion/src/test/java/feast/ingestion/util/StoreUtilTest.java rename to ingestion/src/test/java/feast/ingestion/utils/StoreUtilTest.java index 4e2297e405d..82988121bc8 100644 --- a/ingestion/src/test/java/feast/ingestion/util/StoreUtilTest.java +++ b/ingestion/src/test/java/feast/ingestion/utils/StoreUtilTest.java @@ -14,22 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package feast.ingestion.util; +package feast.ingestion.utils; -import static feast.types.ValueProto.ValueType.Enum.BOOL; -import static feast.types.ValueProto.ValueType.Enum.BOOL_LIST; -import static feast.types.ValueProto.ValueType.Enum.BYTES; -import static feast.types.ValueProto.ValueType.Enum.BYTES_LIST; -import static feast.types.ValueProto.ValueType.Enum.DOUBLE; -import static feast.types.ValueProto.ValueType.Enum.DOUBLE_LIST; -import static feast.types.ValueProto.ValueType.Enum.FLOAT; -import static feast.types.ValueProto.ValueType.Enum.FLOAT_LIST; -import static feast.types.ValueProto.ValueType.Enum.INT32; -import static feast.types.ValueProto.ValueType.Enum.INT32_LIST; -import static feast.types.ValueProto.ValueType.Enum.INT64; -import static feast.types.ValueProto.ValueType.Enum.INT64_LIST; -import static feast.types.ValueProto.ValueType.Enum.STRING; -import static feast.types.ValueProto.ValueType.Enum.STRING_LIST; +import static feast.types.ValueProto.ValueType.Enum.*; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.Field; @@ -40,7 +27,6 @@ import feast.core.FeatureSetProto.FeatureSet; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.FeatureSetProto.FeatureSpec; -import feast.ingestion.utils.StoreUtil; import java.util.Arrays; import org.junit.Assert; import org.junit.Test; diff --git a/ingestion/src/test/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFnTest.java b/ingestion/src/test/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFnTest.java new file mode 100644 index 00000000000..92bb6e41c38 --- /dev/null +++ b/ingestion/src/test/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFnTest.java @@ -0,0 +1,183 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.store.serving.redis; + +import static org.junit.Assert.*; + +import com.google.protobuf.Timestamp; +import feast.core.FeatureSetProto; +import feast.core.FeatureSetProto.EntitySpec; +import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSpec; +import feast.storage.RedisProto.RedisKey; +import feast.store.serving.redis.RedisCustomIO.RedisMutation; +import feast.types.FeatureRowProto.FeatureRow; +import feast.types.FieldProto.Field; +import feast.types.ValueProto.Value; +import feast.types.ValueProto.ValueType.Enum; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.apache.beam.sdk.extensions.protobuf.ProtoCoder; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.values.PCollection; +import org.junit.Rule; +import org.junit.Test; + +public class FeatureRowToRedisMutationDoFnTest { + + @Rule public transient TestPipeline p = TestPipeline.create(); + + private FeatureSetProto.FeatureSet fs = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder() + .setName("feature_set") + .setVersion(1) + .addEntities( + EntitySpec.newBuilder() + .setName("entity_id_primary") + .setValueType(Enum.INT32) + .build()) + .addEntities( + EntitySpec.newBuilder() + .setName("entity_id_secondary") + .setValueType(Enum.STRING) + .build()) + .addFeatures( + FeatureSpec.newBuilder() + .setName("feature_1") + .setValueType(Enum.STRING) + .build()) + .addFeatures( + FeatureSpec.newBuilder() + .setName("feature_2") + .setValueType(Enum.INT64) + .build())) + .build(); + + @Test + public void shouldConvertRowWithDuplicateEntitiesToValidKey() { + Map featureSets = new HashMap<>(); + featureSets.put("feature_set", fs); + + FeatureRow offendingRow = + FeatureRow.newBuilder() + .setFeatureSet("feature_set") + .setEventTimestamp(Timestamp.newBuilder().setSeconds(10)) + .addFields( + Field.newBuilder() + .setName("entity_id_primary") + .setValue(Value.newBuilder().setInt32Val(1))) + .addFields( + Field.newBuilder() + .setName("entity_id_primary") + .setValue(Value.newBuilder().setInt32Val(2))) + .addFields( + Field.newBuilder() + .setName("entity_id_secondary") + .setValue(Value.newBuilder().setStringVal("a"))) + .build(); + + PCollection output = + p.apply(Create.of(Collections.singletonList(offendingRow))) + .setCoder(ProtoCoder.of(FeatureRow.class)) + .apply(ParDo.of(new FeatureRowToRedisMutationDoFn(featureSets))); + + RedisKey expectedKey = + RedisKey.newBuilder() + .setFeatureSet("feature_set") + .addEntities( + Field.newBuilder() + .setName("entity_id_primary") + .setValue(Value.newBuilder().setInt32Val(1))) + .addEntities( + Field.newBuilder() + .setName("entity_id_secondary") + .setValue(Value.newBuilder().setStringVal("a"))) + .build(); + + PAssert.that(output) + .satisfies( + (SerializableFunction, Void>) + input -> { + input.forEach( + rm -> { + assert (Arrays.equals(rm.getKey(), expectedKey.toByteArray())); + assert (Arrays.equals(rm.getValue(), offendingRow.toByteArray())); + }); + return null; + }); + p.run(); + } + + @Test + public void shouldConvertRowWithOutOfOrderEntitiesToValidKey() { + Map featureSets = new HashMap<>(); + featureSets.put("feature_set", fs); + + FeatureRow offendingRow = + FeatureRow.newBuilder() + .setFeatureSet("feature_set") + .setEventTimestamp(Timestamp.newBuilder().setSeconds(10)) + .addFields( + Field.newBuilder() + .setName("entity_id_secondary") + .setValue(Value.newBuilder().setStringVal("a"))) + .addFields( + Field.newBuilder() + .setName("entity_id_primary") + .setValue(Value.newBuilder().setInt32Val(1))) + .build(); + + PCollection output = + p.apply(Create.of(Collections.singletonList(offendingRow))) + .setCoder(ProtoCoder.of(FeatureRow.class)) + .apply(ParDo.of(new FeatureRowToRedisMutationDoFn(featureSets))); + + RedisKey expectedKey = + RedisKey.newBuilder() + .setFeatureSet("feature_set") + .addEntities( + Field.newBuilder() + .setName("entity_id_primary") + .setValue(Value.newBuilder().setInt32Val(1))) + .addEntities( + Field.newBuilder() + .setName("entity_id_secondary") + .setValue(Value.newBuilder().setStringVal("a"))) + .build(); + + PAssert.that(output) + .satisfies( + (SerializableFunction, Void>) + input -> { + input.forEach( + rm -> { + assert (Arrays.equals(rm.getKey(), expectedKey.toByteArray())); + assert (Arrays.equals(rm.getValue(), offendingRow.toByteArray())); + }); + return null; + }); + p.run(); + } +} diff --git a/pom.xml b/pom.xml index 821d3b72321..3ba6a592cfa 100644 --- a/pom.xml +++ b/pom.xml @@ -36,7 +36,7 @@ - 0.4.2-SNAPSHOT + 0.4.5-SNAPSHOT https://github.com/gojek/feast UTF-8 diff --git a/protos/feast/storage/Redis.proto b/protos/feast/storage/Redis.proto index ae287f4e6bf..f58b137e9c1 100644 --- a/protos/feast/storage/Redis.proto +++ b/protos/feast/storage/Redis.proto @@ -32,6 +32,7 @@ message RedisKey { string feature_set = 2; // List of fields containing entity names and their respective values - // contained within this feature row. + // contained within this feature row. The entities should be sorted + // by the entity name alphabetically in ascending order. repeated feast.types.Field entities = 3; } diff --git a/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java b/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java index 075c570c4e9..874196e92bd 100644 --- a/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java +++ b/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java @@ -61,7 +61,7 @@ public static List createFeatureRefs( "Feature id '%s' contains invalid version. Expected format: /:.", featureRefString)); } - } else if (projectSplit.length == 1) { + } else if (featureSplit.length == 1) { name = featureSplit[0]; } else { throw new IllegalArgumentException( diff --git a/sdk/java/src/main/java/com/gojek/feast/Row.java b/sdk/java/src/main/java/com/gojek/feast/Row.java index 9366fe1bb03..ceef139aa13 100644 --- a/sdk/java/src/main/java/com/gojek/feast/Row.java +++ b/sdk/java/src/main/java/com/gojek/feast/Row.java @@ -31,7 +31,7 @@ @SuppressWarnings("UnusedReturnValue") public class Row { private Timestamp entity_timestamp; - private Map fields; + private HashMap fields; public static Row create() { Row row = new Row(); diff --git a/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java b/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java index 1c58e9435c6..3b9429ad8f6 100644 --- a/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java +++ b/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java @@ -60,7 +60,7 @@ private static Stream provideValidFeatureIds() { Arrays.asList( "driver_project/driver_id:1", "driver_project/driver_name:1", - "booking_project/driver_name:1"), + "booking_project/driver_name"), Arrays.asList( FeatureReference.newBuilder() .setProject("driver_project") @@ -74,7 +74,6 @@ private static Stream provideValidFeatureIds() { .build(), FeatureReference.newBuilder() .setProject("booking_project") - .setVersion(1) .setName("driver_name") .build()))); } diff --git a/sdk/python/setup.py b/sdk/python/setup.py index 1617f83852f..d0b37ad9419 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import subprocess from setuptools import find_packages, setup @@ -48,7 +49,13 @@ ] # README file from Feast repo root directory -README_FILE = os.path.join(os.path.dirname(__file__), "..", "..", "README.md") +repo_root = ( + subprocess.Popen(["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE) + .communicate()[0] + .rstrip() + .decode("utf-8") +) +README_FILE = os.path.join(repo_root, "README.md") with open(os.path.join(README_FILE), "r") as f: LONG_DESCRIPTION = f.read() diff --git a/serving/src/main/java/feast/serving/specs/CachedSpecService.java b/serving/src/main/java/feast/serving/specs/CachedSpecService.java index 040a870ffe1..1184f6da95a 100644 --- a/serving/src/main/java/feast/serving/specs/CachedSpecService.java +++ b/serving/src/main/java/feast/serving/specs/CachedSpecService.java @@ -49,7 +49,6 @@ import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; -import org.apache.commons.lang3.tuple.Triple; import org.slf4j.Logger; /** In-memory cache of specs. */ @@ -199,8 +198,8 @@ private Map getFeatureToFeatureSetMapping( .collect( groupingBy( featureSet -> - Triple.of( - featureSet.getProject(), featureSet.getName(), featureSet.getVersion()))) + Pair.of( + featureSet.getProject(), featureSet.getName()))) .forEach( (group, groupedFeatureSets) -> { groupedFeatureSets = diff --git a/serving/src/main/resources/templates/single_featureset_pit_join.sql b/serving/src/main/resources/templates/single_featureset_pit_join.sql index 1f4612b3503..f3f20828ff1 100644 --- a/serving/src/main/resources/templates/single_featureset_pit_join.sql +++ b/serving/src/main/resources/templates/single_featureset_pit_join.sql @@ -29,7 +29,8 @@ SELECT created_timestamp, {{ featureSet.entities | join(', ')}}, false AS is_entity_table -FROM `{{projectId}}.{{datasetId}}.{{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second) +FROM `{{projectId}}.{{datasetId}}.{{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' +{% if featureSet.maxAge == 0 %}{% else %}AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second){% endif %} ), /* 2. Window the data in the unioned dataset, partitioning by entity and ordering by event_timestamp, as @@ -47,7 +48,7 @@ SELECT event_timestamp, {{ featureSet.entities | join(', ')}}, {% for featureName in featureSet.features %} - IF(event_timestamp >= {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp AND Timestamp_sub(event_timestamp, interval {{ featureSet.maxAge }} second) < {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}, NULL) as {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}{% if loop.last %}{% else %}, {% endif %} + IF(event_timestamp >= {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp {% if featureSet.maxAge == 0 %}{% else %}AND Timestamp_sub(event_timestamp, interval {{ featureSet.maxAge }} second) < {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp{% endif %}, {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}, NULL) as {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}{% if loop.last %}{% else %}, {% endif %} {% endfor %} FROM ( SELECT @@ -72,7 +73,8 @@ SELECT {% for featureName in featureSet.features %} {{ featureName }} as {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}{% if loop.last %}{% else %}, {% endif %} {% endfor %} -FROM `{{projectId}}.{{datasetId}}.{{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second) +FROM `{{projectId}}.{{datasetId}}.{{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' +{% if featureSet.maxAge == 0 %}{% else %}AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second){% endif %} ) USING ({{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, created_timestamp, {{ featureSet.entities | join(', ')}}) WHERE is_entity_table ) diff --git a/tests/e2e/bq-batch-retrieval.py b/tests/e2e/bq-batch-retrieval.py index 8616dd37a92..0cf05e77e1d 100644 --- a/tests/e2e/bq-batch-retrieval.py +++ b/tests/e2e/bq-batch-retrieval.py @@ -118,6 +118,14 @@ def test_apply_all_featuresets(client): client.apply(fs1) client.apply(fs2) + no_max_age_fs = FeatureSet( + "no_max_age", + features=[Feature("feature_value8", ValueType.INT64)], + entities=[Entity("entity_id", ValueType.INT64)], + max_age=Duration(seconds=0), + ) + client.apply(no_max_age_fs) + def test_get_batch_features_with_file(client): file_fs1 = client.get_feature_set(name="file_feature_set", version=1) @@ -327,3 +335,28 @@ def test_multiple_featureset_joins(client): assert output["entity_id"].to_list() == [int(i) for i in output["feature_value6"].to_list()] assert output["other_entity_id"].to_list() == output["other_feature_value7"].to_list() + + +def test_no_max_age(client): + no_max_age_fs = client.get_feature_set(name="no_max_age", version=1) + + time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) + N_ROWS = 10 + features_8_df = pd.DataFrame( + { + "datetime": [time_offset] * N_ROWS, + "entity_id": [i for i in range(N_ROWS)], + "feature_value8": [i for i in range(N_ROWS)], + } + ) + client.ingest(no_max_age_fs, features_8_df) + + time.sleep(15) + feature_retrieval_job = client.get_batch_features( + entity_rows=features_8_df[["datetime", "entity_id"]], feature_refs=[f"{PROJECT_NAME}/feature_value8:1"] + ) + + output = feature_retrieval_job.to_dataframe() + print(output.head()) + + assert output["entity_id"].to_list() == output["feature_value8"].to_list() \ No newline at end of file