From 84cbf22ed8f2854439da37ffd2bd16b82c0fce80 Mon Sep 17 00:00:00 2001 From: Chen Zhiling Date: Tue, 28 Jan 2020 11:05:47 +0800 Subject: [PATCH 01/15] Allow users not to set max age for batch retrieval (#446) * Allow users not to set max age for batch retrieval * Fix typo in test assertion --- .../templates/single_featureset_pit_join.sql | 8 +++-- tests/e2e/bq-batch-retrieval.py | 33 +++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/serving/src/main/resources/templates/single_featureset_pit_join.sql b/serving/src/main/resources/templates/single_featureset_pit_join.sql index 1f4612b3503..f3f20828ff1 100644 --- a/serving/src/main/resources/templates/single_featureset_pit_join.sql +++ b/serving/src/main/resources/templates/single_featureset_pit_join.sql @@ -29,7 +29,8 @@ SELECT created_timestamp, {{ featureSet.entities | join(', ')}}, false AS is_entity_table -FROM `{{projectId}}.{{datasetId}}.{{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second) +FROM `{{projectId}}.{{datasetId}}.{{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' +{% if featureSet.maxAge == 0 %}{% else %}AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second){% endif %} ), /* 2. Window the data in the unioned dataset, partitioning by entity and ordering by event_timestamp, as @@ -47,7 +48,7 @@ SELECT event_timestamp, {{ featureSet.entities | join(', ')}}, {% for featureName in featureSet.features %} - IF(event_timestamp >= {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp AND Timestamp_sub(event_timestamp, interval {{ featureSet.maxAge }} second) < {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}, NULL) as {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}{% if loop.last %}{% else %}, {% endif %} + IF(event_timestamp >= {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp {% if featureSet.maxAge == 0 %}{% else %}AND Timestamp_sub(event_timestamp, interval {{ featureSet.maxAge }} second) < {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp{% endif %}, {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}, NULL) as {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}{% if loop.last %}{% else %}, {% endif %} {% endfor %} FROM ( SELECT @@ -72,7 +73,8 @@ SELECT {% for featureName in featureSet.features %} {{ featureName }} as {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}{% if loop.last %}{% else %}, {% endif %} {% endfor %} -FROM `{{projectId}}.{{datasetId}}.{{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second) +FROM `{{projectId}}.{{datasetId}}.{{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' +{% if featureSet.maxAge == 0 %}{% else %}AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second){% endif %} ) USING ({{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, created_timestamp, {{ featureSet.entities | join(', ')}}) WHERE is_entity_table ) diff --git a/tests/e2e/bq-batch-retrieval.py b/tests/e2e/bq-batch-retrieval.py index 8616dd37a92..0cf05e77e1d 100644 --- a/tests/e2e/bq-batch-retrieval.py +++ b/tests/e2e/bq-batch-retrieval.py @@ -118,6 +118,14 @@ def test_apply_all_featuresets(client): client.apply(fs1) client.apply(fs2) + no_max_age_fs = FeatureSet( + "no_max_age", + features=[Feature("feature_value8", ValueType.INT64)], + entities=[Entity("entity_id", ValueType.INT64)], + max_age=Duration(seconds=0), + ) + client.apply(no_max_age_fs) + def test_get_batch_features_with_file(client): file_fs1 = client.get_feature_set(name="file_feature_set", version=1) @@ -327,3 +335,28 @@ def test_multiple_featureset_joins(client): assert output["entity_id"].to_list() == [int(i) for i in output["feature_value6"].to_list()] assert output["other_entity_id"].to_list() == output["other_feature_value7"].to_list() + + +def test_no_max_age(client): + no_max_age_fs = client.get_feature_set(name="no_max_age", version=1) + + time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) + N_ROWS = 10 + features_8_df = pd.DataFrame( + { + "datetime": [time_offset] * N_ROWS, + "entity_id": [i for i in range(N_ROWS)], + "feature_value8": [i for i in range(N_ROWS)], + } + ) + client.ingest(no_max_age_fs, features_8_df) + + time.sleep(15) + feature_retrieval_job = client.get_batch_features( + entity_rows=features_8_df[["datetime", "entity_id"]], feature_refs=[f"{PROJECT_NAME}/feature_value8:1"] + ) + + output = feature_retrieval_job.to_dataframe() + print(output.head()) + + assert output["entity_id"].to_list() == output["feature_value8"].to_list() \ No newline at end of file From 90c9786e0598f3ed037be2003aa1e24b3c15d043 Mon Sep 17 00:00:00 2001 From: Willem Pienaar <6728866+woop@users.noreply.github.com> Date: Sun, 2 Feb 2020 15:39:48 +0800 Subject: [PATCH 02/15] Deduplicate example notebooks (#456) * Deduplicate example notebooks * Merge docker-compose.yml for both batch and online serving. --- examples/basic/basic.ipynb | 256 ++++++-- infra/docker-compose/.env.sample | 24 +- infra/docker-compose/docker-compose.batch.yml | 25 - infra/docker-compose/docker-compose.yml | 38 +- .../jupyter/features/cust_trans_fs.yaml | 11 - .../features/cust_trans_fs_updated.yaml | 13 - .../notebooks/feast-batch-serving.ipynb | 504 ---------------- .../jupyter/notebooks/feast-quickstart.ipynb | 569 ------------------ infra/docker/jupyter/Dockerfile | 3 - infra/docker/jupyter/Dockerfile.dev | 8 - sdk/python/setup.py | 9 +- 11 files changed, 242 insertions(+), 1218 deletions(-) delete mode 100644 infra/docker-compose/docker-compose.batch.yml delete mode 100644 infra/docker-compose/jupyter/features/cust_trans_fs.yaml delete mode 100644 infra/docker-compose/jupyter/features/cust_trans_fs_updated.yaml delete mode 100644 infra/docker-compose/jupyter/notebooks/feast-batch-serving.ipynb delete mode 100644 infra/docker-compose/jupyter/notebooks/feast-quickstart.ipynb delete mode 100644 infra/docker/jupyter/Dockerfile delete mode 100644 infra/docker/jupyter/Dockerfile.dev diff --git a/examples/basic/basic.ipynb b/examples/basic/basic.ipynb index 49658b42357..94fc82f2ce9 100644 --- a/examples/basic/basic.ipynb +++ b/examples/basic/basic.ipynb @@ -15,15 +15,15 @@ "1. Create a synthetic customer feature dataset\n", "2. Register a feature set to represent these features in Feast\n", "3. Ingest these features into Feast\n", - "4. Create a feature query and retrieve historical feature data\n", - "5. Create a feature query and retrieve online feature data" + "4. Create a feature query and retrieve online feature data\n", + "5. Create a feature query and retrieve historical feature data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 1. Clone Feast and install all dependencies" + "### 0. Configuration" ] }, { @@ -32,9 +32,79 @@ "metadata": {}, "outputs": [], "source": [ - "!git clone https://github.com/gojek/feast.git \\\n", - "&& cd feast/sdk/python/ && pip install --upgrade --quiet -e . \\\n", - "&& pip install --quiet --upgrade pandas numpy protobuf" + "import os\n", + "\n", + "# Feast Core acts as the central feature registry\n", + "FEAST_CORE_URL = os.getenv('FEAST_CORE_URL', 'core:6565')\n", + "\n", + "# Feast Online Serving allows for the retrieval of real-time feature data\n", + "FEAST_ONLINE_SERVING_URL = os.getenv('FEAST_ONLINE_SERVING_URL', 'online-serving:6566')\n", + "\n", + "# Feast Batch Serving allows for the retrieval of historical feature data\n", + "FEAST_BATCH_SERVING_URL = os.getenv('FEAST_BATCH_SERVING_URL', 'batch-serving:6567')\n", + "\n", + "# PYTHON_REPOSITORY_PATH is the path to the Python SDK inside the Feast Git Repo\n", + "PYTHON_REPOSITORY_PATH = os.getenv('PYTHON_REPOSITORY_PATH', '../../')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Install Feast SDK" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install from PyPi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --ignore-installed --upgrade feast" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "(Alternative) Install from local repository" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "os.environ['PYTHON_SDK_PATH'] = os.path.join(PYTHON_REPOSITORY_PATH, 'sdk/python')\n", + "sys.path.append(os.environ['PYTHON_SDK_PATH'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!echo $PYTHON_SDK_PATH" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!python -m pip install --ignore-installed --upgrade -e ${PYTHON_SDK_PATH}" ] }, { @@ -66,7 +136,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Configure Feast services and connect the Feast client" + "### 3. Configure Feast services and connect the Feast client\n", + "\n", + "Connect to Feast Core and Feast Online Serving" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client = Client(core_url=FEAST_CORE_URL, serving_url=FEAST_ONLINE_SERVING_URL)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a project workspace" ] }, { @@ -75,9 +163,14 @@ "metadata": {}, "outputs": [], "source": [ - "CORE_URL = 'localhost:6565'\n", - "ONLINE_SERVING_URL = 'localhost:6566'\n", - "BATCH_SERVING_URL = 'localhost:6567'" + "client.create_project('customer_project')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the active project" ] }, { @@ -86,8 +179,6 @@ "metadata": {}, "outputs": [], "source": [ - "client = Client(core_url=CORE_URL, serving_url=BATCH_SERVING_URL) # Connect to Feast Core\n", - "client.create_project('customer_project')\n", "client.set_project('customer_project')" ] }, @@ -95,7 +186,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4. Create synthetic customer features" + "### 4. Create customer features" ] }, { @@ -132,7 +223,7 @@ " }\n", ")\n", "\n", - "print(customer_features.head(10))" + "print(customer_features.head(500))" ] }, { @@ -147,9 +238,7 @@ "metadata": {}, "source": [ "Now we will create a feature set for these features. Feature sets are essentially a schema that represent\n", - "feature values. Feature sets allow Feast to both identify feature values and their structure. \n", - "\n", - "In this case we need to define any entity columns as well as the maximum age. The entity column in this case is \"customer_id\". Max age is set to 1 day (defined in seconds). This means that for each feature query during retrieval, the serving API will only retrieve features up to a maximum of 1 day per provided timestamp and entity combination. " + "feature values. Feature sets allow Feast to both identify feature values and their structure. The following feature set contains no features yet." ] }, { @@ -160,8 +249,8 @@ "source": [ "customer_fs = FeatureSet(\n", " \"customer_transactions\",\n", - " max_age=Duration(seconds=86400),\n", - " entities=[Entity(name='customer_id', dtype=ValueType.INT64)]\n", + " entities=[Entity(name='customer_id', dtype=ValueType.INT64)],\n", + " max_age=Duration(seconds=432000) \n", ")" ] }, @@ -169,7 +258,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Here we are automatically inferring the schema from the provided dataset" + "Here we are automatically inferring the schema from the provided dataset. The two features from the dataset will be added to the feature set" ] }, { @@ -241,16 +330,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 8. Create a batch retrieval query" + "### 8. Retrieve online features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In order to retrieve historical feature data, the user must provide an entity_rows dataframe. This dataframe contains a combination of timestamps and entities. In this case, the user must provide both customer_ids and timestamps. \n", - "\n", - "We will randomly generate timestamps over the last 30 days, and assign customer_ids to them. When these entity rows are sent to the Feast Serving API to retrieve feature values, along with a list of feature ids, Feast is then able to attach the correct feature values to each entity row. The one exception is if the feature values fall outside of the maximum age window." + "The process of retrieving features from the online API is very similar to that of the batch API. The only major difference is that users do not have to provide timestamps (only the latest features are returned, as long as they are within the maximum age window)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The example below retrieves online features for a single customer: \"1001\". It is possible to retrieve any features from feast, even outside of the current project." ] }, { @@ -259,23 +353,51 @@ "metadata": {}, "outputs": [], "source": [ - "event_timestamps = [datetime.utcnow().replace(tzinfo=utc) - timedelta(days=randrange(15), hours=randrange(24), minutes=randrange(60)) for day in range(30)]\n", - "\n", - "entity_rows = pd.DataFrame(\n", - " {\n", - " \"datetime\": event_timestamps,\n", - " \"customer_id\": [customers[idx % len(customers)] for idx in range(len(event_timestamps))],\n", - " }\n", + "online_features = client.get_online_features(\n", + " feature_refs=[\n", + " f\"daily_transactions\",\n", + " f\"total_transactions\",\n", + " ],\n", + " entity_rows=[\n", + " GetOnlineFeaturesRequest.EntityRow(\n", + " fields={\n", + " \"customer_id\": Value(\n", + " int64_val=1001)\n", + " }\n", + " )\n", + " ],\n", ")\n", - "\n", - "print(entity_rows.head(10))" + "print(online_features)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 9. Retrieve historical/batch features" + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The following section requires Google Cloud Platform (Google Cloud Storage and BigQuery)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 9. Create a batch retrieval query" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to retrieve historical feature data, the user must provide an entity_rows dataframe. This dataframe contains a combination of timestamps and entities. In this case, the user must provide both customer_ids and timestamps. \n", + "\n", + "We will randomly generate timestamps over the last 30 days, and assign customer_ids to them. When these entity rows are sent to the Feast Serving API to retrieve feature values, along with a list of feature ids, Feast is then able to attach the correct feature values to each entity row. " ] }, { @@ -284,29 +406,30 @@ "metadata": {}, "outputs": [], "source": [ - "job = client.get_batch_features(\n", - " feature_refs=[\n", - " f\"daily_transactions\", \n", - " f\"total_transactions\", \n", - " ],\n", - " entity_rows=entity_rows\n", - " )\n", - "df = job.to_dataframe()\n", - "print(df.head(10))" + "event_timestamps = [datetime.utcnow().replace(tzinfo=utc) - timedelta(days=randrange(15), hours=randrange(24), minutes=randrange(60)) for day in range(30)]\n", + "\n", + "entity_rows = pd.DataFrame(\n", + " {\n", + " \"datetime\": event_timestamps,\n", + " \"customer_id\": [customers[idx % len(customers)] for idx in range(len(event_timestamps))],\n", + " }\n", + ")\n", + "\n", + "print(entity_rows.head(10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 10. Retrieve online features" + "### 10. Retrieve historical/batch features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The process of retrieving features from the online API is very similar to that of the batch API. The only major difference is that users do not have to provide timestamps (only the latest features are returned, as long as they are within the maximum age window)" + "Next we will create a new client object, but this time we will configure it to connect to the Batch Serving Service. This service will allow us to retrieve historical feature data." ] }, { @@ -315,37 +438,39 @@ "metadata": {}, "outputs": [], "source": [ - "online_client = Client(core_url=CORE_URL, serving_url=ONLINE_SERVING_URL)\n", - "online_client.set_project(\"customer_project\")" + "batch_client = Client(core_url=FEAST_CORE_URL, serving_url=FEAST_BATCH_SERVING_URL)\n", + "batch_client.set_project(\"customer_project\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The example below retrieves online features for a single customer: \"1001\"" + "By calling the `get_batch_features` method we are able to retrieve a `job` object for the exporting of feature data. For every entity and timestamp combination in `entity_rows` we will be receiving a row with feature values joined to it." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "online_features = online_client.get_online_features(\n", - " feature_refs=[\n", - " f\"daily_transactions\",\n", - " f\"total_transactions\",\n", - " ],\n", - " entity_rows=[\n", - " GetOnlineFeaturesRequest.EntityRow(\n", - " fields={\n", - " \"customer_id\": Value(\n", - " int64_val=1001)\n", - " }\n", - " )\n", - " ],\n", - ")" + "job = batch_client.get_batch_features(\n", + " feature_refs=[\n", + " f\"customer_project/daily_transactions\", \n", + " f\"customer_project/total_transactions\", \n", + " ],\n", + " entity_rows=entity_rows\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once the job is complete, it is possible to retrieve the exported data (from Google Cloud Storage) and load it into memory as a Pandas Dataframe." ] }, { @@ -354,7 +479,8 @@ "metadata": {}, "outputs": [], "source": [ - "print(online_features)" + "df = job.to_dataframe()\n", + "print(df.head(10))" ] } ], @@ -374,7 +500,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.4" }, "pycharm": { "stem_cell": { diff --git a/infra/docker-compose/.env.sample b/infra/docker-compose/.env.sample index e14bde27728..c8652e8fe0c 100644 --- a/infra/docker-compose/.env.sample +++ b/infra/docker-compose/.env.sample @@ -1,19 +1,21 @@ +# General COMPOSE_PROJECT_NAME=feast - FEAST_VERSION=latest +# Feast Core FEAST_CORE_IMAGE=gcr.io/kf-feast/feast-core -FEAST_CORE_CONFIG=direct-runner -FEAST_CORE_GCP_SERVICE_ACCOUNT_KEY=placeholder +FEAST_CORE_CONFIG=direct-runner.yml +FEAST_CORE_GCP_SERVICE_ACCOUNT_KEY=placeholder.json +# Feast Serving FEAST_SERVING_IMAGE=gcr.io/kf-feast/feast-serving -FEAST_ONLINE_SERVING_CONFIG=online-serving -FEAST_ONLINE_STORE_CONFIG=redis-store -FEAST_BATCH_SERVING_CONFIG=batch-serving -FEAST_BATCH_STORE_CONFIG=bq-store -FEAST_BATCH_SERVING_GCP_SERVICE_ACCOUNT_KEY=placeholder -FEAST_JOB_STAGING_LOCATION=gs://your-gcp-project/bucket +FEAST_ONLINE_SERVING_CONFIG=online-serving.yml +FEAST_ONLINE_STORE_CONFIG=redis-store.yml +FEAST_BATCH_SERVING_CONFIG=batch-serving.yml +FEAST_BATCH_STORE_CONFIG=bq-store.yml +FEAST_BATCH_SERVING_GCP_SERVICE_ACCOUNT_KEY=placeholder.json +FEAST_JOB_STAGING_LOCATION=gs://your-gcs-bucket/staging -FEAST_JUPYTER_IMAGE=gcr.io/kf-feast/feast-jupyter -FEAST_JUPYTER_GCP_SERVICE_ACCOUNT_KEY=placeholder +# Jupyter +FEAST_JUPYTER_GCP_SERVICE_ACCOUNT_KEY=placeholder.json diff --git a/infra/docker-compose/docker-compose.batch.yml b/infra/docker-compose/docker-compose.batch.yml deleted file mode 100644 index c00ac9475bd..00000000000 --- a/infra/docker-compose/docker-compose.batch.yml +++ /dev/null @@ -1,25 +0,0 @@ -version: "3.7" - -services: - batch-serving: - image: ${FEAST_SERVING_IMAGE}:${FEAST_VERSION} - volumes: - - ./serving/${FEAST_BATCH_SERVING_CONFIG}.yml:/etc/feast/application.yml - - ./serving/${FEAST_BATCH_STORE_CONFIG}.yml:/etc/feast/store.yml - - ./gcp-service-accounts/${FEAST_BATCH_SERVING_GCP_SERVICE_ACCOUNT_KEY}.json:/etc/gcloud/service-accounts/key.json - depends_on: - - core - - redis - ports: - - 6567:6567 - restart: on-failure - environment: - GOOGLE_APPLICATION_CREDENTIALS: /etc/gcloud/service-accounts/key.json - FEAST_JOB_STAGING_LOCATION: ${FEAST_JOB_STAGING_LOCATION} - command: - - "java" - - "-Xms1024m" - - "-Xmx1024m" - - "-jar" - - "/opt/feast/feast-serving.jar" - - "--spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml" \ No newline at end of file diff --git a/infra/docker-compose/docker-compose.yml b/infra/docker-compose/docker-compose.yml index 44750650cec..27d82efc3ca 100644 --- a/infra/docker-compose/docker-compose.yml +++ b/infra/docker-compose/docker-compose.yml @@ -4,8 +4,8 @@ services: core: image: ${FEAST_CORE_IMAGE}:${FEAST_VERSION} volumes: - - ./core/${FEAST_CORE_CONFIG}.yml:/etc/feast/application.yml - - ./gcp-service-accounts/${FEAST_CORE_GCP_SERVICE_ACCOUNT_KEY}.json:/etc/gcloud/service-accounts/key.json + - ./core/${FEAST_CORE_CONFIG}:/etc/feast/application.yml + - ./gcp-service-accounts/${FEAST_CORE_GCP_SERVICE_ACCOUNT_KEY}:/etc/gcloud/service-accounts/key.json environment: DB_HOST: db GOOGLE_APPLICATION_CREDENTIALS: /etc/gcloud/service-accounts/key.json @@ -24,8 +24,8 @@ services: online-serving: image: ${FEAST_SERVING_IMAGE}:${FEAST_VERSION} volumes: - - ./serving/${FEAST_ONLINE_SERVING_CONFIG}.yml:/etc/feast/application.yml - - ./serving/${FEAST_ONLINE_STORE_CONFIG}.yml:/etc/feast/store.yml + - ./serving/${FEAST_ONLINE_SERVING_CONFIG}:/etc/feast/application.yml + - ./serving/${FEAST_ONLINE_STORE_CONFIG}:/etc/feast/store.yml depends_on: - core - redis @@ -38,12 +38,34 @@ services: - /opt/feast/feast-serving.jar - --spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml + batch-serving: + image: ${FEAST_SERVING_IMAGE}:${FEAST_VERSION} + volumes: + - ./serving/${FEAST_BATCH_SERVING_CONFIG}:/etc/feast/application.yml + - ./serving/${FEAST_BATCH_STORE_CONFIG}:/etc/feast/store.yml + - ./gcp-service-accounts/${FEAST_BATCH_SERVING_GCP_SERVICE_ACCOUNT_KEY}:/etc/gcloud/service-accounts/key.json + depends_on: + - core + - redis + ports: + - 6567:6567 + restart: on-failure + environment: + GOOGLE_APPLICATION_CREDENTIALS: /etc/gcloud/service-accounts/key.json + FEAST_JOB_STAGING_LOCATION: ${FEAST_JOB_STAGING_LOCATION} + command: + - "java" + - "-Xms1024m" + - "-Xmx1024m" + - "-jar" + - "/opt/feast/feast-serving.jar" + - "--spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml" + jupyter: - image: ${FEAST_JUPYTER_IMAGE}:${FEAST_VERSION} + image: jupyter/datascience-notebook:latest volumes: - - ./jupyter/notebooks:/home/jovyan/feast-notebooks - - ./jupyter/features:/home/jovyan/features - - ./gcp-service-accounts/${FEAST_JUPYTER_GCP_SERVICE_ACCOUNT_KEY}.json:/etc/gcloud/service-accounts/key.json + - ../../:/home/jovyan/feast + - ./gcp-service-accounts/${FEAST_JUPYTER_GCP_SERVICE_ACCOUNT_KEY}:/etc/gcloud/service-accounts/key.json depends_on: - core - online-serving diff --git a/infra/docker-compose/jupyter/features/cust_trans_fs.yaml b/infra/docker-compose/jupyter/features/cust_trans_fs.yaml deleted file mode 100644 index eb21ce9b35b..00000000000 --- a/infra/docker-compose/jupyter/features/cust_trans_fs.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: customer_transactions -kind: feature_set -entities: -- name: customer_id - valueType: INT64 -features: -- name: daily_transactions - valueType: FLOAT -- name: total_transactions - valueType: FLOAT -maxAge: 3600s \ No newline at end of file diff --git a/infra/docker-compose/jupyter/features/cust_trans_fs_updated.yaml b/infra/docker-compose/jupyter/features/cust_trans_fs_updated.yaml deleted file mode 100644 index 8293d04b881..00000000000 --- a/infra/docker-compose/jupyter/features/cust_trans_fs_updated.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: customer_transactions -kind: feature_set -entities: -- name: customer_id - valueType: INT64 -features: -- name: daily_transactions - valueType: FLOAT -- name: total_transactions - valueType: FLOAT -- name: discounts - valueType: FLOAT -maxAge: 3600s \ No newline at end of file diff --git a/infra/docker-compose/jupyter/notebooks/feast-batch-serving.ipynb b/infra/docker-compose/jupyter/notebooks/feast-batch-serving.ipynb deleted file mode 100644 index c288093f07b..00000000000 --- a/infra/docker-compose/jupyter/notebooks/feast-batch-serving.ipynb +++ /dev/null @@ -1,504 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Feast Batch Serving\n", - "This is an extension to `feast-quickstart` notebook to demonstrate the batch serving capability of Feast.\n", - "\n", - "## Prerequisite\n", - "- A running Feast Serving service with store configuration that supports batch retrieval. (eg. BigQuery store)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data Preparation\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import feast\n", - "import numpy as np\n", - "import pandas as pd\n", - "from datetime import datetime, timedelta\n", - "from feast.serving.ServingService_pb2 import GetOnlineFeaturesRequest\n", - "from feast.types.Value_pb2 import Value as Value\n", - "from feast.client import Client\n", - "from feast.feature_set import FeatureSet" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "client = feast.Client(core_url=\"core:6565\", serving_url=\"batch-serving:6567\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "cust_trans_fs = FeatureSet.from_yaml(\"../features/cust_trans_fs.yaml\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Feature set updated/created: \"customer_transactions:1\".\n" - ] - } - ], - "source": [ - "client.apply(cust_trans_fs)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datetimecustomer_iddaily_transactionstotal_transactions
02019-12-06 02:17:46.899904100002.797627175.978266
12019-12-06 02:17:46.899915100014.931632153.871975
22019-12-06 02:17:46.899922100020.206628108.558844
32019-12-06 02:17:46.899929100032.354937119.549455
42019-12-06 02:17:46.899937100047.171423115.345183
\n", - "
" - ], - "text/plain": [ - " datetime customer_id daily_transactions \\\n", - "0 2019-12-06 02:17:46.899904 10000 2.797627 \n", - "1 2019-12-06 02:17:46.899915 10001 4.931632 \n", - "2 2019-12-06 02:17:46.899922 10002 0.206628 \n", - "3 2019-12-06 02:17:46.899929 10003 2.354937 \n", - "4 2019-12-06 02:17:46.899937 10004 7.171423 \n", - "\n", - " total_transactions \n", - "0 175.978266 \n", - "1 153.871975 \n", - "2 108.558844 \n", - "3 119.549455 \n", - "4 115.345183 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "offset = 10000\n", - "nr_of_customers = 5\n", - "customer_df = pd.DataFrame(\n", - " {\n", - " \"datetime\": [datetime.utcnow() for _ in range(nr_of_customers)],\n", - " \"customer_id\": [offset + inc for inc in range(nr_of_customers)],\n", - " \"daily_transactions\": [np.random.uniform(0, 10) for _ in range(nr_of_customers)],\n", - " \"total_transactions\": [np.random.uniform(100, 200) for _ in range(nr_of_customers)],\n", - " }\n", - ")\n", - "customer_df" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 5/5 [00:00<00:00, 7.24rows/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Ingested 5 rows into customer_transactions:1\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "client.ingest(cust_trans_fs, dataframe=customer_df)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Batch Retrieval\n", - "Batch retrieval takes a dataframe containing the entities column and event timestamp as an input. The result would be the outer join of the input and the features. The input dataframe needs to have a column named `datetime` as event timestamp. No results will be returned if the difference between the feature ingestion timestamp and the `event_timestamp` is greater than the `maxAge` parameter specified in the feature set." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_transactions_v1_feature_timestampcustomer_idevent_timestampcustomer_transactions_v1_daily_transactionscustomer_transactions_v1_total_transactions
02019-12-06 02:17:46+00:00100012019-12-06 02:17:55.612449+00:004.931632153.871980
12019-12-06 02:17:46+00:00100042019-12-06 02:17:55.612449+00:007.171423115.345184
22019-12-06 02:17:46+00:00100002019-12-06 02:17:55.612449+00:002.797627175.978270
32019-12-06 02:17:46+00:00100022019-12-06 02:17:55.612449+00:000.206628108.558846
42019-12-06 02:17:46+00:00100032019-12-06 02:17:55.612449+00:002.354937119.549450
\n", - "
" - ], - "text/plain": [ - " customer_transactions_v1_feature_timestamp customer_id \\\n", - "0 2019-12-06 02:17:46+00:00 10001 \n", - "1 2019-12-06 02:17:46+00:00 10004 \n", - "2 2019-12-06 02:17:46+00:00 10000 \n", - "3 2019-12-06 02:17:46+00:00 10002 \n", - "4 2019-12-06 02:17:46+00:00 10003 \n", - "\n", - " event_timestamp \\\n", - "0 2019-12-06 02:17:55.612449+00:00 \n", - "1 2019-12-06 02:17:55.612449+00:00 \n", - "2 2019-12-06 02:17:55.612449+00:00 \n", - "3 2019-12-06 02:17:55.612449+00:00 \n", - "4 2019-12-06 02:17:55.612449+00:00 \n", - "\n", - " customer_transactions_v1_daily_transactions \\\n", - "0 4.931632 \n", - "1 7.171423 \n", - "2 2.797627 \n", - "3 0.206628 \n", - "4 2.354937 \n", - "\n", - " customer_transactions_v1_total_transactions \n", - "0 153.871980 \n", - "1 115.345184 \n", - "2 175.978270 \n", - "3 108.558846 \n", - "4 119.549450 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "entity_df = customer_df[[\"customer_id\"]].assign(datetime=datetime.utcnow())\n", - "feature_ids=[\n", - " \"customer_transactions:1:daily_transactions\",\n", - " \"customer_transactions:1:total_transactions\",\n", - "]\n", - "batch_job = client.get_batch_features(feature_ids, entity_df)\n", - "batch_job.to_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_transactions_v1_feature_timestampcustomer_idevent_timestampcustomer_transactions_v1_daily_transactionscustomer_transactions_v1_total_transactions
0None100002020-01-05 02:18:43.900732+00:00NoneNone
1None100012020-01-05 02:18:43.900732+00:00NoneNone
2None100022020-01-05 02:18:43.900732+00:00NoneNone
3None100032020-01-05 02:18:43.900732+00:00NoneNone
4None100042020-01-05 02:18:43.900732+00:00NoneNone
\n", - "
" - ], - "text/plain": [ - " customer_transactions_v1_feature_timestamp customer_id \\\n", - "0 None 10000 \n", - "1 None 10001 \n", - "2 None 10002 \n", - "3 None 10003 \n", - "4 None 10004 \n", - "\n", - " event_timestamp \\\n", - "0 2020-01-05 02:18:43.900732+00:00 \n", - "1 2020-01-05 02:18:43.900732+00:00 \n", - "2 2020-01-05 02:18:43.900732+00:00 \n", - "3 2020-01-05 02:18:43.900732+00:00 \n", - "4 2020-01-05 02:18:43.900732+00:00 \n", - "\n", - " customer_transactions_v1_daily_transactions \\\n", - "0 None \n", - "1 None \n", - "2 None \n", - "3 None \n", - "4 None \n", - "\n", - " customer_transactions_v1_total_transactions \n", - "0 None \n", - "1 None \n", - "2 None \n", - "3 None \n", - "4 None " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "stale_entity_df = customer_df[[\"customer_id\"]].assign(datetime=datetime.utcnow() + timedelta(days=30))\n", - "feature_ids=[\n", - " \"customer_transactions:1:daily_transactions\",\n", - " \"customer_transactions:1:total_transactions\",\n", - "]\n", - "batch_job = client.get_batch_features(feature_ids, stale_entity_df)\n", - "batch_job.to_dataframe()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "pycharm": { - "stem_cell": { - "cell_type": "raw", - "metadata": { - "collapsed": false - }, - "source": [] - } - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/infra/docker-compose/jupyter/notebooks/feast-quickstart.ipynb b/infra/docker-compose/jupyter/notebooks/feast-quickstart.ipynb deleted file mode 100644 index b89e59b1e49..00000000000 --- a/infra/docker-compose/jupyter/notebooks/feast-quickstart.ipynb +++ /dev/null @@ -1,569 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Feast Quick Start\n", - "This is a quick example to demonstrate:\n", - "- Register a feature set on Feast\n", - "- Ingest features into Feast\n", - "- Retrieve the ingested features from Feast\n", - "- Update a feature" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import feast\n", - "import numpy as np\n", - "import pandas as pd\n", - "from datetime import datetime\n", - "from feast.serving.ServingService_pb2 import GetOnlineFeaturesRequest\n", - "from feast.types.Value_pb2 import Value as Value\n", - "from feast.client import Client\n", - "from feast.feature_set import FeatureSet" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, instantiate the client.\n", - "Feast endpoints can be set via the following environmental variables: `FEAST_CORE_URL`, `FEAST_SERVING_URL`.\n", - "Alternatively, they can also be passed in explicitly as follows:\n", - " \n", - "`client = feast.Client(core_url=core:6565, serving_url=online-serving:6566)`" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "client = feast.Client()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Register a feature set\n", - "\n", - "Let's create and register our first feature set. Below is an example of a basic customer transactions feature set that has been exported to YAML:\n", - "```\n", - "name: customer_transactions\n", - "kind: feature_set\n", - "entities:\n", - "- name: customer_id\n", - " valueType: INT64\n", - "features:\n", - "- name: daily_transactions\n", - " valueType: FLOAT\n", - "- name: total_transactions\n", - " valueType: FLOAT\n", - "maxAge: 3600s \n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "cust_trans_fs = FeatureSet.from_yaml(\"../features/cust_trans_fs.yaml\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Feature set updated/created: \"customer_transactions:1\".\n" - ] - } - ], - "source": [ - "client.apply(cust_trans_fs)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Ingest features into Feast\n", - "The dataframe below contains the features and entities of the above feature set." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datetimecustomer_iddaily_transactionstotal_transactions
02019-11-26 12:03:47.320634100005.178112110.670651
12019-11-26 12:03:47.320644100010.268114195.393913
22019-11-26 12:03:47.320651100021.486614136.929052
32019-11-26 12:03:47.320658100039.676433166.022999
42019-11-26 12:03:47.320665100045.928573165.687951
\n", - "
" - ], - "text/plain": [ - " datetime customer_id daily_transactions \\\n", - "0 2019-11-26 12:03:47.320634 10000 5.178112 \n", - "1 2019-11-26 12:03:47.320644 10001 0.268114 \n", - "2 2019-11-26 12:03:47.320651 10002 1.486614 \n", - "3 2019-11-26 12:03:47.320658 10003 9.676433 \n", - "4 2019-11-26 12:03:47.320665 10004 5.928573 \n", - "\n", - " total_transactions \n", - "0 110.670651 \n", - "1 195.393913 \n", - "2 136.929052 \n", - "3 166.022999 \n", - "4 165.687951 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "offset = 10000\n", - "nr_of_customers = 5\n", - "customer_df = pd.DataFrame(\n", - " {\n", - " \"datetime\": [datetime.utcnow() for _ in range(nr_of_customers)],\n", - " \"customer_id\": [offset + inc for inc in range(nr_of_customers)],\n", - " \"daily_transactions\": [np.random.uniform(0, 10) for _ in range(nr_of_customers)],\n", - " \"total_transactions\": [np.random.uniform(100, 200) for _ in range(nr_of_customers)],\n", - " }\n", - ")\n", - "customer_df" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/5 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datetimecustomer_iddaily_transactionstotal_transactionsdiscounts
02019-11-26 12:03:47.320634100005.178112110.6706518.389938
12019-11-26 12:03:47.320644100010.268114195.3939130.430047
22019-11-26 12:03:47.320651100021.486614136.9290527.408917
32019-11-26 12:03:47.320658100039.676433166.0229991.192721
42019-11-26 12:03:47.320665100045.928573165.6879512.051037
\n", - "" - ], - "text/plain": [ - " datetime customer_id daily_transactions \\\n", - "0 2019-11-26 12:03:47.320634 10000 5.178112 \n", - "1 2019-11-26 12:03:47.320644 10001 0.268114 \n", - "2 2019-11-26 12:03:47.320651 10002 1.486614 \n", - "3 2019-11-26 12:03:47.320658 10003 9.676433 \n", - "4 2019-11-26 12:03:47.320665 10004 5.928573 \n", - "\n", - " total_transactions discounts \n", - "0 110.670651 8.389938 \n", - "1 195.393913 0.430047 \n", - "2 136.929052 7.408917 \n", - "3 166.022999 1.192721 \n", - "4 165.687951 2.051037 " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "discounts = [np.random.uniform(0, 10) for _ in range(nr_of_customers)]\n", - "customer_df_updated = customer_df.assign(discounts=discounts)\n", - "customer_df_updated" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/5 [00:00 Date: Sun, 2 Feb 2020 03:37:50 +0000 Subject: [PATCH 03/15] GitBook: [master] 8 pages modified --- docs/SUMMARY.md | 9 +- docs/concepts.md | 16 +- docs/contributing.md | 2 +- docs/getting-help.md | 2 +- docs/getting-started/installing-feast.md | 433 ----------------------- docs/installing-feast/docker-compose.md | 104 ++++++ docs/installing-feast/gke.md | 218 ++++++++++++ docs/installing-feast/overview.md | 14 + 8 files changed, 353 insertions(+), 445 deletions(-) delete mode 100644 docs/getting-started/installing-feast.md create mode 100644 docs/installing-feast/docker-compose.md create mode 100644 docs/installing-feast/gke.md create mode 100644 docs/installing-feast/overview.md diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 535fbe6081c..d522e12bc9a 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -6,9 +6,14 @@ * [Getting Help](getting-help.md) * [Contributing](contributing.md) -## Getting Started +## Installing Feast + +* [Overview](installing-feast/overview.md) +* [Docker Compose](installing-feast/docker-compose.md) +* [Google Kubernetes Engine \(GKE\)](installing-feast/gke.md) + +## Using Feast -* [Installing Feast](getting-started/installing-feast.md) * [Using Feast](https://github.com/gojek/feast/blob/master/examples/basic/basic.ipynb) ## Reference diff --git a/docs/concepts.md b/docs/concepts.md index 860515c3699..ae158f8f829 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -2,7 +2,7 @@ ## Architecture -![Logical diagram of a typical Feast deployment](.gitbook/assets/basic-architecture-diagram.svg) +![Logical diagram of a typical Feast deployment](.gitbook/assets/basic-architecture-diagram%20%282%29.svg) The core components of a Feast deployment are @@ -106,13 +106,13 @@ Feast supports the following types for feature values * DOUBLE * FLOAT * BOOL -* BYTES_LIST -* STRING_LIST -* INT32_LIST -* INT64_LIST -* DOUBLE_LIST -* FLOAT_LIST -* BOOL_LIST +* BYTES\_LIST +* STRING\_LIST +* INT32\_LIST +* INT64\_LIST +* DOUBLE\_LIST +* FLOAT\_LIST +* BOOL\_LIST ## Glossary diff --git a/docs/contributing.md b/docs/contributing.md index 38caffd654b..d4189191f09 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -300,7 +300,7 @@ docker run --rm \ ## Code reviews -Code submission to Feast \(including submission from project maintainers\) requires review and approval. Please submit a **pull request** to initiate the code review process. We use [prow](https://github.com/kubernetes/test-infra/tree/master/prow) to manage the testing and reviewing of pull requests. Please refer to [config.yaml](../.prow/config.yaml) for details on the test jobs. +Code submission to Feast \(including submission from project maintainers\) requires review and approval. Please submit a **pull request** to initiate the code review process. We use [prow](https://github.com/kubernetes/test-infra/tree/master/prow) to manage the testing and reviewing of pull requests. Please refer to [config.yaml](https://github.com/gojek/feast/tree/4cd928d1d3b7972b15f0c5dd29593fcedecea9f5/.prow/config.yaml) for details on the test jobs. ## Code conventions diff --git a/docs/getting-help.md b/docs/getting-help.md index d8180ab7842..b87bd82b928 100644 --- a/docs/getting-help.md +++ b/docs/getting-help.md @@ -2,7 +2,7 @@ ## Chat -* Come and chat with us in the [\#Feast Slack channel in the Kubeflow workspace](https://join.slack.com/t/kubeflow/shared_invite/enQtNDg5MTM4NTQyNjczLTdkNTVhMjg1ZTExOWI0N2QyYTQ2MTIzNTJjMWRiOTFjOGRlZWEzODc1NzMwNTMwM2EzNjY1MTFhODczNjk4MTk) and catch up on all things Feast! +* Come and say hello in [\#Feast](https://join.slack.com/t/kubeflow/shared_invite/enQtNDg5MTM4NTQyNjczLTdkNTVhMjg1ZTExOWI0N2QyYTQ2MTIzNTJjMWRiOTFjOGRlZWEzODc1NzMwNTMwM2EzNjY1MTFhODczNjk4MTk) over in the Kubeflow Slack. ## GitHub diff --git a/docs/getting-started/installing-feast.md b/docs/getting-started/installing-feast.md deleted file mode 100644 index 699dd5fa8f1..00000000000 --- a/docs/getting-started/installing-feast.md +++ /dev/null @@ -1,433 +0,0 @@ -# Installing Feast - -## Overview - -This installation guide will demonstrate three ways of installing Feast: - -* \*\*\*\*[**Docker Compose \(Quickstart\):**](installing-feast.md#docker-compose) Fastest way to get Feast up and running. Provides a pre-installed Jupyter Notebook with the Feast Python SDK and sample code. -* [**Minikube**](installing-feast.md#minikube)**:** This installation has no external dependencies, but does not have a historical feature store installed. It allows users to quickly get a feel for Feast. -* [**Google Kubernetes Engine:**](installing-feast.md#google-kubernetes-engine) This guide installs a single cluster Feast installation on Google's GKE. It has Google Cloud specific dependencies like BigQuery, Dataflow, and Google Cloud Storage. - -## Docker Compose \(Quickstart\) - -### Overview - -A docker compose file is provided to quickly test Feast with the official docker images. There is no hard dependency on GCP, unless batch serving is required. Once you have set up Feast using Docker Compose, you will be able to: - -* Create, register, and manage feature sets -* Ingest feature data into Feast -* Retrieve features for online serving - -{% hint style="info" %} -The docker compose setup uses Direct Runner for the Apache Beam jobs. Running Beam with the Direct Runner means it does not need a dedicated runner like Flink or Dataflow, but this comes at the cost of performance. We recommend the use of a full runner when running Feast with very large workloads. -{% endhint %} - -### 0. Requirements - -* [Docker compose](https://docs.docker.com/compose/install/) should be installed. -* TCP ports 6565, 6566, 8888, and 9094 should not be in use. Otherwise, modify the port mappings in `infra/docker-compose/docker-compose.yml` to use unoccupied ports. -* \(for batch serving only\) For batch serving you will also need a [GCP service account key](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) that has access to GCS and BigQuery. Port 6567 will be used for the batch serving endpoint. -* \(for batch serving only\) [Google Cloud SDK ](https://cloud.google.com/sdk/install)installed, authenticated, and configured to the project you will use. - -### 1. Step-by-step guide \(Online serving only\) - -Clone the [Feast repository](https://github.com/gojek/feast/) and navigate to the `docker-compose` sub-directory: - -```bash -git clone https://github.com/gojek/feast.git && \ -cd feast && export FEAST_HOME_DIR=$(pwd) && \ -cd infra/docker-compose -``` - -Make a copy of the `.env.sample` file: - -```bash -cp .env.sample .env -``` - -Start Feast: - -```javascript -docker-compose up -d -``` - -A Jupyter notebook is now available to use Feast: - -[http://localhost:8888/notebooks/feast-notebooks/feast-quickstart.ipynb](http://localhost:8888/notebooks/feast-notebooks/feast-quickstart.ipynb) - -### 2. Step-by-step guide \(Batch and online serving\) - -Clone the [Feast repository](https://github.com/gojek/feast/) and navigate to the `docker-compose` sub-directory: - -```bash -git clone https://github.com/gojek/feast.git && \ -cd feast && export FEAST_HOME_DIR=$(pwd) && \ -cd infra/docker-compose -``` - -Create a [service account ](https://cloud.google.com/iam/docs/creating-managing-service-accounts)from the GCP console and copy it to the `gcp-service-accounts` folder: - -```javascript -cp my-service-account.json ${FEAST_HOME_DIR}/infra/docker-compose/gcp-service-accounts -``` - -Create a Google Cloud Storage bucket. Make sure that your service account above has read/write permissions to this bucket: - -```bash -gsutil mb gs://my-feast-staging-bucket -``` - -Make a copy of the `.env.sample` file: - -```bash -cp .env.sample .env -``` - -Customize the `.env` file based on your environment. At the very least you have to modify: - -* **FEAST\_CORE\_GCP\_SERVICE\_ACCOUNT\_KEY:** This should be your service account file name without the .json extension. -* **FEAST\_BATCH\_SERVING\_GCP\_SERVICE\_ACCOUNT\_KEY:** This should be your service account file name without the .json extension. -* **FEAST\_JUPYTER\_GCP\_SERVICE\_ACCOUNT\_KEY:** This should be your service account file name without the .json extension. -* **FEAST\_JOB\_STAGING\_LOCATION:** Google Cloud Storage bucket that Feast will use to stage data exports and batch retrieval requests. - -We will also need to customize the `bq-store.yml` file inside `infra/docker-compose/serving/` to configure the BigQuery storage configuration as well as the feature sets that the store subscribes to. At a minimum you will need to set: - -* **project\_id:** This is you GCP project id. -* **dataset\_id:** This is the name of the BigQuery dataset that tables will be created in. Each feature set will have one table in BigQuery. - -Start Feast: - -```javascript -docker-compose -f docker-compose.yml -f docker-compose.batch.yml up -d -``` - -A Jupyter notebook is now available to use Feast: - -[http://localhost:8888/notebooks/feast-notebooks](http://localhost:8888/tree/feast-notebooks) - -## Minikube - -### Overview - -This guide will install Feast into [Minikube](https://github.com/kubernetes/minikube). Once Feast is installed you will be able to: - -* Define and register features. -* Load feature data from both batch and streaming sources. -* Retrieve features for online serving. - -{% hint style="warning" %} -This Minikube installation guide is for demonstration purposes only. It is not meant for production use, and does not install a historical feature store. -{% endhint %} - -### 0. Requirements - -The following software should be installed prior to starting: - -1. [Minikube](https://kubernetes.io/docs/tasks/tools/install-minikube/) should be installed. -2. [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) installed and configured to work with Minikube. -3. [Helm](https://helm.sh/3) \(2.16.0 or greater\). Helm 3 has not been tested yet. - -### 1. Set up Minikube - -Start Minikube. Note the minimum cpu and memory below: - -```bash -minikube start --cpus=3 --memory=4096 --kubernetes-version='v1.15.5' -``` - -Set up your Feast environmental variables - -```bash -export FEAST_IP=$(minikube ip) -export FEAST_CORE_URL=${FEAST_IP}:32090 -export FEAST_SERVING_URL=${FEAST_IP}:32091 -``` - -### 2. Install Feast with Helm - -Clone the [Feast repository](https://github.com/gojek/feast/) and navigate to the `charts` sub-directory: - -```bash -git clone https://github.com/gojek/feast.git && \ -cd feast && export FEAST_HOME_DIR=$(pwd) && \ -cd infra/charts/feast -``` - -Copy the `values-demo.yaml` file for your installation: - -```bash -cp values-demo.yaml my-feast-values.yaml -``` - -Update all occurrences of the domain `feast.example.com` inside of `my-feast-values.yaml` with your Minikube IP. This is to allow external access to the services in the cluster. You can find your Minikube IP by running the following command `minikube ip`, or simply replace the text from the command line: - -```bash -sed -i "s/feast.example.com/${FEAST_IP}/g" my-feast-values.yaml -``` - -Install Tiller: - -```bash -helm init -``` - -Install the Feast Helm chart: - -```bash -helm install --name feast -f my-feast-values.yaml . -``` - -Ensure that the system comes online. This will take a few minutes - -```bash -watch kubectl get pods -``` - -```bash -NAME READY STATUS RESTARTS AGE -pod/feast-feast-core-666fd46db4-l58l6 1/1 Running 0 5m -pod/feast-feast-serving-online-84d99ddcbd 1/1 Running 0 6m -pod/feast-kafka-0 1/1 Running 0 3m -pod/feast-kafka-1 1/1 Running 0 4m -pod/feast-kafka-2 1/1 Running 0 4m -pod/feast-postgresql-0 1/1 Running 0 5m -pod/feast-redis-master-0 1/1 Running 0 5m -pod/feast-zookeeper-0 1/1 Running 0 5m -pod/feast-zookeeper-1 1/1 Running 0 5m -pod/feast-zookeeper-2 1/1 Running 0 5m -``` - -### 3. Connect to Feast with the Python SDK - -Install the Python SDK using pip: - -```bash -pip install -e ${FEAST_HOME_DIR}/sdk/python -``` - -Configure the Feast Python SDK: - -```bash -feast config set core_url ${FEAST_CORE_URL} -feast config set serving_url ${FEAST_SERVING_URL} -``` - -That's it! You can now start to use Feast! - -## Google Kubernetes Engine - -### Overview - -This guide will install Feast into a Kubernetes cluster on GCP. It assumes that all of your services will run within a single K8s cluster. Once Feast is installed you will be able to: - -* Define and register features. -* Load feature data from both batch and streaming sources. -* Retrieve features for model training. -* Retrieve features for online serving. - -{% hint style="info" %} -This guide requires [Google Cloud Platform](https://cloud.google.com/) for installation. - -* [BigQuery](https://cloud.google.com/bigquery/) is used for storing historical features. -* [Cloud Dataflow](https://cloud.google.com/dataflow/) is used for running data ingestion jobs. -* [Google Cloud Storage](https://cloud.google.com/storage/) is used for intermediate data storage. -{% endhint %} - -### 0. Requirements - -1. [Google Cloud SDK ](https://cloud.google.com/sdk/install)installed, authenticated, and configured to the project you will use. -2. [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) installed. -3. [Helm](https://helm.sh/3) \(2.16.0 or greater\) installed on your local machine with Tiller installed in your cluster. Helm 3 has not been tested yet. - -### 1. Set up GCP - -First define the environmental variables that we will use throughout this installation. Please customize these to reflect your environment. - -```bash -export FEAST_GCP_PROJECT_ID=my-gcp-project -export FEAST_GCP_REGION=us-central1 -export FEAST_GCP_ZONE=us-central1-a -export FEAST_BIGQUERY_DATASET_ID=feast -export FEAST_GCS_BUCKET=${FEAST_GCP_PROJECT_ID}_feast_bucket -export FEAST_GKE_CLUSTER_NAME=feast -export FEAST_S_ACCOUNT_NAME=feast-sa -``` - -Create a Google Cloud Storage bucket for Feast to stage data during exports: - -```bash -gsutil mb gs://${FEAST_GCS_BUCKET} -``` - -Create a BigQuery dataset for storing historical features: - -```bash -bq mk ${FEAST_BIGQUERY_DATASET_ID} -``` - -Create the service account that Feast will run as: - -```bash -gcloud iam service-accounts create ${FEAST_S_ACCOUNT_NAME} - -gcloud projects add-iam-policy-binding ${FEAST_GCP_PROJECT_ID} \ - --member serviceAccount:${FEAST_S_ACCOUNT_NAME}@${FEAST_GCP_PROJECT_ID}.iam.gserviceaccount.com \ - --role roles/editor - -gcloud iam service-accounts keys create key.json --iam-account \ -${FEAST_S_ACCOUNT_NAME}@${FEAST_GCP_PROJECT_ID}.iam.gserviceaccount.com -``` - -Ensure that [Dataflow API is enabled](https://console.cloud.google.com/apis/api/dataflow.googleapis.com/overview): - -```bash -gcloud services enable dataflow.googleapis.com -``` - -### 2. Set up a Kubernetes \(GKE\) cluster - -{% hint style="warning" %} -Provisioning a GKE cluster can expose your services publicly. This guide does not cover securing access to the cluster. -{% endhint %} - -Create a GKE cluster: - -```bash -gcloud container clusters create ${FEAST_GKE_CLUSTER_NAME} \ - --machine-type n1-standard-4 -``` - -Create a secret in the GKE cluster based on your local key `key.json`: - -```bash -kubectl create secret generic feast-gcp-service-account --from-file=key.json -``` - -For this guide we will use `NodePort` for exposing Feast services. In order to do so, we must find an internal IP of at least one GKE node. - -```bash -export FEAST_IP=$(kubectl describe nodes | grep InternalIP | awk '{print $2}' | head -n 1) -export FEAST_CORE_URL=${FEAST_IP}:32090 -export FEAST_ONLINE_SERVING_URL=${FEAST_IP}:32091 -export FEAST_BATCH_SERVING_URL=${FEAST_IP}:32092 -``` - -Confirm that you are able to access this node: - -```bash -ping $FEAST_IP -``` - -```bash -PING 10.123.114.11 (10.203.164.22) 56(84) bytes of data. -64 bytes from 10.123.114.11: icmp_seq=1 ttl=63 time=54.2 ms -64 bytes from 10.123.114.11: icmp_seq=2 ttl=63 time=51.2 ms -``` - -Add firewall rules in gcloud to open up ports: -```bash -gcloud compute firewall-rules create feast-core-port --allow tcp:32090 -gcloud compute firewall-rules create feast-online-port --allow tcp:32091 -gcloud compute firewall-rules create feast-batch-port --allow tcp:32092 -gcloud compute firewall-rules create feast-redis-port --allow tcp:32101 -gcloud compute firewall-rules create feast-kafka-ports --allow tcp:31090-31095 -``` - -### 3. Set up Helm - -Run the following command to provide Tiller with authorization to install Feast: - -```bash -kubectl apply -f - < + +This guide will install Feast into a Kubernetes cluster on GCP. It assumes that all of your services will run within a single Kubernetes cluster. Once Feast is installed you will be able to: + +* Define and register features. +* Load feature data from both batch and streaming sources. +* Retrieve features for model training. +* Retrieve features for online serving. + +{% hint style="info" %} +This guide requires [Google Cloud Platform](https://cloud.google.com/) for installation. + +* [BigQuery](https://cloud.google.com/bigquery/) is used for storing historical features. +* [Cloud Dataflow](https://cloud.google.com/dataflow/) is used for running data ingestion jobs. +* [Google Cloud Storage](https://cloud.google.com/storage/) is used for intermediate data storage. +{% endhint %} + +## 0. Requirements + +1. [Google Cloud SDK ](https://cloud.google.com/sdk/install)installed, authenticated, and configured to the project you will use. +2. [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) installed. +3. [Helm](https://helm.sh/3) \(2.16.0 or greater\) installed on your local machine with Tiller installed in your cluster. Helm 3 has not been tested yet. + +## 1. Set up GCP + +First define the environmental variables that we will use throughout this installation. Please customize these to reflect your environment. + +```bash +export FEAST_GCP_PROJECT_ID=my-gcp-project +export FEAST_GCP_REGION=us-central1 +export FEAST_GCP_ZONE=us-central1-a +export FEAST_BIGQUERY_DATASET_ID=feast +export FEAST_GCS_BUCKET=${FEAST_GCP_PROJECT_ID}_feast_bucket +export FEAST_GKE_CLUSTER_NAME=feast +export FEAST_S_ACCOUNT_NAME=feast-sa +``` + +Create a Google Cloud Storage bucket for Feast to stage data during exports: + +```bash +gsutil mb gs://${FEAST_GCS_BUCKET} +``` + +Create a BigQuery dataset for storing historical features: + +```bash +bq mk ${FEAST_BIGQUERY_DATASET_ID} +``` + +Create the service account that Feast will run as: + +```bash +gcloud iam service-accounts create ${FEAST_S_ACCOUNT_NAME} + +gcloud projects add-iam-policy-binding ${FEAST_GCP_PROJECT_ID} \ + --member serviceAccount:${FEAST_S_ACCOUNT_NAME}@${FEAST_GCP_PROJECT_ID}.iam.gserviceaccount.com \ + --role roles/editor + +gcloud iam service-accounts keys create key.json --iam-account \ +${FEAST_S_ACCOUNT_NAME}@${FEAST_GCP_PROJECT_ID}.iam.gserviceaccount.com +``` + +Ensure that [Dataflow API is enabled](https://console.cloud.google.com/apis/api/dataflow.googleapis.com/overview): + +```bash +gcloud services enable dataflow.googleapis.com +``` + +## 2. Set up a Kubernetes \(GKE\) cluster + +{% hint style="warning" %} +Provisioning a GKE cluster can expose your services publicly. This guide does not cover securing access to the cluster. +{% endhint %} + +Create a GKE cluster: + +```bash +gcloud container clusters create ${FEAST_GKE_CLUSTER_NAME} \ + --machine-type n1-standard-4 +``` + +Create a secret in the GKE cluster based on your local key `key.json`: + +```bash +kubectl create secret generic feast-gcp-service-account --from-file=key.json +``` + +For this guide we will use `NodePort` for exposing Feast services. In order to do so, we must find an internal IP of at least one GKE node. + +```bash +export FEAST_IP=$(kubectl describe nodes | grep ExternalIP | awk '{print $2}' | head -n 1) +export FEAST_CORE_URL=${FEAST_IP}:32090 +export FEAST_ONLINE_SERVING_URL=${FEAST_IP}:32091 +export FEAST_BATCH_SERVING_URL=${FEAST_IP}:32092 +``` + +Confirm that you are able to access this node \(please make sure that no firewall rules are preventing access to these ports\): + +```bash +ping $FEAST_IP +``` + +```bash +PING 10.123.114.11 (10.203.164.22) 56(84) bytes of data. +64 bytes from 10.123.114.11: icmp_seq=1 ttl=63 time=54.2 ms +64 bytes from 10.123.114.11: icmp_seq=2 ttl=63 time=51.2 ms +``` + +Add firewall rules to open up ports on your Google Cloud Platform project: + +```bash +gcloud compute firewall-rules create feast-core-port --allow tcp:32090 +gcloud compute firewall-rules create feast-online-port --allow tcp:32091 +gcloud compute firewall-rules create feast-batch-port --allow tcp:32092 +gcloud compute firewall-rules create feast-redis-port --allow tcp:32101 +gcloud compute firewall-rules create feast-kafka-ports --allow tcp:31090-31095 +``` + +## 3. Set up Helm + +Run the following command to provide Tiller with authorization to install Feast: + +```bash +kubectl apply -f - < Date: Sun, 2 Feb 2020 13:29:22 +0000 Subject: [PATCH 04/15] GitBook: [master] 5 pages modified --- docs/SUMMARY.md | 1 + docs/installing-feast/docker-compose.md | 32 +++-- docs/installing-feast/gke.md | 67 ++++----- docs/installing-feast/overview.md | 2 +- docs/installing-feast/troubleshooting.md | 168 +++++++++++++++++++++++ 5 files changed, 220 insertions(+), 50 deletions(-) create mode 100644 docs/installing-feast/troubleshooting.md diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index d522e12bc9a..d2cc03a20bd 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -11,6 +11,7 @@ * [Overview](installing-feast/overview.md) * [Docker Compose](installing-feast/docker-compose.md) * [Google Kubernetes Engine \(GKE\)](installing-feast/gke.md) +* [Troubleshooting](installing-feast/troubleshooting.md) ## Using Feast diff --git a/docs/installing-feast/docker-compose.md b/docs/installing-feast/docker-compose.md index b8c9efbe3a1..fbc491b4376 100644 --- a/docs/installing-feast/docker-compose.md +++ b/docs/installing-feast/docker-compose.md @@ -46,6 +46,8 @@ cp .env.sample .env ## 2. Docker Compose for Online Serving Only +### 2.1 Start Feast \(without batch retrieval support\) + If you do not require batch serving, then its possible to simply bring up Feast: ```javascript @@ -56,13 +58,15 @@ A Jupyter Notebook environment is now available to use Feast: [http://localhost:8888/tree/feast/examples](http://localhost:8888/tree/feast/examples) -## 2. Docker Compose for Online and Batch Serving +## 3. Docker Compose for Online and Batch Serving {% hint style="info" %} Batch serving requires Google Cloud Storage to function, specifically Google Cloud Storage \(GCP\) and BigQuery. {% endhint %} -Create a [service account ](https://cloud.google.com/iam/docs/creating-managing-service-accounts)from the GCP console and copy it to the `gcp-service-accounts` folder: +### 3.1 Set up Google Cloud Platform + +Create a [service account ](https://cloud.google.com/iam/docs/creating-managing-service-accounts)from the GCP console and copy it to the `infra/docker-compose/gcp-service-accounts` folder: ```javascript cp my-service-account.json ${FEAST_HOME_DIR}/infra/docker-compose/gcp-service-accounts @@ -74,28 +78,32 @@ Create a Google Cloud Storage bucket. Make sure that your service account above gsutil mb gs://my-feast-staging-bucket ``` -### 2.1 Configure .env +### 3.2 Configure .env Configure the `.env` file based on your environment. At the very least you have to modify: -* **FEAST\_CORE\_GCP\_SERVICE\_ACCOUNT\_KEY:** This should be your service account file name, for example `key.json`. -* **FEAST\_BATCH\_SERVING\_GCP\_SERVICE\_ACCOUNT\_KEY:** This should be your service account file name, for example `key.json`. -* **FEAST\_JUPYTER\_GCP\_SERVICE\_ACCOUNT\_KEY:** This should be your service account file name, for example `key.json`. -* **FEAST\_JOB\_STAGING\_LOCATION:** Google Cloud Storage bucket that Feast will use to stage data exports and batch retrieval requests, for example `gs://your-gcs-bucket/staging` +| Parameter | Description | +| :--- | :--- | +| FEAST\_CORE\_GCP\_SERVICE\_ACCOUNT\_KEY | This should be your service account file name, for example `key.json`. | +| FEAST\_BATCH\_SERVING\_GCP\_SERVICE\_ACCOUNT\_KEY | This should be your service account file name, for example `key.json` | +| FEAST\_JUPYTER\_GCP\_SERVICE\_ACCOUNT\_KEY | This should be your service account file name, for example `key.json` | +| FEAST\_JOB\_STAGING\_LOCATION | Google Cloud Storage bucket that Feast will use to stage data exports and batch retrieval requests, for example `gs://your-gcs-bucket/staging` | -### 2.2 Configure .bq-store.yml +### 3.3 Configure .bq-store.yml We will also need to configure the `bq-store.yml` file inside `infra/docker-compose/serving/` to configure the BigQuery storage configuration as well as the feature sets that the store subscribes to. At a minimum you will need to set: -* **project\_id:** This is you [GCP project Id](https://cloud.google.com/resource-manager/docs/creating-managing-projects). -* **dataset\_id:** This is the name of the BigQuery dataset that tables will be created in. Each feature set will have one table in BigQuery. +| Parameter | Description | +| :--- | :--- | +| bigquery\_config.project\_id | This is you [GCP project Id](https://cloud.google.com/resource-manager/docs/creating-managing-projects). | +| bigquery\_config.dataset\_id | This is the name of the BigQuery dataset that tables will be created in. Each feature set will have one table in BigQuery. | -### 2.3 Start Feast with batch retrieval support +### 3.4 Start Feast \(with batch retrieval support\) Start Feast: ```javascript -docker-compose -f docker-compose.yml -f docker-compose.batch.yml up -d +docker-compose up -d ``` A Jupyter Notebook environment is now available to use Feast: diff --git a/docs/installing-feast/gke.md b/docs/installing-feast/gke.md index e48609fb82e..162f0a26064 100644 --- a/docs/installing-feast/gke.md +++ b/docs/installing-feast/gke.md @@ -13,7 +13,6 @@ This guide will install Feast into a Kubernetes cluster on GCP. It assumes that This guide requires [Google Cloud Platform](https://cloud.google.com/) for installation. * [BigQuery](https://cloud.google.com/bigquery/) is used for storing historical features. -* [Cloud Dataflow](https://cloud.google.com/dataflow/) is used for running data ingestion jobs. * [Google Cloud Storage](https://cloud.google.com/storage/) is used for intermediate data storage. {% endhint %} @@ -34,38 +33,26 @@ export FEAST_GCP_ZONE=us-central1-a export FEAST_BIGQUERY_DATASET_ID=feast export FEAST_GCS_BUCKET=${FEAST_GCP_PROJECT_ID}_feast_bucket export FEAST_GKE_CLUSTER_NAME=feast -export FEAST_S_ACCOUNT_NAME=feast-sa +export FEAST_SERVICE_ACCOUNT_NAME=feast-sa ``` -Create a Google Cloud Storage bucket for Feast to stage data during exports: +Create a Google Cloud Storage bucket for Feast to stage batch data exports: ```bash gsutil mb gs://${FEAST_GCS_BUCKET} ``` -Create a BigQuery dataset for storing historical features: - -```bash -bq mk ${FEAST_BIGQUERY_DATASET_ID} -``` - Create the service account that Feast will run as: ```bash -gcloud iam service-accounts create ${FEAST_S_ACCOUNT_NAME} +gcloud iam service-accounts create ${FEAST_SERVICE_ACCOUNT_NAME} gcloud projects add-iam-policy-binding ${FEAST_GCP_PROJECT_ID} \ - --member serviceAccount:${FEAST_S_ACCOUNT_NAME}@${FEAST_GCP_PROJECT_ID}.iam.gserviceaccount.com \ + --member serviceAccount:${FEAST_SERVICE_ACCOUNT_NAME}@${FEAST_GCP_PROJECT_ID}.iam.gserviceaccount.com \ --role roles/editor gcloud iam service-accounts keys create key.json --iam-account \ -${FEAST_S_ACCOUNT_NAME}@${FEAST_GCP_PROJECT_ID}.iam.gserviceaccount.com -``` - -Ensure that [Dataflow API is enabled](https://console.cloud.google.com/apis/api/dataflow.googleapis.com/overview): - -```bash -gcloud services enable dataflow.googleapis.com +${FEAST_SERVICE_ACCOUNT_NAME}@${FEAST_GCP_PROJECT_ID}.iam.gserviceaccount.com ``` ## 2. Set up a Kubernetes \(GKE\) cluster @@ -87,7 +74,7 @@ Create a secret in the GKE cluster based on your local key `key.json`: kubectl create secret generic feast-gcp-service-account --from-file=key.json ``` -For this guide we will use `NodePort` for exposing Feast services. In order to do so, we must find an internal IP of at least one GKE node. +For this guide we will use `NodePort` for exposing Feast services. In order to do so, we must find an External IP of at least one GKE node. This should be a public IP. ```bash export FEAST_IP=$(kubectl describe nodes | grep ExternalIP | awk '{print $2}' | head -n 1) @@ -96,18 +83,6 @@ export FEAST_ONLINE_SERVING_URL=${FEAST_IP}:32091 export FEAST_BATCH_SERVING_URL=${FEAST_IP}:32092 ``` -Confirm that you are able to access this node \(please make sure that no firewall rules are preventing access to these ports\): - -```bash -ping $FEAST_IP -``` - -```bash -PING 10.123.114.11 (10.203.164.22) 56(84) bytes of data. -64 bytes from 10.123.114.11: icmp_seq=1 ttl=63 time=54.2 ms -64 bytes from 10.123.114.11: icmp_seq=2 ttl=63 time=51.2 ms -``` - Add firewall rules to open up ports on your Google Cloud Platform project: ```bash @@ -170,8 +145,10 @@ cp values.yaml my-feast-values.yaml Update `my-feast-values.yaml` based on your GCP and GKE environment. * Required fields are paired with comments which indicate whether they need to be replaced. -* All occurrences of `EXTERNAL_IP` should be replaced with either your domain name or the IP stored in `$FEAST_IP`. +* All occurrences of `EXTERNAL_IP` should be replaced with either a domain pointing to a load balancer for the cluster or the IP stored in `$FEAST_IP`. * Replace all occurrences of `YOUR_BUCKET_NAME` with your bucket name stored in `$FEAST_GCS_BUCKET` +* Change `feast-serving-batch.store.yaml.bigquery_config.project_id` to your GCP project Id. +* Change `feast-serving-batch.store.yaml.bigquery_config.dataset_id` to the BigQuery dataset that Feast should use. Install the Feast Helm chart: @@ -179,16 +156,18 @@ Install the Feast Helm chart: helm install --name feast -f my-feast-values.yaml . ``` -Ensure that the system comes online. This will take a few minutes +Ensure that the system comes online. This will take a few minutes. ```bash -watch kubectl get pods +kubectl get pods ``` +There may be pod restarts while waiting for Kafka to come online. + ```bash NAME READY STATUS RESTARTS AGE -pod/feast-feast-core-666fd46db4-l58l6 1/1 Running 0 5m -pod/feast-feast-serving-online-84d99ddcbd 1/1 Running 0 6m +pod/feast-feast-core-666fd46db4-l58l6 1/1 Running 2 5m +pod/feast-feast-serving-online-84d99ddcbd 1/1 Running 3 6m pod/feast-kafka-0 1/1 Running 0 3m pod/feast-kafka-1 1/1 Running 0 4m pod/feast-kafka-2 1/1 Running 0 4m @@ -204,7 +183,7 @@ pod/feast-zookeeper-2 1/1 Running 0 5m Install the Python SDK using pip: ```bash -pip install -e ${FEAST_HOME_DIR}/sdk/python +pip install feast ``` Configure the Feast Python SDK: @@ -214,5 +193,19 @@ feast config set core_url ${FEAST_CORE_URL} feast config set serving_url ${FEAST_ONLINE_SERVING_URL} ``` +Test whether you are able to connect to Feast Core + +```text +feast projects list +``` + +Should print an empty list: + +```text +NAME +``` + That's it! You can now start to use Feast! +Please see our [examples](https://github.com/gojek/feast/blob/master/examples/) to get started. + diff --git a/docs/installing-feast/overview.md b/docs/installing-feast/overview.md index 1b69a5f77ad..e0cd44e962a 100644 --- a/docs/installing-feast/overview.md +++ b/docs/installing-feast/overview.md @@ -8,7 +8,7 @@ This installation guide will demonstrate three ways of installing Feast: * Does not officially support a production job manager like Dataflow * \*\*\*\*[**Google Kubernetes Engine**](gke.md)**:** * Recommended way to install Feast for production use. - * The guide has dependencies on BigQuery, Dataflow, and Google Cloud Storage. + * The guide has dependencies on BigQuery, and Google Cloud Storage. diff --git a/docs/installing-feast/troubleshooting.md b/docs/installing-feast/troubleshooting.md new file mode 100644 index 00000000000..bcd4c9bdf93 --- /dev/null +++ b/docs/installing-feast/troubleshooting.md @@ -0,0 +1,168 @@ +# Troubleshooting + +If at any point in time you cannot resolve a problem, please see the [Getting Help](../getting-help.md) section for reaching out to the Feast community. + +## How can I verify that all services are operational? + +### Docker Compose + +The containers should be in an `up` state: + +```text +docker ps +``` + +```text +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +d7447205bced jupyter/datascience-notebook:latest "tini -g -- start-no…" 2 minutes ago Up 2 minutes 0.0.0.0:8888->8888/tcp feast_jupyter_1 +8e49dbe81b92 gcr.io/kf-feast/feast-serving:latest "java -Xms1024m -Xmx…" 2 minutes ago Up 5 seconds 0.0.0.0:6567->6567/tcp feast_batch-serving_1 +b859494bd33a gcr.io/kf-feast/feast-serving:latest "java -jar /opt/feas…" 2 minutes ago Up About a minute 0.0.0.0:6566->6566/tcp feast_online-serving_1 +5c4962811767 gcr.io/kf-feast/feast-core:latest "java -jar /opt/feas…" 2 minutes ago Up 2 minutes 0.0.0.0:6565->6565/tcp feast_core_1 +1ba7239e0ae0 confluentinc/cp-kafka:5.2.1 "/etc/confluent/dock…" 2 minutes ago Up 2 minutes 0.0.0.0:9092->9092/tcp, 0.0.0.0:9094->9094/tcp feast_kafka_1 +e2779672735c confluentinc/cp-zookeeper:5.2.1 "/etc/confluent/dock…" 2 minutes ago Up 2 minutes 2181/tcp, 2888/tcp, 3888/tcp feast_zookeeper_1 +39ac26f5c709 postgres:12-alpine "docker-entrypoint.s…" 2 minutes ago Up 2 minutes 5432/tcp feast_db_1 +3c4ee8616096 redis:5-alpine "docker-entrypoint.s…" 2 minutes ago Up 2 minutes 0.0.0.0:6379->6379/tcp feast_redis_1 +``` + +### Google Kubernetes Engine + +All services should either be in a `running` state or `complete`state: + +```text +kubectl get pods +``` + +```text +NAME READY STATUS RESTARTS AGE +feast-feast-core-5ff566f946-4wlbh 1/1 Running 1 32m +feast-feast-serving-batch-848d74587b-96hq6 1/1 Running 2 32m +feast-feast-serving-online-df69755d5-fml8v 1/1 Running 2 32m +feast-kafka-0 1/1 Running 1 32m +feast-kafka-1 1/1 Running 0 30m +feast-kafka-2 1/1 Running 0 29m +feast-kafka-config-3e860262-zkzr8 0/1 Completed 0 32m +feast-postgresql-0 1/1 Running 0 32m +feast-prometheus-statsd-exporter-554db85b8d-r4hb8 1/1 Running 0 32m +feast-redis-master-0 1/1 Running 0 32m +feast-zookeeper-0 1/1 Running 0 32m +feast-zookeeper-1 1/1 Running 0 32m +feast-zookeeper-2 1/1 Running 0 31m +``` + +## How can I verify that I can connect to all services? + +First find the `IP:Port` combination of your services. + +### **Docker Compose \(from inside the docker cluster\)** + +You will probably need to connect using the hostnames of services and standard Feast ports: + +```bash +export FEAST_CORE_URL=core:6565 +export FEAST_ONLINE_SERVING_URL=online-serving:6566 +export FEAST_BATCH_SERVING_URL=batch-serving:6567 +``` + +### **Docker Compose \(from outside the docker cluster\)** + +You will probably need to connect using `localhost` and standard ports: + +```bash +export FEAST_CORE_URL=localhost:6565 +export FEAST_ONLINE_SERVING_URL=localhost:6566 +export FEAST_BATCH_SERVING_URL=localhost:6567 +``` + +### **Google Kubernetes Engine \(GKE\)** + +You will need to find the external IP of one of the nodes as well as the NodePorts. Please make sure that your firewall is open for these ports: + +```bash +export FEAST_IP=$(kubectl describe nodes | grep ExternalIP | awk '{print $2}' | head -n 1) +export FEAST_CORE_URL=${FEAST_IP}:32090 +export FEAST_ONLINE_SERVING_URL=${FEAST_IP}:32091 +export FEAST_BATCH_SERVING_URL=${FEAST_IP}:32092 +``` + +`netcat`, `telnet`, or even `curl` can be used to test whether all services are available and ports are open, but `grpc_cli` is the most powerful. It can be installed from [here](https://github.com/grpc/grpc/blob/master/doc/command_line_tool.md). + +### Testing Feast Core: + +```bash +grpc_cli ls ${FEAST_CORE_URL} feast.core.CoreService +``` + +```text +GetFeastCoreVersion +GetFeatureSet +ListFeatureSets +ListStores +ApplyFeatureSet +UpdateStore +CreateProject +ArchiveProject +ListProjects +``` + +### Testing Feast Batch Serving and Online Serving + +```bash +grpc_cli ls ${FEAST_BATCH_SERVING_URL} feast.serving.ServingService +``` + +```text +GetFeastServingInfo +GetOnlineFeatures +GetBatchFeatures +GetJob +``` + +```bash +grpc_cli ls ${FEAST_ONLINE_SERVING_URL} feast.serving.ServingService +``` + +```text +GetFeastServingInfo +GetOnlineFeatures +GetBatchFeatures +GetJob +``` + +## How can I print logs from the Feast Services? + +Feast will typically have three services that you need to monitor if something goes wrong. + +* Feast Core +* Feast Serving \(Online\) +* Feast Serving \(Batch\) + +In order to print the logs from these services, please run the commands below. + +### Docker Compose + +```text + docker logs -f feast_core_1 +``` + +```text +docker logs -f feast_batch-serving_1 +``` + +```text +docker logs -f feast_online-serving_1 +``` + +### Google Kubernetes Engine + +```text +kubectl logs $(kubectl get pods | grep feast-core | awk '{print $1}') +``` + +```text +kubectl logs $(kubectl get pods | grep feast-serving-batch | awk '{print $1}') +``` + +```text +kubectl logs $(kubectl get pods | grep feast-serving-online | awk '{print $1}') +``` + From 3ecb66c7d99bea23fd26398f13597bbb94d452c6 Mon Sep 17 00:00:00 2001 From: Willem Pienaar Date: Thu, 6 Feb 2020 03:59:37 +0000 Subject: [PATCH 05/15] GitBook: [master] one page modified --- docs/getting-help.md | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/docs/getting-help.md b/docs/getting-help.md index b87bd82b928..597a782d606 100644 --- a/docs/getting-help.md +++ b/docs/getting-help.md @@ -1,23 +1,36 @@ # Getting Help -## Chat +### Chat * Come and say hello in [\#Feast](https://join.slack.com/t/kubeflow/shared_invite/enQtNDg5MTM4NTQyNjczLTdkNTVhMjg1ZTExOWI0N2QyYTQ2MTIzNTJjMWRiOTFjOGRlZWEzODc1NzMwNTMwM2EzNjY1MTFhODczNjk4MTk) over in the Kubeflow Slack. -## GitHub +### GitHub * Feast's GitHub repo can be [found here](https://github.com/gojek/feast/). * Found a bug or need a feature? [Create an issue on GitHub](https://github.com/gojek/feast/issues/new) -## Mailing list +### Community Call -### Feast discussion +We have a community call every 2 weeks. Alternating between two times. + +* 11 am \(UTC + 8\) +* 5 pm \(UTC + 8\) + +Please join the [**feast-dev**](getting-help.md#feast-development) mailing list to receive the the calendar invitation. + +### Mailing list + +#### Feast discussion * Google Group: [https://groups.google.com/d/forum/feast-discuss](https://groups.google.com/d/forum/feast-discuss) * Mailing List: [feast-discuss@googlegroups.com](mailto:feast-discuss@googlegroups.com) -### Feast development +#### Feast development * Google Group: [https://groups.google.com/d/forum/feast-dev](https://groups.google.com/d/forum/feast-dev) * Mailing List: [feast-dev@googlegroups.com](mailto:feast-dev@googlegroups.com) +### Google Drive + +The Feast community also maintains a [Google Drive](https://drive.google.com/drive/u/0/folders/0AAe8j7ZK3sxSUk9PVA) with documents like RFCs, meeting notes, or roadmaps. Please join one of the above mailing lists \(feast-dev or feast-discuss\) to gain access to the drive. + From 9111b0a1f2158d78c22ef5ce9905564786d35e3e Mon Sep 17 00:00:00 2001 From: Willem Pienaar <6728866+woop@users.noreply.github.com> Date: Mon, 3 Feb 2020 15:27:48 +0800 Subject: [PATCH 06/15] Update README.md and remove versions from Helm Charts (#457) * Update README.md and remove versions from Helm Charts * Fix type in README.md --- README.md | 26 ++++++++++++++++--- infra/charts/feast/README.md | 6 ++--- .../feast-core/templates/deployment.yaml | 2 +- .../feast/charts/feast-core/values.yaml | 1 - .../feast-serving/templates/deployment.yaml | 2 +- .../feast/charts/feast-serving/values.yaml | 1 - infra/charts/feast/requirements.yaml | 6 ++--- infra/charts/feast/values.yaml | 12 ++++----- 8 files changed, 36 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index d9b16748266..97cbcde421d 100644 --- a/README.md +++ b/README.md @@ -28,11 +28,29 @@ my_model = ml.fit(data) prediction = my_model.predict(fs.get_online_features(customer_features, customer_entities)) ``` +## Getting Started with Docker Compose +The following commands will start Feast in online-only mode. +``` +git clone https://github.com/gojek/feast.git +cd feast/infra/docker-compose +cp .env.sample .env +docker-compose up -d +``` + +A [Jupyter Notebook](http://localhost:8888/tree/feast/examples) is now available to start using Feast. + +Please see the links below to set up Feast for batch/historical serving with BigQuery. + ## Important resources - * [Why Feast?](docs/why-feast.md) - * [Concepts](docs/concepts.md) - * [Installation](docs/getting-started/installing-feast.md) - * [Getting Help](docs/community.md) + +Please refer to the official documentation at + + * [Why Feast?](https://docs.feast.dev/why-feast) + * [Concepts](https://docs.feast.dev/concepts) + * [Installation](https://docs.feast.dev/installing-feast/overview) + * [Examples](https://github.com/gojek/feast/blob/master/examples/) + * [Change Log](https://github.com/gojek/feast/blob/master/CHANGELOG.md) + * [Slack (#Feast)](https://join.slack.com/t/kubeflow/shared_invite/enQtNDg5MTM4NTQyNjczLTdkNTVhMjg1ZTExOWI0N2QyYTQ2MTIzNTJjMWRiOTFjOGRlZWEzODc1NzMwNTMwM2EzNjY1MTFhODczNjk4MTk) ## Notice diff --git a/infra/charts/feast/README.md b/infra/charts/feast/README.md index 0463a9a3f89..ab5321ca865 100644 --- a/infra/charts/feast/README.md +++ b/infra/charts/feast/README.md @@ -36,10 +36,10 @@ helm repo add feast-charts https://feast-charts.storage.googleapis.com helm repo update ``` -Install Feast release with minimal features, without batch serving and persistency: +Install Feast release with minimal features, without batch serving and persistence: ```bash RELEASE_NAME=demo -helm install feast-charts/feast --name $RELEASE_NAME --version 0.3.2 -f values-demo.yaml +helm install feast-charts/feast --name $RELEASE_NAME -f values-demo.yaml ``` Install Feast release for typical use cases, with batch and online serving: @@ -60,7 +60,7 @@ PROJECT_ID=google-cloud-project-id DATASET_ID=bigquery-dataset-id # Install the Helm release using default values.yaml -helm install feast-charts/feast --name feast --version 0.3.2 \ +helm install feast-charts/feast --name feast \ --set feast-serving-batch."application\.yaml".feast.jobs.staging-location=$STAGING_LOCATION \ --set feast-serving-batch."store\.yaml".bigquery_config.project_id=$PROJECT_ID \ --set feast-serving-batch."store\.yaml".bigquery_config.dataset_id=$DATASET_ID diff --git a/infra/charts/feast/charts/feast-core/templates/deployment.yaml b/infra/charts/feast/charts/feast-core/templates/deployment.yaml index 02a533c2637..0671d9574b3 100644 --- a/infra/charts/feast/charts/feast-core/templates/deployment.yaml +++ b/infra/charts/feast/charts/feast-core/templates/deployment.yaml @@ -40,7 +40,7 @@ spec: containers: - name: {{ .Chart.Name }} - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + image: '{{ .Values.image.repository }}:{{ required "No .image.tag found. This must be provided as input." .Values.image.tag }}' imagePullPolicy: {{ .Values.image.pullPolicy }} volumeMounts: diff --git a/infra/charts/feast/charts/feast-core/values.yaml b/infra/charts/feast/charts/feast-core/values.yaml index 321d71c844d..f746bc96ead 100644 --- a/infra/charts/feast/charts/feast-core/values.yaml +++ b/infra/charts/feast/charts/feast-core/values.yaml @@ -42,7 +42,6 @@ replicaCount: 1 # image configures the Docker image for Feast Core image: repository: gcr.io/kf-feast/feast-core - tag: 0.3.2 pullPolicy: IfNotPresent # application.yaml is the main configuration for Feast Core application. diff --git a/infra/charts/feast/charts/feast-serving/templates/deployment.yaml b/infra/charts/feast/charts/feast-serving/templates/deployment.yaml index 5be636df96b..e6824a23465 100644 --- a/infra/charts/feast/charts/feast-serving/templates/deployment.yaml +++ b/infra/charts/feast/charts/feast-serving/templates/deployment.yaml @@ -47,7 +47,7 @@ spec: containers: - name: {{ .Chart.Name }} - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + image: '{{ .Values.image.repository }}:{{ required "No .image.tag found. This must be provided as input." .Values.image.tag }}' imagePullPolicy: {{ .Values.image.pullPolicy }} volumeMounts: diff --git a/infra/charts/feast/charts/feast-serving/values.yaml b/infra/charts/feast/charts/feast-serving/values.yaml index d489a48748d..d2b3c599479 100644 --- a/infra/charts/feast/charts/feast-serving/values.yaml +++ b/infra/charts/feast/charts/feast-serving/values.yaml @@ -29,7 +29,6 @@ replicaCount: 1 # image configures the Docker image for Feast Serving image: repository: gcr.io/kf-feast/feast-serving - tag: 0.3.2 pullPolicy: IfNotPresent # application.yaml is the main configuration for Feast Serving application. diff --git a/infra/charts/feast/requirements.yaml b/infra/charts/feast/requirements.yaml index ed280d64b6e..5416ded3fee 100644 --- a/infra/charts/feast/requirements.yaml +++ b/infra/charts/feast/requirements.yaml @@ -1,12 +1,12 @@ dependencies: - name: feast-core - version: 0.3.2 + version: 0.4.4 condition: feast-core.enabled - name: feast-serving alias: feast-serving-batch - version: 0.3.2 + version: 0.4.4 condition: feast-serving-batch.enabled - name: feast-serving alias: feast-serving-online - version: 0.3.2 + version: 0.4.4 condition: feast-serving-online.enabled diff --git a/infra/charts/feast/values.yaml b/infra/charts/feast/values.yaml index db3ec44f330..f9a0a76dc1b 100644 --- a/infra/charts/feast/values.yaml +++ b/infra/charts/feast/values.yaml @@ -12,10 +12,6 @@ # of Feast Serving: online and batch will be deployed. Both described # using the same chart "feast-serving". # -# The following are default values for typical Feast deployment, but not -# for production setting. Refer to "values-production.yaml" for recommended -# values in production environment. -# # Note that the import job by default uses DirectRunner # https://beam.apache.org/documentation/runners/direct/ # in this configuration since it allows Feast to run in more environments @@ -45,16 +41,18 @@ # ============================================================ feast-core: - # enabled specifies whether to install Feast Core component. + # If enabled specifies whether to install Feast Core component. # # Normally, this is set to "false" when Feast users need access to low latency # Feast Serving, by deploying multiple instances of Feast Serving closest # to the client. These instances of Feast Serving however can still use # the same shared Feast Core. enabled: true - # Specify what image tag to use. Keep this consistent for all components + + # Specify which image tag to use. Keep this consistent for all components image: tag: "0.4.4" + # jvmOptions are options that will be passed to the Java Virtual Machine (JVM) # running Feast Core. # @@ -63,6 +61,7 @@ feast-core: jvmOptions: - -Xms1024m - -Xmx1024m + # resources that should be allocated to Feast Core. resources: requests: @@ -70,6 +69,7 @@ feast-core: memory: 1024Mi limits: memory: 2048Mi + # gcpServiceAccount is the Google service account that Feast Core will use. gcpServiceAccount: # useExistingSecret specifies Feast to use an existing secret containing From ee7b6e5ad78f996cd1787f42eb2e9f09c77ed784 Mon Sep 17 00:00:00 2001 From: Chen Zhiling Date: Fri, 7 Feb 2020 08:19:49 +0700 Subject: [PATCH 07/15] Fix typo in split string length check (#464) --- sdk/java/src/main/java/com/gojek/feast/RequestUtil.java | 2 +- sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java b/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java index 075c570c4e9..874196e92bd 100644 --- a/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java +++ b/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java @@ -61,7 +61,7 @@ public static List createFeatureRefs( "Feature id '%s' contains invalid version. Expected format: /:.", featureRefString)); } - } else if (projectSplit.length == 1) { + } else if (featureSplit.length == 1) { name = featureSplit[0]; } else { throw new IllegalArgumentException( diff --git a/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java b/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java index 1c58e9435c6..3b9429ad8f6 100644 --- a/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java +++ b/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java @@ -60,7 +60,7 @@ private static Stream provideValidFeatureIds() { Arrays.asList( "driver_project/driver_id:1", "driver_project/driver_name:1", - "booking_project/driver_name:1"), + "booking_project/driver_name"), Arrays.asList( FeatureReference.newBuilder() .setProject("driver_project") @@ -74,7 +74,6 @@ private static Stream provideValidFeatureIds() { .build(), FeatureReference.newBuilder() .setProject("booking_project") - .setVersion(1) .setName("driver_name") .build()))); } From 128f075428b9645850be45f4633e81d74ff6ed7c Mon Sep 17 00:00:00 2001 From: Chen Zhiling Date: Fri, 7 Feb 2020 12:11:49 +0700 Subject: [PATCH 08/15] Use concrete class for AvroCoder compatibility (#465) --- sdk/java/src/main/java/com/gojek/feast/Row.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/java/src/main/java/com/gojek/feast/Row.java b/sdk/java/src/main/java/com/gojek/feast/Row.java index 9366fe1bb03..ceef139aa13 100644 --- a/sdk/java/src/main/java/com/gojek/feast/Row.java +++ b/sdk/java/src/main/java/com/gojek/feast/Row.java @@ -31,7 +31,7 @@ @SuppressWarnings("UnusedReturnValue") public class Row { private Timestamp entity_timestamp; - private Map fields; + private HashMap fields; public static Row create() { Row row = new Row(); From 7015c90034fffa14b0cf974939d2d9a7a4007f8b Mon Sep 17 00:00:00 2001 From: Khor Shu Heng <32997938+khorshuheng@users.noreply.github.com> Date: Mon, 10 Feb 2020 17:59:51 +0800 Subject: [PATCH 09/15] Exclude versionb from grouping (#441) --- .../src/main/java/feast/serving/specs/CachedSpecService.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/serving/src/main/java/feast/serving/specs/CachedSpecService.java b/serving/src/main/java/feast/serving/specs/CachedSpecService.java index 040a870ffe1..1184f6da95a 100644 --- a/serving/src/main/java/feast/serving/specs/CachedSpecService.java +++ b/serving/src/main/java/feast/serving/specs/CachedSpecService.java @@ -49,7 +49,6 @@ import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; -import org.apache.commons.lang3.tuple.Triple; import org.slf4j.Logger; /** In-memory cache of specs. */ @@ -199,8 +198,8 @@ private Map getFeatureToFeatureSetMapping( .collect( groupingBy( featureSet -> - Triple.of( - featureSet.getProject(), featureSet.getName(), featureSet.getVersion()))) + Pair.of( + featureSet.getProject(), featureSet.getName()))) .forEach( (group, groupedFeatureSets) -> { groupedFeatureSets = From 0af06372fd0d9125c84a6e296154ba59b5b4f014 Mon Sep 17 00:00:00 2001 From: Iain Rauch Date: Tue, 11 Feb 2020 09:07:36 +0000 Subject: [PATCH 10/15] Helm Chart Upgrades (#458) Move prometheus-statsd-exporter to toggleable core dependency (default false). Add ingresses for gRPC and HTTP for both core and serving. Refactor ConfigMaps to user Spring profiles rather than manipulating the base application.yaml. Add ability to define and enable arbitrary Spring profiles. Add toggle to enable prometheus scraping in core. Add parameters to change LOG_LEVEL and LOG_TYPE (#430). Add parameter to specify GOOGLE_CLOUD_PROJECT. Allow jar path to be specified (e.g. if using non-standard image). Add missing documentation for Helm parameters. --- infra/charts/feast/README.md | 80 +++++++++++++++- .../prometheus-statsd-exporter/.helmignore | 0 .../prometheus-statsd-exporter/Chart.yaml | 0 .../prometheus-statsd-exporter/README.md | 0 .../templates/NOTES.txt | 0 .../templates/_helpers.tpl | 0 .../templates/config.yaml | 0 .../templates/deployment.yaml | 0 .../templates/pvc.yaml | 0 .../templates/service.yaml | 0 .../templates/serviceaccount.yaml | 0 .../prometheus-statsd-exporter/values.yaml | 0 .../feast/charts/feast-core/requirements.yaml | 8 +- .../charts/feast-core/templates/_ingress.yaml | 68 +++++++++++++ .../feast-core/templates/configmap.yaml | 45 ++++++--- .../feast-core/templates/deployment.yaml | 42 ++++++-- .../charts/feast-core/templates/ingress.yaml | 33 ++----- .../feast/charts/feast-core/values.yaml | 95 +++++++++++++++---- .../charts/feast-serving/requirements.yaml | 3 + .../feast-serving/templates/_helpers.tpl | 7 ++ .../feast-serving/templates/_ingress.yaml | 68 +++++++++++++ .../feast-serving/templates/configmap.yaml | 36 ++++--- .../feast-serving/templates/deployment.yaml | 30 ++++-- .../feast-serving/templates/ingress.yaml | 31 +----- .../feast/charts/feast-serving/values.yaml | 77 +++++++++++---- infra/charts/feast/requirements.lock | 16 +--- infra/charts/feast/requirements.yaml | 2 +- infra/charts/feast/values-demo.yaml | 17 +++- infra/charts/feast/values.yaml | 12 ++- 29 files changed, 510 insertions(+), 160 deletions(-) rename infra/charts/feast/charts/{ => feast-core/charts}/prometheus-statsd-exporter/.helmignore (100%) rename infra/charts/feast/charts/{ => feast-core/charts}/prometheus-statsd-exporter/Chart.yaml (100%) rename infra/charts/feast/charts/{ => feast-core/charts}/prometheus-statsd-exporter/README.md (100%) rename infra/charts/feast/charts/{ => feast-core/charts}/prometheus-statsd-exporter/templates/NOTES.txt (100%) rename infra/charts/feast/charts/{ => feast-core/charts}/prometheus-statsd-exporter/templates/_helpers.tpl (100%) rename infra/charts/feast/charts/{ => feast-core/charts}/prometheus-statsd-exporter/templates/config.yaml (100%) rename infra/charts/feast/charts/{ => feast-core/charts}/prometheus-statsd-exporter/templates/deployment.yaml (100%) rename infra/charts/feast/charts/{ => feast-core/charts}/prometheus-statsd-exporter/templates/pvc.yaml (100%) rename infra/charts/feast/charts/{ => feast-core/charts}/prometheus-statsd-exporter/templates/service.yaml (100%) rename infra/charts/feast/charts/{ => feast-core/charts}/prometheus-statsd-exporter/templates/serviceaccount.yaml (100%) rename infra/charts/feast/charts/{ => feast-core/charts}/prometheus-statsd-exporter/values.yaml (100%) create mode 100644 infra/charts/feast/charts/feast-core/templates/_ingress.yaml create mode 100644 infra/charts/feast/charts/feast-serving/templates/_ingress.yaml diff --git a/infra/charts/feast/README.md b/infra/charts/feast/README.md index ab5321ca865..e93b687f191 100644 --- a/infra/charts/feast/README.md +++ b/infra/charts/feast/README.md @@ -81,17 +81,26 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-core.kafka.topics[0].name` | Default topic name in Kafka| `feast` | `feast-core.kafka.topics[0].replicationFactor` | No of replication factor for the topic| `1` | `feast-core.kafka.topics[0].partitions` | No of partitions for the topic | `1` +| `feast-core.prometheus-statsd-exporter.enabled` | Flag to install Prometheus StatsD Exporter | `false` +| `feast-core.prometheus-statsd-exporter.*` | Refer to this [link](charts/feast-core/charts/prometheus-statsd-exporter/values.yaml | | `feast-core.replicaCount` | No of pods to create | `1` | `feast-core.image.repository` | Repository for Feast Core Docker image | `gcr.io/kf-feast/feast-core` -| `feast-core.image.tag` | Tag for Feast Core Docker image | `0.3.2` +| `feast-core.image.tag` | Tag for Feast Core Docker image | `0.4.4` | `feast-core.image.pullPolicy` | Image pull policy for Feast Core Docker image | `IfNotPresent` +| `feast-core.prometheus.enabled` | Add annotations to enable Prometheus scraping | `false` | `feast-core.application.yaml` | Configuration for Feast Core application | Refer to this [link](charts/feast-core/values.yaml) | `feast-core.springConfigMountPath` | Directory to mount application.yaml | `/etc/feast/feast-core` | `feast-core.gcpServiceAccount.useExistingSecret` | Flag to use existing secret for GCP service account | `false` | `feast-core.gcpServiceAccount.existingSecret.name` | Secret name for the service account | `feast-gcp-service-account` | `feast-core.gcpServiceAccount.existingSecret.key` | Secret key for the service account | `key.json` | `feast-core.gcpServiceAccount.mountPath` | Directory to mount the JSON key file | `/etc/gcloud/service-accounts` +| `feast-core.gcpProjectId` | Project ID to set `GOOGLE_CLOUD_PROJECT` to change default project used by SDKs | `""` +| `feast-core.jarPath` | Path to Jar file in the Docker image | `/opt/feast/feast-core.jar` | `feast-core.jvmOptions` | Options for the JVM | `[]` +| `feast-core.logLevel` | Application logging level | `warn` +| `feast-core.logType` | Application logging type (`JSON` or `Console`) | `JSON` +| `feast-core.springConfigProfiles` | Map of profile name to file content for additional Spring profiles | `{}` +| `feast-core.springConfigProfilesActive` | CSV of profiles to enable from `springConfigProfiles` | `""` | `feast-core.livenessProbe.enabled` | Flag to enable liveness probe | `true` | `feast-core.livenessProbe.initialDelaySeconds` | Delay before liveness probe is initiated | `60` | `feast-core.livenessProbe.periodSeconds` | How often to perform the probe | `10` @@ -109,6 +118,7 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-core.grpc.port` | Kubernetes Service port for GRPC request| `6565` | `feast-core.grpc.targetPort` | Container port for GRPC request| `6565` | `feast-core.resources` | CPU and memory allocation for the pod | `{}` +| `feast-core.ingress` | See *Ingress Parameters* [below](#ingress-parameters) | `{}` | `feast-serving-online.enabled` | Flag to install Feast Online Serving | `true` | `feast-serving-online.redis.enabled` | Flag to install Redis in Feast Serving | `false` | `feast-serving-online.redis.usePassword` | Flag to use password to access Redis | `false` @@ -116,8 +126,9 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-online.core.enabled` | Flag for Feast Serving to use Feast Core in the same Helm release | `true` | `feast-serving-online.replicaCount` | No of pods to create | `1` | `feast-serving-online.image.repository` | Repository for Feast Serving Docker image | `gcr.io/kf-feast/feast-serving` -| `feast-serving-online.image.tag` | Tag for Feast Serving Docker image | `0.3.2` +| `feast-serving-online.image.tag` | Tag for Feast Serving Docker image | `0.4.4` | `feast-serving-online.image.pullPolicy` | Image pull policy for Feast Serving Docker image | `IfNotPresent` +| `feast-serving-online.prometheus.enabled` | Add annotations to enable Prometheus scraping | `true` | `feast-serving-online.application.yaml` | Application configuration for Feast Serving | Refer to this [link](charts/feast-serving/values.yaml) | `feast-serving-online.store.yaml` | Store configuration for Feast Serving | Refer to this [link](charts/feast-serving/values.yaml) | `feast-serving-online.springConfigMountPath` | Directory to mount application.yaml and store.yaml | `/etc/feast/feast-serving` @@ -125,7 +136,13 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-online.gcpServiceAccount.existingSecret.name` | Secret name for the service account | `feast-gcp-service-account` | `feast-serving-online.gcpServiceAccount.existingSecret.key` | Secret key for the service account | `key.json` | `feast-serving-online.gcpServiceAccount.mountPath` | Directory to mount the JSON key file | `/etc/gcloud/service-accounts` +| `feast-serving-online.gcpProjectId` | Project ID to set `GOOGLE_CLOUD_PROJECT` to change default project used by SDKs | `""` +| `feast-serving-online.jarPath` | Path to Jar file in the Docker image | `/opt/feast/feast-serving.jar` | `feast-serving-online.jvmOptions` | Options for the JVM | `[]` +| `feast-serving-online.logLevel` | Application logging level | `warn` +| `feast-serving-online.logType` | Application logging type (`JSON` or `Console`) | `JSON` +| `feast-serving-online.springConfigProfiles` | Map of profile name to file content for additional Spring profiles | `{}` +| `feast-serving-online.springConfigProfilesActive` | CSV of profiles to enable from `springConfigProfiles` | `""` | `feast-serving-online.livenessProbe.enabled` | Flag to enable liveness probe | `true` | `feast-serving-online.livenessProbe.initialDelaySeconds` | Delay before liveness probe is initiated | `60` | `feast-serving-online.livenessProbe.periodSeconds` | How often to perform the probe | `10` @@ -143,6 +160,7 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-online.grpc.port` | Kubernetes Service port for GRPC request| `6566` | `feast-serving-online.grpc.targetPort` | Container port for GRPC request| `6566` | `feast-serving-online.resources` | CPU and memory allocation for the pod | `{}` +| `feast-serving-online.ingress` | See *Ingress Parameters* [below](#ingress-parameters) | `{}` | `feast-serving-batch.enabled` | Flag to install Feast Batch Serving | `true` | `feast-serving-batch.redis.enabled` | Flag to install Redis in Feast Serving | `false` | `feast-serving-batch.redis.usePassword` | Flag to use password to access Redis | `false` @@ -150,8 +168,9 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-batch.core.enabled` | Flag for Feast Serving to use Feast Core in the same Helm release | `true` | `feast-serving-batch.replicaCount` | No of pods to create | `1` | `feast-serving-batch.image.repository` | Repository for Feast Serving Docker image | `gcr.io/kf-feast/feast-serving` -| `feast-serving-batch.image.tag` | Tag for Feast Serving Docker image | `0.3.2` +| `feast-serving-batch.image.tag` | Tag for Feast Serving Docker image | `0.4.4` | `feast-serving-batch.image.pullPolicy` | Image pull policy for Feast Serving Docker image | `IfNotPresent` +| `feast-serving-batch.prometheus.enabled` | Add annotations to enable Prometheus scraping | `true` | `feast-serving-batch.application.yaml` | Application configuration for Feast Serving | Refer to this [link](charts/feast-serving/values.yaml) | `feast-serving-batch.store.yaml` | Store configuration for Feast Serving | Refer to this [link](charts/feast-serving/values.yaml) | `feast-serving-batch.springConfigMountPath` | Directory to mount application.yaml and store.yaml | `/etc/feast/feast-serving` @@ -159,7 +178,13 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-batch.gcpServiceAccount.existingSecret.name` | Secret name for the service account | `feast-gcp-service-account` | `feast-serving-batch.gcpServiceAccount.existingSecret.key` | Secret key for the service account | `key.json` | `feast-serving-batch.gcpServiceAccount.mountPath` | Directory to mount the JSON key file | `/etc/gcloud/service-accounts` +| `feast-serving-batch.gcpProjectId` | Project ID to set `GOOGLE_CLOUD_PROJECT` to change default project used by SDKs | `""` +| `feast-serving-batch.jarPath` | Path to Jar file in the Docker image | `/opt/feast/feast-serving.jar` | `feast-serving-batch.jvmOptions` | Options for the JVM | `[]` +| `feast-serving-batch.logLevel` | Application logging level | `warn` +| `feast-serving-batch.logType` | Application logging type (`JSON` or `Console`) | `JSON` +| `feast-serving-batch.springConfigProfiles` | Map of profile name to file content for additional Spring profiles | `{}` +| `feast-serving-batch.springConfigProfilesActive` | CSV of profiles to enable from `springConfigProfiles` | `""` | `feast-serving-batch.livenessProbe.enabled` | Flag to enable liveness probe | `true` | `feast-serving-batch.livenessProbe.initialDelaySeconds` | Delay before liveness probe is initiated | `60` | `feast-serving-batch.livenessProbe.periodSeconds` | How often to perform the probe | `10` @@ -176,4 +201,51 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-batch.http.targetPort` | Container port for HTTP request | `8080` | `feast-serving-batch.grpc.port` | Kubernetes Service port for GRPC request| `6566` | `feast-serving-batch.grpc.targetPort` | Container port for GRPC request| `6566` -| `feast-serving-batch.resources` | CPU and memory allocation for the pod | `{}` \ No newline at end of file +| `feast-serving-batch.resources` | CPU and memory allocation for the pod | `{}` +| `feast-serving-batch.ingress` | See *Ingress Parameters* [below](#ingress-parameters) | `{}` + +## Ingress Parameters + +The following table lists the configurable parameters of the ingress section for each Feast module. + +Note, there are two ingresses available for each module - `grpc` and `http`. + +| Parameter | Description | Default +| ----------------------------- | ----------- | ------- +| `ingress.grcp.enabled` | Enables an ingress (endpoint) for the gRPC server | `false` +| `ingress.grcp.*` | See below | +| `ingress.http.enabled` | Enables an ingress (endpoint) for the HTTP server | `false` +| `ingress.http.*` | See below | +| `ingress.*.class` | Value for `kubernetes.io/ingress.class` | `nginx` +| `ingress.*.hosts` | List of host-names for the ingress | `[]` +| `ingress.*.annotations` | Additional ingress annotations | `{}` +| `ingress.*.https.enabled` | Add a tls section to the ingress | `true` +| `ingress.*.https.secretNames` | Map of hostname to TLS secret name | `{}` If not specified, defaults to `domain-tld-tls` e.g. `feast.example.com` uses secret `example-com-tls` +| `ingress.*.auth.enabled` | Enable auth on the ingress (only applicable for `nginx` type | `false` +| `ingress.*.auth.signinHost` | External hostname of the OAuth2 proxy to use | First item in `ingress.hosts`, replacing the sub-domain with 'auth' e.g. `feast.example.com` uses `auth.example.com` +| `ingress.*.auth.authUrl` | Internal URI to internal auth endpoint | `http://auth-server.auth-ns.svc.cluster.local/auth` +| `ingress.*.whitelist` | Subnet masks to whitelist (i.e. value for `nginx.ingress.kubernetes.io/whitelist-source-range`) | `"""` + +To enable all the ingresses will a config like the following (while also adding the hosts etc): + +```yaml +feast-core: + ingress: + grpc: + enabled: true + http: + enabled: true +feast-serving-online: + ingress: + grpc: + enabled: true + http: + enabled: true +feast-serving-batch: + ingress: + grpc: + enabled: true + http: + enabled: true +``` + diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/.helmignore b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/.helmignore similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/.helmignore rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/.helmignore diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/Chart.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/Chart.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/Chart.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/Chart.yaml diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/README.md b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/README.md similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/README.md rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/README.md diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/NOTES.txt b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/NOTES.txt similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/NOTES.txt rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/NOTES.txt diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/_helpers.tpl b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/_helpers.tpl similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/_helpers.tpl rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/_helpers.tpl diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/config.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/config.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/config.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/config.yaml diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/deployment.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/deployment.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/deployment.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/deployment.yaml diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/pvc.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/pvc.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/pvc.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/pvc.yaml diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/service.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/service.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/service.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/service.yaml diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/templates/serviceaccount.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/serviceaccount.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/templates/serviceaccount.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/templates/serviceaccount.yaml diff --git a/infra/charts/feast/charts/prometheus-statsd-exporter/values.yaml b/infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/values.yaml similarity index 100% rename from infra/charts/feast/charts/prometheus-statsd-exporter/values.yaml rename to infra/charts/feast/charts/feast-core/charts/prometheus-statsd-exporter/values.yaml diff --git a/infra/charts/feast/charts/feast-core/requirements.yaml b/infra/charts/feast/charts/feast-core/requirements.yaml index efe9fec508a..ef1e39a7d0f 100644 --- a/infra/charts/feast/charts/feast-core/requirements.yaml +++ b/infra/charts/feast/charts/feast-core/requirements.yaml @@ -6,4 +6,10 @@ dependencies: - name: kafka version: 0.20.1 repository: "@incubator" - condition: kafka.enabled \ No newline at end of file + condition: kafka.enabled +- name: common + version: 0.0.5 + repository: "@incubator" +- name: prometheus-statsd-exporter + version: 0.1.2 + condition: prometheus-statsd-exporter.enabled \ No newline at end of file diff --git a/infra/charts/feast/charts/feast-core/templates/_ingress.yaml b/infra/charts/feast/charts/feast-core/templates/_ingress.yaml new file mode 100644 index 00000000000..5bed6df0470 --- /dev/null +++ b/infra/charts/feast/charts/feast-core/templates/_ingress.yaml @@ -0,0 +1,68 @@ +{{- /* +This takes an array of three values: +- the top context +- the feast component +- the service protocol +- the ingress context +*/ -}} +{{- define "feast.ingress" -}} +{{- $top := (index . 0) -}} +{{- $component := (index . 1) -}} +{{- $protocol := (index . 2) -}} +{{- $ingressValues := (index . 3) -}} +apiVersion: extensions/v1beta1 +kind: Ingress +{{ include "feast.ingress.metadata" . }} +spec: + rules: + {{- range $host := $ingressValues.hosts }} + - host: {{ $host }} + http: + paths: + - path: / + backend: + serviceName: {{ include (printf "feast-%s.fullname" $component) $top }} + servicePort: {{ index $top.Values "service" $protocol "port" }} + {{- end }} +{{- if $ingressValues.https.enabled }} + tls: + {{- range $host := $ingressValues.hosts }} + - secretName: {{ index $ingressValues.https.secretNames $host | default (splitList "." $host | rest | join "-" | printf "%s-tls") }} + hosts: + - {{ $host }} + {{- end }} +{{- end -}} +{{- end -}} + +{{- define "feast.ingress.metadata" -}} +{{- $commonMetadata := fromYaml (include "common.metadata" (first .)) }} +{{- $overrides := fromYaml (include "feast.ingress.metadata-overrides" .) -}} +{{- toYaml (merge $overrides $commonMetadata) -}} +{{- end -}} + +{{- define "feast.ingress.metadata-overrides" -}} +{{- $top := (index . 0) -}} +{{- $component := (index . 1) -}} +{{- $protocol := (index . 2) -}} +{{- $ingressValues := (index . 3) -}} +{{- $commonFullname := include "common.fullname" $top }} +metadata: + name: {{ $commonFullname }}-{{ $component }}-{{ $protocol }} + annotations: + kubernetes.io/ingress.class: {{ $ingressValues.class | quote }} + {{- if (and (eq $ingressValues.class "nginx") $ingressValues.auth.enabled) }} + nginx.ingress.kubernetes.io/auth-url: {{ $ingressValues.auth.authUrl | quote }} + nginx.ingress.kubernetes.io/auth-response-headers: "x-auth-request-email, x-auth-request-user" + nginx.ingress.kubernetes.io/auth-signin: "https://{{ $ingressValues.auth.signinHost | default (splitList "." (index $ingressValues.hosts 0) | rest | join "." | printf "auth.%s")}}/oauth2/start?rd=/r/$host/$request_uri" + {{- end }} + {{- if (and (eq $ingressValues.class "nginx") $ingressValues.whitelist) }} + nginx.ingress.kubernetes.io/whitelist-source-range: {{ $ingressValues.whitelist | quote -}} + {{- end }} + {{- if (and (eq $ingressValues.class "nginx") (eq $protocol "grpc") ) }} + # TODO: Allow choice of GRPC/GRPCS + nginx.ingress.kubernetes.io/backend-protocol: "GRPC" + {{- end }} + {{- if $ingressValues.annotations -}} + {{ include "common.annote" $ingressValues.annotations | indent 4 }} + {{- end }} +{{- end -}} diff --git a/infra/charts/feast/charts/feast-core/templates/configmap.yaml b/infra/charts/feast/charts/feast-core/templates/configmap.yaml index 68dc45c0571..da45cad5bdf 100644 --- a/infra/charts/feast/charts/feast-core/templates/configmap.yaml +++ b/infra/charts/feast/charts/feast-core/templates/configmap.yaml @@ -11,22 +11,43 @@ metadata: heritage: {{ .Release.Service }} data: application.yaml: | -{{- $config := index .Values "application.yaml"}} +{{- toYaml (index .Values "application.yaml") | nindent 4 }} {{- if .Values.postgresql.enabled }} -{{- $datasource := dict "url" (printf "jdbc:postgresql://%s:%s/%s" (printf "%s-postgresql" .Release.Name) (.Values.postgresql.service.port | toString) (.Values.postgresql.postgresqlDatabase)) "driverClassName" "org.postgresql.Driver" }} -{{- $newConfig := dict "spring" (dict "datasource" $datasource) }} -{{- $config := mergeOverwrite $config $newConfig }} + application-bundled-postgresql.yaml: | + spring: + datasource: + url: {{ printf "jdbc:postgresql://%s:%s/%s" (printf "%s-postgresql" .Release.Name) (.Values.postgresql.service.port | toString) (.Values.postgresql.postgresqlDatabase) }} + driverClassName: org.postgresql.Driver {{- end }} -{{- if .Values.kafka.enabled }} -{{- $topic := index .Values.kafka.topics 0 }} -{{- $options := dict "topic" $topic.name "replicationFactor" $topic.replicationFactor "partitions" $topic.partitions }} -{{- if not .Values.kafka.external.enabled }} -{{- $_ := set $options "bootstrapServers" (printf "%s:9092" (printf "%s-kafka" .Release.Name)) }} +{{ if .Values.kafka.enabled }} + {{- $topic := index .Values.kafka.topics 0 }} + application-bundled-kafka.yaml: | + feast: + stream: + type: kafka + options: + topic: {{ $topic.name | quote }} + replicationFactor: {{ $topic.replicationFactor }} + partitions: {{ $topic.partitions }} + {{- if not .Values.kafka.external.enabled }} + bootstrapServers: {{ printf "%s:9092" (printf "%s-kafka" .Release.Name) }} + {{- end }} {{- end }} -{{- $newConfig := dict "feast" (dict "stream" (dict "type" "kafka" "options" $options))}} -{{- $config := mergeOverwrite $config $newConfig }} + +{{- if (index .Values "prometheus-statsd-exporter" "enabled" )}} + application-bundled-statsd.yaml: | + feast: + jobs: + metrics: + enabled: true + type: statsd + host: prometheus-statsd-exporter + port: 9125 {{- end }} -{{- toYaml $config | nindent 4 }} +{{- range $name, $content := .Values.springConfigProfiles }} + application-{{ $name }}.yaml: | +{{- toYaml $content | nindent 4 }} +{{- end }} diff --git a/infra/charts/feast/charts/feast-core/templates/deployment.yaml b/infra/charts/feast/charts/feast-core/templates/deployment.yaml index 0671d9574b3..df834b6749e 100644 --- a/infra/charts/feast/charts/feast-core/templates/deployment.yaml +++ b/infra/charts/feast/charts/feast-core/templates/deployment.yaml @@ -18,6 +18,13 @@ spec: release: {{ .Release.Name }} template: metadata: + {{- if .Values.prometheus.enabled }} + annotations: + {{ $config := index .Values "application.yaml" }} + prometheus.io/path: /metrics + prometheus.io/port: "{{ $config.server.port }}" + prometheus.io/scrape: "true" + {{- end }} labels: app: {{ template "feast-core.name" . }} component: core @@ -42,7 +49,7 @@ spec: - name: {{ .Chart.Name }} image: '{{ .Values.image.repository }}:{{ required "No .image.tag found. This must be provided as input." .Values.image.tag }}' imagePullPolicy: {{ .Values.image.pullPolicy }} - + volumeMounts: - name: {{ template "feast-core.fullname" . }}-config mountPath: "{{ .Values.springConfigMountPath }}" @@ -53,31 +60,48 @@ spec: {{- end }} env: + - name: LOG_TYPE + value: {{ .Values.logType | quote }} + - name: LOG_LEVEL + value: {{ .Values.logLevel | quote }} + {{- if .Values.postgresql.enabled }} - name: SPRING_DATASOURCE_USERNAME - value: {{ .Values.postgresql.postgresqlUsername }} + value: {{ .Values.postgresql.postgresqlUsername | quote }} - name: SPRING_DATASOURCE_PASSWORD - value: {{ .Values.postgresql.postgresqlPassword }} + value: {{ .Values.postgresql.postgresqlPassword | quote }} {{- end }} {{- if .Values.gcpServiceAccount.useExistingSecret }} - name: GOOGLE_APPLICATION_CREDENTIALS value: {{ .Values.gcpServiceAccount.mountPath }}/{{ .Values.gcpServiceAccount.existingSecret.key }} {{- end }} + {{- if .Values.gcpProjectId }} + - name: GOOGLE_CLOUD_PROJECT + value: {{ .Values.gcpProjectId | quote }} + {{- end }} command: - java {{- range .Values.jvmOptions }} - - {{ . }} + - {{ . | quote }} + {{- end }} + - -jar + - {{ .Values.jarPath | quote }} + - "--spring.config.location=file:{{ .Values.springConfigMountPath }}/" + {{- $profilesArray := splitList "," .Values.springConfigProfilesActive -}} + {{- $profilesArray = append $profilesArray (.Values.postgresql.enabled | ternary "bundled-postgresql" "") -}} + {{- $profilesArray = append $profilesArray (.Values.kafka.enabled | ternary "bundled-kafka" "") -}} + {{- $profilesArray = append $profilesArray (index .Values "prometheus-statsd-exporter" "enabled" | ternary "bundled-statsd" "") -}} + {{- $profilesArray = compact $profilesArray -}} + {{- if $profilesArray }} + - "--spring.profiles.active={{ join "," $profilesArray }}" {{- end }} - - -jar - - /opt/feast/feast-core.jar - - "--spring.config.location=file:{{ .Values.springConfigMountPath }}/application.yaml" ports: - name: http containerPort: {{ .Values.service.http.targetPort }} - - name: grpc + - name: grpc containerPort: {{ .Values.service.grpc.targetPort }} {{- if .Values.livenessProbe.enabled }} @@ -103,6 +127,6 @@ spec: timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} failureThreshold: {{ .Values.readinessProbe.failureThreshold }} {{- end }} - + resources: {{- toYaml .Values.resources | nindent 10 }} diff --git a/infra/charts/feast/charts/feast-core/templates/ingress.yaml b/infra/charts/feast/charts/feast-core/templates/ingress.yaml index 86fc2d3f175..7f453e1a75f 100644 --- a/infra/charts/feast/charts/feast-core/templates/ingress.yaml +++ b/infra/charts/feast/charts/feast-core/templates/ingress.yaml @@ -1,28 +1,7 @@ -{{- if .Values.ingress.enabled -}} -{{- $fullName := include "feast-core.fullname" . -}} -apiVersion: extensions/v1beta1 -kind: Ingress -metadata: - name: {{ $fullName }} - labels: - app: {{ template "feast-core.name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version }} - component: core - heritage: {{ .Release.Service }} - release: {{ .Release.Name }} - annotations: -{{- with .Values.ingress.annotations }} -{{ toYaml . | indent 4 }} +{{- if .Values.ingress.http.enabled -}} +{{ template "feast.ingress" (list . "core" "http" .Values.ingress.http) }} +{{- end }} +--- +{{ if .Values.ingress.grpc.enabled -}} +{{ template "feast.ingress" (list . "core" "grpc" .Values.ingress.grpc) }} {{- end }} -spec: - rules: - {{- range .Values.ingress.hosts }} - - host: {{ .host | quote }} - http: - paths: - - path: / - backend: - serviceName: {{ $fullName }} - servicePort: {{ .port | quote }} - {{- end }} -{{- end }} \ No newline at end of file diff --git a/infra/charts/feast/charts/feast-core/values.yaml b/infra/charts/feast/charts/feast-core/values.yaml index f746bc96ead..077906dc35d 100644 --- a/infra/charts/feast/charts/feast-core/values.yaml +++ b/infra/charts/feast/charts/feast-core/values.yaml @@ -1,12 +1,15 @@ -# postgresql configures Postgresql that is installed as part of Feast Core. +# ============================================================ +# Bundled PostgreSQL +# ============================================================ + # Refer to https://github.com/helm/charts/tree/c42002a21abf8eff839ff1d2382152bde2bbe596/stable/postgresql # for additional configuration. postgresql: # enabled specifies whether Postgresql should be installed as part of Feast Core. # - # Feast Core requires a database to store data such as the created FeatureSets + # Feast Core requires a database to store data such as the created FeatureSets # and job statuses. If enabled, the database and service port specified below - # will override "spring.datasource.url" value in application.yaml. The + # will override "spring.datasource.url" value in application.yaml. The # username and password will also be set as environment variables that will # override "spring.datasource.username/password" in application.yaml. enabled: true @@ -20,12 +23,15 @@ postgresql: # port is the TCP port that Postgresql will listen to port: 5432 -# kafka configures Kafka that is installed as part of Feast Core. +# ============================================================ +# Bundled Kafka +# ============================================================ + # Refer to https://github.com/helm/charts/tree/c42002a21abf8eff839ff1d2382152bde2bbe596/incubator/kafka # for additional configuration. kafka: # enabled specifies whether Kafka should be installed as part of Feast Core. - # + # # Feast Core requires a Kafka instance to be set as the default source for # FeatureRows. If enabled, "feast.stream" option in application.yaml will # be overridden by this installed Kafka configuration. @@ -36,6 +42,18 @@ kafka: replicationFactor: 1 partitions: 1 + +# ============================================================ +# Bundled Prometheus StatsD Exporter +# ============================================================ + +prometheus-statsd-exporter: + enabled: false + +# ============================================================ +# Feast Core +# ============================================================ + # replicaCount is the number of pods that will be created. replicaCount: 1 @@ -44,13 +62,18 @@ image: repository: gcr.io/kf-feast/feast-core pullPolicy: IfNotPresent +# Add prometheus scraping annotations to the Pod metadata. +# If enabled, you must also ensure server.port is specified under application.yaml +prometheus: + enabled: false + # application.yaml is the main configuration for Feast Core application. -# +# # Feast Core is a Spring Boot app which uses this yaml configuration file. # Refer to https://github.com/gojek/feast/blob/79eb4ab5fa3d37102c1dca9968162a98690526ba/core/src/main/resources/application.yml # for a complete list and description of the configuration. # -# Note that some properties defined in application.yaml may be overriden by +# Note that some properties defined in application.yaml may be overriden by # Helm under certain conditions. For example, if postgresql and kafka dependencies # are enabled. application.yaml: @@ -96,7 +119,14 @@ application.yaml: host: localhost port: 8125 -# springConfigMountPath is the directory path where application.yaml will be +springConfigProfiles: {} +# db: | +# spring: +# datasource: +# driverClassName: org.postgresql.Driver +# url: jdbc:postgresql://${DB_HOST:127.0.0.1}:${DB_PORT:5432}/${DB_DATABASE:postgres} +springConfigProfilesActive: "" +# springConfigMountPath is the directory path where application.yaml will be # mounted in the container. springConfigMountPath: /etc/feast/feast-core @@ -107,7 +137,7 @@ gcpServiceAccount: useExistingSecret: false existingSecret: # name is the secret name of the existing secret for the service account. - name: feast-gcp-service-account + name: feast-gcp-service-account # key is the secret key of the existing secret for the service account. # key is normally derived from the file name of the JSON key file. key: key.json @@ -115,19 +145,29 @@ gcpServiceAccount: # the value of "existingSecret.key" is file name of the service account file. mountPath: /etc/gcloud/service-accounts -# jvmOptions are options that will be passed to the Java Virtual Machine (JVM) +# Project ID picked up by the Cloud SDK (e.g. BigQuery run against this project) +gcpProjectId: "" + +# Path to Jar file in the Docker image. +# If you are using gcr.io/kf-feast/feast-core this should not need to be changed +jarPath: /opt/feast/feast-core.jar + +# jvmOptions are options that will be passed to the Java Virtual Machine (JVM) # running Feast Core. -# +# # For example, it is good practice to set min and max heap size in JVM. # https://stackoverflow.com/questions/6902135/side-effect-for-increasing-maxpermsize-and-max-heap-size # # Refer to https://docs.oracle.com/cd/E22289_01/html/821-1274/configuring-the-default-jvm-and-java-arguments.html # to see other JVM options that can be set. # -# jvmOptions: -# - -Xms1024m +jvmOptions: [] +# - -Xms1024m # - -Xmx1024m +logType: JSON +logLevel: warn + livenessProbe: enabled: true initialDelaySeconds: 60 @@ -162,12 +202,29 @@ service: # nodePort: ingress: - enabled: false - annotations: {} - # kubernetes.io/ingress.class: nginx - hosts: - # - host: chart-example.local - # port: http + grpc: + enabled: false + class: nginx + hosts: [] + annotations: {} + https: + enabled: true + secretNames: {} + whitelist: "" + auth: + enabled: false + http: + enabled: false + class: nginx + hosts: [] + annotations: {} + https: + enabled: true + secretNames: {} + whitelist: "" + auth: + enabled: false + authUrl: http://auth-server.auth-ns.svc.cluster.local/auth resources: {} # We usually recommend not to specify default resources and to leave this as a conscious diff --git a/infra/charts/feast/charts/feast-serving/requirements.yaml b/infra/charts/feast/charts/feast-serving/requirements.yaml index fa4c1df4c10..2cee3f81494 100644 --- a/infra/charts/feast/charts/feast-serving/requirements.yaml +++ b/infra/charts/feast/charts/feast-serving/requirements.yaml @@ -3,3 +3,6 @@ dependencies: version: 9.5.0 repository: "@stable" condition: redis.enabled +- name: common + version: 0.0.5 + repository: "@incubator" diff --git a/infra/charts/feast/charts/feast-serving/templates/_helpers.tpl b/infra/charts/feast/charts/feast-serving/templates/_helpers.tpl index 49abb6b8e50..ab670cc8cc7 100644 --- a/infra/charts/feast/charts/feast-serving/templates/_helpers.tpl +++ b/infra/charts/feast/charts/feast-serving/templates/_helpers.tpl @@ -43,3 +43,10 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} app.kubernetes.io/managed-by: {{ .Release.Service }} {{- end -}} + +{{/* +Helpers +*/}} +{{- define "bq_store_and_no_job_options" -}} +{{ and (eq (index .Values "store.yaml" "type") "BIGQUERY") (empty (index .Values "application.yaml" "feast" "jobs" "store-options")) }} +{{- end -}} diff --git a/infra/charts/feast/charts/feast-serving/templates/_ingress.yaml b/infra/charts/feast/charts/feast-serving/templates/_ingress.yaml new file mode 100644 index 00000000000..5bed6df0470 --- /dev/null +++ b/infra/charts/feast/charts/feast-serving/templates/_ingress.yaml @@ -0,0 +1,68 @@ +{{- /* +This takes an array of three values: +- the top context +- the feast component +- the service protocol +- the ingress context +*/ -}} +{{- define "feast.ingress" -}} +{{- $top := (index . 0) -}} +{{- $component := (index . 1) -}} +{{- $protocol := (index . 2) -}} +{{- $ingressValues := (index . 3) -}} +apiVersion: extensions/v1beta1 +kind: Ingress +{{ include "feast.ingress.metadata" . }} +spec: + rules: + {{- range $host := $ingressValues.hosts }} + - host: {{ $host }} + http: + paths: + - path: / + backend: + serviceName: {{ include (printf "feast-%s.fullname" $component) $top }} + servicePort: {{ index $top.Values "service" $protocol "port" }} + {{- end }} +{{- if $ingressValues.https.enabled }} + tls: + {{- range $host := $ingressValues.hosts }} + - secretName: {{ index $ingressValues.https.secretNames $host | default (splitList "." $host | rest | join "-" | printf "%s-tls") }} + hosts: + - {{ $host }} + {{- end }} +{{- end -}} +{{- end -}} + +{{- define "feast.ingress.metadata" -}} +{{- $commonMetadata := fromYaml (include "common.metadata" (first .)) }} +{{- $overrides := fromYaml (include "feast.ingress.metadata-overrides" .) -}} +{{- toYaml (merge $overrides $commonMetadata) -}} +{{- end -}} + +{{- define "feast.ingress.metadata-overrides" -}} +{{- $top := (index . 0) -}} +{{- $component := (index . 1) -}} +{{- $protocol := (index . 2) -}} +{{- $ingressValues := (index . 3) -}} +{{- $commonFullname := include "common.fullname" $top }} +metadata: + name: {{ $commonFullname }}-{{ $component }}-{{ $protocol }} + annotations: + kubernetes.io/ingress.class: {{ $ingressValues.class | quote }} + {{- if (and (eq $ingressValues.class "nginx") $ingressValues.auth.enabled) }} + nginx.ingress.kubernetes.io/auth-url: {{ $ingressValues.auth.authUrl | quote }} + nginx.ingress.kubernetes.io/auth-response-headers: "x-auth-request-email, x-auth-request-user" + nginx.ingress.kubernetes.io/auth-signin: "https://{{ $ingressValues.auth.signinHost | default (splitList "." (index $ingressValues.hosts 0) | rest | join "." | printf "auth.%s")}}/oauth2/start?rd=/r/$host/$request_uri" + {{- end }} + {{- if (and (eq $ingressValues.class "nginx") $ingressValues.whitelist) }} + nginx.ingress.kubernetes.io/whitelist-source-range: {{ $ingressValues.whitelist | quote -}} + {{- end }} + {{- if (and (eq $ingressValues.class "nginx") (eq $protocol "grpc") ) }} + # TODO: Allow choice of GRPC/GRPCS + nginx.ingress.kubernetes.io/backend-protocol: "GRPC" + {{- end }} + {{- if $ingressValues.annotations -}} + {{ include "common.annote" $ingressValues.annotations | indent 4 }} + {{- end }} +{{- end -}} diff --git a/infra/charts/feast/charts/feast-serving/templates/configmap.yaml b/infra/charts/feast/charts/feast-serving/templates/configmap.yaml index 0ec80252c16..934216a9d5f 100644 --- a/infra/charts/feast/charts/feast-serving/templates/configmap.yaml +++ b/infra/charts/feast/charts/feast-serving/templates/configmap.yaml @@ -11,37 +11,43 @@ metadata: heritage: {{ .Release.Service }} data: application.yaml: | -{{- $config := index .Values "application.yaml" }} +{{- toYaml (index .Values "application.yaml") | nindent 4 }} {{- if .Values.core.enabled }} -{{- $newConfig := dict "feast" (dict "core-host" (printf "%s-feast-core" .Release.Name)) }} -{{- $config := mergeOverwrite $config $newConfig }} + application-bundled-core.yaml: | + feast: + core-host: {{ printf "%s-feast-core" .Release.Name }} {{- end }} -{{- $store := index .Values "store.yaml" }} -{{- if and (eq $store.type "BIGQUERY") (not (hasKey $config.feast.jobs "store-options")) }} -{{- $jobStore := dict "host" (printf "%s-redis-headless" .Release.Name) "port" 6379 }} -{{- $newConfig := dict "feast" (dict "jobs" (dict "store-options" $jobStore)) }} -{{- $config := mergeOverwrite $config $newConfig }} +{{- if eq (include "bq_store_and_no_job_options" .) "true" }} + application-bundled-redis.yaml: | + feast: + jobs: + store-options: + host: {{ printf "%s-redis-headless" .Release.Name }} + port: 6379 {{- end }} -{{- toYaml $config | nindent 4 }} - store.yaml: | -{{- $config := index .Values "store.yaml"}} +{{- $store := index .Values "store.yaml"}} -{{- if and .Values.redis.enabled (eq $config.type "REDIS") }} +{{- if and .Values.redis.enabled (eq $store.type "REDIS") }} {{- if eq .Values.redis.master.service.type "ClusterIP" }} {{- $newConfig := dict "redis_config" (dict "host" (printf "%s-redis-headless" .Release.Name) "port" .Values.redis.redisPort) }} -{{- $config := mergeOverwrite $config $newConfig }} +{{- $config := mergeOverwrite $store $newConfig }} {{- end }} {{- if and (eq .Values.redis.master.service.type "LoadBalancer") (not (empty .Values.redis.master.service.loadBalancerIP)) }} {{- $newConfig := dict "redis_config" (dict "host" .Values.redis.master.service.loadBalancerIP "port" .Values.redis.redisPort) }} -{{- $config := mergeOverwrite $config $newConfig }} +{{- $config := mergeOverwrite $store $newConfig }} {{- end }} {{- end }} -{{- toYaml $config | nindent 4 }} +{{- toYaml $store | nindent 4 }} + +{{- range $name, $content := .Values.springConfigProfiles }} + application-{{ $name }}.yaml: | +{{- toYaml $content | nindent 4 }} +{{- end }} diff --git a/infra/charts/feast/charts/feast-serving/templates/deployment.yaml b/infra/charts/feast/charts/feast-serving/templates/deployment.yaml index e6824a23465..64dd3955d0c 100644 --- a/infra/charts/feast/charts/feast-serving/templates/deployment.yaml +++ b/infra/charts/feast/charts/feast-serving/templates/deployment.yaml @@ -49,7 +49,7 @@ spec: - name: {{ .Chart.Name }} image: '{{ .Values.image.repository }}:{{ required "No .image.tag found. This must be provided as input." .Values.image.tag }}' imagePullPolicy: {{ .Values.image.pullPolicy }} - + volumeMounts: - name: {{ template "feast-serving.fullname" . }}-config mountPath: "{{ .Values.springConfigMountPath }}" @@ -60,24 +60,40 @@ spec: {{- end }} env: + - name: LOG_TYPE + value: {{ .Values.logType | quote }} + - name: LOG_LEVEL + value: {{ .Values.logLevel | quote }} + {{- if .Values.gcpServiceAccount.useExistingSecret }} - name: GOOGLE_APPLICATION_CREDENTIALS value: {{ .Values.gcpServiceAccount.mountPath }}/{{ .Values.gcpServiceAccount.existingSecret.key }} {{- end }} + {{- if .Values.gcpProjectId }} + - name: GOOGLE_CLOUD_PROJECT + value: {{ .Values.gcpProjectId | quote }} + {{- end }} command: - java {{- range .Values.jvmOptions }} - - {{ . }} + - {{ . | quote }} + {{- end }} + - -jar + - {{ .Values.jarPath | quote }} + - "--spring.config.location=file:{{ .Values.springConfigMountPath }}/" + {{- $profilesArray := splitList "," .Values.springConfigProfilesActive -}} + {{- $profilesArray = append $profilesArray (.Values.core.enabled | ternary "bundled-core" "") -}} + {{- $profilesArray = append $profilesArray (eq (include "bq_store_and_no_job_options" .) "true" | ternary "bundled-redis" "") -}} + {{- $profilesArray = compact $profilesArray -}} + {{- if $profilesArray }} + - "--spring.profiles.active={{ join "," $profilesArray }}" {{- end }} - - -jar - - /opt/feast/feast-serving.jar - - "--spring.config.location=file:{{ .Values.springConfigMountPath }}/application.yaml" ports: - name: http containerPort: {{ .Values.service.http.targetPort }} - - name: grpc + - name: grpc containerPort: {{ .Values.service.grpc.targetPort }} {{- if .Values.livenessProbe.enabled }} @@ -101,6 +117,6 @@ spec: timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} failureThreshold: {{ .Values.readinessProbe.failureThreshold }} {{- end }} - + resources: {{- toYaml .Values.resources | nindent 10 }} diff --git a/infra/charts/feast/charts/feast-serving/templates/ingress.yaml b/infra/charts/feast/charts/feast-serving/templates/ingress.yaml index c6b4cb07a81..1bcd176147a 100644 --- a/infra/charts/feast/charts/feast-serving/templates/ingress.yaml +++ b/infra/charts/feast/charts/feast-serving/templates/ingress.yaml @@ -1,28 +1,7 @@ -{{- if .Values.ingress.enabled -}} -{{- $fullName := include "feast-serving.fullname" . -}} -apiVersion: extensions/v1beta1 -kind: Ingress -metadata: - name: {{ $fullName }} - labels: - app: {{ template "feast-serving.name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version }} - component: serving - heritage: {{ .Release.Service }} - release: {{ .Release.Name }} - annotations: -{{- with .Values.ingress.annotations }} -{{ toYaml . | indent 4 }} +{{- if .Values.ingress.http.enabled -}} +{{ template "feast.ingress" (list . "serving" "http" .Values.ingress.http) }} {{- end }} -spec: - rules: - {{- range .Values.ingress.hosts }} - - host: {{ .host | quote }} - http: - paths: - - path: / - backend: - serviceName: {{ $fullName }} - servicePort: {{ .port | quote }} - {{- end }} +--- +{{ if .Values.ingress.grpc.enabled -}} +{{ template "feast.ingress" (list . "serving" "grpc" .Values.ingress.grpc) }} {{- end }} diff --git a/infra/charts/feast/charts/feast-serving/values.yaml b/infra/charts/feast/charts/feast-serving/values.yaml index d2b3c599479..52d10cd7440 100644 --- a/infra/charts/feast/charts/feast-serving/values.yaml +++ b/infra/charts/feast/charts/feast-serving/values.yaml @@ -3,23 +3,23 @@ # for additional configuration redis: # enabled specifies whether Redis should be installed as part of Feast Serving. - # + # # If enabled, "redis_config" in store.yaml will be overwritten by Helm # to the configuration in this Redis installation. enabled: false # usePassword specifies if password is required to access Redis. Note that # Feast 0.3 does not support Redis with password. - usePassword: false + usePassword: false # cluster configuration for Redis. cluster: # enabled specifies if Redis should be installed in cluster mode. enabled: false -# core configures Feast Core in the same parent feast chart that this Feast +# core configures Feast Core in the same parent feast chart that this Feast # Serving connects to. core: # enabled specifies that Feast Serving will use Feast Core installed - # in the same parent feast chart. If enabled, Helm will overwrite + # in the same parent feast chart. If enabled, Helm will overwrite # "feast.core-host" in application.yaml with the correct value. enabled: true @@ -37,7 +37,7 @@ image: # Refer to https://github.com/gojek/feast/blob/79eb4ab5fa3d37102c1dca9968162a98690526ba/serving/src/main/resources/application.yml # for a complete list and description of the configuration. # -# Note that some properties defined in application.yaml may be overridden by +# Note that some properties defined in application.yaml may be overridden by # Helm under certain conditions. For example, if core is enabled, then # "feast.core-host" will be overridden. Also, if "type: BIGQUERY" is specified # in store.yaml, "feast.jobs.store-options" will be overridden as well with @@ -66,19 +66,19 @@ application.yaml: port: 8080 # store.yaml is the configuration for Feast Store. -# +# # Refer to this link for description: # https://github.com/gojek/feast/blob/79eb4ab5fa3d37102c1dca9968162a98690526ba/protos/feast/core/Store.proto # # Use the correct store configuration depending on whether the installed # Feast Serving is "online" or "batch", by uncommenting the correct store.yaml. # -# Note that if "redis.enabled: true" and "type: REDIS" in store.yaml, +# Note that if "redis.enabled: true" and "type: REDIS" in store.yaml, # Helm will override "redis_config" with configuration of Redis installed # in this chart. -# +# # Note that if "type: BIGQUERY" in store.yaml, Helm assumes Feast Online serving -# is also installed with Redis store. Helm will then override "feast.jobs.store-options" +# is also installed with Redis store. Helm will then override "feast.jobs.store-options" # in application.yaml with the installed Redis store configuration. This is # because in Feast 0.3, Redis job store is required. # @@ -104,7 +104,14 @@ application.yaml: # name: "*" # version: "*" -# springConfigMountPath is the directory path where application.yaml and +springConfigProfiles: {} +# db: | +# spring: +# datasource: +# driverClassName: org.postgresql.Driver +# url: jdbc:postgresql://${DB_HOST:127.0.0.1}:${DB_PORT:5432}/${DB_DATABASE:postgres} +springConfigProfilesActive: "" +# springConfigMountPath is the directory path where application.yaml and # store.yaml will be mounted in the container. springConfigMountPath: /etc/feast/feast-serving @@ -115,7 +122,7 @@ gcpServiceAccount: useExistingSecret: false existingSecret: # name is the secret name of the existing secret for the service account. - name: feast-gcp-service-account + name: feast-gcp-service-account # key is the secret key of the existing secret for the service account. # key is normally derived from the file name of the JSON key file. key: key.json @@ -123,19 +130,29 @@ gcpServiceAccount: # the value of "existingSecret.key" is file name of the service account file. mountPath: /etc/gcloud/service-accounts -# jvmOptions are options that will be passed to the Java Virtual Machine (JVM) +# Project ID picked up by the Cloud SDK (e.g. BigQuery run against this project) +gcpProjectId: "" + +# Path to Jar file in the Docker image. +# If using gcr.io/kf-feast/feast-serving this should not need to be changed. +jarPath: /opt/feast/feast-serving.jar + +# jvmOptions are options that will be passed to the Java Virtual Machine (JVM) # running Feast Core. -# +# # For example, it is good practice to set min and max heap size in JVM. # https://stackoverflow.com/questions/6902135/side-effect-for-increasing-maxpermsize-and-max-heap-size # # Refer to https://docs.oracle.com/cd/E22289_01/html/821-1274/configuring-the-default-jvm-and-java-arguments.html # to see other JVM options that can be set. # -# jvmOptions: -# - -Xms768m +jvmOptions: [] +# - -Xms768m # - -Xmx768m +logType: JSON +logLevel: warn + livenessProbe: enabled: false initialDelaySeconds: 60 @@ -170,12 +187,29 @@ service: # nodePort: ingress: - enabled: false - annotations: {} - # kubernetes.io/ingress.class: nginx - hosts: - # - host: chart-example.local - # port: http + grpc: + enabled: false + class: nginx + hosts: [] + annotations: {} + https: + enabled: true + secretNames: {} + whitelist: "" + auth: + enabled: false + http: + enabled: false + class: nginx + hosts: [] + annotations: {} + https: + enabled: true + secretNames: {} + whitelist: "" + auth: + enabled: false + authUrl: http://auth-server.auth-ns.svc.cluster.local/auth prometheus: enabled: true @@ -185,6 +219,7 @@ resources: {} # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # # limits: # cpu: 100m # memory: 128Mi diff --git a/infra/charts/feast/requirements.lock b/infra/charts/feast/requirements.lock index 8afd9521573..e441790dc76 100644 --- a/infra/charts/feast/requirements.lock +++ b/infra/charts/feast/requirements.lock @@ -1,12 +1,6 @@ dependencies: -- name: feast-core - repository: "" - version: 0.3.2 -- name: feast-serving - repository: "" - version: 0.3.2 -- name: feast-serving - repository: "" - version: 0.3.2 -digest: sha256:7ee4cd271cbd4ace44817dd12ba65f490a8e3529adf199604a2c2bdad9c2fac3 -generated: "2019-11-27T13:35:41.334054+08:00" +- name: common + repository: https://kubernetes-charts-incubator.storage.googleapis.com + version: 0.0.5 +digest: sha256:935bfb09e9ed90ff800826a7df21adaabe3225511c3ad78df44e1a5a60e93f14 +generated: 2019-12-10T14:47:49.57569Z diff --git a/infra/charts/feast/requirements.yaml b/infra/charts/feast/requirements.yaml index 5416ded3fee..1fa1826965a 100644 --- a/infra/charts/feast/requirements.yaml +++ b/infra/charts/feast/requirements.yaml @@ -9,4 +9,4 @@ dependencies: - name: feast-serving alias: feast-serving-online version: 0.4.4 - condition: feast-serving-online.enabled + condition: feast-serving-online.enabled \ No newline at end of file diff --git a/infra/charts/feast/values-demo.yaml b/infra/charts/feast/values-demo.yaml index fad4bc0afb0..2cb5ccbe741 100644 --- a/infra/charts/feast/values-demo.yaml +++ b/infra/charts/feast/values-demo.yaml @@ -1,7 +1,7 @@ # The following are values for installing Feast for demonstration purpose: # - Persistence is disabled since for demo purpose data is not expected # to be durable -# - Only online serving (no batch serving) is installed to remove dependency +# - Only online serving (no batch serving) is installed to remove dependency # on Google Cloud services. Batch serving requires BigQuery dependency. # - Replace all occurrences of "feast.example.com" with the domain name or # external IP pointing to your cluster @@ -68,4 +68,17 @@ feast-serving-online: version: "*" feast-serving-batch: - enabled: false +# enabled: false + enabled: true + store.yaml: + name: bigquery + type: BIGQUERY + bigquery_config: + project_id: PROJECT_ID + dataset_id: DATASET_ID + subscriptions: + - project: "*" + name: "*" + version: "*" + redis: + enabled: false \ No newline at end of file diff --git a/infra/charts/feast/values.yaml b/infra/charts/feast/values.yaml index f9a0a76dc1b..fde03f9ad71 100644 --- a/infra/charts/feast/values.yaml +++ b/infra/charts/feast/values.yaml @@ -2,10 +2,12 @@ # - Feast Core # - Feast Serving Online # - Feast Serving Batch +# - Prometheus StatsD Exporter # # The configuration for different components can be referenced from: # - charts/feast-core/values.yaml # - charts/feast-serving/values.yaml +# - charts/prometheus-statsd-exporter/values.yaml # # Note that "feast-serving-online" and "feast-serving-batch" are # aliases to "feast-serving" chart since in typical scenario two instances @@ -235,11 +237,11 @@ feast-serving-batch: # enabled as well. So Feast Serving Batch will share the same # Redis instance to store job statuses. store-type: REDIS - store-options: - # Use the externally exposed redis instance deployed by Online service - # Please set EXTERNAL_IP to your cluster's external IP - host: EXTERNAL_IP - port: 32101 + # Default to use the internal hostname of the redis instance deployed by Online service, + # otherwise use externally exposed by setting EXTERNAL_IP to your cluster's external IP + # store-options: + # host: EXTERNAL_IP + # port: 32101 # store.yaml is the configuration for Feast Store. # # Refer to this link for more description: From 85f398ed5b4ce4bceac0bc2a3c5a792092667c64 Mon Sep 17 00:00:00 2001 From: Chen Zhiling Date: Thu, 13 Feb 2020 17:57:37 +0800 Subject: [PATCH 11/15] Make redis key creation more determinisitic (#380) (#471) * Make redis key creation more determinisitic (#380) * Add documentation to RedisKey in Redis.proto Ensure entities are sorted by the name Co-authored-by: David Heryanto --- .../redis/FeatureRowToRedisMutationDoFn.java | 16 +- .../FeatureRowToRedisMutationDoFnTest.java | 183 ++++++++++++++++++ protos/feast/storage/Redis.proto | 3 +- 3 files changed, 197 insertions(+), 5 deletions(-) create mode 100644 ingestion/src/test/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFnTest.java diff --git a/ingestion/src/main/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFn.java b/ingestion/src/main/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFn.java index 27cca2ffb2e..4b744d0fe6b 100644 --- a/ingestion/src/main/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFn.java +++ b/ingestion/src/main/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFn.java @@ -24,8 +24,9 @@ import feast.store.serving.redis.RedisCustomIO.RedisMutation; import feast.types.FeatureRowProto.FeatureRow; import feast.types.FieldProto.Field; +import java.util.HashMap; +import java.util.List; import java.util.Map; -import java.util.Set; import java.util.stream.Collectors; import org.apache.beam.sdk.transforms.DoFn; import org.slf4j.Logger; @@ -42,17 +43,24 @@ public FeatureRowToRedisMutationDoFn(Map featureSets) { private RedisKey getKey(FeatureRow featureRow) { FeatureSet featureSet = featureSets.get(featureRow.getFeatureSet()); - Set entityNames = + List entityNames = featureSet.getSpec().getEntitiesList().stream() .map(EntitySpec::getName) - .collect(Collectors.toSet()); + .sorted() + .collect(Collectors.toList()); + Map entityFields = new HashMap<>(); Builder redisKeyBuilder = RedisKey.newBuilder().setFeatureSet(featureRow.getFeatureSet()); for (Field field : featureRow.getFieldsList()) { if (entityNames.contains(field.getName())) { - redisKeyBuilder.addEntities(field); + entityFields.putIfAbsent( + field.getName(), + Field.newBuilder().setName(field.getName()).setValue(field.getValue()).build()); } } + for (String entityName : entityNames) { + redisKeyBuilder.addEntities(entityFields.get(entityName)); + } return redisKeyBuilder.build(); } diff --git a/ingestion/src/test/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFnTest.java b/ingestion/src/test/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFnTest.java new file mode 100644 index 00000000000..92bb6e41c38 --- /dev/null +++ b/ingestion/src/test/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFnTest.java @@ -0,0 +1,183 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.store.serving.redis; + +import static org.junit.Assert.*; + +import com.google.protobuf.Timestamp; +import feast.core.FeatureSetProto; +import feast.core.FeatureSetProto.EntitySpec; +import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSpec; +import feast.storage.RedisProto.RedisKey; +import feast.store.serving.redis.RedisCustomIO.RedisMutation; +import feast.types.FeatureRowProto.FeatureRow; +import feast.types.FieldProto.Field; +import feast.types.ValueProto.Value; +import feast.types.ValueProto.ValueType.Enum; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.apache.beam.sdk.extensions.protobuf.ProtoCoder; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.values.PCollection; +import org.junit.Rule; +import org.junit.Test; + +public class FeatureRowToRedisMutationDoFnTest { + + @Rule public transient TestPipeline p = TestPipeline.create(); + + private FeatureSetProto.FeatureSet fs = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder() + .setName("feature_set") + .setVersion(1) + .addEntities( + EntitySpec.newBuilder() + .setName("entity_id_primary") + .setValueType(Enum.INT32) + .build()) + .addEntities( + EntitySpec.newBuilder() + .setName("entity_id_secondary") + .setValueType(Enum.STRING) + .build()) + .addFeatures( + FeatureSpec.newBuilder() + .setName("feature_1") + .setValueType(Enum.STRING) + .build()) + .addFeatures( + FeatureSpec.newBuilder() + .setName("feature_2") + .setValueType(Enum.INT64) + .build())) + .build(); + + @Test + public void shouldConvertRowWithDuplicateEntitiesToValidKey() { + Map featureSets = new HashMap<>(); + featureSets.put("feature_set", fs); + + FeatureRow offendingRow = + FeatureRow.newBuilder() + .setFeatureSet("feature_set") + .setEventTimestamp(Timestamp.newBuilder().setSeconds(10)) + .addFields( + Field.newBuilder() + .setName("entity_id_primary") + .setValue(Value.newBuilder().setInt32Val(1))) + .addFields( + Field.newBuilder() + .setName("entity_id_primary") + .setValue(Value.newBuilder().setInt32Val(2))) + .addFields( + Field.newBuilder() + .setName("entity_id_secondary") + .setValue(Value.newBuilder().setStringVal("a"))) + .build(); + + PCollection output = + p.apply(Create.of(Collections.singletonList(offendingRow))) + .setCoder(ProtoCoder.of(FeatureRow.class)) + .apply(ParDo.of(new FeatureRowToRedisMutationDoFn(featureSets))); + + RedisKey expectedKey = + RedisKey.newBuilder() + .setFeatureSet("feature_set") + .addEntities( + Field.newBuilder() + .setName("entity_id_primary") + .setValue(Value.newBuilder().setInt32Val(1))) + .addEntities( + Field.newBuilder() + .setName("entity_id_secondary") + .setValue(Value.newBuilder().setStringVal("a"))) + .build(); + + PAssert.that(output) + .satisfies( + (SerializableFunction, Void>) + input -> { + input.forEach( + rm -> { + assert (Arrays.equals(rm.getKey(), expectedKey.toByteArray())); + assert (Arrays.equals(rm.getValue(), offendingRow.toByteArray())); + }); + return null; + }); + p.run(); + } + + @Test + public void shouldConvertRowWithOutOfOrderEntitiesToValidKey() { + Map featureSets = new HashMap<>(); + featureSets.put("feature_set", fs); + + FeatureRow offendingRow = + FeatureRow.newBuilder() + .setFeatureSet("feature_set") + .setEventTimestamp(Timestamp.newBuilder().setSeconds(10)) + .addFields( + Field.newBuilder() + .setName("entity_id_secondary") + .setValue(Value.newBuilder().setStringVal("a"))) + .addFields( + Field.newBuilder() + .setName("entity_id_primary") + .setValue(Value.newBuilder().setInt32Val(1))) + .build(); + + PCollection output = + p.apply(Create.of(Collections.singletonList(offendingRow))) + .setCoder(ProtoCoder.of(FeatureRow.class)) + .apply(ParDo.of(new FeatureRowToRedisMutationDoFn(featureSets))); + + RedisKey expectedKey = + RedisKey.newBuilder() + .setFeatureSet("feature_set") + .addEntities( + Field.newBuilder() + .setName("entity_id_primary") + .setValue(Value.newBuilder().setInt32Val(1))) + .addEntities( + Field.newBuilder() + .setName("entity_id_secondary") + .setValue(Value.newBuilder().setStringVal("a"))) + .build(); + + PAssert.that(output) + .satisfies( + (SerializableFunction, Void>) + input -> { + input.forEach( + rm -> { + assert (Arrays.equals(rm.getKey(), expectedKey.toByteArray())); + assert (Arrays.equals(rm.getValue(), offendingRow.toByteArray())); + }); + return null; + }); + p.run(); + } +} diff --git a/protos/feast/storage/Redis.proto b/protos/feast/storage/Redis.proto index ae287f4e6bf..f58b137e9c1 100644 --- a/protos/feast/storage/Redis.proto +++ b/protos/feast/storage/Redis.proto @@ -32,6 +32,7 @@ message RedisKey { string feature_set = 2; // List of fields containing entity names and their respective values - // contained within this feature row. + // contained within this feature row. The entities should be sorted + // by the entity name alphabetically in ascending order. repeated feast.types.Field entities = 3; } From f12f55cdba9549085d856f6f78aa94926ec708b6 Mon Sep 17 00:00:00 2001 From: Khor Shu Heng <32997938+khorshuheng@users.noreply.github.com> Date: Fri, 14 Feb 2020 13:56:37 +0800 Subject: [PATCH 12/15] Use bzip2 compressed feature set json as pipeline option (#466) * Use bzip2 compressed feature set json as pipeline option * Make decompressor and compressor more generic and extensible * Avoid code duplication in test --- .../core/job/dataflow/DataflowJobManager.java | 40 +++++---- .../job/direct/DirectRunnerJobManager.java | 21 +++-- .../option/FeatureSetJsonByteConverter.java | 47 +++++++++++ .../job/dataflow/DataflowJobManagerTest.java | 36 +++++--- .../direct/DirectRunnerJobManagerTest.java | 25 +++++- .../FeatureSetJsonByteConverterTest.java | 83 +++++++++++++++++++ .../main/java/feast/ingestion/ImportJob.java | 14 ++-- .../ingestion/options/BZip2Compressor.java | 47 +++++++++++ .../ingestion/options/BZip2Decompressor.java | 38 +++++++++ .../ingestion/options/ImportOptions.java | 6 +- .../options/InputStreamConverter.java | 31 +++++++ .../options/OptionByteConverter.java | 30 +++++++ .../ingestion/options/OptionCompressor.java | 31 +++++++ .../ingestion/options/OptionDecompressor.java | 30 +++++++ .../options/StringListStreamConverter.java | 41 +++++++++ .../java/feast/ingestion/ImportJobTest.java | 17 ++-- .../options/BZip2CompressorTest.java | 40 +++++++++ .../options/BZip2DecompressorTest.java | 48 +++++++++++ .../StringListStreamConverterTest.java | 36 ++++++++ .../{util => utils}/DateUtilTest.java | 7 +- .../{util => utils}/JsonUtilTest.java | 3 +- .../{util => utils}/StoreUtilTest.java | 18 +--- 22 files changed, 609 insertions(+), 80 deletions(-) create mode 100644 core/src/main/java/feast/core/job/option/FeatureSetJsonByteConverter.java create mode 100644 core/src/test/java/feast/core/job/option/FeatureSetJsonByteConverterTest.java create mode 100644 ingestion/src/main/java/feast/ingestion/options/BZip2Compressor.java create mode 100644 ingestion/src/main/java/feast/ingestion/options/BZip2Decompressor.java create mode 100644 ingestion/src/main/java/feast/ingestion/options/InputStreamConverter.java create mode 100644 ingestion/src/main/java/feast/ingestion/options/OptionByteConverter.java create mode 100644 ingestion/src/main/java/feast/ingestion/options/OptionCompressor.java create mode 100644 ingestion/src/main/java/feast/ingestion/options/OptionDecompressor.java create mode 100644 ingestion/src/main/java/feast/ingestion/options/StringListStreamConverter.java create mode 100644 ingestion/src/test/java/feast/ingestion/options/BZip2CompressorTest.java create mode 100644 ingestion/src/test/java/feast/ingestion/options/BZip2DecompressorTest.java create mode 100644 ingestion/src/test/java/feast/ingestion/options/StringListStreamConverterTest.java rename ingestion/src/test/java/feast/ingestion/{util => utils}/DateUtilTest.java (92%) rename ingestion/src/test/java/feast/ingestion/{util => utils}/JsonUtilTest.java (95%) rename ingestion/src/test/java/feast/ingestion/{util => utils}/StoreUtilTest.java (91%) diff --git a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java index 2de46ae1f2d..d80d6547186 100644 --- a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java +++ b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java @@ -22,7 +22,6 @@ import com.google.common.base.Strings; import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.util.JsonFormat; -import com.google.protobuf.util.JsonFormat.Printer; import feast.core.FeatureSetProto; import feast.core.SourceProto; import feast.core.StoreProto; @@ -30,15 +29,13 @@ import feast.core.exception.JobExecutionException; import feast.core.job.JobManager; import feast.core.job.Runner; -import feast.core.model.FeatureSet; -import feast.core.model.Job; -import feast.core.model.JobStatus; -import feast.core.model.Project; -import feast.core.model.Source; -import feast.core.model.Store; +import feast.core.job.option.FeatureSetJsonByteConverter; +import feast.core.model.*; import feast.core.util.TypeConversion; import feast.ingestion.ImportJob; +import feast.ingestion.options.BZip2Compressor; import feast.ingestion.options.ImportOptions; +import feast.ingestion.options.OptionCompressor; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -88,7 +85,12 @@ public Job startJob(Job job) { job.getStore().toProto(), false); } catch (InvalidProtocolBufferException e) { - throw new RuntimeException(String.format("Unable to start job %s", job.getId()), e); + log.error(e.getMessage()); + throw new IllegalArgumentException( + String.format( + "DataflowJobManager failed to START job with id '%s' because the job" + + "has an invalid spec. Please check the FeatureSet, Source and Store specs. Actual error message: %s", + job.getId(), e.getMessage())); } } @@ -103,12 +105,15 @@ public Job updateJob(Job job) { try { List featureSetProtos = job.getFeatureSets().stream().map(FeatureSet::toProto).collect(Collectors.toList()); - return submitDataflowJob( job.getId(), featureSetProtos, job.getSource().toProto(), job.getStore().toProto(), true); - } catch (InvalidProtocolBufferException e) { - throw new RuntimeException(String.format("Unable to update job %s", job.getId()), e); + log.error(e.getMessage()); + throw new IllegalArgumentException( + String.format( + "DataflowJobManager failed to UPDATE job with id '%s' because the job" + + "has an invalid spec. Please check the FeatureSet, Source and Store specs. Actual error message: %s", + job.getId(), e.getMessage())); } } @@ -210,13 +215,12 @@ private ImportOptions getPipelineOptions( throws IOException { String[] args = TypeConversion.convertMapToArgs(defaultOptions); ImportOptions pipelineOptions = PipelineOptionsFactory.fromArgs(args).as(ImportOptions.class); - Printer printer = JsonFormat.printer(); - List featureSetsJson = new ArrayList<>(); - for (FeatureSetProto.FeatureSet featureSet : featureSets) { - featureSetsJson.add(printer.print(featureSet.getSpec())); - } - pipelineOptions.setFeatureSetJson(featureSetsJson); - pipelineOptions.setStoreJson(Collections.singletonList(printer.print(sink))); + + OptionCompressor> featureSetJsonCompressor = + new BZip2Compressor<>(new FeatureSetJsonByteConverter()); + + pipelineOptions.setFeatureSetJson(featureSetJsonCompressor.compress(featureSets)); + pipelineOptions.setStoreJson(Collections.singletonList(JsonFormat.printer().print(sink))); pipelineOptions.setProject(projectId); pipelineOptions.setUpdate(update); pipelineOptions.setRunner(DataflowRunner.class); diff --git a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java index fdf3aad9bc3..35ab45e6306 100644 --- a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java +++ b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java @@ -17,9 +17,7 @@ package feast.core.job.direct; import com.google.common.base.Strings; -import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.util.JsonFormat; -import com.google.protobuf.util.JsonFormat.Printer; import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.StoreProto; @@ -27,12 +25,15 @@ import feast.core.exception.JobExecutionException; import feast.core.job.JobManager; import feast.core.job.Runner; +import feast.core.job.option.FeatureSetJsonByteConverter; import feast.core.model.FeatureSet; import feast.core.model.Job; import feast.core.model.JobStatus; import feast.core.util.TypeConversion; import feast.ingestion.ImportJob; +import feast.ingestion.options.BZip2Compressor; import feast.ingestion.options.ImportOptions; +import feast.ingestion.options.OptionCompressor; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -92,17 +93,15 @@ public Job startJob(Job job) { } private ImportOptions getPipelineOptions( - List featureSets, StoreProto.Store sink) - throws InvalidProtocolBufferException { + List featureSets, StoreProto.Store sink) throws IOException { String[] args = TypeConversion.convertMapToArgs(defaultOptions); ImportOptions pipelineOptions = PipelineOptionsFactory.fromArgs(args).as(ImportOptions.class); - Printer printer = JsonFormat.printer(); - List featureSetsJson = new ArrayList<>(); - for (FeatureSetProto.FeatureSet featureSet : featureSets) { - featureSetsJson.add(printer.print(featureSet.getSpec())); - } - pipelineOptions.setFeatureSetJson(featureSetsJson); - pipelineOptions.setStoreJson(Collections.singletonList(printer.print(sink))); + + OptionCompressor> featureSetJsonCompressor = + new BZip2Compressor<>(new FeatureSetJsonByteConverter()); + + pipelineOptions.setFeatureSetJson(featureSetJsonCompressor.compress(featureSets)); + pipelineOptions.setStoreJson(Collections.singletonList(JsonFormat.printer().print(sink))); pipelineOptions.setRunner(DirectRunner.class); pipelineOptions.setProject(""); // set to default value to satisfy validation if (metrics.isEnabled()) { diff --git a/core/src/main/java/feast/core/job/option/FeatureSetJsonByteConverter.java b/core/src/main/java/feast/core/job/option/FeatureSetJsonByteConverter.java new file mode 100644 index 00000000000..dbd04d668fd --- /dev/null +++ b/core/src/main/java/feast/core/job/option/FeatureSetJsonByteConverter.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.job.option; + +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.util.JsonFormat; +import feast.core.FeatureSetProto; +import feast.ingestion.options.OptionByteConverter; +import java.util.ArrayList; +import java.util.List; + +public class FeatureSetJsonByteConverter + implements OptionByteConverter> { + + /** + * Convert list of feature sets to json strings joined by new line, represented as byte arrays + * + * @param featureSets List of feature set protobufs + * @return Byte array representation of the json strings + * @throws InvalidProtocolBufferException + */ + @Override + public byte[] toByte(List featureSets) + throws InvalidProtocolBufferException { + JsonFormat.Printer printer = + JsonFormat.printer().omittingInsignificantWhitespace().printingEnumsAsInts(); + List featureSetsJson = new ArrayList<>(); + for (FeatureSetProto.FeatureSet featureSet : featureSets) { + featureSetsJson.add(printer.print(featureSet.getSpec())); + } + return String.join("\n", featureSetsJson).getBytes(); + } +} diff --git a/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java b/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java index c263515ed08..9f26c6919e4 100644 --- a/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java +++ b/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java @@ -19,11 +19,7 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; +import static org.mockito.Mockito.*; import static org.mockito.MockitoAnnotations.initMocks; import com.google.api.services.dataflow.Dataflow; @@ -44,14 +40,15 @@ import feast.core.config.FeastProperties.MetricsProperties; import feast.core.exception.JobExecutionException; import feast.core.job.Runner; -import feast.core.model.FeatureSet; -import feast.core.model.Job; -import feast.core.model.JobStatus; -import feast.core.model.Source; -import feast.core.model.Store; +import feast.core.job.option.FeatureSetJsonByteConverter; +import feast.core.model.*; +import feast.ingestion.options.BZip2Compressor; import feast.ingestion.options.ImportOptions; +import feast.ingestion.options.OptionCompressor; import java.io.IOException; +import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.beam.runners.dataflow.DataflowPipelineJob; import org.apache.beam.runners.dataflow.DataflowRunner; @@ -131,8 +128,11 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { expectedPipelineOptions.setAppName("DataflowJobManager"); expectedPipelineOptions.setJobName(jobName); expectedPipelineOptions.setStoreJson(Lists.newArrayList(printer.print(store))); + + OptionCompressor> featureSetJsonCompressor = + new BZip2Compressor<>(new FeatureSetJsonByteConverter()); expectedPipelineOptions.setFeatureSetJson( - Lists.newArrayList(printer.print(featureSet.getSpec()))); + featureSetJsonCompressor.compress(Collections.singletonList(featureSet))); ArgumentCaptor captor = ArgumentCaptor.forClass(ImportOptions.class); @@ -170,7 +170,19 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { // Assume the files that are staged are correct expectedPipelineOptions.setFilesToStage(actualPipelineOptions.getFilesToStage()); - assertThat(actualPipelineOptions.toString(), equalTo(expectedPipelineOptions.toString())); + assertThat( + actualPipelineOptions.getFeatureSetJson(), + equalTo(expectedPipelineOptions.getFeatureSetJson())); + assertThat( + actualPipelineOptions.getDeadLetterTableSpec(), + equalTo(expectedPipelineOptions.getDeadLetterTableSpec())); + assertThat( + actualPipelineOptions.getStatsdHost(), equalTo(expectedPipelineOptions.getStatsdHost())); + assertThat( + actualPipelineOptions.getMetricsExporterType(), + equalTo(expectedPipelineOptions.getMetricsExporterType())); + assertThat( + actualPipelineOptions.getStoreJson(), equalTo(expectedPipelineOptions.getStoreJson())); assertThat(actual.getExtId(), equalTo(expectedExtJobId)); } diff --git a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java index 2dd87cfc6e3..64412f4391e 100644 --- a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java +++ b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java @@ -40,14 +40,19 @@ import feast.core.StoreProto.Store.Subscription; import feast.core.config.FeastProperties.MetricsProperties; import feast.core.job.Runner; +import feast.core.job.option.FeatureSetJsonByteConverter; import feast.core.model.FeatureSet; import feast.core.model.Job; import feast.core.model.JobStatus; import feast.core.model.Source; import feast.core.model.Store; +import feast.ingestion.options.BZip2Compressor; import feast.ingestion.options.ImportOptions; +import feast.ingestion.options.OptionCompressor; import java.io.IOException; +import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.beam.runners.direct.DirectRunner; import org.apache.beam.sdk.PipelineResult; @@ -121,8 +126,11 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { expectedPipelineOptions.setProject(""); expectedPipelineOptions.setStoreJson(Lists.newArrayList(printer.print(store))); expectedPipelineOptions.setProject(""); + + OptionCompressor> featureSetJsonCompressor = + new BZip2Compressor<>(new FeatureSetJsonByteConverter()); expectedPipelineOptions.setFeatureSetJson( - Lists.newArrayList(printer.print(featureSet.getSpec()))); + featureSetJsonCompressor.compress(Collections.singletonList(featureSet))); String expectedJobId = "feast-job-0"; ArgumentCaptor pipelineOptionsCaptor = @@ -150,7 +158,20 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { expectedPipelineOptions.setOptionsId( actualPipelineOptions.getOptionsId()); // avoid comparing this value - assertThat(actualPipelineOptions.toString(), equalTo(expectedPipelineOptions.toString())); + assertThat( + actualPipelineOptions.getFeatureSetJson(), + equalTo(expectedPipelineOptions.getFeatureSetJson())); + assertThat( + actualPipelineOptions.getDeadLetterTableSpec(), + equalTo(expectedPipelineOptions.getDeadLetterTableSpec())); + assertThat( + actualPipelineOptions.getStatsdHost(), equalTo(expectedPipelineOptions.getStatsdHost())); + assertThat( + actualPipelineOptions.getMetricsExporterType(), + equalTo(expectedPipelineOptions.getMetricsExporterType())); + assertThat( + actualPipelineOptions.getStoreJson(), equalTo(expectedPipelineOptions.getStoreJson())); + assertThat(jobStarted.getPipelineResult(), equalTo(mockPipelineResult)); assertThat(jobStarted.getJobId(), equalTo(expectedJobId)); assertThat(actual.getExtId(), equalTo(expectedJobId)); diff --git a/core/src/test/java/feast/core/job/option/FeatureSetJsonByteConverterTest.java b/core/src/test/java/feast/core/job/option/FeatureSetJsonByteConverterTest.java new file mode 100644 index 00000000000..2dfeef1d969 --- /dev/null +++ b/core/src/test/java/feast/core/job/option/FeatureSetJsonByteConverterTest.java @@ -0,0 +1,83 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.job.option; + +import static org.junit.Assert.*; + +import com.google.protobuf.InvalidProtocolBufferException; +import feast.core.FeatureSetProto; +import feast.core.SourceProto; +import feast.types.ValueProto; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.junit.Test; + +public class FeatureSetJsonByteConverterTest { + + private FeatureSetProto.FeatureSet newFeatureSet(Integer version, Integer numberOfFeatures) { + List features = + IntStream.range(1, numberOfFeatures + 1) + .mapToObj( + i -> + FeatureSetProto.FeatureSpec.newBuilder() + .setValueType(ValueProto.ValueType.Enum.FLOAT) + .setName("feature".concat(Integer.toString(i))) + .build()) + .collect(Collectors.toList()); + + return FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetProto.FeatureSetSpec.newBuilder() + .setSource( + SourceProto.Source.newBuilder() + .setType(SourceProto.SourceType.KAFKA) + .setKafkaSourceConfig( + SourceProto.KafkaSourceConfig.newBuilder() + .setBootstrapServers("somebrokers:9092") + .setTopic("sometopic"))) + .addAllFeatures(features) + .setVersion(version) + .addEntities( + FeatureSetProto.EntitySpec.newBuilder() + .setName("entity") + .setValueType(ValueProto.ValueType.Enum.STRING))) + .build(); + } + + @Test + public void shouldConvertFeatureSetsAsJsonStringBytes() throws InvalidProtocolBufferException { + int nrOfFeatureSet = 1; + int nrOfFeatures = 1; + List featureSets = + IntStream.range(1, nrOfFeatureSet + 1) + .mapToObj(i -> newFeatureSet(i, nrOfFeatures)) + .collect(Collectors.toList()); + + String expectedOutputString = + "{\"version\":1," + + "\"entities\":[{\"name\":\"entity\",\"valueType\":2}]," + + "\"features\":[{\"name\":\"feature1\",\"valueType\":6}]," + + "\"source\":{" + + "\"type\":1," + + "\"kafkaSourceConfig\":{" + + "\"bootstrapServers\":\"somebrokers:9092\"," + + "\"topic\":\"sometopic\"}}}"; + FeatureSetJsonByteConverter byteConverter = new FeatureSetJsonByteConverter(); + assertEquals(expectedOutputString, new String(byteConverter.toByte(featureSets))); + } +} diff --git a/ingestion/src/main/java/feast/ingestion/ImportJob.java b/ingestion/src/main/java/feast/ingestion/ImportJob.java index 41af5f9bb40..c4973ce3cae 100644 --- a/ingestion/src/main/java/feast/ingestion/ImportJob.java +++ b/ingestion/src/main/java/feast/ingestion/ImportJob.java @@ -22,7 +22,9 @@ import feast.core.FeatureSetProto.FeatureSet; import feast.core.SourceProto.Source; import feast.core.StoreProto.Store; +import feast.ingestion.options.BZip2Decompressor; import feast.ingestion.options.ImportOptions; +import feast.ingestion.options.StringListStreamConverter; import feast.ingestion.transform.ReadFromSource; import feast.ingestion.transform.ValidateFeatureRows; import feast.ingestion.transform.WriteFailedElementToBigQuery; @@ -33,6 +35,7 @@ import feast.ingestion.utils.StoreUtil; import feast.ingestion.values.FailedElement; import feast.types.FeatureRowProto.FeatureRow; +import java.io.IOException; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -57,15 +60,14 @@ public class ImportJob { * @param args arguments to be passed to Beam pipeline * @throws InvalidProtocolBufferException if options passed to the pipeline are invalid */ - public static void main(String[] args) throws InvalidProtocolBufferException { + public static void main(String[] args) throws IOException { ImportOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().create().as(ImportOptions.class); runPipeline(options); } @SuppressWarnings("UnusedReturnValue") - public static PipelineResult runPipeline(ImportOptions options) - throws InvalidProtocolBufferException { + public static PipelineResult runPipeline(ImportOptions options) throws IOException { /* * Steps: * 1. Read messages from Feast Source as FeatureRow @@ -80,8 +82,10 @@ public static PipelineResult runPipeline(ImportOptions options) log.info("Starting import job with settings: \n{}", options.toString()); - List featureSets = - SpecUtil.parseFeatureSetSpecJsonList(options.getFeatureSetJson()); + BZip2Decompressor> decompressor = + new BZip2Decompressor<>(new StringListStreamConverter()); + List featureSetJson = decompressor.decompress(options.getFeatureSetJson()); + List featureSets = SpecUtil.parseFeatureSetSpecJsonList(featureSetJson); List stores = SpecUtil.parseStoreJsonList(options.getStoreJson()); for (Store store : stores) { diff --git a/ingestion/src/main/java/feast/ingestion/options/BZip2Compressor.java b/ingestion/src/main/java/feast/ingestion/options/BZip2Compressor.java new file mode 100644 index 00000000000..b7e4e6ee0af --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/BZip2Compressor.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; + +public class BZip2Compressor implements OptionCompressor { + + private final OptionByteConverter byteConverter; + + public BZip2Compressor(OptionByteConverter byteConverter) { + this.byteConverter = byteConverter; + } + /** + * Compress pipeline option using BZip2 + * + * @param option Pipeline option value + * @return BZip2 compressed option value + * @throws IOException + */ + @Override + public byte[] compress(T option) throws IOException { + ByteArrayOutputStream compressedStream = new ByteArrayOutputStream(); + try (BZip2CompressorOutputStream bzip2Output = + new BZip2CompressorOutputStream(compressedStream)) { + bzip2Output.write(byteConverter.toByte(option)); + } + + return compressedStream.toByteArray(); + } +} diff --git a/ingestion/src/main/java/feast/ingestion/options/BZip2Decompressor.java b/ingestion/src/main/java/feast/ingestion/options/BZip2Decompressor.java new file mode 100644 index 00000000000..ce49c1be6e6 --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/BZip2Decompressor.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; + +public class BZip2Decompressor implements OptionDecompressor { + + private final InputStreamConverter inputStreamConverter; + + public BZip2Decompressor(InputStreamConverter inputStreamConverter) { + this.inputStreamConverter = inputStreamConverter; + } + + @Override + public T decompress(byte[] compressed) throws IOException { + try (ByteArrayInputStream inputStream = new ByteArrayInputStream(compressed); + BZip2CompressorInputStream bzip2Input = new BZip2CompressorInputStream(inputStream)) { + return inputStreamConverter.readStream(bzip2Input); + } + } +} diff --git a/ingestion/src/main/java/feast/ingestion/options/ImportOptions.java b/ingestion/src/main/java/feast/ingestion/options/ImportOptions.java index b299bb47e55..6afdd80dd72 100644 --- a/ingestion/src/main/java/feast/ingestion/options/ImportOptions.java +++ b/ingestion/src/main/java/feast/ingestion/options/ImportOptions.java @@ -28,16 +28,16 @@ public interface ImportOptions extends PipelineOptions, DataflowPipelineOptions, DirectOptions { @Required @Description( - "JSON string representation of the FeatureSet that the import job will process." + "JSON string representation of the FeatureSet that the import job will process, in BZip2 binary format." + "FeatureSet follows the format in feast.core.FeatureSet proto." + "Mutliple FeatureSetSpec can be passed by specifying '--featureSet={...}' multiple times" + "The conversion of Proto message to JSON should follow this mapping:" + "https://developers.google.com/protocol-buffers/docs/proto3#json" + "Please minify and remove all insignificant whitespace such as newline in the JSON string" + "to prevent error when parsing the options") - List getFeatureSetJson(); + byte[] getFeatureSetJson(); - void setFeatureSetJson(List featureSetJson); + void setFeatureSetJson(byte[] featureSetJson); @Required @Description( diff --git a/ingestion/src/main/java/feast/ingestion/options/InputStreamConverter.java b/ingestion/src/main/java/feast/ingestion/options/InputStreamConverter.java new file mode 100644 index 00000000000..e2fef732368 --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/InputStreamConverter.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.IOException; +import java.io.InputStream; + +public interface InputStreamConverter { + + /** + * Used in conjunction with {@link OptionDecompressor} to decompress the pipeline option + * + * @param inputStream Input byte stream in compressed format + * @return Decompressed pipeline option value + */ + T readStream(InputStream inputStream) throws IOException; +} diff --git a/ingestion/src/main/java/feast/ingestion/options/OptionByteConverter.java b/ingestion/src/main/java/feast/ingestion/options/OptionByteConverter.java new file mode 100644 index 00000000000..ff5a41a627d --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/OptionByteConverter.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.IOException; + +public interface OptionByteConverter { + + /** + * Used in conjunction with {@link OptionCompressor} to compress the pipeline option + * + * @param option Pipeline option value + * @return byte representation of the pipeline option value, without compression. + */ + byte[] toByte(T option) throws IOException; +} diff --git a/ingestion/src/main/java/feast/ingestion/options/OptionCompressor.java b/ingestion/src/main/java/feast/ingestion/options/OptionCompressor.java new file mode 100644 index 00000000000..b2345fc3eb1 --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/OptionCompressor.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.IOException; + +public interface OptionCompressor { + + /** + * Compress pipeline option into bytes format. This is necessary as some Beam runner has + * limitation in terms of pipeline option size. + * + * @param option Pipeline option value + * @return Compressed values of the option, as byte array + */ + byte[] compress(T option) throws IOException; +} diff --git a/ingestion/src/main/java/feast/ingestion/options/OptionDecompressor.java b/ingestion/src/main/java/feast/ingestion/options/OptionDecompressor.java new file mode 100644 index 00000000000..affeafdaa0b --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/OptionDecompressor.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.IOException; + +public interface OptionDecompressor { + + /** + * Decompress pipeline option from byte array. + * + * @param compressed Compressed pipeline option value + * @return Decompressed pipeline option + */ + T decompress(byte[] compressed) throws IOException; +} diff --git a/ingestion/src/main/java/feast/ingestion/options/StringListStreamConverter.java b/ingestion/src/main/java/feast/ingestion/options/StringListStreamConverter.java new file mode 100644 index 00000000000..d7277f3c7d6 --- /dev/null +++ b/ingestion/src/main/java/feast/ingestion/options/StringListStreamConverter.java @@ -0,0 +1,41 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.List; +import java.util.stream.Collectors; + +public class StringListStreamConverter implements InputStreamConverter> { + + /** + * Convert Input byte stream to newline separated strings + * + * @param inputStream Input byte stream + * @return List of string + */ + @Override + public List readStream(InputStream inputStream) throws IOException { + BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); + List stringList = reader.lines().collect(Collectors.toList()); + reader.close(); + return stringList; + } +} diff --git a/ingestion/src/test/java/feast/ingestion/ImportJobTest.java b/ingestion/src/test/java/feast/ingestion/ImportJobTest.java index 290b38dabee..58ecae8f045 100644 --- a/ingestion/src/test/java/feast/ingestion/ImportJobTest.java +++ b/ingestion/src/test/java/feast/ingestion/ImportJobTest.java @@ -30,13 +30,16 @@ import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; import feast.core.StoreProto.Store.Subscription; +import feast.ingestion.options.BZip2Compressor; import feast.ingestion.options.ImportOptions; +import feast.ingestion.options.OptionByteConverter; import feast.storage.RedisProto.RedisKey; import feast.test.TestUtil; import feast.test.TestUtil.LocalKafka; import feast.test.TestUtil.LocalRedis; import feast.types.FeatureRowProto.FeatureRow; import feast.types.ValueProto.ValueType.Enum; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -48,6 +51,7 @@ import org.apache.beam.sdk.PipelineResult; import org.apache.beam.sdk.PipelineResult.State; import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; import org.apache.kafka.common.serialization.ByteArraySerializer; import org.joda.time.Duration; import org.junit.AfterClass; @@ -162,12 +166,13 @@ public void runPipeline_ShouldWriteToRedisCorrectlyGivenValidSpecAndFeatureRow() .build(); ImportOptions options = PipelineOptionsFactory.create().as(ImportOptions.class); - options.setFeatureSetJson( - Collections.singletonList( - JsonFormat.printer().omittingInsignificantWhitespace().print(featureSet.getSpec()))); - options.setStoreJson( - Collections.singletonList( - JsonFormat.printer().omittingInsignificantWhitespace().print(redis))); + BZip2Compressor compressor = new BZip2Compressor<>(option -> { + JsonFormat.Printer printer = + JsonFormat.printer().omittingInsignificantWhitespace().printingEnumsAsInts(); + return printer.print(option).getBytes(); + }); + options.setFeatureSetJson(compressor.compress(spec)); + options.setStoreJson(Collections.singletonList(JsonFormat.printer().print(redis))); options.setProject(""); options.setBlockOnRun(false); diff --git a/ingestion/src/test/java/feast/ingestion/options/BZip2CompressorTest.java b/ingestion/src/test/java/feast/ingestion/options/BZip2CompressorTest.java new file mode 100644 index 00000000000..cd03b18c793 --- /dev/null +++ b/ingestion/src/test/java/feast/ingestion/options/BZip2CompressorTest.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.junit.Assert; +import org.junit.Test; + +public class BZip2CompressorTest { + + @Test + public void shouldHavBZip2CompatibleOutput() throws IOException { + BZip2Compressor compressor = new BZip2Compressor<>(String::getBytes); + String origString = "somestring"; + try (ByteArrayInputStream inputStream = + new ByteArrayInputStream(compressor.compress(origString)); + BZip2CompressorInputStream bzip2Input = new BZip2CompressorInputStream(inputStream); + BufferedReader reader = new BufferedReader(new InputStreamReader(bzip2Input))) { + Assert.assertEquals(origString, reader.readLine()); + } + } +} diff --git a/ingestion/src/test/java/feast/ingestion/options/BZip2DecompressorTest.java b/ingestion/src/test/java/feast/ingestion/options/BZip2DecompressorTest.java new file mode 100644 index 00000000000..fe7cc789d86 --- /dev/null +++ b/ingestion/src/test/java/feast/ingestion/options/BZip2DecompressorTest.java @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import static org.junit.Assert.*; + +import java.io.*; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; +import org.junit.Test; + +public class BZip2DecompressorTest { + + @Test + public void shouldDecompressBZip2Stream() throws IOException { + BZip2Decompressor decompressor = + new BZip2Decompressor<>( + inputStream -> { + BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); + String output = reader.readLine(); + reader.close(); + return output; + }); + + String originalString = "abc"; + ByteArrayOutputStream compressedStream = new ByteArrayOutputStream(); + try (BZip2CompressorOutputStream bzip2Output = + new BZip2CompressorOutputStream(compressedStream)) { + bzip2Output.write(originalString.getBytes()); + } + + String decompressedString = decompressor.decompress(compressedStream.toByteArray()); + assertEquals(originalString, decompressedString); + } +} diff --git a/ingestion/src/test/java/feast/ingestion/options/StringListStreamConverterTest.java b/ingestion/src/test/java/feast/ingestion/options/StringListStreamConverterTest.java new file mode 100644 index 00000000000..5ce9f054bc9 --- /dev/null +++ b/ingestion/src/test/java/feast/ingestion/options/StringListStreamConverterTest.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.ingestion.options; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import org.junit.Test; + +public class StringListStreamConverterTest { + + @Test + public void shouldReadStreamAsNewlineSeparatedStrings() throws IOException { + StringListStreamConverter converter = new StringListStreamConverter(); + String originalString = "abc\ndef"; + InputStream stringStream = new ByteArrayInputStream(originalString.getBytes()); + assertEquals(Arrays.asList("abc", "def"), converter.readStream(stringStream)); + } +} diff --git a/ingestion/src/test/java/feast/ingestion/util/DateUtilTest.java b/ingestion/src/test/java/feast/ingestion/utils/DateUtilTest.java similarity index 92% rename from ingestion/src/test/java/feast/ingestion/util/DateUtilTest.java rename to ingestion/src/test/java/feast/ingestion/utils/DateUtilTest.java index 71d4e67beaa..151d501a596 100644 --- a/ingestion/src/test/java/feast/ingestion/util/DateUtilTest.java +++ b/ingestion/src/test/java/feast/ingestion/utils/DateUtilTest.java @@ -14,15 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package feast.ingestion.util; +package feast.ingestion.utils; import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.is; -import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.*; import com.google.protobuf.Timestamp; -import feast.ingestion.utils.DateUtil; import junit.framework.TestCase; import org.joda.time.DateTime; diff --git a/ingestion/src/test/java/feast/ingestion/util/JsonUtilTest.java b/ingestion/src/test/java/feast/ingestion/utils/JsonUtilTest.java similarity index 95% rename from ingestion/src/test/java/feast/ingestion/util/JsonUtilTest.java rename to ingestion/src/test/java/feast/ingestion/utils/JsonUtilTest.java index 02af4d819f9..62c74dfc345 100644 --- a/ingestion/src/test/java/feast/ingestion/util/JsonUtilTest.java +++ b/ingestion/src/test/java/feast/ingestion/utils/JsonUtilTest.java @@ -14,12 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package feast.ingestion.util; +package feast.ingestion.utils; import static org.hamcrest.Matchers.equalTo; import static org.junit.Assert.assertThat; -import feast.ingestion.utils.JsonUtil; import java.util.Collections; import java.util.HashMap; import java.util.Map; diff --git a/ingestion/src/test/java/feast/ingestion/util/StoreUtilTest.java b/ingestion/src/test/java/feast/ingestion/utils/StoreUtilTest.java similarity index 91% rename from ingestion/src/test/java/feast/ingestion/util/StoreUtilTest.java rename to ingestion/src/test/java/feast/ingestion/utils/StoreUtilTest.java index 4e2297e405d..82988121bc8 100644 --- a/ingestion/src/test/java/feast/ingestion/util/StoreUtilTest.java +++ b/ingestion/src/test/java/feast/ingestion/utils/StoreUtilTest.java @@ -14,22 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package feast.ingestion.util; +package feast.ingestion.utils; -import static feast.types.ValueProto.ValueType.Enum.BOOL; -import static feast.types.ValueProto.ValueType.Enum.BOOL_LIST; -import static feast.types.ValueProto.ValueType.Enum.BYTES; -import static feast.types.ValueProto.ValueType.Enum.BYTES_LIST; -import static feast.types.ValueProto.ValueType.Enum.DOUBLE; -import static feast.types.ValueProto.ValueType.Enum.DOUBLE_LIST; -import static feast.types.ValueProto.ValueType.Enum.FLOAT; -import static feast.types.ValueProto.ValueType.Enum.FLOAT_LIST; -import static feast.types.ValueProto.ValueType.Enum.INT32; -import static feast.types.ValueProto.ValueType.Enum.INT32_LIST; -import static feast.types.ValueProto.ValueType.Enum.INT64; -import static feast.types.ValueProto.ValueType.Enum.INT64_LIST; -import static feast.types.ValueProto.ValueType.Enum.STRING; -import static feast.types.ValueProto.ValueType.Enum.STRING_LIST; +import static feast.types.ValueProto.ValueType.Enum.*; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.Field; @@ -40,7 +27,6 @@ import feast.core.FeatureSetProto.FeatureSet; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.FeatureSetProto.FeatureSpec; -import feast.ingestion.utils.StoreUtil; import java.util.Arrays; import org.junit.Assert; import org.junit.Test; From 8c84e6cb27c43e051f258ac0bc5d59653fd75cd7 Mon Sep 17 00:00:00 2001 From: Shu Heng Date: Fri, 14 Feb 2020 14:43:15 +0800 Subject: [PATCH 13/15] Bump chart version to 0.4.5 --- infra/charts/feast/Chart.yaml | 2 +- infra/charts/feast/README.md | 6 +++--- infra/charts/feast/charts/feast-core/Chart.yaml | 2 +- infra/charts/feast/charts/feast-serving/Chart.yaml | 2 +- infra/charts/feast/requirements.yaml | 6 +++--- infra/charts/feast/values.yaml | 6 +++--- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/infra/charts/feast/Chart.yaml b/infra/charts/feast/Chart.yaml index c8f328548a9..e4ca21aa62f 100644 --- a/infra/charts/feast/Chart.yaml +++ b/infra/charts/feast/Chart.yaml @@ -1,4 +1,4 @@ apiVersion: v1 description: A Helm chart to install Feast on kubernetes name: feast -version: 0.4.4 +version: 0.4.5 diff --git a/infra/charts/feast/README.md b/infra/charts/feast/README.md index e93b687f191..ca526ad0b9f 100644 --- a/infra/charts/feast/README.md +++ b/infra/charts/feast/README.md @@ -85,7 +85,7 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-core.prometheus-statsd-exporter.*` | Refer to this [link](charts/feast-core/charts/prometheus-statsd-exporter/values.yaml | | `feast-core.replicaCount` | No of pods to create | `1` | `feast-core.image.repository` | Repository for Feast Core Docker image | `gcr.io/kf-feast/feast-core` -| `feast-core.image.tag` | Tag for Feast Core Docker image | `0.4.4` +| `feast-core.image.tag` | Tag for Feast Core Docker image | `0.4.5` | `feast-core.image.pullPolicy` | Image pull policy for Feast Core Docker image | `IfNotPresent` | `feast-core.prometheus.enabled` | Add annotations to enable Prometheus scraping | `false` | `feast-core.application.yaml` | Configuration for Feast Core application | Refer to this [link](charts/feast-core/values.yaml) @@ -126,7 +126,7 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-online.core.enabled` | Flag for Feast Serving to use Feast Core in the same Helm release | `true` | `feast-serving-online.replicaCount` | No of pods to create | `1` | `feast-serving-online.image.repository` | Repository for Feast Serving Docker image | `gcr.io/kf-feast/feast-serving` -| `feast-serving-online.image.tag` | Tag for Feast Serving Docker image | `0.4.4` +| `feast-serving-online.image.tag` | Tag for Feast Serving Docker image | `0.4.5` | `feast-serving-online.image.pullPolicy` | Image pull policy for Feast Serving Docker image | `IfNotPresent` | `feast-serving-online.prometheus.enabled` | Add annotations to enable Prometheus scraping | `true` | `feast-serving-online.application.yaml` | Application configuration for Feast Serving | Refer to this [link](charts/feast-serving/values.yaml) @@ -168,7 +168,7 @@ The following table lists the configurable parameters of the Feast chart and the | `feast-serving-batch.core.enabled` | Flag for Feast Serving to use Feast Core in the same Helm release | `true` | `feast-serving-batch.replicaCount` | No of pods to create | `1` | `feast-serving-batch.image.repository` | Repository for Feast Serving Docker image | `gcr.io/kf-feast/feast-serving` -| `feast-serving-batch.image.tag` | Tag for Feast Serving Docker image | `0.4.4` +| `feast-serving-batch.image.tag` | Tag for Feast Serving Docker image | `0.4.5` | `feast-serving-batch.image.pullPolicy` | Image pull policy for Feast Serving Docker image | `IfNotPresent` | `feast-serving-batch.prometheus.enabled` | Add annotations to enable Prometheus scraping | `true` | `feast-serving-batch.application.yaml` | Application configuration for Feast Serving | Refer to this [link](charts/feast-serving/values.yaml) diff --git a/infra/charts/feast/charts/feast-core/Chart.yaml b/infra/charts/feast/charts/feast-core/Chart.yaml index 86d0699b9ac..28b3297bba9 100644 --- a/infra/charts/feast/charts/feast-core/Chart.yaml +++ b/infra/charts/feast/charts/feast-core/Chart.yaml @@ -1,4 +1,4 @@ apiVersion: v1 description: A Helm chart for core component of Feast name: feast-core -version: 0.4.4 +version: 0.4.5 diff --git a/infra/charts/feast/charts/feast-serving/Chart.yaml b/infra/charts/feast/charts/feast-serving/Chart.yaml index 2e9cf89243d..c610474c3e5 100644 --- a/infra/charts/feast/charts/feast-serving/Chart.yaml +++ b/infra/charts/feast/charts/feast-serving/Chart.yaml @@ -1,4 +1,4 @@ apiVersion: v1 description: A Helm chart for serving component of Feast name: feast-serving -version: 0.4.4 +version: 0.4.5 diff --git a/infra/charts/feast/requirements.yaml b/infra/charts/feast/requirements.yaml index 1fa1826965a..b30635dcdb9 100644 --- a/infra/charts/feast/requirements.yaml +++ b/infra/charts/feast/requirements.yaml @@ -1,12 +1,12 @@ dependencies: - name: feast-core - version: 0.4.4 + version: 0.4.5 condition: feast-core.enabled - name: feast-serving alias: feast-serving-batch - version: 0.4.4 + version: 0.4.5 condition: feast-serving-batch.enabled - name: feast-serving alias: feast-serving-online - version: 0.4.4 + version: 0.4.5 condition: feast-serving-online.enabled \ No newline at end of file diff --git a/infra/charts/feast/values.yaml b/infra/charts/feast/values.yaml index fde03f9ad71..dd2174ae46d 100644 --- a/infra/charts/feast/values.yaml +++ b/infra/charts/feast/values.yaml @@ -53,7 +53,7 @@ feast-core: # Specify which image tag to use. Keep this consistent for all components image: - tag: "0.4.4" + tag: "0.4.5" # jvmOptions are options that will be passed to the Java Virtual Machine (JVM) # running Feast Core. @@ -121,7 +121,7 @@ feast-serving-online: enabled: true # Specify what image tag to use. Keep this consistent for all components image: - tag: "0.4.4" + tag: "0.4.5" # redis.enabled specifies whether Redis should be installed as part of Feast Serving. # # If enabled is set to "false", Feast admin has to ensure there is an @@ -180,7 +180,7 @@ feast-serving-batch: enabled: true # Specify what image tag to use. Keep this consistent for all components image: - tag: "0.4.4" + tag: "0.4.5" # redis.enabled specifies whether Redis should be installed as part of Feast Serving. # # This is usually set to "false" for Feast Serving Batch because the default From 94c19cef0160efd884219c2fc340606c01bf1d86 Mon Sep 17 00:00:00 2001 From: Shu Heng Date: Fri, 14 Feb 2020 14:58:52 +0800 Subject: [PATCH 14/15] Update pom version --- datatypes/java/README.md | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datatypes/java/README.md b/datatypes/java/README.md index 535fac73d2e..d5124eabb46 100644 --- a/datatypes/java/README.md +++ b/datatypes/java/README.md @@ -16,7 +16,7 @@ Dependency Coordinates dev.feast datatypes-java - 0.4.0-SNAPSHOT + 0.4.5-SNAPSHOT ``` diff --git a/pom.xml b/pom.xml index 821d3b72321..3ba6a592cfa 100644 --- a/pom.xml +++ b/pom.xml @@ -36,7 +36,7 @@ - 0.4.2-SNAPSHOT + 0.4.5-SNAPSHOT https://github.com/gojek/feast UTF-8 From fc7f58126722d4588ff42163550d39055c0fb118 Mon Sep 17 00:00:00 2001 From: Shu Heng Date: Fri, 14 Feb 2020 14:59:36 +0800 Subject: [PATCH 15/15] Changelog for release 0.4.5 --- CHANGELOG.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee545e3c4d0..401cabb90d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ # Changelog +## [v0.4.5](https://github.com/gojek/feast/tree/v0.4.5) (2020-02-14) + +[Full Changelog](https://github.com/gojek/feast/compare/v0.4.4...v0.4.5) + +**Merged pull requests:** +- Use bzip2 compressed feature set json as pipeline option [\#466](https://github.com/gojek/feast/pull/466) ([khorshuheng](https://github.com/khorshuheng)) +- Make redis key creation more determinisitic [\#471](https://github.com/gojek/feast/pull/471) ([zhilingc](https://github.com/zhilingc)) +- Helm Chart Upgrades [\#458](https://github.com/gojek/feast/pull/458) ([Yanson](https://github.com/Yanson)) +- Exclude version from grouping [\#441](https://github.com/gojek/feast/pull/441) ([khorshuheng](https://github.com/khorshuheng)) +- Use concrete class for AvroCoder compatibility [\#465](https://github.com/gojek/feast/pull/465) ([zhilingc](https://github.com/zhilingc)) +- Fix typo in split string length check [\#464](https://github.com/gojek/feast/pull/464) ([zhilingc](https://github.com/zhilingc)) +- Update README.md and remove versions from Helm Charts [\#457](https://github.com/gojek/feast/pull/457) ([woop](https://github.com/woop)) +- Deduplicate example notebooks [\#456](https://github.com/gojek/feast/pull/456) ([woop](https://github.com/woop)) +- Allow users not to set max age for batch retrieval [\#446](https://github.com/gojek/feast/pull/446) ([zhilingc](https://github.com/zhilingc)) + ## [v0.4.4](https://github.com/gojek/feast/tree/v0.4.4) (2020-01-28) [Full Changelog](https://github.com/gojek/feast/compare/v0.4.3...v0.4.4)