From 89c9bfd1a514003c5710f0194c4d231cdd25851f Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Fri, 9 Sep 2022 15:11:57 -0700 Subject: [PATCH 01/18] Adding DevSkim linter to Github actions --- .github/workflows/devskim-security-linter.yml | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/devskim-security-linter.yml diff --git a/.github/workflows/devskim-security-linter.yml b/.github/workflows/devskim-security-linter.yml new file mode 100644 index 000000000..d52fa57a8 --- /dev/null +++ b/.github/workflows/devskim-security-linter.yml @@ -0,0 +1,35 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party (Microsoft) and are governed by +# separate terms of service, privacy policy, and support +# documentation. +# For more details about Devskim, visit https://github.com/marketplace/actions/devskim + +name: DevSkim + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + schedule: + - cron: '25 4 * * 2' + +jobs: + lint: + name: DevSkim + runs-on: ubuntu-20.04 + permissions: + actions: read + contents: read + security-events: write + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Run DevSkim scanner + uses: microsoft/DevSkim-Action@v1 + + - name: Upload DevSkim scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: devskim-results.sarif From bea66184201bc1b20e470241955998ad70c01626 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Fri, 16 Sep 2022 12:52:26 -0700 Subject: [PATCH 02/18] Update docker-publish.yml --- .github/workflows/docker-publish.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 6db0babf7..1dff8482c 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -6,9 +6,11 @@ name: Publish Feathr Docker image to DockerHub on: + workflow_dispatch: schedule: # Runs daily at 10 PM UTC, would generate nightly tag - cron: '00 22 * * *' + push: # For every push against the releases/** branch, usually would happen at release time, Tag example - releases/v0.7.0 @@ -66,7 +68,7 @@ jobs: with: app-name: 'feathr-purview-registry' publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_PURVIEW_REGISTRY }} - images: 'index.docker.io/feathrfeaturestore/feathr-registry:nightly' + images: 'feathrfeaturestore/feathr-registry:nightly' - name: Deploy to Feathr RBAC Registry Azure Web App id: deploy-to-rbac-webapp From 0963b5e483aad8f11239fd1dd44fe0d5e8c24d38 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Fri, 16 Sep 2022 12:56:29 -0700 Subject: [PATCH 03/18] Update docker-publish.yml --- .github/workflows/docker-publish.yml | 48 ++++++++++++++-------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 1dff8482c..4a1cadaff 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -19,33 +19,33 @@ on: jobs: - build_and_push_image_to_registry: - name: Push Docker image to Docker Hub - runs-on: ubuntu-latest - steps: - - name: Check out the repo - uses: actions/checkout@v3 +# build_and_push_image_to_registry: +# name: Push Docker image to Docker Hub +# runs-on: ubuntu-latest +# steps: +# - name: Check out the repo +# uses: actions/checkout@v3 - - name: Log in to Docker Hub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} +# - name: Log in to Docker Hub +# uses: docker/login-action@v2 +# with: +# username: ${{ secrets.DOCKER_USERNAME }} +# password: ${{ secrets.DOCKER_PASSWORD }} - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@v4 - with: - images: feathrfeaturestore/feathr-registry +# - name: Extract metadata (tags, labels) for Docker +# id: meta +# uses: docker/metadata-action@v4 +# with: +# images: feathrfeaturestore/feathr-registry - - name: Build and push Docker image - uses: docker/build-push-action@v3 - with: - context: . - file: FeathrRegistry.Dockerfile - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} +# - name: Build and push Docker image +# uses: docker/build-push-action@v3 +# with: +# context: . +# file: FeathrRegistry.Dockerfile +# push: true +# tags: ${{ steps.meta.outputs.tags }} +# labels: ${{ steps.meta.outputs.labels }} # Deploy the docker container to the three test environments for feathr deploy: From 68c5490211bfa52183b727279e4ee6c21692a80c Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Fri, 16 Sep 2022 12:58:22 -0700 Subject: [PATCH 04/18] Update docker-publish.yml --- .github/workflows/docker-publish.yml | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 4a1cadaff..62d273f3a 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -19,38 +19,10 @@ on: jobs: -# build_and_push_image_to_registry: -# name: Push Docker image to Docker Hub -# runs-on: ubuntu-latest -# steps: -# - name: Check out the repo -# uses: actions/checkout@v3 - -# - name: Log in to Docker Hub -# uses: docker/login-action@v2 -# with: -# username: ${{ secrets.DOCKER_USERNAME }} -# password: ${{ secrets.DOCKER_PASSWORD }} - -# - name: Extract metadata (tags, labels) for Docker -# id: meta -# uses: docker/metadata-action@v4 -# with: -# images: feathrfeaturestore/feathr-registry - -# - name: Build and push Docker image -# uses: docker/build-push-action@v3 -# with: -# context: . -# file: FeathrRegistry.Dockerfile -# push: true -# tags: ${{ steps.meta.outputs.tags }} -# labels: ${{ steps.meta.outputs.labels }} # Deploy the docker container to the three test environments for feathr deploy: runs-on: ubuntu-latest - needs: build_and_push_image_to_registry steps: From 758a5215887d20a0a9240f9af22bc3bc50958f15 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Fri, 16 Sep 2022 13:00:01 -0700 Subject: [PATCH 05/18] Update docker-publish.yml --- .github/workflows/docker-publish.yml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 62d273f3a..3d21df336 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -26,14 +26,6 @@ jobs: steps: - - name: Deploy to Feathr SQL Registry Azure Web App - id: deploy-to-sql-webapp - uses: azure/webapps-deploy@v2 - with: - app-name: 'feathr-sql-registry' - publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_SQL_REGISTRY }} - images: 'index.docker.io/feathrfeaturestore/feathr-registry:nightly' - - name: Deploy to Feathr Purview Registry Azure Web App id: deploy-to-purview-webapp uses: azure/webapps-deploy@v2 @@ -42,11 +34,3 @@ jobs: publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_PURVIEW_REGISTRY }} images: 'feathrfeaturestore/feathr-registry:nightly' - - name: Deploy to Feathr RBAC Registry Azure Web App - id: deploy-to-rbac-webapp - uses: azure/webapps-deploy@v2 - with: - app-name: 'feathr-rbac-registry' - publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_RBAC_REGISTRY }} - images: 'index.docker.io/feathrfeaturestore/feathr-registry:nightly' - From 5dad67c6f41b9bfc3e8a03602112c4e2aea28371 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Fri, 16 Sep 2022 15:18:50 -0700 Subject: [PATCH 06/18] Update docker-publish.yml --- .github/workflows/docker-publish.yml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 3d21df336..d4b4776d0 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -26,11 +26,20 @@ jobs: steps: - - name: Deploy to Feathr Purview Registry Azure Web App - id: deploy-to-purview-webapp +# - name: Deploy to Feathr Purview Registry Azure Web App +# id: deploy-to-purview-webapp +# uses: azure/webapps-deploy@v2 +# with: +# app-name: 'feathr-purview-registry' +# publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_PURVIEW_REGISTRY }} +# images: 'feathrfeaturestore/feathr-registry:nightly' + + - name: Deploy to Feathr RBAC Registry Azure Web App + id: deploy-to-rbac-webapp uses: azure/webapps-deploy@v2 with: - app-name: 'feathr-purview-registry' - publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_PURVIEW_REGISTRY }} + app-name: 'feathr-rbac-registry' + publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_RBAC_REGISTRY }} images: 'feathrfeaturestore/feathr-registry:nightly' + From 101a4c2bc3baeb1a1b560c217cc3f5a01d30399c Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Fri, 16 Sep 2022 15:26:19 -0700 Subject: [PATCH 07/18] Update docker-publish.yml --- .github/workflows/docker-publish.yml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index d4b4776d0..235ae597b 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -34,12 +34,20 @@ jobs: # publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_PURVIEW_REGISTRY }} # images: 'feathrfeaturestore/feathr-registry:nightly' - - name: Deploy to Feathr RBAC Registry Azure Web App - id: deploy-to-rbac-webapp +# - name: Deploy to Feathr RBAC Registry Azure Web App +# id: deploy-to-rbac-webapp +# uses: azure/webapps-deploy@v2 +# with: +# app-name: 'feathr-rbac-registry' +# publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_RBAC_REGISTRY }} +# images: 'feathrfeaturestore/feathr-registry:nightly' + + - name: Deploy to Feathr SQL Registry Azure Web App + id: deploy-to-sql-webapp uses: azure/webapps-deploy@v2 with: - app-name: 'feathr-rbac-registry' - publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_RBAC_REGISTRY }} + app-name: 'feathr-sql-registry' + publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_SQL_REGISTRY }} images: 'feathrfeaturestore/feathr-registry:nightly' From 0a8ddea2c18f0ac6f07ff394b01dd08b4fbdd873 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Fri, 16 Sep 2022 15:30:53 -0700 Subject: [PATCH 08/18] Update docker-publish.yml --- .github/workflows/docker-publish.yml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 235ae597b..a1f5a06cd 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -26,21 +26,21 @@ jobs: steps: -# - name: Deploy to Feathr Purview Registry Azure Web App -# id: deploy-to-purview-webapp -# uses: azure/webapps-deploy@v2 -# with: -# app-name: 'feathr-purview-registry' -# publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_PURVIEW_REGISTRY }} -# images: 'feathrfeaturestore/feathr-registry:nightly' + - name: Deploy to Feathr Purview Registry Azure Web App + id: deploy-to-purview-webapp + uses: azure/webapps-deploy@v2 + with: + app-name: 'feathr-purview-registry' + publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_PURVIEW_REGISTRY }} + images: 'feathrfeaturestore/feathr-registry:nightly' -# - name: Deploy to Feathr RBAC Registry Azure Web App -# id: deploy-to-rbac-webapp -# uses: azure/webapps-deploy@v2 -# with: -# app-name: 'feathr-rbac-registry' -# publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_RBAC_REGISTRY }} -# images: 'feathrfeaturestore/feathr-registry:nightly' + - name: Deploy to Feathr RBAC Registry Azure Web App + id: deploy-to-rbac-webapp + uses: azure/webapps-deploy@v2 + with: + app-name: 'feathr-rbac-registry' + publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_RBAC_REGISTRY }} + images: 'feathrfeaturestore/feathr-registry:nightly' - name: Deploy to Feathr SQL Registry Azure Web App id: deploy-to-sql-webapp From b663baf95c641b802979981c8014f55df01c9245 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Fri, 16 Sep 2022 15:37:44 -0700 Subject: [PATCH 09/18] Update docker-publish.yml --- .github/workflows/docker-publish.yml | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index a1f5a06cd..7a36fbc71 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -20,6 +20,36 @@ on: jobs: + build_and_push_image_to_registry: + name: Push Docker image to Docker Hub + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: actions/checkout@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: feathrfeaturestore/feathr-registry + + - name: Build and push Docker image + uses: docker/build-push-action@v3 + with: + context: . + file: FeathrRegistry.Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + + # Deploy the docker container to the three test environments for feathr deploy: runs-on: ubuntu-latest From 9143e39b57b5a3e896746031f86210625c11f48e Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Wed, 12 Oct 2022 10:36:01 -0700 Subject: [PATCH 10/18] Removing devskim file --- .github/workflows/devskim-security-linter.yml | 35 ------------------- 1 file changed, 35 deletions(-) delete mode 100644 .github/workflows/devskim-security-linter.yml diff --git a/.github/workflows/devskim-security-linter.yml b/.github/workflows/devskim-security-linter.yml deleted file mode 100644 index d52fa57a8..000000000 --- a/.github/workflows/devskim-security-linter.yml +++ /dev/null @@ -1,35 +0,0 @@ -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party (Microsoft) and are governed by -# separate terms of service, privacy policy, and support -# documentation. -# For more details about Devskim, visit https://github.com/marketplace/actions/devskim - -name: DevSkim - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - schedule: - - cron: '25 4 * * 2' - -jobs: - lint: - name: DevSkim - runs-on: ubuntu-20.04 - permissions: - actions: read - contents: read - security-events: write - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Run DevSkim scanner - uses: microsoft/DevSkim-Action@v1 - - - name: Upload DevSkim scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 - with: - sarif_file: devskim-results.sarif From 6b34dbe687d230ac5d3516a0ec432e3fc2f8de57 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Wed, 12 Oct 2022 16:40:12 -0700 Subject: [PATCH 11/18] Restructuring the Prod Reco sample --- .../samples/product_recommendation_demo.ipynb | 280 ++++++++---------- 1 file changed, 123 insertions(+), 157 deletions(-) diff --git a/docs/samples/product_recommendation_demo.ipynb b/docs/samples/product_recommendation_demo.ipynb index 44febb062..09139c385 100644 --- a/docs/samples/product_recommendation_demo.ipynb +++ b/docs/samples/product_recommendation_demo.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Demo Notebook: Feathr Feature Store on Azure\n", + "# Product Recommendation with Feathr\n", "\n", "This notebook demonstrates how Feathr Feature Store can simplify and empower your model training and inference. You will learn:\n", "\n", @@ -17,13 +17,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Prerequisite: Use Quick Start Template to Provision Azure Resources\n", + "## 1. Prerequisite: Use Azure Resource Manager(ARM) to Provision Azure Resources\n", "\n", "First step is to provision required cloud resources if you want to use Feathr. Feathr provides a python based client to interact with cloud resources.\n", "\n", - "Please follow the steps [here](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html) to provision required cloud resources. Due to the complexity of the possible cloud environment, it is almost impossible to create a script that works for all the use cases. Because of this, [azure_resource_provision.sh](https://github.com/feathr-ai/feathr/blob/main/docs/how-to-guides/azure_resource_provision.sh) is a full end to end command line to create all the required resources, and you can tailor the script as needed, while [the companion documentation](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-cli.html) can be used as a complete guide for using that shell script. \n", + "Please follow the steps [here](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html) to provision required cloud resources. This will create a new resource group and deploy the needed Azure resources in it. \n", "\n", + "If you already have an existing resource group and only want to install few resources manually you can refer to the [CLI documentation](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-cli.html). It provides CLI commands to install the needed resources.\n", "\n", + "The below architecture diagram represents how different resources interact with each other\n", "![Architecture](https://github.com/feathr-ai/feathr/blob/main/docs/images/architecture.png?raw=true)" ] }, @@ -31,11 +33,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Prerequisite: Install Feathr and Import Dependencies\n", + "## 2. Prerequisite: Login to Azure and Install Feathr\n", "\n", - "Install Feathr using pip:\n", - "\n", - "`pip install -U feathr pandavro scikit-learn`" + "Login to Azure with a device code (You will see instructions in the output once you execute the cell):" ] }, { @@ -44,44 +44,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Import Dependencies\n", - "import glob\n", - "import os\n", - "import tempfile\n", - "from datetime import datetime, timedelta\n", - "from math import sqrt\n", - "\n", - "import pandas as pd\n", - "import pandavro as pdx\n", - "from feathr import FeathrClient\n", - "from feathr import BOOLEAN, FLOAT, INT32, ValueType\n", - "from feathr import Feature, DerivedFeature, FeatureAnchor\n", - "from feathr import BackfillTime, MaterializationSettings\n", - "from feathr import FeatureQuery, ObservationSettings\n", - "from feathr import RedisSink\n", - "from feathr import INPUT_CONTEXT, HdfsSource\n", - "from feathr import WindowAggTransformation\n", - "from feathr import TypedKey\n", - "from sklearn.metrics import mean_squared_error\n", - "from sklearn.model_selection import train_test_split\n", - "from azure.identity import DefaultAzureCredential\n", - "from azure.keyvault.secrets import SecretClient" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prerequisite: Configure the required environment with Feathr Quick Start Template\n", - "\n", - "In the first step (Provision cloud resources), you should have provisioned all the required cloud resources. Run the code below to install Feathr, login to Azure to get the required credentials to access more cloud resources." + "! az login --use-device-code" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**REQUIRED STEP: Fill in the resource prefix when provisioning the resources**" + "Install Feathr and dependencies to run this notebook." ] }, { @@ -90,23 +60,14 @@ "metadata": {}, "outputs": [], "source": [ - "resource_prefix = \"feathr_resource_prefix\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! pip install feathr azure-cli pandavro scikit-learn" + "!pip install -U feathr pandavro scikit-learn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Login to Azure with a device code (You will see instructions in the output):" + "Import Dependencies to make sure everything is installed correctly" ] }, { @@ -115,7 +76,27 @@ "metadata": {}, "outputs": [], "source": [ - "! az login --use-device-code" + "import glob\n", + "import os\n", + "import tempfile\n", + "from datetime import datetime, timedelta\n", + "from math import sqrt\n", + "\n", + "import pandas as pd\n", + "import pandavro as pdx\n", + "from feathr import FeathrClient\n", + "from feathr import BOOLEAN, FLOAT, INT32, ValueType\n", + "from feathr import Feature, DerivedFeature, FeatureAnchor\n", + "from feathr import BackfillTime, MaterializationSettings\n", + "from feathr import FeatureQuery, ObservationSettings\n", + "from feathr import RedisSink\n", + "from feathr import INPUT_CONTEXT, HdfsSource\n", + "from feathr import WindowAggTransformation\n", + "from feathr import TypedKey\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import train_test_split\n", + "from azure.identity import AzureCliCredential\n", + "from azure.keyvault.secrets import SecretClient" ] }, { @@ -123,20 +104,20 @@ "metadata": {}, "source": [ "\n", - "**Permission**\n", + "## 3. Prerequisite: Set the required permissions\n", "\n", - "To proceed with the following steps, you may need additional permission: permission to access the keyvault, permission to access the Storage Blob as a Contributor and permission to submit jobs to Synapse cluster. Skip this step if you have already given yourself the access. Otherwise, run the following lines of command in the Cloud Shell before running the cell below.\n", + "Before you proceed further, you may need additional permission: permission to access the keyvault, permission to access the Storage Blob as a Contributor and permission to submit jobs to Synapse cluster. Skip this step if you have already given yourself the access. Otherwise, run the following lines of command in the [Cloud Shell](https://shell.azure.com) before running the cells below.\n", "\n", "```\n", - "userId=\n", - "resource_prefix=\n", - "synapse_workspace_name=\"${resource_prefix}syws\"\n", - "keyvault_name=\"${resource_prefix}kv\"\n", - "objectId=$(az ad user show --id $userId --query id -o tsv)\n", - "az keyvault update --name $keyvault_name --enable-rbac-authorization false\n", - "az keyvault set-policy -n $keyvault_name --secret-permissions get list --object-id $objectId\n", - "az role assignment create --assignee $userId --role \"Storage Blob Data Contributor\"\n", - "az synapse role assignment create --workspace-name $synapse_workspace_name --role \"Synapse Contributor\" --assignee $userId\n", + " userId=\n", + " resource_prefix=\n", + " synapse_workspace_name=\"${resource_prefix}syws\"\n", + " keyvault_name=\"${resource_prefix}kv\"\n", + " objectId=$(az ad user show --id $userId --query id -o tsv)\n", + " az keyvault update --name $keyvault_name --enable-rbac-authorization false\n", + " az keyvault set-policy -n $keyvault_name --secret-permissions get list --object-id $objectId\n", + " az role assignment create --assignee $userId --role \"Storage Blob Data Contributor\"\n", + " az synapse role assignment create --workspace-name $synapse_workspace_name --role \"Synapse Contributor\" --assignee $userId\n", "```\n" ] }, @@ -144,7 +125,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Get all the required credentials from Azure KeyVault" + "# 4. Prerequisite: Feathr Configuration\n", + "\n", + "### Setting the environment variables\n", + "Set the environment variables that will be used by Feathr as configuration. Feathr supports configuration via enviroment variables and yaml, you can read more about it [here](https://feathr-ai.github.io/feathr/how-to-guides/feathr-configuration-and-env.html).\n", + "\n", + "**Fill in the `resource_prefix` that you used while provisioning the resources in Step 1 using ARM.**" ] }, { @@ -153,44 +139,49 @@ "metadata": {}, "outputs": [], "source": [ - "# Get all the required credentials from Azure Key Vault\n", - "key_vault_name=resource_prefix+\"kv\"\n", - "synapse_workspace_url=resource_prefix+\"syws\"\n", - "adls_account=resource_prefix+\"dls\"\n", - "adls_fs_name=resource_prefix+\"fs\"\n", - "purview_name=resource_prefix+\"purview\"\n", - "key_vault_uri = f\"https://{key_vault_name}.vault.azure.net\"\n", - "credential = DefaultAzureCredential(exclude_interactive_browser_credential=False)\n", - "client = SecretClient(vault_url=key_vault_uri, credential=credential)\n", - "secretName = \"FEATHR-ONLINE-STORE-CONN\"\n", - "retrieved_secret = client.get_secret(secretName).value\n", - "\n", - "# Get redis credentials; This is to parse Redis connection string.\n", - "redis_port=retrieved_secret.split(',')[0].split(\":\")[1]\n", - "redis_host=retrieved_secret.split(',')[0].split(\":\")[0]\n", - "redis_password=retrieved_secret.split(',')[1].split(\"password=\",1)[1]\n", - "redis_ssl=retrieved_secret.split(',')[2].split(\"ssl=\",1)[1]\n", - "\n", - "# Set the resource link\n", - "os.environ['spark_config__azure_synapse__dev_url'] = f'https://{synapse_workspace_url}.dev.azuresynapse.net'\n", - "os.environ['spark_config__azure_synapse__pool_name'] = 'spark31'\n", - "os.environ['spark_config__azure_synapse__workspace_dir'] = f'abfss://{adls_fs_name}@{adls_account}.dfs.core.windows.net/feathr_project'\n", - "os.environ['online_store__redis__host'] = redis_host\n", - "os.environ['online_store__redis__port'] = redis_port\n", - "os.environ['online_store__redis__ssl_enabled'] = redis_ssl\n", - "os.environ['REDIS_PASSWORD']=redis_password\n", - "feathr_output_path = f'abfss://{adls_fs_name}@{adls_account}.dfs.core.windows.net/feathr_output'" + "RESOURCE_PREFIX = \"YOUR_RESOURCE_PREFIX\" # from ARM deployment in Step 1\n", + "FEATHR_PROJECT_NAME=\"YOUR_PROJECT_NAME\" # provide a unique name" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "## Prerequisite: Configure the required environment (Skip this step if using the above Quick Start Template)\n", "\n", - "In the first step (Provision cloud resources), you should have provisioned all the required cloud resources. If you use Feathr CLI to create a workspace, you should have a folder with a file called `feathr_config.yaml` in it with all the required configurations. Otherwise, update the configuration below.\n", + "# Get name for deployed resources using the resource prefix\n", + "KEY_VAULT_NAME=f\"{RESOURCE_PREFIX}kv\"\n", + "SYNAPSE_WORKSPACE_NAME=f\"{RESOURCE_PREFIX}syws\"\n", + "ADLS_ACCOUNT=f\"{RESOURCE_PREFIX}dls\"\n", + "ADLS_FS_NAME=f\"{RESOURCE_PREFIX}fs\"\n", + "KEY_VAULT_URI = f\"https://{KEY_VAULT_NAME}.vault.azure.net\"\n", + "FEATHR_API_APP = f\"{RESOURCE_PREFIX}webapp\"\n", + "\n", + "\n", + "# Getting the credential object for Key Vault client\n", + "credential = AzureCliCredential()\n", + "client = SecretClient(vault_url=KEY_VAULT_URI, credential=credential)\n", + "\n", + "# Getting Redis store's connection string.\n", + "retrieved_secret = client.get_secret(\"FEATHR-ONLINE-STORE-CONN\").value\n", + "\n", + "# Parse Redis connection string\n", + "REDIS_PORT=retrieved_secret.split(',')[0].split(\":\")[1]\n", + "REDIS_HOST=retrieved_secret.split(',')[0].split(\":\")[0]\n", + "REDIS_PASSWORD=retrieved_secret.split(',')[1].split(\"password=\",1)[1]\n", + "REDIS_SSL=retrieved_secret.split(',')[2].split(\"ssl=\",1)[1]\n", + "# Set password as environment variable.\n", + "os.environ['REDIS_PASSWORD']=REDIS_PASSWORD" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Write the configuration as yaml file.\n", "\n", - "The code below will write this configuration string to a temporary location and load it to Feathr. Please still refer to [feathr_config.yaml](https://github.com/feathr-ai/feathr/blob/main/feathr_project/feathrcli/data/feathr_user_workspace/feathr_config.yaml) and use that as the source of truth. It should also have more explanations on the meaning of each variable." + "The code below will write this configuration string to a temporary location and load it to Feathr. Please refer to [feathr_config.yaml](https://github.com/feathr-ai/feathr/blob/main/feathr_project/feathrcli/data/feathr_user_workspace/feathr_config.yaml) for full list of configuration options and details about them." ] }, { @@ -200,68 +191,38 @@ "outputs": [], "source": [ "import tempfile\n", - "yaml_config = \"\"\"\n", - "# Please refer to https://github.com/feathr-ai/feathr/blob/main/feathr_project/feathrcli/data/feathr_user_workspace/feathr_config.yaml for explanations on the meaning of each field.\n", + "yaml_config = f\"\"\"\n", "api_version: 1\n", "project_config:\n", - " project_name: 'feathr_getting_started'\n", - " required_environment_variables:\n", - " - 'REDIS_PASSWORD'\n", + " project_name: '{FEATHR_PROJECT_NAME}'\n", "offline_store:\n", "# Please set 'enabled' flags as true (false by default) if any of items under the same paths are expected to be visited\n", " adls:\n", " adls_enabled: true\n", " wasb:\n", " wasb_enabled: true\n", - " s3:\n", - " s3_enabled: false\n", - " s3_endpoint: 's3.amazonaws.com'\n", - " jdbc:\n", - " jdbc_enabled: false\n", - " jdbc_database: 'feathrtestdb'\n", - " jdbc_table: 'feathrtesttable'\n", - " snowflake:\n", - " snowflake_enabled: false\n", - " url: \".snowflakecomputing.com\"\n", - " user: \"\"\n", - " role: \"\"\n", "spark_config:\n", " spark_cluster: 'azure_synapse'\n", " spark_result_output_parts: '1'\n", " azure_synapse:\n", - " dev_url: 'https://feathrazuretest3synapse.dev.azuresynapse.net'\n", - " pool_name: 'spark3'\n", - " workspace_dir: 'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_getting_started'\n", + " dev_url: 'https://{SYNAPSE_WORKSPACE_NAME}.dev.azuresynapse.net'\n", + " pool_name: 'spark31'\n", + " workspace_dir: 'abfss://{ADLS_FS_NAME}@{ADLS_ACCOUNT}.dfs.core.windows.net/feathr_project'\n", " executor_size: 'Small'\n", " executor_num: 1\n", - " databricks:\n", - " workspace_instance_url: 'https://adb-2474129336842816.16.azuredatabricks.net'\n", - " config_template: {'run_name':'','new_cluster':{'spark_version':'9.1.x-scala2.12','node_type_id':'Standard_D3_v2','num_workers':2,'spark_conf':{}},'libraries':[{'jar':''}],'spark_jar_task':{'main_class_name':'','parameters':['']}}\n", - " work_dir: 'dbfs:/feathr_getting_started'\n", "online_store:\n", " redis:\n", - " host: 'feathrazuretest3redis.redis.cache.windows.net'\n", - " port: 6380\n", - " ssl_enabled: True\n", + " host: '{REDIS_HOST}'\n", + " port: {REDIS_PORT}\n", + " ssl_enabled: {REDIS_SSL}\n", "feature_registry:\n", - " api_endpoint: \"https://feathr-sql-registry.azurewebsites.net/api/v1\"\n", + " api_endpoint: 'https://{FEATHR_API_APP}.azurewebsites.net/api/v1'\n", "\"\"\"\n", + "\n", "tmp = tempfile.NamedTemporaryFile(mode='w', delete=False)\n", "with open(tmp.name, \"w\") as text_file:\n", " text_file.write(yaml_config)\n", - "feathr_output_path = f'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_output'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prerequisite: Setup necessary environment variables (Skip this step if using the above Quick Start Template)\n", - "\n", - "You should setup the environment variables in order to run this sample. More environment variables can be set by referring to [feathr_config.yaml](https://github.com/feathr-ai/feathr/blob/main/feathr_project/feathrcli/data/feathr_user_workspace/feathr_config.yaml) and use that as the source of truth. It also has more explanations on the meaning of each variable.\n", - "\n", - "To run this notebook, for Azure users, you need REDIS_PASSWORD.\n", - "To run this notebook, for Databricks useres, you need DATABRICKS_WORKSPACE_TOKEN_VALUE and REDIS_PASSWORD." + "feathr_output_path = f'abfss://{ADLS_FS_NAME}@{ADLS_ACCOUNT}.dfs.core.windows.net/feathr_output'" ] }, { @@ -305,6 +266,7 @@ "# Observation dataset usually comes with a event_timestamp to denote when the observation happened.\n", "# The label here is product_rating. Our model objective is to predict a user's rating for this product.\n", "import pandas as pd\n", + "# Public URL hosting mock data\n", "pd.read_csv(\"https://azurefeathrstorage.blob.core.windows.net/public/sample_data/product_recommendation_sample/user_observation_mock_data.csv\")" ] }, @@ -360,7 +322,7 @@ "1. Feature source: what source data that this feature is based on\n", "2. Transformation: what transformation is used to transform the source data into feature. Transformation can be optional when you just want to take a column out from the source data.\n", "\n", - "(For more details on feature definition, please refer to the [Feathr Feature Definition Guide](https://github.com/feathr-ai/feathr/blob/main/docs/concepts/feature-definition.md))" + "(For more details on feature definition, please refer to the [Feathr Feature Definition Guide](https://feathr-ai.github.io/feathr/concepts/feature-definition.html))" ] }, { @@ -456,9 +418,7 @@ "source": [ "### Window aggregation features\n", "\n", - "Using [window aggregations](https://en.wikipedia.org/wiki/Window_function_%28SQL%29) can help us create more powerful features. A window aggregation feature compress large amount of information into one single feature value. Using our raw data as an example, we have the users' purchase history data that might be quite some rows, we want to create a window aggregation feature that represents their last 90 days of average purcahse amount.\n", - "\n", - "Feathr provides a nice API to help us create such window aggregation features.\n", + "Using [window aggregations](https://en.wikipedia.org/wiki/Window_function_%28SQL%29) can help us create more powerful features. A window aggregation feature compresses large amount of information into one single feature value. Using our raw data as an example, we have the user's purchase history data that might be quite some rows, we want to create a window aggregation feature that represents their last 90 days of average purchase amount.\n", "\n", "To create this window aggregation feature via Feathr, we just need to define the following parameters with `WindowAggTransformation` API:\n", "1. `agg_expr`: the field/column you want to aggregate. It can be a ANSI SQL expression. So we just write `cast_float(purchase_amount)`(the raw data might be in string form, let's cast_float).\n", @@ -509,9 +469,7 @@ "### Derived Features Section\n", "Derived features are features that are computed from other Feathr features. They could be computed from anchored features, or other derived features.\n", "\n", - "Typical usage includes feature cross(f1 * f2), or computing cosine similarity between two features.\n", - "\n", - "The syntax works in a similar way." + "Typical usage includes feature cross(f1 * f2), or computing cosine similarity between two features. The syntax works in a similar way." ] }, { @@ -532,7 +490,7 @@ "metadata": {}, "source": [ "### Build Features\n", - "Lastly, we need to build those features so that it can be consumed later. Note that we have to build both the \"anchor\" and the \"derived\" features." + "Lastly, we need to build these features so that they can be consumed later. Note that we have to build both the \"anchor\" and the \"derived\" features." ] }, { @@ -550,12 +508,11 @@ "metadata": {}, "source": [ "### Optional: A Special Type of Feature: Request Feature\n", - "For advanced user cases, in some cases, features defined on top of request data(a.k.a. observation data) may have no entity key or timestamp.\n", - "It is merely a function/transformation executing against request data at runtime.\n", - "For example, the day of week of the request, which is calculated by converting the request UNIX timestamp.\n", - "In this case, the `source` section should be `INPUT_CONTEXT` to indicate the source of those defined anchors.\n", + "Sometimes features defined on top of request data(a.k.a. observation data) may have no entity key or timestamp. It is merely a function/transformation executing against request data at runtime.\n", "\n", - "We won't cover the details it in this notebook." + "For example, the day of the week of the request, which is calculated by converting the request UNIX timestamp. In this case, the `source` section should be `INPUT_CONTEXT` to indicate the source of those defined anchors.\n", + "\n", + "We won't cover the details of it in this notebook." ] }, { @@ -564,12 +521,11 @@ "source": [ "## Create training data using point-in-time correct feature join\n", "\n", - "A training dataset usually contains entity id column(s), multiple feature columns, event timestamp column and label/target column. \n", + "A training dataset usually contains `entity id` column(s), multiple `feature` columns, event timestamp column and `label/target` column. \n", "\n", - "To create a training dataset using Feathr, we need to provide a feature join settings to specify\n", - "what features and how these features should be joined to the observation data. \n", + "To create a training dataset using Feathr, we need to provide a feature join settings to specify what features and how these features should be joined to the observation data. \n", "\n", - "(To learn more on this topic, please refer to [Point-in-time Correctness](https://github.com/feathr-ai/feathr/blob/main/docs/concepts/point-in-time-join.md))" + "(To learn more on this topic, please refer to [Point-in-time Correctness](https://feathr-ai.github.io/feathr/concepts/point-in-time-join.html))." ] }, { @@ -694,16 +650,16 @@ "\n", "In the previous section, we demonstrated how Feathr can compute feature value to generate training dataset from feature definition on-they-fly.\n", "\n", - "Now let's talk about how we can use the trained models. We can use the trained models for offline inference as well as online inference. In both cases, we need features to be feed into the models. For offline inference, you can compute and get the features on-demand; or you can store the computed features to some offline database for later offline inference.\n", + "Now let's talk about how we can use the trained models. We can use the trained models for both online and offline inference. In both cases, we need features to be fed into the models. For offline inference, you can compute and get the features on-demand; or you can store the computed features to some offline database for later offline inference.\n", "\n", "For online inference, we can use Feathr to compute and store the features in the online database. Then use it for online inference when the request comes.\n", "\n", "![img](../images/online_inference.jpg)\n", "\n", "\n", - "In this section, we will focus on materialize features to online store. For materialization to offline store, you can check out our [user guide](https://github.com/feathr-ai/feathr/blob/main/docs/concepts/materializing-features.md#materializing-features-to-offline-store).\n", + "In this section, we will focus on materialize features to online store. For materialization to offline store, you can check out our [user guide](https://feathr-ai.github.io/feathr/concepts/materializing-features.html#materializing-features-to-offline-store).\n", "\n", - "We can push the computed features to the online store like below:" + "We can push the computed features to the online store(Redis) like below:" ] }, { @@ -761,7 +717,7 @@ "source": [ "### Registering and Fetching features\n", "\n", - "We can also register the features with an Apache Atlas compatible service, such as Azure Purview, and share the registered features across teams:" + "We can also register the features and share them across teams:" ] }, { @@ -771,7 +727,17 @@ "outputs": [], "source": [ "feathr_client.register_features()\n", - "feathr_client.list_registered_features(project_name=\"feathr_getting_started\")" + "feathr_client.list_registered_features(project_name=f\"{FEATHR_PROJECT_NAME}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "In this notebook you learnt how to set up Feathr and use it to create features, register features and use those features for model training and inferencing.\n", + "\n", + "We hope this example gave you a good sense of Feathr's capabilities and how you could leverage it within your organization's MLOps workflow." ] } ], From ce4ed50297bf9a183164593e94309057d778e653 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Wed, 12 Oct 2022 16:46:38 -0700 Subject: [PATCH 12/18] Adjusting headings --- docs/samples/product_recommendation_demo.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/samples/product_recommendation_demo.ipynb b/docs/samples/product_recommendation_demo.ipynb index 09139c385..f5e7e6500 100644 --- a/docs/samples/product_recommendation_demo.ipynb +++ b/docs/samples/product_recommendation_demo.ipynb @@ -234,7 +234,7 @@ "In this tutorial, we use Feathr Feature Store to help create a model that predicts users product rating. To make it simple, let's just predict users' rating for ONE product for an e-commerce website. (We have an [advanced demo](./product_recommendation_demo_advanced.ipynb) that predicts ratings for arbitrary products.)\n", "\n", "\n", - "## Initialize Feathr Client\n", + "### Initialize Feathr Client\n", "\n", "Let's initialize a Feathr client first. The Feathr client provides all the APIs we need to interact with Feathr Feature Store." ] @@ -252,7 +252,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Understand the Raw Datasets\n", + "### Understand the Raw Datasets\n", "We have 3 raw datasets to work with: one observation dataset(a.k.a. label dataset) and two raw datasets to generate features." ] }, @@ -308,7 +308,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## What's a Feature in Feathr\n", + "### What's a Feature in Feathr\n", "A feature is an individual measurable property or characteristic of a phenomenon which is sometimes time-sensitive. \n", "\n", "In Feathr, feature can be defined by the following characteristics:\n", @@ -562,7 +562,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Download the result and show the result\n", + "### Download the result and show the result\n", "\n", "Let's use the helper function `get_result_df` to download the result and view it:" ] @@ -595,7 +595,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Train a machine learning model\n", + "### Train a machine learning model\n", "After getting all the features, let's train a machine learning model with the converted feature by Feathr:" ] }, @@ -685,7 +685,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Fetch feature value from online store\n", + "### Fetch feature value from online store\n", "We can then get the features from the online store (Redis) via the client's `get_online_features` or `multi_get_online_features` API." ] }, From 4aadce419a4fdb3b1b57199d936a473fa0c119a3 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Wed, 12 Oct 2022 16:50:31 -0700 Subject: [PATCH 13/18] Minor changes --- docs/samples/product_recommendation_demo.ipynb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/samples/product_recommendation_demo.ipynb b/docs/samples/product_recommendation_demo.ipynb index f5e7e6500..a35ff66bb 100644 --- a/docs/samples/product_recommendation_demo.ipynb +++ b/docs/samples/product_recommendation_demo.ipynb @@ -10,7 +10,9 @@ "\n", "1. Define sharable features using Feathr API\n", "2. Create a training dataset via point-in-time feature join with Feathr API\n", - "3. Materialize features to online store and then retrieve them with Feathr API" + "3. Materialize features to online store and then retrieve them with Feathr API\n", + "\n", + "In this tutorial, we use Feathr to create a model that predicts users' product rating. " ] }, { @@ -231,7 +233,7 @@ "source": [ "# Define sharable features using Feathr API\n", "\n", - "In this tutorial, we use Feathr Feature Store to help create a model that predicts users product rating. To make it simple, let's just predict users' rating for ONE product for an e-commerce website. (We have an [advanced demo](./product_recommendation_demo_advanced.ipynb) that predicts ratings for arbitrary products.)\n", + "In this tutorial, we use Feathr Feature Store and create a model that predicts users' product rating. To make it simple, let's just predict users' rating for ONE product for an e-commerce website. (We have an [advanced demo](./product_recommendation_demo_advanced.ipynb) that predicts ratings for arbitrary products.)\n", "\n", "\n", "### Initialize Feathr Client\n", From 00587c6c5a1a2dac59fcd83bf153525274f0bf52 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Wed, 12 Oct 2022 16:52:01 -0700 Subject: [PATCH 14/18] Removing changes to docker publish file --- .github/workflows/docker-publish.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index d4caa8a17..84e99b614 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -6,11 +6,9 @@ name: Publish Feathr Docker image to DockerHub on: - workflow_dispatch: schedule: # Runs daily at 10 PM UTC, would generate nightly tag - cron: '00 22 * * *' - push: # For every push against the releases/** branch, usually would happen at release time, Tag example - releases/v0.7.0 @@ -19,7 +17,6 @@ on: jobs: - build_and_push_image_to_registry: name: Push Docker image to Docker Hub runs-on: ubuntu-latest @@ -48,11 +45,10 @@ jobs: tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - - # Deploy the docker container to the three test environments for feathr deploy: runs-on: ubuntu-latest + needs: build_and_push_image_to_registry steps: @@ -71,6 +67,7 @@ jobs: app-name: 'feathr-purview-registry' publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_PURVIEW_REGISTRY }} images: 'feathrfeaturestore/feathr-registry:nightly' + - name: Deploy to Feathr RBAC Registry Azure Web App id: deploy-to-rbac-webapp uses: azure/webapps-deploy@v2 From 641934476ca10b7d285835fbc2322ae5980853df Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Tue, 18 Oct 2022 22:55:04 -0700 Subject: [PATCH 15/18] Addressing PR comments, moving Product recommendation notebook sample to Synapse folder since it is strongly tied to Synapse --- .../product_recommendation_demo.ipynb | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) rename docs/samples/{ => azure_synapse}/product_recommendation_demo.ipynb (99%) diff --git a/docs/samples/product_recommendation_demo.ipynb b/docs/samples/azure_synapse/product_recommendation_demo.ipynb similarity index 99% rename from docs/samples/product_recommendation_demo.ipynb rename to docs/samples/azure_synapse/product_recommendation_demo.ipynb index a35ff66bb..3fc4c430f 100644 --- a/docs/samples/product_recommendation_demo.ipynb +++ b/docs/samples/azure_synapse/product_recommendation_demo.ipynb @@ -4,13 +4,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Product Recommendation with Feathr\n", + "# Product Recommendation with Feathr on Azure\n", "\n", "This notebook demonstrates how Feathr Feature Store can simplify and empower your model training and inference. You will learn:\n", "\n", "1. Define sharable features using Feathr API\n", + "2. Register features with register API.\n", "2. Create a training dataset via point-in-time feature join with Feathr API\n", - "3. Materialize features to online store and then retrieve them with Feathr API\n", + "4. Materialize features to online store and then retrieve them with Feathr API\n", "\n", "In this tutorial, we use Feathr to create a model that predicts users' product rating. " ] @@ -745,7 +746,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.9.12 ('ifelse_bug_env': venv)", + "display_name": "Python 3.8.13 ('feathrtest')", "language": "python", "name": "python3" }, @@ -759,11 +760,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.8.13" }, "vscode": { "interpreter": { - "hash": "6a6c366ec8f33a88299a9f856c1a3e4312616abcb6fcf46b22c3da0a923e63af" + "hash": "96bbbb728c64ae5eda27ed1c89d74908bf0652fd45caa45cd0ade6bdc0df4d48" } } }, From 42209f17673deb2b1672bcfcf413059a649175e3 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Wed, 19 Oct 2022 14:08:28 -0700 Subject: [PATCH 16/18] Addressing PR comments --- .../azure_synapse/product_recommendation_demo.ipynb | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/docs/samples/azure_synapse/product_recommendation_demo.ipynb b/docs/samples/azure_synapse/product_recommendation_demo.ipynb index 3fc4c430f..6b3eac93d 100644 --- a/docs/samples/azure_synapse/product_recommendation_demo.ipynb +++ b/docs/samples/azure_synapse/product_recommendation_demo.ipynb @@ -234,7 +234,7 @@ "source": [ "# Define sharable features using Feathr API\n", "\n", - "In this tutorial, we use Feathr Feature Store and create a model that predicts users' product rating. To make it simple, let's just predict users' rating for ONE product for an e-commerce website. (We have an [advanced demo](./product_recommendation_demo_advanced.ipynb) that predicts ratings for arbitrary products.)\n", + "In this tutorial, we use Feathr Feature Store and create a model that predicts users' product rating. To make it simple, let's just predict users' rating for ONE product for an e-commerce website. (We have an [advanced demo](../product_recommendation_demo_advanced.ipynb) that predicts ratings for arbitrary products.)\n", "\n", "\n", "### Initialize Feathr Client\n", @@ -537,12 +537,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Synapse and Databricks have different output path format\n", - "if feathr_client.spark_runtime == 'databricks':\n", - " output_path = 'dbfs:/feathrazure_test.avro'\n", - "else:\n", - " output_path = feathr_output_path\n", - "\n", + "output_path = feathr_output_path\n", "# Features that we want to request\n", "feature_query = FeatureQuery(feature_list=[\"feature_user_age\", \n", " \"feature_user_tax_rate\", \n", @@ -657,7 +652,7 @@ "\n", "For online inference, we can use Feathr to compute and store the features in the online database. Then use it for online inference when the request comes.\n", "\n", - "![img](../images/online_inference.jpg)\n", + "![img](../../images/online_inference.jpg)\n", "\n", "\n", "In this section, we will focus on materialize features to online store. For materialization to offline store, you can check out our [user guide](https://feathr-ai.github.io/feathr/concepts/materializing-features.html#materializing-features-to-offline-store).\n", From 50770699beb49e43bab7e7f16618f4aed935749d Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Wed, 19 Oct 2022 14:34:34 -0700 Subject: [PATCH 17/18] Fixing images --- docs/samples/azure_synapse/product_recommendation_demo.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/samples/azure_synapse/product_recommendation_demo.ipynb b/docs/samples/azure_synapse/product_recommendation_demo.ipynb index 6b3eac93d..a0ade27f3 100644 --- a/docs/samples/azure_synapse/product_recommendation_demo.ipynb +++ b/docs/samples/azure_synapse/product_recommendation_demo.ipynb @@ -63,7 +63,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install -U feathr pandavro scikit-learn" + "%pip install -U feathr pandavro scikit-learn" ] }, { From ea289529468a5c47661c45fcf5c5fb54dbc3dca8 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Thu, 20 Oct 2022 12:52:36 -0700 Subject: [PATCH 18/18] Removing the need to pass email id as we could directly compute object Id using az command, also making CLI instructions clearer that it is for advance users --- .../azure_synapse/product_recommendation_demo.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/samples/azure_synapse/product_recommendation_demo.ipynb b/docs/samples/azure_synapse/product_recommendation_demo.ipynb index a0ade27f3..e93860269 100644 --- a/docs/samples/azure_synapse/product_recommendation_demo.ipynb +++ b/docs/samples/azure_synapse/product_recommendation_demo.ipynb @@ -10,7 +10,7 @@ "\n", "1. Define sharable features using Feathr API\n", "2. Register features with register API.\n", - "2. Create a training dataset via point-in-time feature join with Feathr API\n", + "3. Create a training dataset via point-in-time feature join with Feathr API\n", "4. Materialize features to online store and then retrieve them with Feathr API\n", "\n", "In this tutorial, we use Feathr to create a model that predicts users' product rating. " @@ -26,7 +26,8 @@ "\n", "Please follow the steps [here](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html) to provision required cloud resources. This will create a new resource group and deploy the needed Azure resources in it. \n", "\n", - "If you already have an existing resource group and only want to install few resources manually you can refer to the [CLI documentation](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-cli.html). It provides CLI commands to install the needed resources.\n", + "If you already have an existing resource group and only want to install few resources manually you can refer to the cli documentation [here](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-cli.html). It provides CLI commands to install the needed resources. \n", + "**Please Note: CLI documentation is for advance users since there are lot of configurations and role assignment that would have to be done manually so it won't work out of box and should just be used for reference. ARM template is the preferred way to deploy.**\n", "\n", "The below architecture diagram represents how different resources interact with each other\n", "![Architecture](https://github.com/feathr-ai/feathr/blob/main/docs/images/architecture.png?raw=true)" @@ -109,14 +110,13 @@ "\n", "## 3. Prerequisite: Set the required permissions\n", "\n", - "Before you proceed further, you may need additional permission: permission to access the keyvault, permission to access the Storage Blob as a Contributor and permission to submit jobs to Synapse cluster. Skip this step if you have already given yourself the access. Otherwise, run the following lines of command in the [Cloud Shell](https://shell.azure.com) before running the cells below.\n", + "Before you proceed further, you would need additional permissions: permission to access the keyvault, permission to access the Storage Blob as a Contributor and permission to submit jobs to Synapse cluster. Run the following lines of command in the [Cloud Shell](https://shell.azure.com) before running the cells below. Please replace the resource_prefix with the prefix you used in ARM template deployment.\n", "\n", "```\n", - " userId=\n", - " resource_prefix=\n", + " resource_prefix=\"YOUR_RESOURCE_PREFIX\"\n", " synapse_workspace_name=\"${resource_prefix}syws\"\n", " keyvault_name=\"${resource_prefix}kv\"\n", - " objectId=$(az ad user show --id $userId --query id -o tsv)\n", + " objectId=$(az ad signed-in-user show --query id -o tsv)\n", " az keyvault update --name $keyvault_name --enable-rbac-authorization false\n", " az keyvault set-policy -n $keyvault_name --secret-permissions get list --object-id $objectId\n", " az role assignment create --assignee $userId --role \"Storage Blob Data Contributor\"\n",