Skip to content

Commit

Permalink
Google Container Registry to Artifact Registry migration (#382)
Browse files Browse the repository at this point in the history
* GCR to AR migration

* GCR to AR migration

* GCR to AR migration

* Review fixes

* Changelog

* Set version to 1.9.0.dev1

* Revert "Set version to 1.9.0.dev1"

This reverts commit d7a8f09.

---------

Co-authored-by: Grzegorz Witkowski <grzegorz.witkowski@allegro.pl>
  • Loading branch information
artnowo-alle and grzegorzwitkowski committed May 20, 2024
1 parent aab532e commit dd136fe
Show file tree
Hide file tree
Showing 19 changed files with 133 additions and 101 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# BigFlow changelog

## Version 1.9.0

### Changes

* Switched from Google Container Registry to Artifact Registry. Made `-r`/`--docker-repository` common for all deploy commands. Build and deploy commands authenticate to the Docker repository taken from `deployment_config.py` or CLI arguments, instead of hardcoded `https://eu.gcr.io`.

## Version 1.8.0

### Changes
Expand Down
1 change: 1 addition & 0 deletions bigflow/build/operate.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def _build_docker_image(
logger.debug("Authenticate to docker registry")
bigflow.deploy.authenticate_to_registry(
auth_method=cache_params.auth_method or bigflow.deploy.AuthorizationType.LOCAL_ACCOUNT,
docker_repository=project_spec.docker_repository,
vault_endpoint=cache_params.vault_endpoint,
vault_secret=cache_params.vault_secret,
vault_endpoint_verify=cache_params.vault_endpoint_verify
Expand Down
18 changes: 10 additions & 8 deletions bigflow/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,11 +432,17 @@ def _add_deploy_parsers_common_arguments(parser):
_add_auth_parsers_arguments(parser)
_add_parsers_common_arguments(parser)

parser.add_argument('-r', '--docker-repository',
type=str,
help='Name of a local and target Docker repository. Typically, a target repository is hosted by Google Cloud Artifact Registry.'
' If so, with the following naming schema: {HOSTNAME}/{PROJECT-ID}/{IMAGE}.'
)


def _create_deploy_parser(subparsers):
parser = subparsers.add_parser('deploy',
description='Performs complete deployment. Uploads DAG files from local DAGs folder '
'to Composer and uploads Docker image to Container Registry.')
'to Composer and uploads Docker image to Artifact Registry.')

_add_deploy_dags_parser_arguments(parser)
_add_deploy_image_parser_arguments(parser)
Expand All @@ -445,7 +451,7 @@ def _create_deploy_parser(subparsers):

def _create_deploy_image_parser(subparsers):
parser = subparsers.add_parser('deploy-image',
description='Uploads Docker image to Container Registry.'
description='Uploads Docker image to Artifact Registry.'
)

_add_deploy_image_parser_arguments(parser)
Expand Down Expand Up @@ -481,11 +487,6 @@ def _add_deploy_image_parser_arguments(parser):
parser.add_argument('-i', '--image-tar-path',
type=str,
help='Path to a Docker image file. The file name must contain version number with the following naming schema: image-{version}.tar')
parser.add_argument('-r', '--docker-repository',
type=str,
help='Name of a local and target Docker repository. Typically, a target repository is hosted by Google Cloud Container Registry.'
' If so, with the following naming schema: {HOSTNAME}/{PROJECT-ID}/{IMAGE}.'
)

def _add_deploy_dags_parser_arguments(parser):
parser.add_argument('-dd', '--dags-dir',
Expand Down Expand Up @@ -553,7 +554,8 @@ def _cli_deploy_dags(args):
vault_endpoint=_resolve_vault_endpoint(args),
vault_endpoint_verify=_resolve_property(args, 'vault_endpoint_verify', ignore_value_error=True),
vault_secret=vault_secret,
project_id=_resolve_property(args, 'gcp_project_id')
project_id=_resolve_property(args, 'gcp_project_id'),
docker_repository=_resolve_property(args, 'docker_repository')
)


Expand Down
2 changes: 1 addition & 1 deletion bigflow/dagbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def generate_dag_file(
# bigflow-workflow: \t{workflow.workflow_id}
# bigflow-build-ver: \t{build_ver}
# bigflow-startdate: \t{start_from.isoformat()}
# biglfow-imageid: \t{image_version}
# bigflow-imageid: \t{image_version}
import datetime
from airflow import DAG
Expand Down
17 changes: 12 additions & 5 deletions bigflow/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,10 @@ def _deploy_image_loaded_to_local_registry(
docker_image_latest = docker_repository + ":latest"
tag_image(image_id, docker_repository, "latest")

logger.info("Deploying docker image tag=%s auth_method=%s", docker_image, auth_method)
authenticate_to_registry(auth_method, vault_endpoint, vault_secret, vault_endpoint_verify)
logger.info(
"Deploying docker image tag=%s auth_method=%s docker_repository=%s",
docker_image, auth_method, docker_repository)
authenticate_to_registry(auth_method, docker_repository, vault_endpoint, vault_secret, vault_endpoint_verify)
bf_commons.run_process(['docker', 'push', docker_image])
bf_commons.run_process(['docker', 'push', docker_image_latest])

Expand All @@ -141,6 +143,7 @@ def _deploy_image_loaded_to_local_registry(

def authenticate_to_registry(
auth_method: AuthorizationType,
docker_repository: str,
vault_endpoint: T.Optional[str] = None,
vault_secret: T.Optional[str] = None,
vault_endpoint_verify: str | bool | None = None,
Expand All @@ -151,8 +154,9 @@ def authenticate_to_registry(
bf_commons.run_process(['gcloud', 'auth', 'configure-docker'])
elif auth_method == AuthorizationType.VAULT:
oauthtoken = get_vault_token(vault_endpoint, vault_secret, vault_endpoint_verify)
docker_repository_host = docker_repository.split('/', 1)[0]
bf_commons.run_process(
['docker', 'login', '-u', 'oauth2accesstoken', '--password-stdin', 'https://eu.gcr.io'],
['docker', 'login', '-u', 'oauth2accesstoken', '--password-stdin', f"https://{docker_repository_host}"],
input=oauthtoken,
)
else:
Expand All @@ -162,12 +166,13 @@ def authenticate_to_registry(
def check_images_exist(
images: T.Set[str],
auth_method: AuthorizationType,
docker_repository: str,
vault_endpoint: T.Optional[str] = None,
vault_secret: T.Optional[str] = None,
vault_endpoint_verify: str | bool | None = None
):
logger.info("Checking if images used in DAGs exist in the registry")
authenticate_to_registry(auth_method, vault_endpoint, vault_secret, vault_endpoint_verify)
authenticate_to_registry(auth_method, docker_repository, vault_endpoint, vault_secret, vault_endpoint_verify)
missing_images = set()
for image in images:
found_images = bf_commons.run_process(['docker', 'manifest', 'inspect', image], check=False, verbose=False)
Expand Down Expand Up @@ -195,6 +200,7 @@ def deploy_dags_folder(
dags_dir: str,
dags_bucket: str,
project_id: str,
docker_repository: str,
clear_dags_folder: bool = False,
auth_method: AuthorizationType = AuthorizationType.LOCAL_ACCOUNT,
vault_endpoint: T.Optional[str] = None,
Expand All @@ -208,7 +214,8 @@ def deploy_dags_folder(
vault_endpoint=vault_endpoint,
vault_endpoint_verify=vault_endpoint_verify,
vault_secret=vault_secret,
images=images)
images=images,
docker_repository=docker_repository,)

logger.info("Deploying DAGs folder, auth_method=%s, clear_dags_folder=%s, dags_dir=%s", auth_method, clear_dags_folder, dags_dir)

Expand Down
4 changes: 2 additions & 2 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ Configuration is passed via command line arguments.
```shell
bigflow deploy-image \
--image-tar-path '/tmp/image-0.1.0-tar' \
--docker-repository 'eu.gcr.io/my_gcp_dev_project/my_project' \
--docker-repository 'europe-west1-docker.pkg.dev/my_gcp_dev_project/my_project' \
--auth-method=vault \
--vault-secret ***** \
--vault-endpoint 'https://example.com/vault'
Expand Down Expand Up @@ -293,7 +293,7 @@ Configuration is passed via command line arguments.
bigflow deploy \
--image-tar-path '/tmp/image-0.1.0-tar' \
--dags-dir '/tmp/my_dags' \
--docker-repository 'eu.gcr.io/my_gcp_dev_project/my_project' \
--docker-repository 'europe-west1-docker.pkg.dev/my_gcp_dev_project/my_project' \
--auth-method=vault \
--vault-secret ***** \
--vault-endpoint 'https://example.com/vault' \
Expand Down
20 changes: 10 additions & 10 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ deployment_config = Config(
properties={
'gcp_project_id': '{env}-project-id',
'docker_repository_project': 'my-shared-docker-project-id',
'docker_repository': 'eu.gcr.io/{docker_repository_project}/my-analytics',
'docker_repository': 'europe-west1-docker.pkg.dev/{docker_repository_project}/my-analytics',
'vault_endpoint': 'https://example.com/vault',
'dags_bucket': 'europe-west1-my-1234-bucket',
},
Expand Down Expand Up @@ -54,13 +54,13 @@ final properties:
```text
dev config:
{ 'dags_bucket': 'europe-west1-my-1234-bucket',
'docker_repository': 'eu.gcr.io/my-shared-docker-project-id/my-analytics',
'docker_repository': 'europe-west1-docker.pkg.dev/my-shared-docker-project-id/my-analytics',
'docker_repository_project': 'my-shared-docker-project-id',
'gcp_project_id': 'dev-project-id',
'vault_endpoint': 'https://example.com/vault'}
prod config:
{ 'dags_bucket': 'europe-west1-my-4321-bucket',
'docker_repository': 'eu.gcr.io/my-shared-docker-project-id/my-analytics',
'docker_repository': 'europe-west1-docker.pkg.dev/my-shared-docker-project-id/my-analytics',
'docker_repository_project': 'my-shared-docker-project-id',
'gcp_project_id': 'prod-project-id',
'vault_endpoint': 'https://example.com/vault'}
Expand All @@ -85,13 +85,13 @@ For example, the `docker_repository` property is resolved from:

```python
'docker_repository_project': 'my-shared-docker-project-id',
'docker_repository': 'eu.gcr.io/{docker_repository_project}/my-analytics'
'docker_repository': 'europe-west1-docker.pkg.dev/{docker_repository_project}/my-analytics'
```

to

```text
'docker_repository': 'eu.gcr.io/my-shared-docker-project-id/my-analytics'
'docker_repository': 'europe-west1-docker.pkg.dev/my-shared-docker-project-id/my-analytics'
```


Expand Down Expand Up @@ -333,7 +333,7 @@ deployment_config = DeploymentConfig(

Bigflow 1.3 introduces a new class-based approach for keeping configurations.
New API is optional and there is no need to migrate existing code from
`biglfow.Configuration`. However, it allows you to use type hints, which enables
`bigflow.Configuration`. However, it allows you to use type hints, which enables
autocompletion in IDEs and gives you more flexibility.

Each configuration is declared as a subclass of `bigflow.konfig.Konfig`.
Expand Down Expand Up @@ -411,15 +411,15 @@ class MyConfig(bigflow.konfig.Konfig):
docker_repository_project = "my-shared-docker-project-id"

# no 'expand()' function - string is *not* interpolated
docker_repository_raw = "eu.gcr.io/{docker_repository_project}/my-analytics"
docker_repository_raw = "europe-west1-docker.pkg.dev/{docker_repository_project}/my-analytics"

# interpolation is enabled
docker_repository = expand("eu.gcr.io/{docker_repository_project}/my-analytics")
docker_repository = expand("europe-west1-docker.pkg.dev/{docker_repository_project}/my-analytics")


config = MyConfig()
print(config.docker_repository_raw) # => eu.gcr.io/{docker_repository_project}/my-analytics
print(config.docker_repository) # => eu.gcr.io/my-shared-docker-project-id/my-analytics
print(config.docker_repository_raw) # => europe-west1-docker.pkg.dev/{docker_repository_project}/my-analytics
print(config.docker_repository) # => europe-west1-docker.pkg.dev/my-shared-docker-project-id/my-analytics
```

### Reading from environment variables
Expand Down
29 changes: 14 additions & 15 deletions docs/deployment.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,20 +108,20 @@ It is used by BigFlow to select the proper configuration from [Config](configura

[Docker Registry](https://docs.docker.com/registry/) is a repository where Docker images are stored.

We recommend using Google Cloud [Container Registry](https://cloud.google.com/container-registry)
We recommend using Google Cloud [Artifact Registry](https://cloud.google.com/artifact-registry)
because it integrates seamlessly with Composer.

### Docker repository name

One Container Registry can hosts many image repositories.
One Artifact Registry can host many image repositories.
We recommend having one image repository per one BigFlow project.

You don't need to create repositories explicitly, they are barely namespaces.
All you need is to put the full repository name into the `docker_repository` property in
[`deployment_config.py`](#managing-configuration-in-deployment_configpy). For example:

```python
'docker_repository': 'eu.gcr.io/my_gcp_dev_project/my-bigflow-project'
'docker_repository': 'europe-west1-docker.pkg.dev/my_gcp_dev_project/my-repo-name/my-bigflow-project'
```

### Docker Registry permissions
Expand All @@ -130,21 +130,21 @@ Ensure that your Composers have permission to pull images from a Registry.

If a Composer's service account is a
[default service account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account)
and if it wants to pull from a Container Registry located in the same GCP project &mdash;
and if it wants to pull from an Artifact Registry located in the same GCP project &mdash;
it has the pull permission by default.

Otherwise, you have to grant read permission to a [bucket](https://cloud.google.com/storage/docs/json_api/v1/buckets),
which underlies your Registry (**Storage Object Viewer** is enough).

Read more about Container Registry [access control](https://cloud.google.com/container-registry/docs/access-control).
Read more about Artifact Registry [access control](https://cloud.google.com/artifact-registry/docs/protect-artifacts).

### How to find a bucket behind your Container Registry
#### Container Registry deprecation

Finding a bucket behind your Container Registry is not straightforward, because it's not linked anywhere.
Moreover, its naming policy is somehow confusing ([read more](https://cloud.google.com/container-registry/docs/access-control#grant-bucket) about this policy).

In the GCP project which hosts your Container Registry, go to [Storage](https://cloud.google.com/storage)
browser. There should be a bucket with the `artifacts` phrase in its name
Since BigFlow 1.9.0, the Artifact Registry is used instead of Container Registry due to the latter one's deprecation.
Please consult Google Cloud docs for more information:
* [Container Registry deprecation](https://cloud.google.com/container-registry/docs/deprecations/container-registry-deprecation)
* [Prepare for Container Registry shutdown](https://cloud.google.com/artifact-registry/docs/transition/prepare-gcr-shutdown)
* [Transition from Container Registry](https://cloud.google.com/artifact-registry/docs/transition/transition-from-gcr)

## Managing configuration in deployment_config.py

Expand Down Expand Up @@ -172,7 +172,7 @@ deployment_config = Config(name='dev',
properties={
'gcp_project_id': 'my_gcp_dev_project',
'docker_repository_project': '{gcp_project_id}',
'docker_repository': 'eu.gcr.io/{docker_repository_project}/my-bigflow-project',
'docker_repository': 'europe-west1-docker.pkg.dev/{docker_repository_project}/my-repository-name/my-bigflow-project',
'vault_endpoint': 'https://example.com/vault',
'dags_bucket': 'europe-west1-my-first-compo-ba6e3418-bucket'
})\
Expand Down Expand Up @@ -236,9 +236,8 @@ on your CI/CD server.
Deployment means uploading various files to Cloud Storage
[buckets](https://cloud.google.com/storage/docs/json_api/v1/buckets):

1. Docker images are uploaded to a
[bucket behind Container Registry](#how-to-find-a-bucket-behind-your-container-registry).
write access to this bucket is required.
1. Docker images are pushed to Artifact Registry which requires a
[predefined writer role](https://cloud.google.com/artifact-registry/docs/access-control#roles)
1. DAG files are uploaded to a bucket behind [Composer's DAGs Folder](#composers-dags-folder),
write and delete access to this bucket is required.

Expand Down
2 changes: 1 addition & 1 deletion docs/ml-prediction.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ There is also a handy method for downloading models to the local file system fro

## Running the prediction process

To run the process, you first need to build the image and deploy it to Google Container Registry, so Dataflow can
To run the process, you first need to build the image and deploy it to Google Artifact Registry, so Dataflow can
use it to run a job.

```shell script
Expand Down
2 changes: 1 addition & 1 deletion docs/project_structure_and_build.md
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ print_resource_job = kubernetes_pod_operator.KubernetesPodOperator(
cmds=['bf'],
arguments=['run', '--job', 'resources_workflow.print_resource_job', '--runtime', '{{ execution_date.strftime("%Y-%m-%d %H:%M:%S") }}', '--project-package', 'examples', '--config', '{{var.value.env}}'],
namespace='default',
image='eu.gcr.io/docker_repository_project/my-project:0.1.0',
image='europe-west1-docker.pkg.dev/docker_repository_project/my-project:0.1.0',
is_delete_operator_pod=True,
retries=3,
retry_delay=timedelta(seconds=60),
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ deployment_config = Config(
name='dev',
properties={
'gcp_project_id': 'my_gcp_project_id',
'docker_repository': 'eu.gcr.io/{gcp_project_id}/docs-project',
'docker_repository': 'europe-west1-docker.pkg.dev/{gcp_project_id}/docs-project',
'dags_bucket': 'my_composer_dags_bucket',
},
)
Expand Down
2 changes: 1 addition & 1 deletion examples_project/deployment_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
name='dev',
properties={
'gcp_project_id': 'my_gcp_project_id',
'docker_repository': 'eu.gcr.io/{gcp_project_id}/docs-project',
'docker_repository': 'europe-west1-docker.pkg.dev/{gcp_project_id}/docs-project',
'dags_bucket': 'my_composer_dags_bucket'
})

Expand Down

0 comments on commit dd136fe

Please sign in to comment.