Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor sdlf-ddk-lightweight #15

Merged
merged 4 commits into from
Nov 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions sdlf-ddk-lightweight/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ cdk.out
ddk.out/*
.ddk.out/*
.out

# type stubs
.typings
5 changes: 3 additions & 2 deletions sdlf-ddk-lightweight/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ CICD=default
CHILD=default
REGION=$(shell aws configure get region --profile ${CICD})
ENV=dev
CHILD_STACKS=$(shell cdk list --notices=false | grep "SDLF-DDK-${ENV}/" | tr '\n' ' ')


.PHONY: empty_buckets delete_sdlf delete_bootstrap delete_repositories delete_all_items
Expand All @@ -19,14 +20,14 @@ help:


delete_repositories:
./scripts/cleanup_scripts/delete_repositories.sh -s ${CICD} -t ${CHILD} -r ${REGION} -d sdlf-ddk-example
./scripts/cleanup_scripts/delete_repositories.sh -s ${CICD} -t ${CHILD} -r ${REGION} -d sdlf-ddk-streaming

empty_buckets:
pushd scripts/cleanup_scripts; python3 ./list_items_to_delete.py ${ENV} ${CHILD}; popd;
pushd scripts/cleanup_scripts; python3 ./empty_buckets.py ${CHILD}; popd;

delete_sdlf:
cdk destroy SDLF-DDK-${ENV}/sdlf-data-lake-pipeline --force --profile ${CHILD};
cdk destroy ${CHILD_STACKS} --force --profile ${CHILD}

cdk destroy sdlf-ddk-pipeline --force --profile ${CICD}

Expand Down
141 changes: 68 additions & 73 deletions sdlf-ddk-lightweight/README.md

Large diffs are not rendered by default.

98 changes: 62 additions & 36 deletions sdlf-ddk-lightweight/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,72 +13,98 @@
# limitations under the License.


from typing import Any, Dict
import wrapt
from typing import Any, Tuple, Dict

import aws_cdk as cdk
import wrapt
from aws_cdk.pipelines import CodePipeline
from aws_ddk_core.cicd import CICDPipelineStack
from data_lake.pipelines import SDLFPipelineStack
from aws_ddk_core.config import Config
from aws_cdk.pipelines import CodePipeline
from constructs import Construct

from data_lake.pipelines import SDLFBaseStack


@wrapt.patch_function_wrapper(CICDPipelineStack, 'build')
def build(wrapped, self, args, kwargs) -> "CICDPipelineStack":
@wrapt.patch_function_wrapper(CICDPipelineStack, 'build') # type: ignore
kukushking marked this conversation as resolved.
Show resolved Hide resolved
def build(
wrapped: Any, # pylint: disable=unused-argument
self: CICDPipelineStack,
args: Tuple[Any, ...], # pylint: disable=unused-argument
kwargs: Dict[str, Any]
) -> "CICDPipelineStack":
"""
This is a temporary workaround to expose the publish_assets_in_parallel parameter
from CDK into DDK so we can reduce the number of file asset jobs created.

Returns
-------
pipeline : CICDPipelineStack
CICDPipelineStack
"""

publish_assets_in_parallel = kwargs['publish_assets_in_parallel'] if 'publish_assets_in_parallel' in kwargs else True
publish_assets_in_parallel: bool = \
kwargs['publish_assets_in_parallel'] \
if 'publish_assets_in_parallel' in kwargs \
else True

# Create self-mutating CDK Pipeline
self._pipeline = CodePipeline(
self._pipeline = CodePipeline( # type: ignore pylint: disable=protected-access
self,
id="code-pipeline",
cross_account_keys=True,
synth=self._synth_action,
cli_version=self._config.get_cdk_version(),
synth=self._synth_action, # type: ignore pylint: disable=protected-access
cli_version=self._config.get_cdk_version(), # type: ignore pylint: disable=protected-access
publish_assets_in_parallel=publish_assets_in_parallel
)
return self



class DataLakeFramework(cdk.Stage):
def __init__(
self,
scope,
pipeline_params: Dict,
scope: Construct,
pipeline_params: Dict[str, Any],
environment_id: str,
**kwargs: Any,
) -> None:
super().__init__(scope, f"SDLF-DDK-{environment_id}", **kwargs)

self._environment_id = environment_id
self._resource_prefix = pipeline_params.get("resource_prefix", "ddk")
self._sdlf_params = pipeline_params.get("data_pipeline_parameters", {})

SDLFPipelineStack(self, f"{self._resource_prefix}-data-lake-pipeline", environment_id=environment_id, resource_prefix=self._resource_prefix, params=self._sdlf_params)

satellite_app = cdk.App()
config = Config()

cicd_repository_name = config.get_env_config("cicd").get("repository", "sdlf-ddk-example")

pipeline_name = "sdlf-ddk-pipeline"
pipeline = CICDPipelineStack(satellite_app, id=pipeline_name, environment_id="cicd", pipeline_name=pipeline_name)

pipeline.add_source_action(repository_name=cicd_repository_name)
pipeline.add_synth_action()
pipeline.build(publish_assets_in_parallel=False)
pipeline.add_checks()
pipeline.add_stage("dev", DataLakeFramework(satellite_app, environment_id="dev", pipeline_params=config.get_env_config("dev"), env=config.get_env("dev")))
pipeline.synth()
pipeline.add_notifications()
super().__init__(
scope,
f"SDLF-DDK-{environment_id}",
**kwargs
)

self._sdlf_params = pipeline_params.get("data_pipeline_parameters", {})

satellite_app.synth()
SDLFBaseStack(
self,
f"{self._resource_prefix}-data-lake-pipeline",
environment_id=environment_id,
resource_prefix=self._resource_prefix,
params=self._sdlf_params
)


if __name__ == "__main__":
satellite_app = cdk.App()
config = Config()
PIPELINE_NAME = "sdlf-ddk-pipeline"
cicd_repository_name = config.get_env_config("cicd").get("repository", "sdlf-ddk-example")

pipeline = CICDPipelineStack(satellite_app, id=PIPELINE_NAME, environment_id="cicd", pipeline_name=PIPELINE_NAME)
pipeline.add_source_action(repository_name=cicd_repository_name)
pipeline.add_synth_action()
pipeline.build(publish_assets_in_parallel=False) # type:ignore
pipeline.add_checks()
pipeline.add_stage(
"dev",
DataLakeFramework(
satellite_app,
environment_id="dev",
pipeline_params=config.get_env_config("dev"),
env=config.get_env("dev")
)
)
pipeline.synth()

satellite_app.synth()
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@


from .foundations_stack import FoundationsStack
from .sdlf_dataset_stack import SDLFDatasetStack, SDLFDatasetConfig

__all__ = [
"FoundationsStage",
"SDLFDatasetStage",
"SDLFDatasetConfig"
"FoundationsStack"
]
Loading