Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
maciejmacq-dev committed Oct 27, 2023
1 parent c6e1799 commit b2dde49
Show file tree
Hide file tree
Showing 30 changed files with 360 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .copier-answers.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Changes here will be overwritten by Copier; NEVER EDIT MANUALLY
{{ _copier_answers|to_nice_yaml -}}
9 changes: 7 additions & 2 deletions copier.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,20 @@ project_description:
help: Short project description
default: Project for transforming data

dupa_description:
type: str
help: Short project description
default: XXXXXXXXX

gcp_dev_project_id:
type: str
help: Project id used in GCP as development environment
default: "123456789"
default: 123456789

gcp_prod_project_id:
type: str
help: Project id used in GCP as production environment
default: "987654321"
default: 123456789

pipeline_owner:
type: str
Expand Down
14 changes: 14 additions & 0 deletions xxx/.copier-answers.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Changes here will be overwritten by Copier; NEVER EDIT MANUALLY
_commit: v1.3.29
_src_path: /Users/maciejpietrzykowski/Desktop/data-pipelines-template-example
dataset: presentation
enable_data_governance: false
gcp_dev_project_id: dsda
gcp_prod_project_id: ddd
pipeline_owner: DataOps Teams
project_description: Project for transforming data
project_name: my_new_project
schedule_interval: 0 12 * * *a
use_bi: false
use_databricks: false
use_ingestion: false
10 changes: 10 additions & 0 deletions xxx/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Default ignored files
target/
dbt_modules/
dbt_packages/
logs/
.idea
.user.yml

# data-pipelines-cli
/build/
15 changes: 15 additions & 0 deletions xxx/.gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
services:
- name: docker:19.03.13-dind

include:
- https://raw.githubusercontent.com/getindata/gitlab_cicd_templates/v0.1.14/dataops/gcp/gcp_setup_template.yml
- https://raw.githubusercontent.com/getindata/gitlab_cicd_templates/v0.1.14/dataops/cicd_template.yml

variables:
DOCKER_REGISTRY: europe-central2-docker.pkg.dev
BLOB_CONFIG_PATH: blob_args.json
GCP_PROJECT: $GCP_PROJECT

stages:
- execute-dev
- execute-release
16 changes: 16 additions & 0 deletions xxx/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM gcr.io/getindata-images-public/dbt-dataops:gcp-0.5.0

ADD analyses /dbt/analyses/
ADD seeds /dbt/seeds/
ADD macros /dbt/macros/
ADD models /dbt/models/
ADD docs /dbt/docs/
ADD tests /dbt/tests/
COPY target/catalog.json /dbt/target/

ADD dbt_project.yml /dbt/dbt_project.yml
ADD packages.yml /dbt/packages.yml

ADD build/profiles/env_execution/profiles.yml /root/.dbt/profiles.yml
#ADD config/base/datahub_assertions.yml /dbt/
RUN GCP_KEY_PATH="" dbt deps
28 changes: 28 additions & 0 deletions xxx/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# my_new_project

Project for transforming data

## Requirements

Use the package manager [pip](https://pip.pypa.io/en/stable/) to install [dp (data-pipelines-cli)](https://pypi.org/project/data-pipelines-cli/):

```bash
pip install data-pipelines-cli[docker,datahub,gcp]
```

## Using the project

```
dp run
```

```
dp test
```

### Resources:

- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
- Understand [Copier](https://copier.readthedocs.io/en/stable/)
- Try [Airlfow](https://airflow.apache.org/)
Empty file added xxx/analyses/.gitkeep
Empty file.
34 changes: 34 additions & 0 deletions xxx/config/base/airflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# File generated by a template

default_args:
owner: DataOps Teams
depends_on_past: False
start_date: 2023-10-27T00:00:00
email_on_failure: False
email_on_retry: False
retries: 0
retry_delay: 5m

dag:
dag_id: my_new_project
description: 'Project for transforming data'
schedule_interval: '0 12 * * *a'
catchup: False
max_active_runs: 2
concurrency: 2

dags_path: "gs://dataops-composer-dags-gid-labs-dlz-core-dev/dags/my_new_project"

manifest_file_name: manifest.json
seed_task: False
use_task_group: True

#failure_handlers:
# - type: slack
# connection_id: slack_failure
# message_template: |
# :red_circle: Task Failed.
# *Task*: {task}
# *Dag*: {dag}
# *Execution Time*: {execution_time}
# *Log Url*: {url}
4 changes: 4 additions & 0 deletions xxx/config/base/bi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
is_bi_enabled: False
bi_target: looker
is_bi_compile: True
is_bi_deploy: True
11 changes: 11 additions & 0 deletions xxx/config/base/bigquery.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# File generated by a template

method: service-account
keyfile: "{{ env_var('GCP_KEY_PATH') }}"
project: "{{ env_var('GCP_PROJECT') }}"
dataset: presentation
timeout_seconds: 300
priority: interactive
location: europe-central2
threads: 1
retries: 1
2 changes: 2 additions & 0 deletions xxx/config/base/dbt.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
target: env_execution
target_type: bigquery
8 changes: 8 additions & 0 deletions xxx/config/base/execution_env.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# File generated by a template

image:
repository: europe-central2-docker.pkg.dev/gid-dataops-labs/composer-dags/my_new_project
tag: <IMAGE_TAG>

type: k8s
execution_script: ./executor_with_test_reports_ingestions.sh
29 changes: 29 additions & 0 deletions xxx/config/base/k8s.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# File generated by a template

image_pull_policy: IfNotPresent
namespace: default

secrets:
- secret: service-account
deploy_type: volume
deploy_target: /var
key: gc-key.json

envs:
GCP_KEY_PATH: "/var/gc-key.json"
GCP_PROJECT: dsda

labels:
runner: airflow

is_delete_operator_pod: True

config_file: '/home/airflow/composer_kube_config'

resources:
limit:
memory: 1024M
cpu: 100m
requests:
memory: 1024M
cpu: 100m
6 changes: 6 additions & 0 deletions xxx/config/base/publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# File generated by a template

repository: git@gitlab.com:getindata/dataops/published-dbt-packages.git
branch: main
username: "DataOps Teams"
email: "DataOps Teams@getindata.com"
5 changes: 5 additions & 0 deletions xxx/config/local/bigquery.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
method: oauth
keyfile: ""
dataset: "{{ var('username') }}_private_working_schema"
project: dsda
timeout_seconds: 200
1 change: 1 addition & 0 deletions xxx/config/local/dbt.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
target: local
1 change: 1 addition & 0 deletions xxx/config/prod/airflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dags_path: "gs://dataops-composer-dags-dataops-prod-342817/dags/my_new_project"
3 changes: 3 additions & 0 deletions xxx/config/prod/k8s.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
envs:
GCP_KEY_PATH: "/var/gc-key.json"
GCP_PROJECT: ddd
109 changes: 109 additions & 0 deletions xxx/copier.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
project_name:
type: str
help: Name of the project (use alphanumeric characters with _)
default: my_new_project

project_description:
type: str
help: Short project description
default: Project for transforming data

gcp_dev_project_id:
type: str
help: Project id used in GCP as development environment

gcp_prod_project_id:
type: str
help: Project id used in GCP as production environment

pipeline_owner:
type: str
help: Owner of the pipeline in airflow
default: DataOps Team

schedule_interval:
type: str
help: Cron expression
default: 0 12 * * *

dataset:
type: str
help: Name of the dataset
default: presentation

enable_data_governance:
type: bool
help: Would you like to use DataHub for colecting metadata?
default: false

use_databricks:
type: bool
help: Would you like to use Databricks integration?
default: false

databricks_cluster_name:
when: "[[ use_databricks ]]"
type: str
help: Name of the databricks cluster used to execute dbt tasks.

databricks_workspace_url:
when: "[[ use_databricks ]]"
type: str
help: Workspace url where jobs will be deployed.

use_ingestion:
type: bool
help: Would you like to use ingestion framework?
default: false

destination_id_dev:
when: "[[ use_ingestion ]]"
type: str
help: Destination Id for dev instance

source_id_dev:
when: "[[ use_ingestion ]]"
type: str
help: Source Id for dev instance

destination_id_prod:
when: "[[ use_ingestion ]]"
type: str
help: Destination Id for prod instance

source_id_prod:
when: "[[ use_ingestion ]]"
type: str
help: Source Id for prod instance

use_bi:
type: bool
help: Would you like to use Business Intelligence (e.g. Looker)?
default: false

_exclude:
- .git
- .github

_skip_if_exists:
- models
- tests

#_tasks:
# - "git add -A"
# - "git commit -m 'Initial project or upgrade'"
# - "git push"

_min_copier_version: "7.0.0"

_templates_suffix: .tmpl

_envops:
autoescape: false
block_end_string: "%]"
block_start_string: "[%"
comment_end_string: "#]"
comment_start_string: "[#"
keep_trailing_newline: true
variable_end_string: "]]"
variable_start_string: "[["
6 changes: 6 additions & 0 deletions xxx/dag/dag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from os import path
from airflow.models import Variable
from dbt_airflow_factory.airflow_dag_factory import AirflowDagFactory


dag = AirflowDagFactory(path.dirname(path.abspath(__file__)), Variable.get("env")).create()
37 changes: 37 additions & 0 deletions xxx/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'my_new_project'
version: '1.0.0'
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: 'bigquery'

# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: [ "models" ]
docs-paths: ["docs"]
analysis-paths: [ "analyses" ]
test-paths: [ "tests" ]
seed-paths: [ "seeds" ]
macro-paths: [ "macros" ]
snapshot-paths: [ "snapshots" ]

target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"

models:
my_new_project:
staging:
+materialized: view
+schema : staging
intermediate:
+materialized: ephemeral
+schema: intermediate
presentation:
+materialized: table
+schema: presentation
Empty file added xxx/docs/.gitkeep
Empty file.
Empty file added xxx/macros/.gitkeep
Empty file.
3 changes: 3 additions & 0 deletions xxx/macros/generate_schema_name.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{% macro generate_schema_name(custom_schema_name, node) -%}
{{ dbt_common_macros.custom_generate_schema_name(custom_schema_name, node) }}
{%- endmacro %}
Empty file added xxx/models/.gitkeep
Empty file.

0 comments on commit b2dde49

Please sign in to comment.