/
profiles.yml
43 lines (43 loc) · 1.49 KB
/
profiles.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
calitp_warehouse:
target: staging
outputs:
prod:
&prod
execution_project: cal-itp-data-infra
database: cal-itp-data-infra
schema: staging
fixed_retries: 1
location: us-west2
method: oauth
priority: interactive
threads: 8
# currently slowest model is int_gtfs_rt__trip_updates_trip_day_map_grouping
# as of 7/18/23, takes over 14400 seconds
timeout_seconds: 16200
type: bigquery
gcs_bucket: calitp-dbt-python-models
dataproc_region: us-west2
submission_method: serverless
dataproc_batch:
runtime_config:
container_image: gcr.io/cal-itp-data-infra/dbt-spark:2023.3.28
properties:
spark.executor.cores: "4" # this is the default but put it here for clarity
spark.executor.instances: "4" # dbt defaults to 2
spark.executor.memory: 4g
spark.dynamicAllocation.maxExecutors: "16"
prod_service_account:
<<: *prod
method: service-account
keyfile: "{{ env_var('BIGQUERY_KEYFILE_LOCATION', '/secrets/jobs-data/service-account.json') }}"
staging:
&staging
<<: *prod
execution_project: cal-itp-data-infra-staging
database: cal-itp-data-infra-staging
schema: staging
gcs_bucket: test-calitp-dbt-python-models
staging_service_account:
<<: *staging
method: service-account
keyfile: "{{ env_var('BIGQUERY_KEYFILE_LOCATION', '/secrets/jobs-data/service-account.json') }}"