-
Notifications
You must be signed in to change notification settings - Fork 79
/
application.yaml
162 lines (145 loc) · 8.42 KB
/
application.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#
# Copyright 2020-2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
fhirdata:
# The base URL of the source FHIR server. If `dbConfig` is not set, resources
# are fetched from this URL through the FHIR Search API.
# Equivalent to the pipeline `fhirServerUrl` parameter.
fhirServerUrl: "http://172.17.0.1:8091/fhir"
#fhirServerUrl: "http://localhost:8099/openmrs/ws/fhir2/R4"
#fhirServerUrl: "http://localhost:9002/fhir"
# The following user-name/password should be set if the FHIR server supports
# Basic Auth.
#fhirServerUserName: "admin"
#fhirServerPassword: "Admin123"
# The following client credentials should be set if the FHIR server accepts
# OAuth access tokens. Note the client credentials, e.g., the secret, are
# sensitive, and it is probably a better practice to set these through
# command-line arguments.
#fhirServerOAuthTokenEndpoint: "http://localhost:9080/auth/realms/test/protocol/openid-connect/token"
#fhirServerOAuthClientId: "THE_CLIENT_ID"
#fhirServerOAuthClientSecret: "THE_CLIENT_SECRET"
# The path to the file containing JDBC settings for connecting to a HAPI FHIR
# server database. If this is set, resources are fetched directly from the
# database and `fhirServerUrl` is ignored.
# Equivalent to pipeline `fhirDatabaseConfigPath` parameter.
# dbConfig: "config/hapi-postgres-config.json"
# The path to output Parquet files to. The last portion of the
# path is used as a prefix for naming the directory that contains
# per-resource directories and a timestamp will be added;
# for example, "config/controller_DWH_ORIG" will create something like
# ./config/controller_DWH_ORIG_TIMESTAMP_2023-01-27T23-55-39.295824Z
# Similar to the pipeline `outputParquetPath` parameter.
#
# For GCS buckets, dwhRootPrefix must be of the format
# "gs://<bucket>/<baseDirPath>/<prefix>". <baseDirPath> is optional
# for GCS buckets and may contain 0 or more directory names.
#
# For *nix file systems, dwhRootPrefix must be of the format
# "/<baseDirPath>/<prefix>" and can be absolute or relative. <baseDirPath>
# is required for *nix, and must contain 1 or more directory names.
# Sample 1 : "/fhir-test-analytics/dwh/controller_DWH_ORIG"
# Sample 2 : "dwh/controller_DWH_ORIG"
#
# For Windows file systems, dwhRootPrefix must be of the format '<baseDirPath>\<prefix>' and can
# be absolute or relative. In case of absolute the <baseDirPath> is non-mandatory and in case of
# relative it should contain mandatory <baseDirPath> with 1 or more directory names. Use
# single-quotes or no-quotes for the value so that backslash character is treated as a regular
# character and not as an escaped character
# Sample 1 : 'C:\controller_DWH_ORIG'
# Sample 1 : 'C:\fhir-test-analytics\dwh\controller_DWH_ORIG'
# Sample 2 : dwh\controller_DWH_ORIG
#
# Note for developers: You can make a symlink of `[repo_root]/docker/dwh` here
# such that the Thrift Server of `compose-controller-spark-sql-single.yaml`
# config can easily be used in dev env. too. You may need to set the ACL of
# that directory too, such that files created by the pipelines are readable by
# the Thrift Server, e.g., `setfacl -d -m o::rx dwh/`.
dwhRootPrefix: "dwh/controller_DEV_DWH"
# The schedule for automatic incremental pipeline runs.
# Uses the Spring CronExpression format, i.e.,
# "second minute hour day-of-the-month month day-of-the-week", so:
# "0 0 * * * *" means top of every hour;
# "*/40 * * * * *" means every 40 seconds;
# Scheduling very frequent runs is resource intensive.
incrementalSchedule: "0 0 * * * *"
# The schedule for automatic DWH snapshot purging. There is no benefit
# to scheduling the purge job more frequently than incremental runs.
# Uses the Spring CronExpression format.
purgeSchedule: "0 30 * * * *"
# The number of DWH snapshots to retain when the purge job runs.
# This must be > 0 or the purge job will not run. If a pipeline run fails
# for any reason, any partial output must be manually removed.
numOfDwhSnapshotsToRetain: 2
# The comma-separated list of FHIR resources to fetch/monitor.
# Equivalent to pipeline `resourceList` parameter.
# Note there is no Questionnaire in our test FHIR server, but it is okay; see
# https://github.com/google/fhir-data-pipes/issues/785.
resourceList: "Patient,Encounter,Observation,Questionnaire,Condition,Practitioner,Location,Organization"
# The parallelism to be used for a pipeline job. In case of FlinkRunner, if the value is set to
# -1, then in the local execution mode the number of threads the job uses will be equal to the
# number of cores in the machine, whereas in the remote mode (cluster) only 1 thread is used.
# If set to a positive value, then in both the modes the pipeline uses these many threads combined
# across all the workers.
numThreads: -1
# In case of Flink local execution mode (which is the default currently), generate Flink
# configuration file `flink-conf.yaml` automatically based on the parallelism set via the
# parameter `numThreads` and the `cores` available in the machine. The generated file will have
# the parameters set with optimised values necessary to run the pipelines without fail. Disable
# this parameter to manually pass the configuration file by pointing the environment variable
# FLINK_CONF_DIR to the directory where the flink-conf.yaml is placed.
#
# Note for Flink non-local execution mode, this parameter has to be disabled and the configuration
# file has to be passed manually which has more fine-grained control parameters
autoGenerateFlinkConfiguration: true
# Whether resource tables should be automatically created on a
# Hive/Spark server. Primarily meant for single-machine deployment.
createHiveResourceTables: false
# Path to a file with the settings used to create tables.
# Required if createHiveResourceTables is `true`.
thriftserverHiveConfig: "config/thriftserver-hive-config.json"
# Path to a directory containing view definitions for each resource type.
# If not set or set to empty string, automatic view creation is disabled.
# Otherwise, for each resource type, its view definition SQL queries are read
# and applied from corresponding files, i.e., any file that starts with the
# resource name and ends in `.sql`, e.g., `DiagnosticReport_flat.sql`.
# Only applies when createHiveResourceTables is `true`.
#
# Note for developers: If you symlink `[repo_root]/docker/config/views` here
# you can use those predefined views in your dev. env. too.
hiveResourceViewsDir: "config/views"
# This is the size of the Parquet Row Group (a logical horizontal partitioning into rows) that
# will be used for creating row groups in parquet file by pipelines. A large value means more data
# for one column can be fit into one big column chunk which will speed up the reading of column
# data. On the downside, more in-memory will be needed to hold the data before writing to files.
rowGroupSizeForParquetFiles: 33554432 # 32mb
# The location from which ViewDefinition resources are read and applied to the
# corresponding input FHIR resources. Any file in this directory that ends
# `.json` is assumed to be a single ViewDefinition. To output these views to a
# relational database, the next sinkDbConfigPath should also be set.
viewDefinitionsDir: "config/views"
# The configuration file for the sink database. If `viewDefinitionsDir` is set
# then the generated views are materialized and written to this DB. If not,
# then the raw FHIR JSON resources are written to this DB. Note enabling this
# feature can have a noticeable impact on pipelines performance. The default
# empty string disables this feature.
sinkDbConfigPath: "config/hapi-postgres-config_local_views.json"
# Enable spring boot actuator end points, use "*" to expose all endpoints, or a comma-separated
# list to expose selected ones
management:
endpoints:
web:
exposure:
include: health,info,metrics,prometheus,pipeline-metrics