chore(docs): Change the default configuration tab and add comments to… (

vectordotdev#18420) chore(docs): Change the 'Configuring Vector' page to mainly use YAML
aholmberg · Sep 6, 2023 · ed1dedf · ed1dedf
1 parent 7ec5b97
commit ed1dedf
Showing 1 changed file with 156 additions and 128 deletions.
diff --git a/website/content/en/docs/reference/configuration/_index.md b/website/content/en/docs/reference/configuration/_index.md
@@ -14,7 +14,71 @@ The following is an example of a popular Vector configuration that ingests logs
 from a file and routes them to both Elasticsearch and AWS S3. Your configuration
 will differ based on your needs.
 
-{{< tabs default="vector.toml" >}}
+{{< tabs default="vector.yaml" >}}
+{{< tab title="vector.yaml" >}}
+
+```yaml
+# Set global options
+data_dir: "/var/lib/vector"
+
+# Vector's API (disabled by default)
+# Enable and try it out with the `vector top` command
+api:
+  enabled: false
+# address = "127.0.0.1:8686"
+
+# Ingest data by tailing one or more files
+sources:
+  apache_logs:
+    type: "file"
+    include:
+      - "/var/log/apache2/*.log" # supports globbing
+    ignore_older: 86400          # 1 day
+
+# Structure and parse via Vector's Remap Language
+transforms:
+  apache_parser:
+    inputs:
+      - "apache_logs"
+    type: "remap"
+    source: ". = parse_apache_log(.message)"
+
+  # Sample the data to save on cost
+  apache_sampler:
+    inputs:
+      - "apache_parser"
+    type: "sample"
+    rate: 2 # only keep 50% (1/`rate`)
+
+# Send structured data to a short-term storage
+sinks:
+  es_cluster:
+    inputs:
+      - "apache_sampler"       # only take sampled data
+    type: "elasticsearch"
+    endpoints:
+      - "http://79.12.221.222:9200"
+    bulk:
+      index: "vector-%Y-%m-%d" # daily indices
+
+  # Send structured data to a cost-effective long-term storage
+  s3_archives:
+    inputs:
+      - "apache_parser" # don't sample for S3
+    type: "aws_s3"
+    region: "us-east-1"
+    bucket: "my-log-archives"
+    key_prefix: "date=%Y-%m-%d"   # daily partitions, hive friendly format
+    compression: "gzip"           # compress final objects
+    framing:
+      method: "newline_delimited" # new line delimited...
+    encoding:
+      codec: "json"               # ...JSON
+    batch:
+      max_bytes: 10000000         # 10mb uncompressed
+```
+
+{{< /tab >}}
 {{< tab title="vector.toml" >}}
 
 ```toml
@@ -67,53 +131,6 @@ encoding.codec  = "json"               # ...JSON
 batch.max_bytes = 10000000             # 10mb uncompressed
 ```
 
-{{< /tab >}}
-{{< tab title="vector.yaml" >}}
-
-```yaml
-data_dir: /var/lib/vector
-sources:
-  apache_logs:
-    type: file
-    include:
-      - /var/log/apache2/*.log
-    ignore_older: 86400
-transforms:
-  apache_parser:
-    inputs:
-      - apache_logs
-    type: remap
-    source: |
-      . = parse_apache_log(.message)
-  apache_sampler:
-    inputs:
-      - apache_parser
-    type: sample
-    rate: 50
-sinks:
-  es_cluster:
-    inputs:
-      - apache_sampler
-    type: elasticsearch
-    endpoints: ['http://79.12.221.222:9200']
-    bulk:
-      index: vector-%Y-%m-%d
-  s3_archives:
-    inputs:
-      - apache_parser
-    type: aws_s3
-    region: us-east-1
-    bucket: my-log-archives
-    key_prefix: date=%Y-%m-%d
-    compression: gzip
-    framing:
-      method: newline_delimited
-    encoding:
-      codec: json
-    batch:
-      max_bytes: 10000000
-```
-
 {{< /tab >}}
 {{< tab title="vector.json" >}}
 
@@ -185,18 +202,18 @@ sinks:
 To use this configuration file, specify it with the `--config` flag when
 starting Vector:
 
-{{< tabs default="TOML" >}}
-{{< tab title="TOML" >}}
+{{< tabs default="YAML" >}}
+{{< tab title="YAML" >}}
 
 ```shell
-vector --config /etc/vector/vector.toml
+vector --config /etc/vector/vector.yaml
 ```
 
 {{< /tab >}}
-{{< tab title="YAML" >}}
+{{< tab title="TOML" >}}
 
 ```shell
-vector --config /etc/vector/vector.yaml
+vector --config /etc/vector/vector.toml
 ```
 
 {{< /tab >}}
@@ -229,37 +246,37 @@ vector --config /etc/vector/vector.json
 Vector interpolates environment variables within your configuration file with
 the following syntax:
 
-```toml
-[transforms.add_host]
-type = "remap"
-source = '''
-# Basic usage. "$HOSTNAME" also works.
-.host = "${HOSTNAME}" # or "$HOSTNAME"
+```yaml
+transforms:
+  add_host:
+    type: "remap"
+    source: |
+      # Basic usage. "$HOSTNAME" also works.
+      .host = "${HOSTNAME}" # or "$HOSTNAME"
 
-# Setting a default value when not present.
-.environment = "${ENV:-development}"
+      # Setting a default value when not present.
+      .environment = "${ENV:-development}"
 
-# Requiring an environment variable to be present.
-.tenant = "${TENANT:?tenant must be supplied}"
-'''
+      # Requiring an environment variable to be present.
+      .tenant = "${TENANT:?tenant must be supplied}"
 ```
 
 #### Default values
 
 Default values can be supplied using `:-` or `-` syntax:
 
-```toml
-option = "${ENV_VAR:-default}" # default value if variable is unset or empty
-option = "${ENV_VAR-default}" # default value only if variable is unset
+```yaml
+option: "${ENV_VAR:-default}" # default value if variable is unset or empty
+option: "${ENV_VAR-default}" # default value only if variable is unset
 ```
 
 #### Required variables
 
 Environment variables that are required can be specified using `:?` or `?` syntax:
 
-```toml
-option = "${ENV_VAR:?err}" # Vector exits with 'err' message if variable is unset or empty
-option = "${ENV_VAR?err}" # Vector exits with 'err' message only if variable is unset
+```yaml
+option: "${ENV_VAR:?err}" # Vector exits with 'err' message if variable is unset or empty
+option: "${ENV_VAR?err}" # Vector exits with 'err' message only if variable is unset
 ```
 
 #### Escaping
@@ -285,89 +302,98 @@ method. For most Linux-based systems, the file can be found at
 You can pass multiple configuration files when starting Vector:
 
 ```shell
-vector --config vector1.toml --config vector2.toml
+vector --config vector1.yaml --config vector2.yaml
 ```
 
 Or using a [globbing syntax][glob]:
 
 ```shell
-vector --config /etc/vector/*.toml
+vector --config /etc/vector/*.yaml
 ```
 
 #### Automatic namespacing
 
 You can also split your configuration by grouping the components by their type, one directory per component type, where the file name is used as the component id. For example:
 
-{{< tabs default="vector.toml" >}}
-{{< tab title="vector.toml" >}}
+{{< tabs default="vector.yaml" >}}
+{{< tab title="vector.yaml" >}}
 
-```toml
+```yaml
 # Set global options
-data_dir = "/var/lib/vector"
+data_dir: "/var/lib/vector"
 
 # Vector's API (disabled by default)
 # Enable and try it out with the `vector top` command
-[api]
-enabled = false
-# address = "127.0.0.1:8686"
+api:
+  enabled: false
+  # address: "127.0.0.1:8686"
 ```
 
 {{< /tab >}}
-{{< tab title="sources/apache_logs.toml" >}}
+{{< tab title="sources/apache_logs.yaml" >}}
 
-```toml
+```yaml
 # Ingest data by tailing one or more files
-type         = "file"
-include      = ["/var/log/apache2/*.log"]    # supports globbing
-ignore_older = 86400                         # 1 day
+type: "file"
+include: ["/var/log/apache2/*.log"]    # supports globbing
+ignore_older: 86400                    # 1 day
 ```
 
 {{< /tab >}}
-{{< tab title="transforms/apache_parser.toml" >}}
+{{< tab title="transforms/apache_parser.yaml" >}}
 
-```toml
+```yaml
 # Structure and parse via Vector Remap Language
-inputs = ["apache_logs"]
-type   = "remap"
-source = '''
-. = parse_apache_log(.message)
+inputs:
+  - "apache_logs"
+type: "remap"
+source: |
+  . = parse_apache_log(.message)
 ```
 
 {{< /tab >}}
-{{< tab title="transforms/apache_sampler.toml" >}}
+{{< tab title="transforms/apache_sampler.yaml" >}}
 
-```toml
+```yaml
 # Sample the data to save on cost
-inputs = ["apache_parser"]
-type   = "sample"
-rate   = 2                    # only keep 50% (1/`rate`)
+inputs:
+  - "apache_parser"
+type: "sample"
+rate: 2 # only keep 50% (1/`rate`)
 ```
 
 {{< /tab >}}
-{{< tab title="sinks/es_cluster.toml" >}}
+{{< tab title="sinks/es_cluster.yaml" >}}
 
-```toml
+```yaml
 # Send structured data to a short-term storage
-inputs     = ["apache_sampler"]             # only take sampled data
-type       = "elasticsearch"
-endpoints  = ["http://79.12.221.222:9200"]  # local or external host
-bulk.index = "vector-%Y-%m-%d"              # daily indices
+inputs:
+  - "apache_sampler"             # only take sampled data
+type: "elasticsearch"
+endpoints:
+  - "http://79.12.221.222:9200"  # local or external host
+bulk:
+  index: "vector-%Y-%m-%d"      # daily indices
 ```
 
 {{< /tab >}}
-{{< tab title="sinks/s3_archives.toml" >}}
+{{< tab title="sinks/s3_archives.yaml" >}}
 
-```toml
+```yaml
 # Send structured data to a cost-effective long-term storage
-inputs          = ["apache_parser"]    # don't sample for S3
-type            = "aws_s3"
-region          = "us-east-1"
-bucket          = "my-log-archives"
-key_prefix      = "date=%Y-%m-%d"      # daily partitions, hive friendly format
-compression     = "gzip"               # compress final objects
-framing.method  = "newline_delimited"  # new line delimited...
-encoding.codec  = "json"               # ...JSON
-batch.max_bytes = 10000000             # 10mb uncompressed
+inputs:
+  - "apache_parser"           # don't sample for S3
+type: "aws_s3"
+region: "us-east-1"
+bucket: "my-log-archives"
+key_prefix: "date=%Y-%m-%d"   # daily partitions, hive-friendly format
+compression: "gzip"           # compress final objects
+framing:
+  method: "newline_delimited" # new line delimited...
+encoding:
+  codec: "json"               # ...JSON
+batch:
+  max_bytes: 10000000         # 10mb uncompressed
 ```
 
 {{< /tab >}}
@@ -384,26 +410,28 @@ vector --config-dir /etc/vector
 Vector supports wildcards (`*`) in component IDs when building your topology.
 For example:
 
-```toml
-[sources.app1_logs]
-type = "file"
-includes = ["/var/log/app1.log"]
+```yaml
+sources:
+  app1_logs:
+    type: "file"
+    includes: ["/var/log/app1.log"]
 
-[sources.app2_logs]
-type = "file"
-includes = ["/var/log/app.log"]
+  app2_logs:
+    type: "file"
+    includes: ["/var/log/app.log"]
 
-[sources.system_logs]
-type = "file"
-includes = ["/var/log/system.log"]
+  system_logs:
+    type: "file"
+    includes: ["/var/log/system.log"]
 
-[sinks.app_logs]
-type = "datadog_logs"
-inputs = ["app*"]
+sinks:
+  app_logs:
+    type: "datadog_logs"
+    inputs: ["app*"]
 
-[sinks.archive]
-type = "aws_s3"
-inputs = ["app*", "system_logs"]
+  archive:
+    type: "aws_s3"
+    inputs: ["app*", "system_logs"]
 ```
 
 ## Sections