Skip to content

Commit

Permalink
enhancement(kubernetes_logs source): Expose oldest_first (vectordot…
Browse files Browse the repository at this point in the history
…dev#18376)

* enhancement(kubernetes_logs source): Expose `oldest_first`

In response to vectordotdev#18088 (comment)

Might close: vectordotdev#18088

Signed-off-by: Jesse Szwedko <jesse.szwedko@datadoghq.com>

* Improve descirptions

Signed-off-by: Jesse Szwedko <jesse.szwedko@datadoghq.com>

---------

Signed-off-by: Jesse Szwedko <jesse.szwedko@datadoghq.com>
  • Loading branch information
jszwedko committed Aug 26, 2023
1 parent 016890e commit dc66566
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 10 deletions.
6 changes: 5 additions & 1 deletion src/sources/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,11 @@ pub struct FileConfig {
#[serde(default)]
pub multiline: Option<MultilineConfig>,

/// An approximate limit on the amount of data read from a single file at a given time.
/// Max amount of bytes to read from a single file before switching over to the next file.
/// **Note:** This does not apply when `oldest_first` is `true.
///
/// This allows distributing the reads more or less evenly across
/// the files.
#[serde(default = "default_max_read_bytes")]
#[configurable(metadata(docs::type_unit = "bytes"))]
pub max_read_bytes: usize,
Expand Down
22 changes: 17 additions & 5 deletions src/sources/kubernetes_logs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,18 @@ pub struct Config {
#[configurable(metadata(docs::human_name = "Ignore Files Older Than"))]
ignore_older_secs: Option<u64>,

/// Max amount of bytes to read from a single file before switching over
/// to the next file.
/// Max amount of bytes to read from a single file before switching over to the next file.
/// **Note:** This does not apply when `oldest_first` is `true.
///
/// This allows distributing the reads more or less evenly across
/// the files.
#[configurable(metadata(docs::type_unit = "bytes"))]
max_read_bytes: usize,

/// Instead of balancing read capacity fairly across all watched files, prioritize draining the oldest files before moving on to read data from younger files.
#[serde(default = "default_oldest_first")]
pub oldest_first: bool,

/// The maximum number of bytes a line can contain before being discarded.
///
/// This protects against malformed lines or tailing incorrect files.
Expand Down Expand Up @@ -264,6 +268,7 @@ impl Default for Config {
read_from: default_read_from(),
ignore_older_secs: None,
max_read_bytes: default_max_read_bytes(),
oldest_first: default_oldest_first(),
max_line_bytes: default_max_line_bytes(),
fingerprint_lines: default_fingerprint_lines(),
glob_minimum_cooldown_ms: default_glob_minimum_cooldown_ms(),
Expand Down Expand Up @@ -516,6 +521,7 @@ struct Source {
read_from: ReadFrom,
ignore_older_secs: Option<u64>,
max_read_bytes: usize,
oldest_first: bool,
max_line_bytes: usize,
fingerprint_lines: usize,
glob_minimum_cooldown: Duration,
Expand Down Expand Up @@ -593,6 +599,7 @@ impl Source {
read_from: ReadFrom::from(config.read_from),
ignore_older_secs: config.ignore_older_secs,
max_read_bytes: config.max_read_bytes,
oldest_first: config.oldest_first,
max_line_bytes: config.max_line_bytes,
fingerprint_lines: config.fingerprint_lines,
glob_minimum_cooldown,
Expand Down Expand Up @@ -624,6 +631,7 @@ impl Source {
read_from,
ignore_older_secs,
max_read_bytes,
oldest_first,
max_line_bytes,
fingerprint_lines,
glob_minimum_cooldown,
Expand Down Expand Up @@ -763,9 +771,7 @@ impl Source {
max_line_length: max_line_bytes,
ignore_not_found: true,
},
// We'd like to consume rotated pod log files first to release our file handle and let
// the space be reclaimed
oldest_first: true,
oldest_first,
// We do not remove the log files, `kubelet` is responsible for it.
remove_after: None,
// The standard emitter.
Expand Down Expand Up @@ -944,6 +950,12 @@ const fn default_max_read_bytes() -> usize {
2048
}

// We'd like to consume rotated pod log files first to release our file handle and let
// the space be reclaimed
const fn default_oldest_first() -> bool {
true
}

const fn default_max_line_bytes() -> usize {
// NOTE: The below comment documents an incorrect assumption, see
// https://github.com/vectordotdev/vector/issues/6967
Expand Down
10 changes: 8 additions & 2 deletions website/cue/reference/components/sources/base/file.cue
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,14 @@ base: components: sources: file: configuration: {
}
}
max_read_bytes: {
description: "An approximate limit on the amount of data read from a single file at a given time."
required: false
description: """
Max amount of bytes to read from a single file before switching over to the next file.
**Note:** This does not apply when `oldest_first` is `true.
This allows distributing the reads more or less evenly across
the files.
"""
required: false
type: uint: {
default: 2048
unit: "bytes"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ base: components: sources: kubernetes_logs: configuration: {
}
max_read_bytes: {
description: """
Max amount of bytes to read from a single file before switching over
to the next file.
Max amount of bytes to read from a single file before switching over to the next file.
**Note:** This does not apply when `oldest_first` is `true.
This allows distributing the reads more or less evenly across
the files.
Expand Down Expand Up @@ -209,6 +209,11 @@ base: components: sources: kubernetes_logs: configuration: {
}
}
}
oldest_first: {
description: "Instead of balancing read capacity fairly across all watched files, prioritize draining the oldest files before moving on to read data from younger files."
required: false
type: bool: default: true
}
pod_annotation_fields: {
description: "Configuration for how the events are enriched with Pod metadata."
required: false
Expand Down

0 comments on commit dc66566

Please sign in to comment.