Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/metal-yaks-rhyme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@hyperdx/api": patch
---

feat: support custom otel collector config (BETA)
5 changes: 5 additions & 0 deletions .changeset/nervous-turkeys-travel.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@hyperdx/api": patch
---

fix: reduce bloat in opamp agent logs
5 changes: 4 additions & 1 deletion docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,17 @@ services:
HYPERDX_API_KEY: ${HYPERDX_API_KEY}
HYPERDX_LOG_LEVEL: ${HYPERDX_LOG_LEVEL}
OPAMP_SERVER_URL: 'http://host.docker.internal:${HYPERDX_OPAMP_PORT}'
CUSTOM_OTELCOL_CONFIG_FILE: '/etc/otelcol-contrib/custom.config.yaml'
# Uncomment to enable stdout logging for the OTel collector
# OTEL_SUPERVISOR_PASSTHROUGH_LOGS: 'true'
OTEL_SUPERVISOR_PASSTHROUGH_LOGS: 'false'
# Uncomment to enable JSON schema in ClickHouse
# Be sure to also set BETA_CH_OTEL_JSON_SCHEMA_ENABLED to 'true' in ch-server
# OTEL_AGENT_FEATURE_GATE_ARG: '--feature-gates=clickhouse.json'
volumes:
- ./docker/otel-collector/config.yaml:/etc/otelcol-contrib/config.yaml
- ./docker/otel-collector/supervisor_docker.yaml:/etc/otel/supervisor.yaml
# Add a custom config file
- ./docker/otel-collector/custom.config.yaml:/etc/otelcol-contrib/custom.config.yaml
ports:
- '13133:13133' # health_check extension
- '24225:24225' # fluentd receiver
Expand Down
28 changes: 17 additions & 11 deletions docker/otel-collector/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,44 @@ FROM otel/opentelemetry-collector-contrib:0.129.1 AS col
FROM otel/opentelemetry-collector-opampsupervisor:0.128.0 AS supervisor

# From: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/aa5c3aa4c7ec174361fcaf908de8eaca72263078/cmd/opampsupervisor/Dockerfile#L18
FROM alpine:latest@sha256:a8560b36e8b8210634f77d9f7f9efd7ffa463e380b75e2e74aff4511df3ef88c AS prep
RUN apk --update add ca-certificates
RUN mkdir -p /etc/otel/supervisor-data/

FROM scratch AS base
FROM alpine:latest@sha256:a8560b36e8b8210634f77d9f7f9efd7ffa463e380b75e2e74aff4511df3ef88c AS base

ARG USER_UID=10001
ARG USER_GID=10001

# Install certs, create user/group, and make the writable data dir
RUN apk add --no-cache ca-certificates && \
addgroup -S -g ${USER_GID} otel && \
adduser -S -u ${USER_UID} -G otel otel && \
install -d -m 0777 -o ${USER_UID} -g ${USER_GID} /etc/otel/supervisor-data

USER ${USER_UID}:${USER_GID}

COPY --from=prep /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
COPY --from=prep --chmod=777 --chown=${USER_UID}:${USER_GID} /etc/otel/supervisor-data /etc/otel/supervisor-data
COPY --from=supervisor --chmod=755 /usr/local/bin/opampsupervisor /
COPY ./supervisor_docker.yaml /etc/otel/supervisor.yaml
COPY --from=supervisor --chmod=755 /usr/local/bin/opampsupervisor /opampsupervisor
COPY --from=col --chmod=755 /otelcol-contrib /otelcontribcol

# Copy entrypoint and log rotation scripts
COPY --chmod=755 ./entrypoint.sh /entrypoint.sh
COPY --chmod=755 ./log-rotator.sh /log-rotator.sh

## dev ##############################################################################################
FROM base AS dev

COPY ./config.yaml /etc/otelcol-contrib/config.yaml
COPY ./supervisor_docker.yaml /etc/otel/supervisor.yaml

EXPOSE 4317 4318 13133

ENTRYPOINT ["/opampsupervisor"]
ENTRYPOINT ["/entrypoint.sh", "/opampsupervisor"]
CMD ["--config", "/etc/otel/supervisor.yaml"]

## prod #############################################################################################
FROM base AS prod

COPY ./config.yaml /etc/otelcol-contrib/config.yaml
COPY ./supervisor_docker.yaml /etc/otel/supervisor.yaml

EXPOSE 4317 4318 13133

ENTRYPOINT ["/opampsupervisor"]
ENTRYPOINT ["/entrypoint.sh", "/opampsupervisor"]
CMD ["--config", "/etc/otel/supervisor.yaml"]
174 changes: 174 additions & 0 deletions docker/otel-collector/config.deprecated.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
receivers:
# Troubleshooting
prometheus:
config:
scrape_configs:
- job_name: 'otelcol'
scrape_interval: 30s
static_configs:
- targets:
- '0.0.0.0:8888'
- ${env:CLICKHOUSE_PROMETHEUS_METRICS_ENDPOINT}
# Data sources: logs
fluentforward:
endpoint: '0.0.0.0:24225'
# Configured via OpAMP w/ authentication
# Data sources: traces, metrics, logs
# otlp/hyperdx:
# protocols:
# grpc:
# include_metadata: true
# endpoint: '0.0.0.0:4317'
# http:
# cors:
# allowed_origins: ['*']
# allowed_headers: ['*']
# include_metadata: true
# endpoint: '0.0.0.0:4318'
processors:
transform:
log_statements:
- context: log
error_mode: ignore
statements:
# JSON parsing: Extends log attributes with the fields from structured log body content, either as an OTEL map or
# as a string containing JSON content.
- set(log.cache, ExtractPatterns(log.body, "(?P<0>(\\{.*\\}))")) where
IsString(log.body)
- merge_maps(log.attributes, ParseJSON(log.cache["0"]), "upsert")
where IsMap(log.cache)
- flatten(log.attributes) where IsMap(log.cache)
- merge_maps(log.attributes, log.body, "upsert") where IsMap(log.body)
- context: log
error_mode: ignore
conditions:
- severity_number == 0 and severity_text == ""
statements:
# Infer: extract the first log level keyword from the first 256 characters of the body
- set(log.cache["substr"], log.body.string) where Len(log.body.string)
< 256
- set(log.cache["substr"], Substring(log.body.string, 0, 256)) where
Len(log.body.string) >= 256
- set(log.cache, ExtractPatterns(log.cache["substr"],
"(?i)(?P<0>(alert|crit|emerg|fatal|error|err|warn|notice|debug|dbug|trace))"))
# Infer: detect FATAL
- set(log.severity_number, SEVERITY_NUMBER_FATAL) where
IsMatch(log.cache["0"], "(?i)(alert|crit|emerg|fatal)")
- set(log.severity_text, "fatal") where log.severity_number ==
SEVERITY_NUMBER_FATAL
# Infer: detect ERROR
- set(log.severity_number, SEVERITY_NUMBER_ERROR) where
IsMatch(log.cache["0"], "(?i)(error|err)")
- set(log.severity_text, "error") where log.severity_number ==
SEVERITY_NUMBER_ERROR
# Infer: detect WARN
- set(log.severity_number, SEVERITY_NUMBER_WARN) where
IsMatch(log.cache["0"], "(?i)(warn|notice)")
- set(log.severity_text, "warn") where log.severity_number ==
SEVERITY_NUMBER_WARN
# Infer: detect DEBUG
- set(log.severity_number, SEVERITY_NUMBER_DEBUG) where
IsMatch(log.cache["0"], "(?i)(debug|dbug)")
- set(log.severity_text, "debug") where log.severity_number ==
SEVERITY_NUMBER_DEBUG
# Infer: detect TRACE
- set(log.severity_number, SEVERITY_NUMBER_TRACE) where
IsMatch(log.cache["0"], "(?i)(trace)")
- set(log.severity_text, "trace") where log.severity_number ==
SEVERITY_NUMBER_TRACE
# Infer: else
- set(log.severity_text, "info") where log.severity_number == 0
- set(log.severity_number, SEVERITY_NUMBER_INFO) where
log.severity_number == 0
- context: log
error_mode: ignore
statements:
# Normalize the severity_text case
- set(log.severity_text, ConvertCase(log.severity_text, "lower"))
resourcedetection:
detectors:
- env
- system
- docker
timeout: 5s
override: false
batch:
memory_limiter:
# 80% of maximum memory up to 2G
limit_mib: 1500
# 25% of limit up to 2G
spike_limit_mib: 512
check_interval: 5s
connectors:
routing/logs:
default_pipelines: [logs/out-default]
error_mode: ignore
table:
- context: log
statement: route() where IsMatch(attributes["rr-web.event"], ".*")
pipelines: [logs/out-rrweb]
exporters:
debug:
verbosity: detailed
sampling_initial: 5
sampling_thereafter: 200
clickhouse/rrweb:
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
endpoint: ${env:CLICKHOUSE_ENDPOINT}
password: ${env:CLICKHOUSE_PASSWORD}
username: ${env:CLICKHOUSE_USER}
ttl: 720h
logs_table_name: hyperdx_sessions
timeout: 5s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
clickhouse:
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
endpoint: ${env:CLICKHOUSE_ENDPOINT}
password: ${env:CLICKHOUSE_PASSWORD}
username: ${env:CLICKHOUSE_USER}
ttl: 720h
timeout: 5s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
extensions:
health_check:
endpoint: :13133
service:
telemetry:
metrics:
readers:
- pull:
exporter:
prometheus:
host: '0.0.0.0'
port: 8888
logs:
level: ${HYPERDX_LOG_LEVEL}
extensions: [health_check]
pipelines:
traces:
# receivers: [otlp/hyperdx]
processors: [memory_limiter, batch]
exporters: [clickhouse]
metrics:
# receivers: [otlp/hyperdx, prometheus]
processors: [memory_limiter, batch]
exporters: [clickhouse]
logs/in:
# receivers: [otlp/hyperdx, fluentforward]
exporters: [routing/logs]
logs/out-default:
receivers: [routing/logs]
processors: [memory_limiter, transform, batch]
exporters: [clickhouse]
logs/out-rrweb:
receivers: [routing/logs]
processors: [memory_limiter, batch]
exporters: [clickhouse/rrweb]
Loading
Loading