diff --git a/.chloggen/test.yaml b/.chloggen/test.yaml
new file mode 100644
index 0000000000..a76acf23a0
--- /dev/null
+++ b/.chloggen/test.yaml
@@ -0,0 +1,20 @@
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: new_component
+
+# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action)
+component: clusterObservability
+
+# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Inital implementation of ClusterObservability CRD
+
+# One or more tracking issues related to the change
+issues: [3820]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext: |
+ This change provides an initial implementation of the Cluster Observability CRD and operator control loop.
+ Reconcilation is disabled by default.
+ Currently, the CRD is **not** included in the operator bundle and must be installed manually.
+ To enable reconciliation, enable the `--feature-gates=+operator.clusterobservability` feature gate.
diff --git a/Makefile b/Makefile
index 80f1a8880f..fdec490aee 100644
--- a/Makefile
+++ b/Makefile
@@ -746,7 +746,11 @@ api-docs: crdoc kustomize
for crdmanifest in $$TMP_DIR/*; do \
filename="$$(basename -s .opentelemetry.io.yaml $$crdmanifest)" ;\
filename="$${filename#apiextensions.k8s.io_v1_customresourcedefinition_}" ;\
- $(CRDOC) --resources $$crdmanifest --output docs/api/$$filename.md ;\
+ if [ "$$filename" = "clusterobservabilities" ]; then \
+ echo "Skipping API documentation generation for clusterobservabilities (internal alpha API)" ;\
+ else \
+ $(CRDOC) --resources $$crdmanifest --output docs/api/$$filename.md ;\
+ fi ;\
done;\
}
diff --git a/apis/v1alpha1/clusterobservability_types.go b/apis/v1alpha1/clusterobservability_types.go
new file mode 100644
index 0000000000..394d9a623f
--- /dev/null
+++ b/apis/v1alpha1/clusterobservability_types.go
@@ -0,0 +1,288 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package v1alpha1
+
+import (
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// ObservabilitySignal represents the type of observability signal.
+// +kubebuilder:validation:Enum=logs;traces;metrics;profiles
+type ObservabilitySignal string
+
+const (
+ ObservabilitySignalLogs ObservabilitySignal = "logs"
+ ObservabilitySignalTraces ObservabilitySignal = "traces"
+ ObservabilitySignalMetrics ObservabilitySignal = "metrics"
+ ObservabilitySignalProfiles ObservabilitySignal = "profiles"
+)
+
+// OTLPHTTPExporter defines OTLP HTTP exporter configuration.
+// This structure mirrors the official OpenTelemetry Collector otlphttpexporter configuration.
+type OTLPHTTPExporter struct {
+ // Endpoint is the target base URL to send data to (e.g., https://example.com:4318).
+ // +optional
+ Endpoint string `json:"endpoint,omitempty"`
+
+ // TracesEndpoint is the target URL to send trace data to (e.g., https://example.com:4318/v1/traces).
+ // If this setting is present the endpoint setting is ignored for traces.
+ // +optional
+ TracesEndpoint string `json:"traces_endpoint,omitempty"`
+
+ // MetricsEndpoint is the target URL to send metric data to (e.g., https://example.com:4318/v1/metrics).
+ // If this setting is present the endpoint setting is ignored for metrics.
+ // +optional
+ MetricsEndpoint string `json:"metrics_endpoint,omitempty"`
+
+ // LogsEndpoint is the target URL to send log data to (e.g., https://example.com:4318/v1/logs).
+ // If this setting is present the endpoint setting is ignored for logs.
+ // +optional
+ LogsEndpoint string `json:"logs_endpoint,omitempty"`
+
+ // ProfilesEndpoint is the target URL to send profile data to (e.g., https://example.com:4318/v1/development/profiles).
+ // If this setting is present the endpoint setting is ignored for profiles.
+ // +optional
+ ProfilesEndpoint string `json:"profiles_endpoint,omitempty"`
+
+ // TLS defines TLS configuration for the exporter.
+ // +optional
+ TLS *TLSConfig `json:"tls,omitempty"`
+
+ // Timeout is the HTTP request time limit (e.g., "30s", "1m"). Default is 30s.
+ // +optional
+ Timeout string `json:"timeout,omitempty"`
+
+ // ReadBufferSize for HTTP client. Default is 0.
+ // +optional
+ // +kubebuilder:validation:Minimum=0
+ ReadBufferSize *int `json:"read_buffer_size,omitempty"`
+
+ // WriteBufferSize for HTTP client. Default is 512 * 1024.
+ // +optional
+ // +kubebuilder:validation:Minimum=0
+ WriteBufferSize *int `json:"write_buffer_size,omitempty"`
+
+ // SendingQueue defines configuration for the sending queue.
+ // +optional
+ SendingQueue *SendingQueueConfig `json:"sending_queue,omitempty"`
+
+ // RetryOnFailure defines retry configuration for failed requests.
+ // +optional
+ RetryOnFailure *RetryConfig `json:"retry_on_failure,omitempty"`
+
+ // Encoding defines the encoding to use for the messages.
+ // Valid options: proto, json. Default is proto.
+ // +optional
+ // +kubebuilder:validation:Enum=proto;json
+ Encoding string `json:"encoding,omitempty"`
+
+ // Compression defines the compression algorithm to use.
+ // By default gzip compression is enabled. Use "none" to disable.
+ // +optional
+ // +kubebuilder:validation:Enum=gzip;none;""
+ Compression string `json:"compression,omitempty"`
+
+ // Headers defines additional headers to be sent with each request.
+ // +optional
+ Headers map[string]string `json:"headers,omitempty"`
+}
+
+// TLSConfig defines TLS configuration for the OTLP HTTP exporter.
+// This mirrors the OpenTelemetry Collector configtls settings.
+type TLSConfig struct {
+ // CAFile is the path to the CA certificate file for server verification.
+ // +optional
+ CAFile string `json:"ca_file,omitempty"`
+
+ // CertFile is the path to the client certificate file for mutual TLS.
+ // +optional
+ CertFile string `json:"cert_file,omitempty"`
+
+ // KeyFile is the path to the client private key file for mutual TLS.
+ // +optional
+ KeyFile string `json:"key_file,omitempty"`
+
+ // Insecure controls whether to use insecure transport. Default is false.
+ // +optional
+ Insecure bool `json:"insecure,omitempty"`
+
+ // ServerName for TLS handshake. If empty, uses the hostname from endpoint.
+ // +optional
+ ServerName string `json:"server_name,omitempty"`
+}
+
+// SendingQueueConfig defines configuration for the sending queue.
+type SendingQueueConfig struct {
+ // Enabled controls whether the queue is enabled. Default is true.
+ // +optional
+ Enabled *bool `json:"enabled,omitempty"`
+
+ // NumConsumers is the number of consumers that dequeue batches. Default is 10.
+ // +optional
+ // +kubebuilder:validation:Minimum=1
+ NumConsumers *int `json:"num_consumers,omitempty"`
+
+ // QueueSize is the maximum number of batches allowed in queue at a given time. Default is 1000.
+ // +optional
+ // +kubebuilder:validation:Minimum=1
+ QueueSize *int `json:"queue_size,omitempty"`
+}
+
+// RetryConfig defines retry configuration for failed requests.
+type RetryConfig struct {
+ // Enabled controls whether retry is enabled. Default is true.
+ // +optional
+ Enabled *bool `json:"enabled,omitempty"`
+
+ // InitialInterval is the initial retry interval (e.g., "5s"). Default is 5s.
+ // +optional
+ InitialInterval string `json:"initial_interval,omitempty"`
+
+ // RandomizationFactor is the randomization factor for retry intervals (e.g., "0.5"). Default is 0.5.
+ // +optional
+ RandomizationFactor string `json:"randomization_factor,omitempty"`
+
+ // Multiplier is the multiplier for retry intervals (e.g., "1.5"). Default is 1.5.
+ // +optional
+ Multiplier string `json:"multiplier,omitempty"`
+
+ // MaxInterval is the maximum retry interval (e.g., "30s"). Default is 30s.
+ // +optional
+ MaxInterval string `json:"max_interval,omitempty"`
+
+ // MaxElapsedTime is the maximum elapsed time for retries (e.g., "5m"). Default is 5m.
+ // +optional
+ MaxElapsedTime string `json:"max_elapsed_time,omitempty"`
+}
+
+// ClusterObservabilitySpec defines the desired state of ClusterObservability.
+// This follows a simplified design using a single OTLP HTTP exporter for all signals.
+// All signals (logs, traces, metrics, profiles) are enabled by default.
+type ClusterObservabilitySpec struct {
+ // Exporter defines the OTLP HTTP exporter configuration for all signals.
+ // The collector will automatically append appropriate paths for each signal type.
+ // +required
+ Exporter OTLPHTTPExporter `json:"exporter"`
+}
+
+// ClusterObservabilityConditionType represents the type of condition.
+type ClusterObservabilityConditionType string
+
+const (
+ // ClusterObservabilityConditionReady indicates whether the ClusterObservability is ready.
+ ClusterObservabilityConditionReady ClusterObservabilityConditionType = "Ready"
+ // ClusterObservabilityConditionConfigured indicates whether the ClusterObservability is configured.
+ ClusterObservabilityConditionConfigured ClusterObservabilityConditionType = "Configured"
+ // ClusterObservabilityConditionConflicted indicates that multiple ClusterObservability resources exist.
+ ClusterObservabilityConditionConflicted ClusterObservabilityConditionType = "Conflicted"
+)
+
+const (
+ // ClusterObservabilityFinalizer is the finalizer used for ClusterObservability resources.
+ ClusterObservabilityFinalizer = "clusterobservability.opentelemetry.io/finalizer"
+)
+
+// ClusterObservabilityCondition represents a condition of a ClusterObservability.
+type ClusterObservabilityCondition struct {
+ // Type of condition.
+ // +required
+ Type ClusterObservabilityConditionType `json:"type"`
+
+ // Status of the condition.
+ // +required
+ Status metav1.ConditionStatus `json:"status"`
+
+ // Last time the condition transitioned from one status to another.
+ // +optional
+ LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"`
+
+ // The reason for the condition's last transition.
+ // +optional
+ Reason string `json:"reason,omitempty"`
+
+ // A human readable message indicating details about the transition.
+ // +optional
+ Message string `json:"message,omitempty"`
+
+ // ObservedGeneration represents the .metadata.generation that the condition was set based upon.
+ // +optional
+ ObservedGeneration int64 `json:"observedGeneration,omitempty"`
+}
+
+// ClusterObservabilityStatus defines the observed state of ClusterObservability.
+type ClusterObservabilityStatus struct {
+ // Conditions represent the latest available observations of the ClusterObservability state.
+ // +optional
+ // +listType=map
+ // +listMapKey=type
+ Conditions []ClusterObservabilityCondition `json:"conditions,omitempty"`
+
+ // ObservedGeneration is the most recent generation observed for this ClusterObservability.
+ // It corresponds to the ClusterObservability's generation, which is updated on mutation
+ // by the API Server.
+ // +optional
+ ObservedGeneration int64 `json:"observedGeneration,omitempty"`
+
+ // Phase represents the current phase of the ClusterObservability.
+ // +optional
+ Phase string `json:"phase,omitempty"`
+
+ // Message provides additional information about the current state.
+ // +optional
+ Message string `json:"message,omitempty"`
+
+ // ComponentsStatus provides status information about individual observability components.
+ // +optional
+ ComponentsStatus map[string]ComponentStatus `json:"componentsStatus,omitempty"`
+
+ // ConfigVersions tracks the version hashes of the configuration files used.
+ // This enables detection of config changes when operator is upgraded.
+ // +optional
+ ConfigVersions map[string]string `json:"configVersions,omitempty"`
+}
+
+// ComponentStatus represents the status of an individual component.
+type ComponentStatus struct {
+ // Ready indicates whether the component is ready.
+ // +optional
+ Ready bool `json:"ready,omitempty"`
+
+ // Message provides additional information about the component status.
+ // +optional
+ Message string `json:"message,omitempty"`
+
+ // LastUpdated is the last time this status was updated.
+ // +optional
+ LastUpdated metav1.Time `json:"lastUpdated,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:printcolumn:name="Endpoint",type="string",JSONPath=".spec.exporter.endpoint",description="OTLP exporter endpoint"
+// +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase",description="Current phase"
+// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
+// +operator-sdk:csv:customresourcedefinitions:displayName="Cluster Observability"
+// +operator-sdk:csv:customresourcedefinitions:resources={{Pod,v1},{Deployment,apps/v1},{ConfigMap,v1},{Service,v1},{DaemonSet,apps/v1}}
+
+// ClusterObservability is the Schema for the clusterobservabilities API.
+type ClusterObservability struct {
+ metav1.TypeMeta `json:",inline"`
+ metav1.ObjectMeta `json:"metadata,omitempty"`
+
+ Spec ClusterObservabilitySpec `json:"spec,omitempty"`
+ Status ClusterObservabilityStatus `json:"status,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// ClusterObservabilityList contains a list of ClusterObservability.
+type ClusterObservabilityList struct {
+ metav1.TypeMeta `json:",inline"`
+ metav1.ListMeta `json:"metadata,omitempty"`
+ Items []ClusterObservability `json:"items"`
+}
+
+func init() {
+ SchemeBuilder.Register(&ClusterObservability{}, &ClusterObservabilityList{})
+}
diff --git a/apis/v1alpha1/zz_generated.deepcopy.go b/apis/v1alpha1/zz_generated.deepcopy.go
index 2168d86fe3..dc058b9907 100644
--- a/apis/v1alpha1/zz_generated.deepcopy.go
+++ b/apis/v1alpha1/zz_generated.deepcopy.go
@@ -122,6 +122,149 @@ func (in *AutoscalerSpec) DeepCopy() *AutoscalerSpec {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ClusterObservability) DeepCopyInto(out *ClusterObservability) {
+ *out = *in
+ out.TypeMeta = in.TypeMeta
+ in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+ in.Spec.DeepCopyInto(&out.Spec)
+ in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterObservability.
+func (in *ClusterObservability) DeepCopy() *ClusterObservability {
+ if in == nil {
+ return nil
+ }
+ out := new(ClusterObservability)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *ClusterObservability) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
+ }
+ return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ClusterObservabilityCondition) DeepCopyInto(out *ClusterObservabilityCondition) {
+ *out = *in
+ in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterObservabilityCondition.
+func (in *ClusterObservabilityCondition) DeepCopy() *ClusterObservabilityCondition {
+ if in == nil {
+ return nil
+ }
+ out := new(ClusterObservabilityCondition)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ClusterObservabilityList) DeepCopyInto(out *ClusterObservabilityList) {
+ *out = *in
+ out.TypeMeta = in.TypeMeta
+ in.ListMeta.DeepCopyInto(&out.ListMeta)
+ if in.Items != nil {
+ in, out := &in.Items, &out.Items
+ *out = make([]ClusterObservability, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterObservabilityList.
+func (in *ClusterObservabilityList) DeepCopy() *ClusterObservabilityList {
+ if in == nil {
+ return nil
+ }
+ out := new(ClusterObservabilityList)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *ClusterObservabilityList) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
+ }
+ return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ClusterObservabilitySpec) DeepCopyInto(out *ClusterObservabilitySpec) {
+ *out = *in
+ in.Exporter.DeepCopyInto(&out.Exporter)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterObservabilitySpec.
+func (in *ClusterObservabilitySpec) DeepCopy() *ClusterObservabilitySpec {
+ if in == nil {
+ return nil
+ }
+ out := new(ClusterObservabilitySpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ClusterObservabilityStatus) DeepCopyInto(out *ClusterObservabilityStatus) {
+ *out = *in
+ if in.Conditions != nil {
+ in, out := &in.Conditions, &out.Conditions
+ *out = make([]ClusterObservabilityCondition, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+ if in.ComponentsStatus != nil {
+ in, out := &in.ComponentsStatus, &out.ComponentsStatus
+ *out = make(map[string]ComponentStatus, len(*in))
+ for key, val := range *in {
+ (*out)[key] = *val.DeepCopy()
+ }
+ }
+ if in.ConfigVersions != nil {
+ in, out := &in.ConfigVersions, &out.ConfigVersions
+ *out = make(map[string]string, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterObservabilityStatus.
+func (in *ClusterObservabilityStatus) DeepCopy() *ClusterObservabilityStatus {
+ if in == nil {
+ return nil
+ }
+ out := new(ClusterObservabilityStatus)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ComponentStatus) DeepCopyInto(out *ComponentStatus) {
+ *out = *in
+ in.LastUpdated.DeepCopyInto(&out.LastUpdated)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComponentStatus.
+func (in *ComponentStatus) DeepCopy() *ComponentStatus {
+ if in == nil {
+ return nil
+ }
+ out := new(ComponentStatus)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ConfigMapsSpec) DeepCopyInto(out *ConfigMapsSpec) {
*out = *in
@@ -526,6 +669,53 @@ func (in *NodeJS) DeepCopy() *NodeJS {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *OTLPHTTPExporter) DeepCopyInto(out *OTLPHTTPExporter) {
+ *out = *in
+ if in.TLS != nil {
+ in, out := &in.TLS, &out.TLS
+ *out = new(TLSConfig)
+ **out = **in
+ }
+ if in.ReadBufferSize != nil {
+ in, out := &in.ReadBufferSize, &out.ReadBufferSize
+ *out = new(int)
+ **out = **in
+ }
+ if in.WriteBufferSize != nil {
+ in, out := &in.WriteBufferSize, &out.WriteBufferSize
+ *out = new(int)
+ **out = **in
+ }
+ if in.SendingQueue != nil {
+ in, out := &in.SendingQueue, &out.SendingQueue
+ *out = new(SendingQueueConfig)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.RetryOnFailure != nil {
+ in, out := &in.RetryOnFailure, &out.RetryOnFailure
+ *out = new(RetryConfig)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Headers != nil {
+ in, out := &in.Headers, &out.Headers
+ *out = make(map[string]string, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OTLPHTTPExporter.
+func (in *OTLPHTTPExporter) DeepCopy() *OTLPHTTPExporter {
+ if in == nil {
+ return nil
+ }
+ out := new(OTLPHTTPExporter)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ObservabilitySpec) DeepCopyInto(out *ObservabilitySpec) {
*out = *in
@@ -1275,6 +1465,26 @@ func (in *Resource) DeepCopy() *Resource {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RetryConfig) DeepCopyInto(out *RetryConfig) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RetryConfig.
+func (in *RetryConfig) DeepCopy() *RetryConfig {
+ if in == nil {
+ return nil
+ }
+ out := new(RetryConfig)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Sampler) DeepCopyInto(out *Sampler) {
*out = *in
@@ -1305,6 +1515,36 @@ func (in *ScaleSubresourceStatus) DeepCopy() *ScaleSubresourceStatus {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SendingQueueConfig) DeepCopyInto(out *SendingQueueConfig) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.NumConsumers != nil {
+ in, out := &in.NumConsumers, &out.NumConsumers
+ *out = new(int)
+ **out = **in
+ }
+ if in.QueueSize != nil {
+ in, out := &in.QueueSize, &out.QueueSize
+ *out = new(int)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SendingQueueConfig.
+func (in *SendingQueueConfig) DeepCopy() *SendingQueueConfig {
+ if in == nil {
+ return nil
+ }
+ out := new(SendingQueueConfig)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *TLS) DeepCopyInto(out *TLS) {
*out = *in
@@ -1320,6 +1560,21 @@ func (in *TLS) DeepCopy() *TLS {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *TLSConfig) DeepCopyInto(out *TLSConfig) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TLSConfig.
+func (in *TLSConfig) DeepCopy() *TLSConfig {
+ if in == nil {
+ return nil
+ }
+ out := new(TLSConfig)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *TargetAllocator) DeepCopyInto(out *TargetAllocator) {
*out = *in
diff --git a/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml b/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml
index 24c82cc9a6..22745fb6ad 100644
--- a/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml
+++ b/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml
@@ -429,18 +429,10 @@ spec:
- apiGroups:
- opentelemetry.io
resources:
+ - clusterobservabilities
- instrumentations
- - opentelemetrycollectors
- verbs:
- - get
- - list
- - patch
- - update
- - watch
- - apiGroups:
- - opentelemetry.io
- resources:
- opampbridges
+ - opentelemetrycollectors
- targetallocators
- targetallocators/finalizers
verbs:
@@ -454,12 +446,14 @@ spec:
- apiGroups:
- opentelemetry.io
resources:
+ - clusterobservabilities/finalizers
- opampbridges/finalizers
verbs:
- update
- apiGroups:
- opentelemetry.io
resources:
+ - clusterobservabilities/status
- opampbridges/status
- opentelemetrycollectors/finalizers
- opentelemetrycollectors/status
@@ -493,6 +487,18 @@ spec:
- patch
- update
- watch
+ - apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
- apiGroups:
- authentication.k8s.io
resources:
diff --git a/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml b/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml
index 75598fc82c..5934b29712 100644
--- a/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml
+++ b/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml
@@ -429,18 +429,10 @@ spec:
- apiGroups:
- opentelemetry.io
resources:
+ - clusterobservabilities
- instrumentations
- - opentelemetrycollectors
- verbs:
- - get
- - list
- - patch
- - update
- - watch
- - apiGroups:
- - opentelemetry.io
- resources:
- opampbridges
+ - opentelemetrycollectors
- targetallocators
- targetallocators/finalizers
verbs:
@@ -454,12 +446,14 @@ spec:
- apiGroups:
- opentelemetry.io
resources:
+ - clusterobservabilities/finalizers
- opampbridges/finalizers
verbs:
- update
- apiGroups:
- opentelemetry.io
resources:
+ - clusterobservabilities/status
- opampbridges/status
- opentelemetrycollectors/finalizers
- opentelemetrycollectors/status
@@ -493,6 +487,18 @@ spec:
- patch
- update
- watch
+ - apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
- apiGroups:
- authentication.k8s.io
resources:
diff --git a/config/crd/bases/opentelemetry.io_clusterobservabilities.yaml b/config/crd/bases/opentelemetry.io_clusterobservabilities.yaml
new file mode 100644
index 0000000000..8ac5526b54
--- /dev/null
+++ b/config/crd/bases/opentelemetry.io_clusterobservabilities.yaml
@@ -0,0 +1,174 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.19.0
+ name: clusterobservabilities.opentelemetry.io
+spec:
+ group: opentelemetry.io
+ names:
+ kind: ClusterObservability
+ listKind: ClusterObservabilityList
+ plural: clusterobservabilities
+ singular: clusterobservability
+ scope: Namespaced
+ versions:
+ - additionalPrinterColumns:
+ - description: OTLP exporter endpoint
+ jsonPath: .spec.exporter.endpoint
+ name: Endpoint
+ type: string
+ - description: Current phase
+ jsonPath: .status.phase
+ name: Phase
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ properties:
+ apiVersion:
+ type: string
+ kind:
+ type: string
+ metadata:
+ type: object
+ spec:
+ properties:
+ exporter:
+ properties:
+ compression:
+ enum:
+ - gzip
+ - none
+ - ""
+ type: string
+ encoding:
+ enum:
+ - proto
+ - json
+ type: string
+ endpoint:
+ type: string
+ headers:
+ additionalProperties:
+ type: string
+ type: object
+ logs_endpoint:
+ type: string
+ metrics_endpoint:
+ type: string
+ profiles_endpoint:
+ type: string
+ read_buffer_size:
+ minimum: 0
+ type: integer
+ retry_on_failure:
+ properties:
+ enabled:
+ type: boolean
+ initial_interval:
+ type: string
+ max_elapsed_time:
+ type: string
+ max_interval:
+ type: string
+ multiplier:
+ type: string
+ randomization_factor:
+ type: string
+ type: object
+ sending_queue:
+ properties:
+ enabled:
+ type: boolean
+ num_consumers:
+ minimum: 1
+ type: integer
+ queue_size:
+ minimum: 1
+ type: integer
+ type: object
+ timeout:
+ type: string
+ tls:
+ properties:
+ ca_file:
+ type: string
+ cert_file:
+ type: string
+ insecure:
+ type: boolean
+ key_file:
+ type: string
+ server_name:
+ type: string
+ type: object
+ traces_endpoint:
+ type: string
+ write_buffer_size:
+ minimum: 0
+ type: integer
+ type: object
+ required:
+ - exporter
+ type: object
+ status:
+ properties:
+ componentsStatus:
+ additionalProperties:
+ properties:
+ lastUpdated:
+ format: date-time
+ type: string
+ message:
+ type: string
+ ready:
+ type: boolean
+ type: object
+ type: object
+ conditions:
+ items:
+ properties:
+ lastTransitionTime:
+ format: date-time
+ type: string
+ message:
+ type: string
+ observedGeneration:
+ format: int64
+ type: integer
+ reason:
+ type: string
+ status:
+ type: string
+ type:
+ type: string
+ required:
+ - status
+ - type
+ type: object
+ type: array
+ x-kubernetes-list-map-keys:
+ - type
+ x-kubernetes-list-type: map
+ configVersions:
+ additionalProperties:
+ type: string
+ type: object
+ message:
+ type: string
+ observedGeneration:
+ format: int64
+ type: integer
+ phase:
+ type: string
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml
index 7c7d535ff4..47448f099b 100644
--- a/config/crd/kustomization.yaml
+++ b/config/crd/kustomization.yaml
@@ -6,6 +6,8 @@ resources:
- bases/opentelemetry.io_instrumentations.yaml
- bases/opentelemetry.io_opampbridges.yaml
- bases/opentelemetry.io_targetallocators.yaml
+# NOTE: We dont include the clusterObservability CR for now.
+# - bases/opentelemetry.io_clusterobservabilities.yaml
# +kubebuilder:scaffold:crdkustomizeresource
# patches here are for enabling the conversion webhook for each CRD
diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml
index df0e9aa6de..ecddc921ac 100644
--- a/config/rbac/role.yaml
+++ b/config/rbac/role.yaml
@@ -137,18 +137,10 @@ rules:
- apiGroups:
- opentelemetry.io
resources:
+ - clusterobservabilities
- instrumentations
- - opentelemetrycollectors
- verbs:
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - opentelemetry.io
- resources:
- opampbridges
+ - opentelemetrycollectors
- targetallocators
- targetallocators/finalizers
verbs:
@@ -162,12 +154,14 @@ rules:
- apiGroups:
- opentelemetry.io
resources:
+ - clusterobservabilities/finalizers
- opampbridges/finalizers
verbs:
- update
- apiGroups:
- opentelemetry.io
resources:
+ - clusterobservabilities/status
- opampbridges/status
- opentelemetrycollectors/finalizers
- opentelemetrycollectors/status
@@ -201,3 +195,15 @@ rules:
- patch
- update
- watch
+- apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
diff --git a/config/samples/clusterobservability_v1alpha1_clusterobservability.yaml b/config/samples/clusterobservability_v1alpha1_clusterobservability.yaml
new file mode 100644
index 0000000000..6e241f2deb
--- /dev/null
+++ b/config/samples/clusterobservability_v1alpha1_clusterobservability.yaml
@@ -0,0 +1,49 @@
+# Copyright The OpenTelemetry Authors
+# SPDX-License-Identifier: Apache-2.0
+
+# Example ClusterObservability configuration
+# This resource manages a complete observability stack for your Kubernetes cluster
+apiVersion: opentelemetry.io/v1alpha1
+kind: ClusterObservability
+metadata:
+ name: cluster-observability
+ namespace: opentelemetry-operator-system
+spec:
+ # OTLP HTTP exporter configuration
+ exporter:
+ # Base endpoint for all signals (e.g., https://otel-backend.example.com:4318)
+ endpoint: "https://otel-backend.example.com:4318"
+
+ # Optional: Override endpoint for specific signals
+ # traces_endpoint: "https://trace-backend.example.com/v2/trace/otlp"
+ # metrics_endpoint: "https://metrics-backend.example.com/v2/datapoint/otlp"
+ # logs_endpoint: "https://logs-backend.example.com/v1/logs"
+
+ # Optional: Add headers (e.g., for authentication)
+ headers:
+ "Authorization": "Bearer your-token-here"
+
+ # Optional: Compression (gzip or none)
+ compression: "gzip"
+
+ # Optional: Request timeout
+ timeout: "30s"
+
+ # Optional: TLS configuration
+ # Note: File mounting for ca_file, cert_file, key_file is not yet supported
+ # tls:
+ # insecure: false
+ # server_name: "otel-backend.example.com"
+
+ # Optional: Retry configuration
+ # retry_on_failure:
+ # enabled: true
+ # initial_interval: "5s"
+ # max_interval: "30s"
+ # max_elapsed_time: "5m"
+
+ # Optional: Sending queue configuration
+ # sending_queue:
+ # enabled: true
+ # num_consumers: 10
+ # queue_size: 1000
diff --git a/docs/cluster-observability.md b/docs/cluster-observability.md
new file mode 100644
index 0000000000..f5e87cf8fa
--- /dev/null
+++ b/docs/cluster-observability.md
@@ -0,0 +1,338 @@
+# ClusterObservability Controller
+
+ClusterObservability provides a streamlined way to deploy and manage OpenTelemetry observability components across an entire Kubernetes cluster with a single Custom Resource.
+
+## Overview
+
+ClusterObservability automatically creates and manages:
+- **Agent Collector**: DaemonSet for node-level metrics, logs, and host OTLP receiver
+- **Cluster Collector**: Deployment for cluster-level k8s metrics and events
+- **Auto-Instrumentation**: Single Instrumentation CR for application instrumentation (points to local agent)
+
+The controller uses a **controller-of-controllers pattern**, creating `OpenTelemetryCollector` and `Instrumentation` CRs that are managed by their respective controllers.
+
+## Quick Start
+
+```yaml
+apiVersion: opentelemetry.io/v1alpha1
+kind: ClusterObservability
+metadata:
+ name: cluster-observability
+ namespace: opentelemetry-operator-system
+spec:
+ # OTLP HTTP exporter only
+ exporter:
+ endpoint: "https://otel-backend.example.com:4318"
+ # Optional: override endpoint for specific signals
+ traces_endpoint: "https://trace-backend.example.com/v2/trace/otlp"
+ metrics_endpoint: "https://metrics-backend.example.com/v2/datapoint/otlp"
+ logs_endpoint: "https://logs-backend.example.com/v1/logs"
+ headers:
+ "Authorization": "Bearer your-token"
+ compression: "gzip"
+ timeout: "30s"
+```
+
+## Architecture
+
+```mermaid
+graph TB
+ CO[ClusterObservability CR] --> Controller[ClusterObservability Controller]
+
+ Controller --> OTLC1[OpenTelemetryCollector CR
Agent DaemonSet]
+ Controller --> OTLC2[OpenTelemetryCollector CR
Cluster Deployment]
+ Controller --> INSTR[Instrumentation CR
Single Instance
Points to local agent]
+
+ OTLC1 --> OTC1[OpenTelemetryCollector Controller]
+ OTLC2 --> OTC2[OpenTelemetryCollector Controller]
+ INSTR --> IC[Instrumentation Controller]
+
+ OTC1 --> DS[Agent DaemonSet]
+ OTC2 --> DEP[Cluster Deployment]
+ IC --> POD[Instrumented Pods]
+
+ subgraph "OpenShift Integration"
+ Controller --> SCC[Security Context Constraints]
+ DS --> KubeletCA[Kubelet CA Certificate]
+ end
+
+ subgraph "Configuration System"
+ Controller --> ConfigLoader[Config Loader]
+ ConfigLoader --> BaseConfig[Base Configs]
+ ConfigLoader --> DistroConfig[Distro Overrides]
+ end
+```
+
+## Controller Interaction Flow
+
+```mermaid
+sequenceDiagram
+ participant User
+ participant K8s as Kubernetes API
+ participant Controller as ClusterObservability Controller
+ participant ConfigLoader as Config Loader
+ participant Status as Status Handler
+
+ User->>K8s: Create ClusterObservability CR
+ K8s->>Controller: Watch Event (Create)
+
+ Controller->>Controller: Add Finalizer
+ Controller->>Controller: Validate Singleton
+
+ alt Multiple ClusterObservability CRs
+ Controller->>Status: Mark as Conflicted
+ Status->>K8s: Update Status (Conflicted)
+ else Single Active CR
+ Controller->>ConfigLoader: Load Collector Configs
+ ConfigLoader-->>Controller: Agent & Cluster Configs
+
+ Controller->>K8s: Create Agent OpenTelemetryCollector CR
+ Controller->>K8s: Create Cluster OpenTelemetryCollector CR
+ Controller->>K8s: Create Instrumentation CR
+
+ opt OpenShift Environment (cached detection)
+ Controller->>K8s: Create Security Context Constraints
+ end
+
+ Controller->>Status: Check Component Health
+ Status->>K8s: Query Component Status
+ Status-->>Controller: Health Status
+
+ Controller->>Status: Update Status (Ready/NotReady)
+ Status->>K8s: Update ClusterObservability Status
+ end
+
+ Note over Controller: Continuous Reconciliation
+ K8s->>Controller: Watch Event (Update/Delete)
+ Controller->>Controller: Reconcile Changes
+```
+
+## Feature Gate
+
+ClusterObservability is controlled by the `operator.clusterobservability` feature gate:
+
+```bash
+# Enable ClusterObservability
+./manager --feature-gates=+operator.clusterobservability
+
+# Check if enabled
+./manager --help | grep cluster-observability
+```
+
+## CRD Configuration
+
+ClusterObservability has a simple spec with one field at present:
+
+```go
+type ClusterObservabilitySpec struct {
+ Exporter OTLPHTTPExporter // OTLP HTTP exporter configuration
+}
+```
+
+The `exporter` field uses the `otlphttp` exporter from OpenTelemetry Collector.
+
+### Basic Example
+```yaml
+apiVersion: opentelemetry.io/v1alpha1
+kind: ClusterObservability
+metadata:
+ name: cluster-observability
+ namespace: opentelemetry-operator-system
+spec:
+ exporter:
+ endpoint: "https://otel.example.com:4318"
+ headers:
+ "Authorization": "Bearer your-token"
+ timeout: "30s"
+```
+
+**Note**: TLS certificate file mounting (`ca_file`, `cert_file`, `key_file`) is not supported yet even though the config can be set.
+
+
+## Conflict Detection Example
+
+The controller only allows one active ClusterObservability resource in the cluster. When multiple resources are detected, the **oldest resource** (by creation timestamp) remains active while others are marked as conflicted. If resources have identical creation timestamps, the resource with the lexicographically smaller namespace/name becomes active.
+
+When a second ClusterObservability resource is created, it gets marked with `Conflicted` status:
+
+```yaml
+Name: cluster-observability-2
+Namespace: opentelemetry-operator-system
+Labels:
+Annotations:
+API Version: opentelemetry.io/v1alpha1
+Kind: ClusterObservability
+Metadata:
+ Creation Timestamp: 2025-09-06T03:30:28Z
+ Generation: 1
+ Resource Version: 7935997
+ UID: 969895d1-ab6b-429e-b740-f8381ab3ce32
+Spec:
+ Exporter:
+ Endpoint: http://otlp-collector.opentelemetry-demo.svc.cluster.local:4317
+ Headers:
+ X - Deployment: clusterobservability-test
+Status:
+ Conditions:
+ Last Transition Time: 2025-09-06T03:30:28Z
+ Message: Multiple ClusterObservability resources exist in cluster
+ Reason: Configured
+ Status: True
+ Type: Conflicted
+ Message: Multiple ClusterObservability resources detected. Only the oldest resource is active.
+ Observed Generation: 1
+ Phase: Conflicted
+Events:
+ Type Reason Age From Message
+ ---- ------ ---- ---- -------
+ Normal Info 6s (x2 over 6s) cluster-observability status updated - resource is conflicted
+ Warning Conflicted 4s (x25 over 6s) cluster-observability Multiple ClusterObservability resources detected. Only opentelemetry-operator-system/cluster-observability (oldest) is active
+```
+
+### Agent Collector Configuration
+
+Agent collectors run as DaemonSet with `hostNetwork: true` and collect following at present:
+- **OTLP Receiver**: Receives traces/metrics from auto-instrumented apps (gRPC:4317, HTTP:4318)
+- **Kubelet Stats**: Pod and container metrics via `kubeletstats` receiver
+- **Container Logs**: Application logs via `filelog` receiver
+
+The agent collector exposes OTLP ports on the host network, allowing instrumented applications to send telemetry to their local node's collector using `$(OTEL_NODE_IP):4317` or `$(OTEL_NODE_IP):4318`.
+
+### Cluster Collector Configuration
+
+Cluster collectors run as Deployment and collect:
+- **Cluster Metrics**: Via `k8s_cluster` receiver
+
+## Auto-Instrumentation
+
+ClusterObservability creates a **single Instrumentation CR** in the same namespace as the ClusterObservability resource. Users reference this CR from their application annotations. The instrumentation configuration is controlled by operator settings.
+
+
+
+### Example Clusterobservability Object
+
+```yaml
+Name: cluster-observability
+Namespace: opentelemetry-operator-system
+Labels:
+Annotations:
+API Version: opentelemetry.io/v1alpha1
+Kind: ClusterObservability
+Metadata:
+ Creation Timestamp: 2025-09-06T03:36:05Z
+ Finalizers:
+ clusterobservability.opentelemetry.io/finalizer
+ Generation: 1
+ Resource Version: 7948019
+ UID: b3dc8d25-b345-4cb4-8bbc-516e5e7b1dc7
+Spec:
+ Exporter:
+ Compression: gzip
+ Headers:
+ Content - Type: application/x-protobuf
+ X - SF - TOKEN: fake-token
+ metrics_endpoint: https://ingest.us0.signalfx.com/v2/datapoint/otlp
+ Timeout: 30s
+ traces_endpoint: https://ingest.us0.signalfx.com/v2/trace/otlp
+Status:
+ Components Status:
+ Agent:
+ Last Updated: 2025-09-06T03:40:24Z
+ Message: Agent collector DaemonSet not ready: 0/3 pods ready
+ Cluster:
+ Last Updated: 2025-09-06T03:40:24Z
+ Message: Cluster collector Deployment ready: 1/1 replicas ready
+ Ready: true
+ Instrumentation:
+ Last Updated: 2025-09-06T03:40:24Z
+ Message: Instrumentation CR ready: opentelemetry-operator-system/default-instrumentation
+ Ready: true
+ Conditions:
+ Last Transition Time: 2025-09-06T03:36:05Z
+ Message: ClusterObservability configuration applied successfully
+ Reason: Configured
+ Status: True
+ Type: Configured
+ Last Transition Time: 2025-09-06T03:36:05Z
+ Message: Collector configuration has been updated - managed collectors will be reconciled
+ Reason: ConfigChanged
+ Status: True
+ Type: ConfigurationUpdated
+ Config Versions:
+ Agent - Collector - Openshift: d3945a86e3b61a9bb578b8340cf9679a486b4cde13332b7f216b6d85874ea6ee
+ Cluster - Collector - Openshift: 4ac402eda083f315297e410b2dccb1698cb5ae10ebedc8ad5eb860a5aeda66a1
+ Message: Some components are not ready
+ Observed Generation: 1
+ Phase: Pending
+Events:
+ Type Reason Age From Message
+ ---- ------ ---- ---- -------
+ Normal ConfigChanged 4m19s (x2 over 4m19s) cluster-observability Collector configuration has changed, updating managed resources
+ Normal Info 4m17s (x23 over 4m19s) cluster-observability applied status changes
+```
+
+### How Users Apply Auto-Instrumentation
+
+Users trigger auto-instrumentation by adding annotations that reference the single Instrumentation CR:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: my-app
+ namespace: my-apps
+spec:
+ template:
+ metadata:
+ annotations:
+ # Reference the single Instrumentation CR using namespace/name format
+ instrumentation.opentelemetry.io/inject-java: "{ObservabiilityCluster}/ObservabiilityCluster-CR-Name"
+ spec:
+ containers:
+ - name: app
+ image: my-java-app:latest
+```
+
+**Pattern**: `namespace/instrumentation-name` where `namespace` is where ClusterObservability is deployed.
+
+## Troubleshooting
+
+### Check Controller Status
+```bash
+kubectl logs deployment/opentelemetry-operator-controller-manager -n opentelemetry-operator-system
+```
+
+### Manual RBAC Workaround
+If automatic RBAC creation (`--create-rbac-permissions=true`) isn't working, you may need to apply manual RBAC permissions. This is a known issue being investigated.
+
+```bash
+# Apply manual ClusterRole and ClusterRoleBinding for collectors
+kubectl apply -f deploy-test/cluster-observability-manual-rbac.yaml
+```
+
+### Check ClusterObservability Status
+```bash
+kubectl get clusterobservabilities -n opentelemetry-operator-system
+kubectl describe clusterobservability cluster-observability -n opentelemetry-operator-system
+```
+
+### Check Component Health
+```bash
+# OpentelemetryCollector CR
+kubectl get opentelemetrycollector -l app.kubernetes.io/managed-by=opentelemetry-operator -n opentelemetry-operator-system
+
+# Agent collectors
+kubectl get daemonsets -l app.kubernetes.io/managed-by=opentelemetry-operator -n opentelemetry-operator-system
+
+# Cluster collectors
+kubectl get deployments -l app.kubernetes.io/managed-by=opentelemetry-operator -n opentelemetry-operator-system
+
+# Auto-instrumentation
+kubectl get instrumentations -n opentelemetry-operator-system
+```
+
+### Check Events
+```bash
+kubectl get events --field-selector reason=Conflicted
+kubectl get events --field-selector involvedObject.kind=ClusterObservability
+```
diff --git a/go.sum b/go.sum
index 7ccbf510e1..416f3e7b82 100644
--- a/go.sum
+++ b/go.sum
@@ -543,67 +543,42 @@ go.opentelemetry.io/collector/featuregate v1.37.0 h1:CjsHzjktiqq/dxid4Xkhuf3yD6o
go.opentelemetry.io/collector/featuregate v1.37.0/go.mod h1:Y/KsHbvREENKvvN9RlpiWk/IGBK+CATBYzIIpU7nccc=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 h1:Hf9xI/XLML9ElpiHVDNwvqI0hIFlzV8dgIr35kV1kRU=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0/go.mod h1:NfchwuyNoMcZ5MLHwPrODwUF1HWCXWrL31s8gSAdIKY=
-go.opentelemetry.io/contrib/otelconf v0.17.0 h1:Yh9uifPSe8yiksLshMbeAXGm/ZRmo7LD7Di+/yd1L5w=
-go.opentelemetry.io/contrib/otelconf v0.17.0/go.mod h1:8dHKS6uMiZlvmrA7MGUtb4HwnX+ukdF5iS3p2UPKvLE=
go.opentelemetry.io/contrib/otelconf v0.18.0 h1:ciF2Gf00BWs0DnexKFZXcxg9kJ8r3SUW1LOzW3CsKA8=
go.opentelemetry.io/contrib/otelconf v0.18.0/go.mod h1:FcP7k+JLwBLdOxS6qY6VQ/4b5VBntI6L6o80IMwhAeI=
go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
-go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.13.0 h1:z6lNIajgEBVtQZHjfw2hAccPEBDs+nx58VemmXWa2ec=
-go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.13.0/go.mod h1:+kyc3bRx/Qkq05P6OCu3mTEIOxYRYzoIg+JsUp5X+PM=
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.14.0 h1:OMqPldHt79PqWKOMYIAQs3CxAi7RLgPxwfFSwr4ZxtM=
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.14.0/go.mod h1:1biG4qiqTxKiUCtoWDPpL3fB3KxVwCiGw81j3nKMuHE=
-go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.13.0 h1:zUfYw8cscHHLwaY8Xz3fiJu+R59xBnkgq2Zr1lwmK/0=
-go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.13.0/go.mod h1:514JLMCcFLQFS8cnTepOk6I09cKWJ5nGHBxHrMJ8Yfg=
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0 h1:QQqYw3lkrzwVsoEX0w//EhH/TCnpRdEenKBOOEIMjWc=
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0/go.mod h1:gSVQcr17jk2ig4jqJ2DX30IdWH251JcNAecvrqTxH1s=
-go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.37.0 h1:zG8GlgXCJQd5BU98C0hZnBbElszTmUgCNCfYneaDL0A=
-go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.37.0/go.mod h1:hOfBCz8kv/wuq73Mx2H2QnWokh/kHZxkh6SNF2bdKtw=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0 h1:vl9obrcoWVKp/lwl8tRE33853I8Xru9HFbw/skNeLs8=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0/go.mod h1:GAXRxmLJcVM3u22IjTg74zWBrRCKq8BnOqUVLodpcpw=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.38.0 h1:Oe2z/BCg5q7k4iXC3cqJxKYg0ieRiOqF0cecFYdPTwk=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.38.0/go.mod h1:ZQM5lAJpOsKnYagGg/zV2krVqTtaVdYdDkhMoX6Oalg=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.37.0 h1:Ahq7pZmv87yiyn3jeFz/LekZmPLLdKejuO3NcK9MssM=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.37.0/go.mod h1:MJTqhM0im3mRLw1i8uGHnCvUEeS7VwRyxlLC78PA18M=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.37.0 h1:EtFWSnwW9hGObjkIdmlnWSydO+Qs8OwzfzXLUPg4xOc=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.37.0/go.mod h1:QjUEoiGCPkvFZ/MjK6ZZfNOS6mfVEVKYE99dFhuN2LI=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.37.0 h1:bDMKF3RUSxshZ5OjOTi8rsHGaPKsAt76FaqgvIUySLc=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.37.0/go.mod h1:dDT67G/IkA46Mr2l9Uj7HsQVwsjASyV9SjGofsiUZDA=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 h1:aTL7F04bJHUlztTsNGJ2l+6he8c+y/b//eR0jjjemT4=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0/go.mod h1:kldtb7jDTeol0l3ewcmd8SDvx3EmIE7lyvqbasU3QC4=
go.opentelemetry.io/otel/exporters/prometheus v0.60.0 h1:cGtQxGvZbnrWdC2GyjZi0PDKVSLWP/Jocix3QWfXtbo=
go.opentelemetry.io/otel/exporters/prometheus v0.60.0/go.mod h1:hkd1EekxNo69PTV4OWFGZcKQiIqg0RfuWExcPKFvepk=
-go.opentelemetry.io/otel/exporters/stdout/stdoutlog v0.13.0 h1:yEX3aC9KDgvYPhuKECHbOlr5GLwH6KTjLJ1sBSkkxkc=
-go.opentelemetry.io/otel/exporters/stdout/stdoutlog v0.13.0/go.mod h1:/GXR0tBmmkxDaCUGahvksvp66mx4yh5+cFXgSlhg0vQ=
go.opentelemetry.io/otel/exporters/stdout/stdoutlog v0.14.0 h1:B/g+qde6Mkzxbry5ZZag0l7QrQBCtVm7lVjaLgmpje8=
go.opentelemetry.io/otel/exporters/stdout/stdoutlog v0.14.0/go.mod h1:mOJK8eMmgW6ocDJn6Bn11CcZ05gi3P8GylBXEkZtbgA=
-go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.37.0 h1:6VjV6Et+1Hd2iLZEPtdV7vie80Yyqf7oikJLjQ/myi0=
-go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.37.0/go.mod h1:u8hcp8ji5gaM/RfcOo8z9NMnf1pVLfVY7lBY2VOGuUU=
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.38.0 h1:wm/Q0GAAykXv83wzcKzGGqAnnfLFyFe7RslekZuv+VI=
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.38.0/go.mod h1:ra3Pa40+oKjvYh+ZD3EdxFZZB0xdMfuileHAm4nNN7w=
-go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.37.0 h1:SNhVp/9q4Go/XHBkQ1/d5u9P/U+L1yaGPoi0x+mStaI=
-go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.37.0/go.mod h1:tx8OOlGH6R4kLV67YaYO44GFXloEjGPZuMjEkaaqIp4=
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0 h1:kJxSDN4SgWWTjG/hPp3O7LCGLcHXFlvS2/FFOrwL+SE=
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0/go.mod h1:mgIOzS7iZeKJdeB8/NYHrJ48fdGc71Llo5bJ1J4DWUE=
-go.opentelemetry.io/otel/log v0.13.0 h1:yoxRoIZcohB6Xf0lNv9QIyCzQvrtGZklVbdCoyb7dls=
-go.opentelemetry.io/otel/log v0.13.0/go.mod h1:INKfG4k1O9CL25BaM1qLe0zIedOpvlS5Z7XgSbmN83E=
go.opentelemetry.io/otel/log v0.14.0 h1:2rzJ+pOAZ8qmZ3DDHg73NEKzSZkhkGIua9gXtxNGgrM=
go.opentelemetry.io/otel/log v0.14.0/go.mod h1:5jRG92fEAgx0SU/vFPxmJvhIuDU9E1SUnEQrMlJpOno=
go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E=
go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg=
-go.opentelemetry.io/otel/sdk/log v0.13.0 h1:I3CGUszjM926OphK8ZdzF+kLqFvfRY/IIoFq/TjwfaQ=
-go.opentelemetry.io/otel/sdk/log v0.13.0/go.mod h1:lOrQyCCXmpZdN7NchXb6DOZZa1N5G1R2tm5GMMTpDBw=
go.opentelemetry.io/otel/sdk/log v0.14.0 h1:JU/U3O7N6fsAXj0+CXz21Czg532dW2V4gG1HE/e8Zrg=
go.opentelemetry.io/otel/sdk/log v0.14.0/go.mod h1:imQvII+0ZylXfKU7/wtOND8Hn4OpT3YUoIgqJVksUkM=
-go.opentelemetry.io/otel/sdk/log/logtest v0.13.0 h1:9yio6AFZ3QD9j9oqshV1Ibm9gPLlHNxurno5BreMtIA=
-go.opentelemetry.io/otel/sdk/log/logtest v0.13.0/go.mod h1:QOGiAJHl+fob8Nu85ifXfuQYmJTFAvcrxL6w5/tu168=
go.opentelemetry.io/otel/sdk/log/logtest v0.14.0 h1:Ijbtz+JKXl8T2MngiwqBlPaHqc4YCaP/i13Qrow6gAM=
+go.opentelemetry.io/otel/sdk/log/logtest v0.14.0/go.mod h1:dCU8aEL6q+L9cYTqcVOk8rM9Tp8WdnHOPLiBgp0SGOA=
go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM=
go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA=
go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
diff --git a/internal/config/cli.go b/internal/config/cli.go
index 81ebf77cee..57c0175684 100644
--- a/internal/config/cli.go
+++ b/internal/config/cli.go
@@ -145,6 +145,13 @@ func ApplyCLI(cfg *Config) error {
cfg.Zap.LevelFormat, _ = f.GetString("zap-level-format")
case "enable-webhooks":
cfg.EnableWebhooks, _ = f.GetBool("enable-webhooks")
+ case "create-rbac-permissions":
+ val, _ := f.GetBool("create-rbac-permissions")
+ if val {
+ cfg.CreateRBACPermissions = autoRBAC.Available
+ } else {
+ cfg.CreateRBACPermissions = autoRBAC.NotAvailable
+ }
}
}
})
diff --git a/internal/controllers/clusterobservability_controller.go b/internal/controllers/clusterobservability_controller.go
new file mode 100644
index 0000000000..72127229fd
--- /dev/null
+++ b/internal/controllers/clusterobservability_controller.go
@@ -0,0 +1,562 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package controllers
+
+import (
+ "context"
+ "fmt"
+ "time"
+
+ "github.com/go-logr/logr"
+ corev1 "k8s.io/api/core/v1"
+ apiequality "k8s.io/apimachinery/pkg/api/equality"
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
+ "k8s.io/apimachinery/pkg/types"
+ "k8s.io/client-go/tools/record"
+ "k8s.io/client-go/util/retry"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
+ "sigs.k8s.io/controller-runtime/pkg/handler"
+
+ "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1"
+ "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1"
+ "github.com/open-telemetry/opentelemetry-operator/internal/autodetect/openshift"
+ "github.com/open-telemetry/opentelemetry-operator/internal/config"
+ "github.com/open-telemetry/opentelemetry-operator/internal/manifests"
+ "github.com/open-telemetry/opentelemetry-operator/internal/manifests/clusterobservability"
+ coStatus "github.com/open-telemetry/opentelemetry-operator/internal/status/clusterobservability"
+)
+
+// ClusterObservabilityReconciler reconciles a ClusterObservability object.
+type ClusterObservabilityReconciler struct {
+ client.Client
+ recorder record.EventRecorder
+ scheme *runtime.Scheme
+ log logr.Logger
+ config config.Config
+}
+
+// ClusterObservabilityReconcilerParams is the set of options to build a new ClusterObservabilityReconciler.
+type ClusterObservabilityReconcilerParams struct {
+ client.Client
+ Recorder record.EventRecorder
+ Scheme *runtime.Scheme
+ Log logr.Logger
+ Config config.Config
+}
+
+func (r *ClusterObservabilityReconciler) getParams(instance v1alpha1.ClusterObservability) manifests.Params {
+ return manifests.Params{
+ Config: r.config,
+ Client: r.Client,
+ ClusterObservability: instance,
+ Log: r.log,
+ Scheme: r.scheme,
+ Recorder: r.recorder,
+ }
+}
+
+func NewClusterObservabilityReconciler(params ClusterObservabilityReconcilerParams) *ClusterObservabilityReconciler {
+ reconciler := &ClusterObservabilityReconciler{
+ Client: params.Client,
+ scheme: params.Scheme,
+ log: params.Log,
+ recorder: params.Recorder,
+ config: params.Config,
+ }
+ return reconciler
+}
+
+//+kubebuilder:rbac:groups=opentelemetry.io,resources=clusterobservabilities,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups=opentelemetry.io,resources=clusterobservabilities/status,verbs=get;update;patch
+//+kubebuilder:rbac:groups=opentelemetry.io,resources=clusterobservabilities/finalizers,verbs=update
+//+kubebuilder:rbac:groups=opentelemetry.io,resources=opentelemetrycollectors,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups=opentelemetry.io,resources=instrumentations,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups="",resources=namespaces,verbs=get;list;watch
+//+kubebuilder:rbac:groups="",resources=events,verbs=create;patch
+//+kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch
+//+kubebuilder:rbac:groups=apps,resources=daemonsets,verbs=get;list;watch
+//+kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch
+//+kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,verbs=get;list;watch;create;update;patch;delete
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+func (r *ClusterObservabilityReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+ log := r.log.WithValues("clusterobservability", req.NamespacedName)
+
+ var instance v1alpha1.ClusterObservability
+ if err := r.Client.Get(ctx, req.NamespacedName, &instance); err != nil {
+ if !apierrors.IsNotFound(err) {
+ log.Error(err, "unable to fetch ClusterObservability")
+ }
+ return ctrl.Result{}, client.IgnoreNotFound(err)
+ }
+
+ // Handle deletion
+ if deletionTimestamp := instance.GetDeletionTimestamp(); deletionTimestamp != nil {
+ return r.handleDeletion(ctx, log, &instance)
+ }
+
+ // Validate singleton constraint
+ isActive, conflictErr := r.validateSingleton(ctx, log, &instance)
+ if conflictErr != nil {
+ return ctrl.Result{}, conflictErr
+ }
+
+ if !isActive {
+ // This instance is conflicted, update status and skip reconciliation
+ params := r.getParams(instance)
+ return coStatus.HandleReconcileStatus(ctx, log, params, fmt.Errorf("multiple ClusterObservability resources detected"))
+ }
+
+ // TODO: Add upgrade support
+ // TODO: Support management state like OpenTelemetryCollector
+
+ configChanged, configErr := coStatus.DetectConfigChanges(&instance)
+ if configErr != nil {
+ log.Error(configErr, "failed to detect config changes")
+ }
+
+ if configChanged {
+ log.Info("Configuration changes detected - triggering full reconciliation")
+ r.recorder.Event(&instance, corev1.EventTypeNormal, "ConfigChanged",
+ "Collector configuration has changed, updating managed resources")
+ }
+
+ // Add finalizer to ensure proper resource cleanup
+ if !controllerutil.ContainsFinalizer(&instance, v1alpha1.ClusterObservabilityFinalizer) {
+ if controllerutil.AddFinalizer(&instance, v1alpha1.ClusterObservabilityFinalizer) {
+ if err := r.Update(ctx, &instance); err != nil {
+ return ctrl.Result{}, err
+ }
+ }
+ }
+
+ log.V(2).Info("Reconciling ClusterObservability managed resources")
+
+ params := r.getParams(instance)
+
+ desiredObjects, buildErr := clusterobservability.Build(params)
+ if buildErr != nil {
+ return ctrl.Result{}, buildErr
+ }
+
+ var openTelemetryCRs []client.Object
+ var unstructuredObjects []client.Object
+ var regularObjects []client.Object
+
+ for _, obj := range desiredObjects {
+ switch obj.(type) {
+ case *v1beta1.OpenTelemetryCollector, *v1alpha1.Instrumentation:
+ openTelemetryCRs = append(openTelemetryCRs, obj)
+ case *unstructured.Unstructured:
+ unstructuredObjects = append(unstructuredObjects, obj)
+ default:
+ regularObjects = append(regularObjects, obj)
+ }
+ }
+
+ // Handle OpenTelemetry CRs - their controllers manage the underlying resources
+ for _, crObj := range openTelemetryCRs {
+ if err := r.reconcileOpenTelemetryResource(ctx, log, crObj); err != nil {
+ return ctrl.Result{}, fmt.Errorf("failed to reconcile OpenTelemetry CR %s: %w", crObj.GetObjectKind(), err)
+ }
+ }
+
+ // Handle Unstructured objects (like OpenShift SCC) separately to avoid deep copy issues
+ for _, unstructuredObj := range unstructuredObjects {
+ if err := r.reconcileUnstructuredResource(ctx, log, unstructuredObj); err != nil {
+ return ctrl.Result{}, fmt.Errorf("failed to reconcile unstructured resource %s: %w", unstructuredObj.GetName(), err)
+ }
+ }
+ // Handle regular Kubernetes resources (currently none - OpenTelemetry CRs handle their own resources)
+ if len(regularObjects) > 0 {
+ ownedObjects, err := r.findClusterObservabilityOwnedObjects(ctx, params)
+ if err != nil {
+ return ctrl.Result{}, err
+ }
+ err = reconcileDesiredObjects(ctx, r.Client, log, ¶ms.ClusterObservability, params.Scheme, regularObjects, ownedObjects)
+ if err != nil {
+ return ctrl.Result{}, err
+ }
+ }
+ return coStatus.HandleReconcileStatus(ctx, log, params, nil)
+}
+
+// reconcileOpenTelemetryResource creates/updates OpenTelemetry CRs.
+// Their respective controllers handle the underlying Kubernetes resources.
+// TODO: fix issue with resourceVersion becoming stale due to updates from OpenTelemetryCollector/Instrumentation controllers.
+func (r *ClusterObservabilityReconciler) reconcileOpenTelemetryResource(ctx context.Context, log logr.Logger, desired client.Object) error {
+ key := client.ObjectKeyFromObject(desired)
+
+ var existing client.Object
+ switch desired.(type) {
+ case *v1beta1.OpenTelemetryCollector:
+ existing = &v1beta1.OpenTelemetryCollector{}
+ case *v1alpha1.Instrumentation:
+ existing = &v1alpha1.Instrumentation{}
+ default:
+ return fmt.Errorf("unsupported CRD type: %T", desired)
+ }
+
+ getErr := r.Get(ctx, key, existing)
+
+ if getErr != nil {
+ if apierrors.IsNotFound(getErr) {
+ if createErr := r.Create(ctx, desired); createErr != nil {
+ return fmt.Errorf("failed to create %s %s: %w", desired.GetObjectKind().GroupVersionKind().Kind, key, createErr)
+ }
+ log.Info("Created CR", "kind", desired.GetObjectKind().GroupVersionKind().Kind, "name", key.Name, "namespace", key.Namespace)
+ return nil
+ }
+ return fmt.Errorf("failed to get %s %s: %w", desired.GetObjectKind().GroupVersionKind().Kind, key, getErr)
+ }
+ switch existingCRD := existing.(type) {
+ case *v1beta1.OpenTelemetryCollector:
+ desiredCRD := desired.(*v1beta1.OpenTelemetryCollector)
+ if !apiequality.Semantic.DeepEqual(existingCRD.Spec, desiredCRD.Spec) {
+ err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
+ latest := &v1beta1.OpenTelemetryCollector{}
+ if err := r.Get(ctx, key, latest); err != nil {
+ return err
+ }
+
+ // Only update if still different (another controller might have updated it)
+ if apiequality.Semantic.DeepEqual(latest.Spec, desiredCRD.Spec) {
+ log.Info("OpenTelemetryCollector already matches desired state", "name", key.Name, "namespace", key.Namespace)
+ return nil
+ }
+
+ // Update the latest version with our desired changes
+ latest.Spec = desiredCRD.Spec
+ latest.Labels = desiredCRD.Labels
+ latest.Annotations = desiredCRD.Annotations
+
+ return r.Update(ctx, latest)
+ })
+
+ if err != nil {
+ return fmt.Errorf("failed to update OpenTelemetryCollector %s: %w", key, err)
+ }
+
+ log.Info("Updated OpenTelemetryCollector", "name", key.Name, "namespace", key.Namespace)
+ }
+
+ case *v1alpha1.Instrumentation:
+ desiredCRD := desired.(*v1alpha1.Instrumentation)
+ if !apiequality.Semantic.DeepEqual(existingCRD.Spec, desiredCRD.Spec) {
+ err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
+ latest := &v1alpha1.Instrumentation{}
+ if err := r.Get(ctx, key, latest); err != nil {
+ return err
+ }
+
+ // Only update if still different (another controller might have updated it)
+ if apiequality.Semantic.DeepEqual(latest.Spec, desiredCRD.Spec) {
+ log.Info("Instrumentation already matches desired state", "name", key.Name, "namespace", key.Namespace)
+ return nil
+ }
+
+ // Update the latest version with our desired changes
+ latest.Spec = desiredCRD.Spec
+ latest.Labels = desiredCRD.Labels
+ latest.Annotations = desiredCRD.Annotations
+
+ return r.Update(ctx, latest)
+ })
+
+ if err != nil {
+ return fmt.Errorf("failed to update Instrumentation %s: %w", key, err)
+ }
+
+ log.Info("Updated Instrumentation", "name", key.Name, "namespace", key.Namespace)
+ }
+
+ default:
+ return fmt.Errorf("unsupported CRD type: %T", existing)
+ }
+
+ return nil
+}
+
+// reconcileUnstructuredResource handles Unstructured objects (like OpenShift SCCs)
+// without deep copy issues that occur with complex nested data.
+func (r *ClusterObservabilityReconciler) reconcileUnstructuredResource(ctx context.Context, log logr.Logger, obj client.Object) error {
+ unstructuredObj := obj.(*unstructured.Unstructured)
+
+ // Create a new Unstructured object for fetching existing resource
+ // This avoids deep copy issues with the desired object
+ existing := &unstructured.Unstructured{}
+ existing.SetGroupVersionKind(unstructuredObj.GroupVersionKind())
+
+ key := client.ObjectKeyFromObject(unstructuredObj)
+ getErr := r.Client.Get(ctx, key, existing)
+ if getErr != nil && !apierrors.IsNotFound(getErr) {
+ return fmt.Errorf("failed to get existing unstructured resource %s: %w", unstructuredObj.GetName(), getErr)
+ }
+
+ if apierrors.IsNotFound(getErr) {
+ // Create new resource
+ if createErr := r.Client.Create(ctx, unstructuredObj); createErr != nil {
+ return fmt.Errorf("failed to create unstructured resource %s: %w", unstructuredObj.GetName(), createErr)
+ }
+ log.Info("Created unstructured resource",
+ "kind", unstructuredObj.GetKind(),
+ "name", unstructuredObj.GetName())
+ } else {
+ // Check if update is needed by comparing specs
+ if !apiequality.Semantic.DeepEqual(existing.Object, unstructuredObj.Object) {
+ unstructuredObj.SetResourceVersion(existing.GetResourceVersion())
+ if updateErr := r.Client.Update(ctx, unstructuredObj); updateErr != nil {
+ return fmt.Errorf("failed to update unstructured resource %s: %w", unstructuredObj.GetName(), updateErr)
+ }
+ log.Info("Updated unstructured resource",
+ "kind", unstructuredObj.GetKind(),
+ "name", unstructuredObj.GetName())
+ }
+ }
+
+ return nil
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *ClusterObservabilityReconciler) SetupWithManager(mgr ctrl.Manager) error {
+ err := r.SetupCaches(mgr)
+ if err != nil {
+ return err
+ }
+
+ ownedResources := r.GetOwnedResourceTypes()
+ builder := ctrl.NewControllerManagedBy(mgr).
+ For(&v1alpha1.ClusterObservability{}).
+ Watches(
+ &corev1.Namespace{},
+ handler.EnqueueRequestsFromMapFunc(r.findClusterObservabilityForNamespace),
+ )
+
+ for _, resource := range ownedResources {
+ builder.Owns(resource)
+ }
+
+ return builder.Complete(r)
+}
+
+// SetupCaches sets up field indexing for efficient owned object queries.
+func (r *ClusterObservabilityReconciler) SetupCaches(mgr ctrl.Manager) error {
+ const clusterObservabilityResourceOwnerKey = ".metadata.owner"
+
+ ownedResources := r.GetOwnedResourceTypes()
+ for _, resource := range ownedResources {
+ if err := mgr.GetCache().IndexField(context.Background(), resource, clusterObservabilityResourceOwnerKey, func(rawObj client.Object) []string {
+ owner := metav1.GetControllerOf(rawObj)
+ if owner == nil {
+ return nil
+ }
+ // Make sure it's a ClusterObservability
+ if owner.APIVersion != v1alpha1.GroupVersion.String() || owner.Kind != "ClusterObservability" {
+ return nil
+ }
+ return []string{owner.Name}
+ }); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// findClusterObservabilityForNamespace finds ClusterObservability instances when namespaces change.
+func (r *ClusterObservabilityReconciler) findClusterObservabilityForNamespace(_ context.Context, obj client.Object) []ctrl.Request {
+ ctx := context.Background()
+
+ var clusterObservabilityList v1alpha1.ClusterObservabilityList
+ if err := r.List(ctx, &clusterObservabilityList); err != nil {
+ r.log.Error(err, "failed to list ClusterObservability resources")
+ return nil
+ }
+
+ var requests []ctrl.Request
+ for _, co := range clusterObservabilityList.Items {
+ requests = append(requests, ctrl.Request{
+ NamespacedName: client.ObjectKeyFromObject(&co),
+ })
+ }
+ return requests
+}
+
+// validateSingleton ensures only one ClusterObservability resource is active in the cluster.
+// Returns true if this instance is the active one, false if conflicted.
+func (r *ClusterObservabilityReconciler) validateSingleton(ctx context.Context, log logr.Logger, instance *v1alpha1.ClusterObservability) (bool, error) {
+ var clusterObservabilityList v1alpha1.ClusterObservabilityList
+ if err := r.List(ctx, &clusterObservabilityList); err != nil {
+ log.Error(err, "failed to list ClusterObservability resources for singleton validation")
+ return false, err
+ }
+
+ // Filter out deleted resources and find the oldest active resource
+ var activeResources []v1alpha1.ClusterObservability
+ for _, co := range clusterObservabilityList.Items {
+ if co.DeletionTimestamp == nil {
+ activeResources = append(activeResources, co)
+ }
+ }
+
+ if len(activeResources) <= 1 {
+ // No conflict, this is the only active resource
+ return true, nil
+ }
+
+ // Multiple resources exist, determine which one should be active
+ // Use oldest by creation timestamp as the winner
+ // If timestamps are equal, use lexicographical name comparison as tie-breaker
+ oldestResource := &activeResources[0]
+ for i := 1; i < len(activeResources); i++ {
+ candidate := &activeResources[i]
+
+ if candidate.CreationTimestamp.Before(&oldestResource.CreationTimestamp) {
+ oldestResource = candidate
+ } else if candidate.CreationTimestamp.Equal(&oldestResource.CreationTimestamp) {
+ candidateKey := candidate.Namespace + "/" + candidate.Name
+ oldestKey := oldestResource.Namespace + "/" + oldestResource.Name
+ if candidateKey < oldestKey {
+ oldestResource = candidate
+ }
+ }
+ }
+
+ isWinner := oldestResource.UID == instance.UID
+
+ if !isWinner {
+ // This resource is conflicted, emit an event and update status
+ r.recorder.Event(instance, corev1.EventTypeWarning, "Conflicted",
+ fmt.Sprintf("Multiple ClusterObservability resources detected. Only %s/%s (oldest) is active",
+ oldestResource.Namespace, oldestResource.Name))
+ log.Info("ClusterObservability resource is conflicted",
+ "active", fmt.Sprintf("%s/%s", oldestResource.Namespace, oldestResource.Name),
+ "conflicted", fmt.Sprintf("%s/%s", instance.Namespace, instance.Name))
+ } else {
+ // This resource is the winner, emit events for conflicted ones
+ for _, conflicted := range activeResources {
+ if conflicted.UID != instance.UID {
+ r.recorder.Event(&conflicted, corev1.EventTypeWarning, "Conflicted",
+ fmt.Sprintf("Multiple ClusterObservability resources detected. Only %s/%s (oldest) is active",
+ instance.Namespace, instance.Name))
+ }
+ }
+ log.Info("ClusterObservability resource is active", "conflicted-count", len(activeResources)-1)
+ }
+
+ return isWinner, nil
+}
+
+// handleDeletion handles the cleanup of ClusterObservability resources and managed objects.
+func (r *ClusterObservabilityReconciler) handleDeletion(ctx context.Context, log logr.Logger, instance *v1alpha1.ClusterObservability) (ctrl.Result, error) {
+ log.Info("Handling ClusterObservability deletion")
+
+ if !controllerutil.ContainsFinalizer(instance, v1alpha1.ClusterObservabilityFinalizer) {
+ // Finalizer already removed, nothing to do
+ return ctrl.Result{}, nil
+ }
+
+ // Clean up all managed resources
+ if err := r.cleanupManagedResources(ctx, log, instance); err != nil {
+ log.Error(err, "failed to cleanup managed resources")
+ r.recorder.Event(instance, corev1.EventTypeWarning, "CleanupFailed",
+ fmt.Sprintf("Failed to cleanup managed resources: %v", err))
+ return ctrl.Result{RequeueAfter: time.Second * 30}, err
+ }
+
+ // Remove finalizer to allow deletion
+ latest := &v1alpha1.ClusterObservability{}
+ if err := r.Get(ctx, client.ObjectKeyFromObject(instance), latest); err != nil {
+ log.Error(err, "failed to get latest ClusterObservability for finalizer removal")
+ return ctrl.Result{}, err
+ }
+
+ controllerutil.RemoveFinalizer(latest, v1alpha1.ClusterObservabilityFinalizer)
+ if err := r.Update(ctx, latest); err != nil {
+ log.Error(err, "failed to remove finalizer")
+ return ctrl.Result{}, err
+ }
+
+ log.Info("Successfully cleaned up ClusterObservability resources")
+ r.recorder.Event(instance, corev1.EventTypeNormal, "Deleted", "ClusterObservability and all managed resources cleaned up")
+
+ return ctrl.Result{}, nil
+}
+
+// cleanupManagedResources deletes cluster-scoped resources managed by ClusterObservability.
+// Namespace-scoped resources (OpenTelemetryCollector and Instrumentation CRs) are automatically
+// cleaned up by Kubernetes garbage collection via owner references.
+func (r *ClusterObservabilityReconciler) cleanupManagedResources(ctx context.Context, log logr.Logger, instance *v1alpha1.ClusterObservability) error {
+ // Only clean up cluster-scoped resources that cannot use owner references
+ if err := r.cleanupClusterScopedResources(ctx, log, instance); err != nil {
+ return fmt.Errorf("failed to cleanup cluster-scoped resources: %w", err)
+ }
+
+ log.Info("Cluster-scoped resources cleaned up successfully")
+ return nil
+}
+
+// cleanupClusterScopedResources removes cluster-scoped resources that can't use owner references.
+func (r *ClusterObservabilityReconciler) cleanupClusterScopedResources(ctx context.Context, log logr.Logger, instance *v1alpha1.ClusterObservability) error {
+
+ if r.config.OpenShiftRoutesAvailability == openshift.RoutesAvailable {
+ agentCollectorName := fmt.Sprintf("%s-%s", instance.Name, clusterobservability.AgentCollectorSuffix)
+ sccName := fmt.Sprintf("%s-hostaccess", agentCollectorName)
+
+ scc := &unstructured.Unstructured{}
+ scc.SetGroupVersionKind(schema.GroupVersionKind{
+ Group: "security.openshift.io",
+ Version: "v1",
+ Kind: "SecurityContextConstraints",
+ })
+ scc.SetName(sccName)
+
+ if err := r.Delete(ctx, scc); err != nil && !apierrors.IsNotFound(err) {
+ return fmt.Errorf("failed to delete SecurityContextConstraints %s: %w", sccName, err)
+ }
+ log.Info("Deleted SecurityContextConstraints", "name", sccName)
+ }
+
+ return nil
+}
+
+// GetOwnedResourceTypes returns CRs directly created by ClusterObservability.
+// Note: We only track OpenTelemetry CRs we create, not the underlying K8s resources
+// (those are managed by OpenTelemetryCollector controller).
+func (r *ClusterObservabilityReconciler) GetOwnedResourceTypes() []client.Object {
+ return []client.Object{
+ &v1beta1.OpenTelemetryCollector{},
+ &v1alpha1.Instrumentation{},
+ }
+}
+
+// findClusterObservabilityOwnedObjects finds OpenTelemetry CRs owned by ClusterObservability for cleanup.
+func (r *ClusterObservabilityReconciler) findClusterObservabilityOwnedObjects(ctx context.Context, params manifests.Params) (map[types.UID]client.Object, error) {
+ const clusterObservabilityResourceOwnerKey = ".metadata.owner"
+ ownedObjects := map[types.UID]client.Object{}
+
+ listOpts := []client.ListOption{
+ client.InNamespace(params.ClusterObservability.Namespace),
+ client.MatchingFields{clusterObservabilityResourceOwnerKey: params.ClusterObservability.Name},
+ }
+
+ ownedObjectTypes := r.GetOwnedResourceTypes()
+ for _, objectType := range ownedObjectTypes {
+ objs, err := getList(ctx, r.Client, objectType, listOpts...)
+ if err != nil {
+ return nil, err
+ }
+ for uid, object := range objs {
+ ownedObjects[uid] = object
+ }
+ }
+
+ return ownedObjects, nil
+}
diff --git a/internal/controllers/common.go b/internal/controllers/common.go
index 7e70127fca..166096833a 100644
--- a/internal/controllers/common.go
+++ b/internal/controllers/common.go
@@ -31,6 +31,10 @@ func isNamespaceScoped(obj client.Object) bool {
case *rbacv1.ClusterRole, *rbacv1.ClusterRoleBinding:
return false
default:
+ // Check for OpenShift SecurityContextConstraints (unstructured)
+ if obj.GetObjectKind().GroupVersionKind().Kind == "SecurityContextConstraints" {
+ return false
+ }
return true
}
}
diff --git a/internal/manifests/clusterobservability/clusterobservability.go b/internal/manifests/clusterobservability/clusterobservability.go
new file mode 100644
index 0000000000..878b7d2cea
--- /dev/null
+++ b/internal/manifests/clusterobservability/clusterobservability.go
@@ -0,0 +1,456 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package clusterobservability
+
+import (
+ "fmt"
+
+ corev1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+
+ "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1"
+ "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1"
+ "github.com/open-telemetry/opentelemetry-operator/internal/autodetect/openshift"
+ "github.com/open-telemetry/opentelemetry-operator/internal/manifests"
+ "github.com/open-telemetry/opentelemetry-operator/internal/manifests/clusterobservability/config"
+ "github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils"
+)
+
+const (
+ ComponentClusterObservability = "cluster-observability"
+
+ // Collector name suffixes.
+ AgentCollectorSuffix = "agent"
+ ClusterCollectorSuffix = "cluster"
+)
+
+// getCollectorImage returns a sensible default collector image when build-time version is not set.
+func getCollectorImage(configuredImage string) string {
+ // If the configured image has a 0.0.0 tag (fallback during development builds)
+ // replace it with latest
+ if configuredImage == "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector:0.0.0" {
+ return "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:latest"
+ }
+ return configuredImage
+}
+
+// Build creates the manifest for the ClusterObservability resource.
+func Build(params manifests.Params) ([]client.Object, error) {
+ var resourceManifests []client.Object
+
+ // Build agent-level collector (DaemonSet)
+ agentCollector, err := buildAgentCollector(params)
+ if err != nil {
+ return nil, fmt.Errorf("failed to build agent collector: %w", err)
+ }
+ if agentCollector != nil {
+ resourceManifests = append(resourceManifests, agentCollector)
+ }
+
+ // Build cluster-level collector (Deployment)
+ clusterCollector, err := buildClusterCollector(params)
+ if err != nil {
+ return nil, fmt.Errorf("failed to build cluster collector: %w", err)
+ }
+ if clusterCollector != nil {
+ resourceManifests = append(resourceManifests, clusterCollector)
+ }
+
+ // Build Instrumentation CRs for all namespaces
+ instrumentations, err := buildInstrumentations(params)
+ if err != nil {
+ return nil, fmt.Errorf("failed to build instrumentation CRs: %w", err)
+ }
+ resourceManifests = append(resourceManifests, instrumentations...)
+
+ // Build OpenShift Security Context Constraints if on OpenShift
+ if isOpenShiftEnvironment(params) {
+ sccResources := buildOpenShiftSCC(params)
+ resourceManifests = append(resourceManifests, sccResources...)
+ }
+
+ return resourceManifests, nil
+}
+
+// buildAgentCollector creates an OpenTelemetryCollector CR for agent-level collection.
+func buildAgentCollector(params manifests.Params) (*v1beta1.OpenTelemetryCollector, error) {
+ co := params.ClusterObservability
+
+ // Load configuration using the config loader
+ configLoader := config.NewConfigLoader()
+
+ // Detect Kubernetes distribution
+ distroProvider := configLoader.DetectDistroProvider(params.Config)
+
+ // Load the configuration
+ collectorConfig, err := configLoader.LoadCollectorConfig(
+ config.AgentCollectorType,
+ distroProvider,
+ co.Spec,
+ )
+ if err != nil {
+ return nil, fmt.Errorf("failed to load agent collector config: %w", err)
+ }
+
+ // Validate the configuration
+ if err := configLoader.ValidateConfig(collectorConfig); err != nil {
+ return nil, fmt.Errorf("agent collector config validation failed: %w", err)
+ }
+
+ agentCollectorName := fmt.Sprintf("%s-%s", co.Name, AgentCollectorSuffix)
+ labels := manifestutils.Labels(co.ObjectMeta, agentCollectorName, params.Config.CollectorImage, ComponentClusterObservability, params.Config.LabelsFilter)
+ labels["app.kubernetes.io/managed-by"] = "opentelemetry-operator"
+ labels["app.kubernetes.io/component"] = ComponentClusterObservability
+
+ agentCollector := &v1beta1.OpenTelemetryCollector{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: agentCollectorName,
+ Namespace: co.Namespace,
+ Labels: labels,
+ OwnerReferences: []metav1.OwnerReference{
+ {
+ APIVersion: co.APIVersion,
+ Kind: co.Kind,
+ Name: co.Name,
+ UID: co.UID,
+ Controller: &[]bool{true}[0],
+ BlockOwnerDeletion: &[]bool{true}[0],
+ },
+ },
+ },
+ Spec: v1beta1.OpenTelemetryCollectorSpec{
+ Mode: v1beta1.ModeDaemonSet,
+ Config: collectorConfig,
+ OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{
+ Image: getCollectorImage(params.Config.CollectorImage),
+ SecurityContext: &corev1.SecurityContext{
+ AllowPrivilegeEscalation: &[]bool{false}[0],
+ Capabilities: &corev1.Capabilities{
+ Drop: []corev1.Capability{"ALL"},
+ },
+ RunAsNonRoot: &[]bool{true}[0],
+ SeccompProfile: &corev1.SeccompProfile{
+ Type: corev1.SeccompProfileTypeRuntimeDefault,
+ },
+ },
+ PodSecurityContext: &corev1.PodSecurityContext{
+ RunAsNonRoot: &[]bool{true}[0],
+ SeccompProfile: &corev1.SeccompProfile{
+ Type: corev1.SeccompProfileTypeRuntimeDefault,
+ },
+ },
+ // Enable host networking for DaemonSet to allow direct port access
+ HostNetwork: true,
+ VolumeMounts: []corev1.VolumeMount{
+ {
+ Name: "host-dev",
+ MountPath: "/hostfs/dev",
+ ReadOnly: true,
+ },
+ {
+ Name: "host-etc",
+ MountPath: "/hostfs/etc",
+ ReadOnly: true,
+ },
+ {
+ Name: "host-proc",
+ MountPath: "/hostfs/proc",
+ ReadOnly: true,
+ },
+ {
+ Name: "host-run-udev-data",
+ MountPath: "/hostfs/run/udev/data",
+ ReadOnly: true,
+ },
+ {
+ Name: "host-sys",
+ MountPath: "/hostfs/sys",
+ ReadOnly: true,
+ },
+ {
+ Name: "host-var-run-utmp",
+ MountPath: "/hostfs/var/run/utmp",
+ ReadOnly: true,
+ },
+ {
+ Name: "host-usr-lib-osrelease",
+ MountPath: "/hostfs/usr/lib/os-release",
+ ReadOnly: true,
+ },
+ {
+ Name: "var-log-pods",
+ MountPath: "/var/log/pods",
+ ReadOnly: true,
+ },
+ {
+ Name: "var-lib-docker-containers",
+ MountPath: "/var/lib/docker/containers",
+ ReadOnly: true,
+ },
+ // OpenShift kubelet CA certificate mount (direct file)
+ {
+ Name: "kubelet-serving-ca",
+ MountPath: "/etc/kubelet-serving-ca/ca-bundle.crt",
+ ReadOnly: true,
+ },
+ },
+ Volumes: []corev1.Volume{
+ {
+ Name: "host-dev",
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: "/dev",
+ },
+ },
+ },
+ {
+ Name: "host-etc",
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: "/etc",
+ },
+ },
+ },
+ {
+ Name: "host-proc",
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: "/proc",
+ },
+ },
+ },
+ {
+ Name: "host-run-udev-data",
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: "/run/udev/data",
+ },
+ },
+ },
+ {
+ Name: "host-sys",
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: "/sys",
+ },
+ },
+ },
+ {
+ Name: "host-var-run-utmp",
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: "/var/run/utmp",
+ },
+ },
+ },
+ {
+ Name: "host-usr-lib-osrelease",
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: "/usr/lib/os-release",
+ },
+ },
+ },
+ {
+ Name: "var-log-pods",
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: "/var/log/pods",
+ },
+ },
+ },
+ {
+ Name: "var-lib-docker-containers",
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: "/var/lib/docker/containers",
+ },
+ },
+ },
+ // OpenShift kubelet CA certificate volume via hostPath
+ {
+ Name: "kubelet-serving-ca",
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: "/etc/kubernetes/kubelet-ca.crt",
+ Type: &[]corev1.HostPathType{corev1.HostPathFile}[0],
+ },
+ },
+ },
+ },
+ },
+ },
+ }
+
+ return agentCollector, nil
+}
+
+// buildClusterCollector creates an OpenTelemetryCollector CR for cluster-level collection.
+func buildClusterCollector(params manifests.Params) (*v1beta1.OpenTelemetryCollector, error) {
+ co := params.ClusterObservability
+
+ // Load configuration using the config loader
+ configLoader := config.NewConfigLoader()
+
+ // Detect Kubernetes distribution
+ distroProvider := configLoader.DetectDistroProvider(params.Config)
+
+ // Load the configuration
+ collectorConfig, err := configLoader.LoadCollectorConfig(
+ config.ClusterCollectorType,
+ distroProvider,
+ co.Spec,
+ )
+ if err != nil {
+ return nil, fmt.Errorf("failed to load cluster collector config: %w", err)
+ }
+
+ // Validate the configuration
+ if err := configLoader.ValidateConfig(collectorConfig); err != nil {
+ return nil, fmt.Errorf("cluster collector config validation failed: %w", err)
+ }
+
+ replicas := int32(1)
+ clusterCollectorName := fmt.Sprintf("%s-%s", co.Name, ClusterCollectorSuffix)
+ clusterLabels := manifestutils.Labels(co.ObjectMeta, clusterCollectorName, params.Config.CollectorImage, ComponentClusterObservability, params.Config.LabelsFilter)
+ clusterLabels["app.kubernetes.io/managed-by"] = "opentelemetry-operator"
+ clusterLabels["app.kubernetes.io/component"] = ComponentClusterObservability
+
+ clusterCollector := &v1beta1.OpenTelemetryCollector{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: clusterCollectorName,
+ Namespace: co.Namespace,
+ Labels: clusterLabels,
+ OwnerReferences: []metav1.OwnerReference{
+ {
+ APIVersion: co.APIVersion,
+ Kind: co.Kind,
+ Name: co.Name,
+ UID: co.UID,
+ Controller: &[]bool{true}[0],
+ BlockOwnerDeletion: &[]bool{true}[0],
+ },
+ },
+ },
+ Spec: v1beta1.OpenTelemetryCollectorSpec{
+ Mode: v1beta1.ModeDeployment,
+ Config: collectorConfig,
+ OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{
+ Image: getCollectorImage(params.Config.CollectorImage),
+ Replicas: &replicas,
+ SecurityContext: &corev1.SecurityContext{
+ AllowPrivilegeEscalation: &[]bool{false}[0],
+ Capabilities: &corev1.Capabilities{
+ Drop: []corev1.Capability{"ALL"},
+ },
+ RunAsNonRoot: &[]bool{true}[0],
+ SeccompProfile: &corev1.SeccompProfile{
+ Type: corev1.SeccompProfileTypeRuntimeDefault,
+ },
+ },
+ PodSecurityContext: &corev1.PodSecurityContext{
+ RunAsNonRoot: &[]bool{true}[0],
+ SeccompProfile: &corev1.SeccompProfile{
+ Type: corev1.SeccompProfileTypeRuntimeDefault,
+ },
+ },
+ },
+ },
+ }
+
+ return clusterCollector, nil
+}
+
+// buildInstrumentations creates a single Instrumentation CR in the operator namespace
+// Users can reference it via instrumentation.opentelemetry.io/ns annotation.
+func buildInstrumentations(params manifests.Params) ([]client.Object, error) {
+ co := params.ClusterObservability
+
+ // Build OTLP exporter endpoint for instrumentation
+ endpoint, err := buildInstrumentationEndpoint(co.Spec)
+ if err != nil {
+ return nil, fmt.Errorf("failed to build instrumentation endpoint: %w", err)
+ }
+
+ // Create a single Instrumentation in the same namespace as the ClusterObservability resource
+ instrumentationLabels := manifestutils.Labels(co.ObjectMeta, co.Name, "", ComponentClusterObservability, params.Config.LabelsFilter)
+ instrumentationLabels["app.kubernetes.io/managed-by"] = "opentelemetry-operator"
+ instrumentationLabels["app.kubernetes.io/component"] = ComponentClusterObservability
+
+ instrumentation := &v1alpha1.Instrumentation{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: co.Name,
+ Namespace: co.Namespace,
+ Labels: instrumentationLabels,
+ OwnerReferences: []metav1.OwnerReference{
+ {
+ APIVersion: co.APIVersion,
+ Kind: co.Kind,
+ Name: co.Name,
+ UID: co.UID,
+ Controller: &[]bool{true}[0],
+ BlockOwnerDeletion: &[]bool{true}[0],
+ },
+ },
+ },
+ Spec: v1alpha1.InstrumentationSpec{
+ Exporter: v1alpha1.Exporter{
+ Endpoint: endpoint,
+ },
+ Propagators: []v1alpha1.Propagator{
+ v1alpha1.TraceContext,
+ v1alpha1.Baggage,
+ v1alpha1.B3,
+ v1alpha1.Jaeger,
+ },
+ Sampler: v1alpha1.Sampler{
+ Type: v1alpha1.ParentBasedTraceIDRatio,
+ Argument: "1.0",
+ },
+ },
+ }
+
+ // Enable instrumentation based on operator configuration
+ if params.Config.EnableJavaAutoInstrumentation {
+ instrumentation.Spec.Java = v1alpha1.Java{
+ Image: params.Config.AutoInstrumentationJavaImage,
+ }
+ }
+ if params.Config.EnableNodeJSAutoInstrumentation {
+ instrumentation.Spec.NodeJS = v1alpha1.NodeJS{
+ Image: params.Config.AutoInstrumentationNodeJSImage,
+ }
+ }
+ if params.Config.EnablePythonAutoInstrumentation {
+ instrumentation.Spec.Python = v1alpha1.Python{
+ Image: params.Config.AutoInstrumentationPythonImage,
+ }
+ }
+ if params.Config.EnableDotNetAutoInstrumentation {
+ instrumentation.Spec.DotNet = v1alpha1.DotNet{
+ Image: params.Config.AutoInstrumentationDotNetImage,
+ }
+ }
+ if params.Config.EnableGoAutoInstrumentation {
+ instrumentation.Spec.Go = v1alpha1.Go{
+ Image: params.Config.AutoInstrumentationGoImage,
+ }
+ }
+
+ return []client.Object{instrumentation}, nil
+}
+
+// buildInstrumentationEndpoint builds the OTLP endpoint for instrumentation.
+func buildInstrumentationEndpoint(spec v1alpha1.ClusterObservabilitySpec) (string, error) {
+ // Point to local node's agent collector
+ endpoint := "http://$(OTEL_NODE_IP):4317"
+
+ return endpoint, nil
+}
+
+// isOpenShiftEnvironment detects if we're running in an OpenShift environment using cached config.
+func isOpenShiftEnvironment(params manifests.Params) bool {
+ return params.Config.OpenShiftRoutesAvailability == openshift.RoutesAvailable
+}
diff --git a/internal/manifests/clusterobservability/config/configs/agent-collector-base.yaml b/internal/manifests/clusterobservability/config/configs/agent-collector-base.yaml
new file mode 100644
index 0000000000..48a82f33bc
--- /dev/null
+++ b/internal/manifests/clusterobservability/config/configs/agent-collector-base.yaml
@@ -0,0 +1,73 @@
+# Base configuration for agent collectors (DaemonSet)
+# Collects kubelet stats, container logs, and OTLP data from auto-instrumentation
+receivers:
+ otlp:
+ protocols:
+ grpc:
+ endpoint: "0.0.0.0:4317"
+ http:
+ endpoint: "0.0.0.0:4318"
+ kubeletstats:
+ collection_interval: 30s
+ auth_type: serviceAccount
+ endpoint: "https://${env:K8S_NODE_NAME}:10250"
+ metric_groups:
+ - container
+ - pod
+ - node
+ - volume
+ filelog:
+ include:
+ - "/var/log/pods/*/*/*.log"
+ exclude:
+ - "/var/log/pods/*/otc-container/*.log"
+ start_at: end
+ include_file_path: true
+ include_file_name: false
+ operators:
+ - type: router
+ id: get-format
+ routes:
+ - output: parser-docker
+ expr: 'body matches "^\\{"'
+ - output: parser-crio
+ expr: 'body matches "^[^ Z]+ "'
+ - output: parser-containerd
+ expr: 'body matches "^[^ Z]+Z"'
+
+processors:
+ batch: {}
+ resourcedetection:
+ detectors: ["env", "system", "k8snode"]
+ timeout: 2s
+ k8sattributes:
+ auth_type: serviceAccount
+ passthrough: false
+ filter:
+ node_from_env_var: K8S_NODE_NAME
+ extract:
+ metadata:
+ - "k8s.namespace.name"
+ - "k8s.deployment.name"
+ - "k8s.statefulset.name"
+ - "k8s.daemonset.name"
+ - "k8s.cronjob.name"
+ - "k8s.job.name"
+ - "k8s.node.name"
+ - "k8s.pod.name"
+ - "k8s.pod.uid"
+ - "k8s.pod.start_time"
+ pod_association:
+ - sources:
+ - from: resource_attribute
+ name: "k8s.pod.ip"
+ - sources:
+ - from: resource_attribute
+ name: "k8s.pod.uid"
+ - sources:
+ - from: connection
+
+exporters: {} # Will be populated by controller
+
+service:
+ pipelines: {} # Will be populated by controller based on enabled signals
\ No newline at end of file
diff --git a/internal/manifests/clusterobservability/config/configs/cluster-collector-base.yaml b/internal/manifests/clusterobservability/config/configs/cluster-collector-base.yaml
new file mode 100644
index 0000000000..e78fe4712e
--- /dev/null
+++ b/internal/manifests/clusterobservability/config/configs/cluster-collector-base.yaml
@@ -0,0 +1,22 @@
+# Base configuration for cluster collectors (Deployment)
+# Collects cluster-level metrics and events
+receivers:
+ k8s_cluster:
+ auth_type: serviceAccount
+ collection_interval: 30s
+ node_conditions_to_report: ["Ready", "MemoryPressure", "DiskPressure", "PIDPressure"]
+ allocatable_types_to_report: ["cpu", "memory", "storage", "pods"]
+
+ k8s_events:
+ auth_type: serviceAccount
+
+processors:
+ batch: {}
+ resourcedetection:
+ detectors: ["env", "system"]
+ timeout: 2s
+
+exporters: {} # Will be populated by controller
+
+service:
+ pipelines: {} # Will be populated by controller based on enabled signals
\ No newline at end of file
diff --git a/internal/manifests/clusterobservability/config/configs/distros/openshift/agent-collector-overrides.yaml b/internal/manifests/clusterobservability/config/configs/distros/openshift/agent-collector-overrides.yaml
new file mode 100644
index 0000000000..5c2fb04828
--- /dev/null
+++ b/internal/manifests/clusterobservability/config/configs/distros/openshift/agent-collector-overrides.yaml
@@ -0,0 +1,80 @@
+# OpenShift specific overrides for agent collector
+# These settings will be merged with agent-collector-base.yaml
+
+receivers:
+ kubeletstats:
+ collection_interval: 10s
+ auth_type: serviceAccount
+ endpoint: "https://${env:K8S_NODE_NAME}:10250"
+ ca_file: /etc/kubelet-serving-ca/ca-bundle.crt
+ metric_groups:
+ - container
+ - pod
+ - node
+ - volume
+ extra_metadata_labels:
+ - container.id
+
+ # OpenShift uses CRI-O container runtime
+ filelog:
+ include:
+ - "/var/log/pods/*/*/*.log"
+ - "/var/log/containers/*.log"
+ exclude:
+ - "/var/log/pods/openshift-*/*/*.log" # Skip OpenShift system logs
+ - "/var/log/pods/kube-*/*/*.log" # Skip kube-system logs
+ - "/var/log/pods/*/otc-container/*.log"
+ start_at: end
+ include_file_path: true
+ include_file_name: false
+ operators:
+ - type: router
+ id: get-format
+ routes:
+ - output: parser-crio
+ expr: 'body matches "^[^ Z]+ "'
+ - output: parser-docker
+ expr: 'body matches "^\\{"'
+ - type: json_parser
+ id: parser-docker
+ if: 'attributes["log_type"] == "docker"'
+ - type: regex_parser
+ id: parser-crio
+ if: 'attributes["log_type"] == "crio"'
+ regex: '^(?P