26
26
_meter = None
27
27
_token_counter = None
28
28
_request_histogram = None
29
+ _tool_counter = None
30
+ _tool_duration_histogram = None
31
+ _active_conversations_gauge = None
32
+ _llm_request_counter = None
29
33
30
34
TELEMETRY_AVAILABLE = False
31
35
TELEMETRY_IMPORT_ERROR = None
32
36
33
37
try :
34
38
from opentelemetry import metrics , trace # fmt: skip
35
- from opentelemetry .exporter .otlp .proto .grpc .trace_exporter import OTLPSpanExporter # fmt: skip
39
+ from opentelemetry .exporter .otlp .proto .grpc .trace_exporter import (
40
+ OTLPSpanExporter , # fmt: skip
41
+ )
36
42
from opentelemetry .exporter .prometheus import PrometheusMetricReader # fmt: skip
43
+ from opentelemetry .instrumentation .anthropic import (
44
+ AnthropicInstrumentor , # fmt: skip
45
+ )
37
46
from opentelemetry .instrumentation .flask import FlaskInstrumentor # fmt: skip
47
+ from opentelemetry .instrumentation .openai_v2 import OpenAIInstrumentor # fmt: skip
38
48
from opentelemetry .instrumentation .requests import RequestsInstrumentor # fmt: skip
39
49
from opentelemetry .sdk .metrics import MeterProvider # fmt: skip
40
50
from opentelemetry .sdk .resources import Resource # fmt: skip
41
51
from opentelemetry .sdk .trace import TracerProvider # fmt: skip
42
52
from opentelemetry .sdk .trace .export import BatchSpanProcessor # fmt: skip
53
+ from prometheus_client import start_http_server # fmt: skip
43
54
44
55
TELEMETRY_AVAILABLE = True
45
56
except ImportError as e :
@@ -56,9 +67,17 @@ def init_telemetry(
56
67
service_name : str = "gptme" ,
57
68
enable_flask_instrumentation : bool = True ,
58
69
enable_requests_instrumentation : bool = True ,
70
+ enable_openai_instrumentation : bool = True ,
71
+ enable_anthropic_instrumentation : bool = True ,
72
+ prometheus_port : int = 8000 ,
59
73
) -> None :
60
74
"""Initialize OpenTelemetry tracing and metrics."""
61
75
global _telemetry_enabled , _tracer , _meter , _token_counter , _request_histogram
76
+ global \
77
+ _tool_counter , \
78
+ _tool_duration_histogram , \
79
+ _active_conversations_gauge , \
80
+ _llm_request_counter
62
81
63
82
# Check if telemetry is enabled via environment variable
64
83
if os .getenv ("GPTME_TELEMETRY_ENABLED" , "" ).lower () not in ("true" , "1" , "yes" ):
@@ -89,7 +108,15 @@ def init_telemetry(
89
108
if hasattr (tracer_provider , "add_span_processor" ):
90
109
tracer_provider .add_span_processor (span_processor ) # type: ignore
91
110
92
- # Initialize metrics
111
+ # Initialize metrics with Prometheus reader
112
+ prometheus_port = int (os .getenv ("PROMETHEUS_PORT" , prometheus_port ))
113
+ prometheus_addr = os .getenv ("PROMETHEUS_ADDR" , "localhost" )
114
+
115
+ # Start Prometheus HTTP server to expose metrics
116
+ start_http_server (port = prometheus_port , addr = prometheus_addr )
117
+
118
+ # Initialize PrometheusMetricReader which pulls metrics from the SDK
119
+ # on-demand to respond to scrape requests
93
120
prometheus_reader = PrometheusMetricReader ()
94
121
metrics .set_meter_provider (MeterProvider (metric_readers = [prometheus_reader ]))
95
122
_meter = metrics .get_meter (service_name )
@@ -107,13 +134,43 @@ def init_telemetry(
107
134
unit = "seconds" ,
108
135
)
109
136
137
+ _tool_counter = _meter .create_counter (
138
+ name = "gptme_tool_calls" ,
139
+ description = "Number of tool calls made" ,
140
+ unit = "calls" ,
141
+ )
142
+
143
+ _tool_duration_histogram = _meter .create_histogram (
144
+ name = "gptme_tool_duration_seconds" ,
145
+ description = "Tool execution duration in seconds" ,
146
+ unit = "seconds" ,
147
+ )
148
+
149
+ _active_conversations_gauge = _meter .create_up_down_counter (
150
+ name = "gptme_active_conversations" ,
151
+ description = "Number of active conversations" ,
152
+ unit = "conversations" ,
153
+ )
154
+
155
+ _llm_request_counter = _meter .create_counter (
156
+ name = "gptme_llm_requests" ,
157
+ description = "Number of LLM API requests made" ,
158
+ unit = "requests" ,
159
+ )
160
+
110
161
# Auto-instrument Flask and requests if enabled
111
162
if enable_flask_instrumentation :
112
163
FlaskInstrumentor ().instrument ()
113
164
114
165
if enable_requests_instrumentation :
115
166
RequestsInstrumentor ().instrument ()
116
167
168
+ if enable_openai_instrumentation :
169
+ OpenAIInstrumentor ().instrument ()
170
+
171
+ if enable_anthropic_instrumentation :
172
+ AnthropicInstrumentor ().instrument ()
173
+
117
174
_telemetry_enabled = True
118
175
119
176
# Import console for user-visible messages
@@ -122,6 +179,9 @@ def init_telemetry(
122
179
# Log to console so users know telemetry is active
123
180
console .log ("📊 Telemetry enabled - performance metrics will be collected" )
124
181
console .log (f"🔍 Traces will be sent via OTLP to { otlp_endpoint } " )
182
+ console .log (
183
+ f"📈 Prometheus metrics available at http://{ prometheus_addr } :{ prometheus_port } /metrics"
184
+ )
125
185
126
186
except Exception as e :
127
187
logger .error (f"Failed to initialize telemetry: { e } " )
@@ -181,6 +241,49 @@ def record_request_duration(
181
241
_request_histogram .record (duration , {"endpoint" : endpoint , "method" : method })
182
242
183
243
244
+ def record_tool_call (
245
+ tool_name : str ,
246
+ duration : float | None = None ,
247
+ success : bool = True ,
248
+ error_type : str | None = None ,
249
+ error_message : str | None = None ,
250
+ ) -> None :
251
+ """Record tool call metrics."""
252
+ if not is_telemetry_enabled () or _tool_counter is None :
253
+ return
254
+
255
+ attributes = {"tool_name" : tool_name , "success" : str (success ).lower ()}
256
+
257
+ if error_type :
258
+ attributes ["error_type" ] = error_type
259
+ if error_message :
260
+ # Truncate long error messages
261
+ attributes ["error_message" ] = error_message [:200 ]
262
+
263
+ _tool_counter .add (1 , attributes )
264
+
265
+ if duration is not None and _tool_duration_histogram is not None :
266
+ _tool_duration_histogram .record (duration , attributes )
267
+
268
+
269
+ def record_conversation_change (delta : int ) -> None :
270
+ """Record change in active conversations (+1 for new, -1 for ended)."""
271
+ if not is_telemetry_enabled () or _active_conversations_gauge is None :
272
+ return
273
+
274
+ _active_conversations_gauge .add (delta )
275
+
276
+
277
+ def record_llm_request (provider : str , model : str , success : bool = True ) -> None :
278
+ """Record LLM API request metrics."""
279
+ if not is_telemetry_enabled () or _llm_request_counter is None :
280
+ return
281
+
282
+ _llm_request_counter .add (
283
+ 1 , {"provider" : provider , "model" : model , "success" : str (success ).lower ()}
284
+ )
285
+
286
+
184
287
def measure_tokens_per_second (func : F ) -> F :
185
288
"""Decorator to measure tokens per second for LLM operations."""
186
289
0 commit comments