Skip to content

Commit ab7a269

Browse files
authored
Add ruby collector (#1587)
<!-- ELLIPSIS_HIDDEN --> > [!IMPORTANT] > Add Ruby collector to BAML for enhanced tracing and logging, updating FFI, client templates, and tests. > > - **Behavior**: > - Adds `Collector` class to `log_collector.rs` for managing function logs and usage. > - Updates `BamlRuntimeFfi` in `lib.rs` to handle collectors in `call_function` and `stream_function`. > - Modifies `client.rb.j2` to support `collector` in `baml_options`. > - **Models**: > - Introduces `FunctionLog`, `Usage`, `Timing`, `StreamTiming`, `LLMCall`, and `LLMStreamCall` in `log_collector.rs`. > - Implements `lang_wrapper!` macro in `lang_wrapper.rs` for wrapping types. > - **Dependencies**: > - Updates `Cargo.toml` to use `serde_magnus` from the registry. > - **Tests**: > - Adds `test_collector.rb` for testing collector functionality. > - Updates `README.md` in `integ-tests/ruby` with test instructions. > - **Misc**: > - Registers new classes in `init` function in `lib.rs`. > - Adds `define_all_in_ruby` in `log_collector.rs` to register Ruby classes. > > <sup>This description was created by </sup>[<img alt="Ellipsis" src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=BoundaryML%2Fbaml&utm_source=github&utm_medium=referral)<sup> for dbfa014. It will automatically update as commits are pushed.</sup> <!-- ELLIPSIS_HIDDEN -->
1 parent 223432b commit ab7a269

13 files changed

Lines changed: 4423 additions & 1163 deletions

File tree

engine/Cargo.lock

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

engine/baml-runtime/src/tracingv2/storage/storage.rs

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,19 @@
55
//! for a single FunctionId, even if multiple Collectors or FunctionLogs want it.
66
//! It uses manual reference counting (`inc_ref` / `dec_ref`) to free memory for
77
//! a FunctionId as soon as there are no more "owners."
8+
use baml_types::tracing::events::{
9+
FunctionEnd, FunctionId, FunctionStart, HTTPRequest, HTTPResponse, LoggedLLMRequest,
10+
LoggedLLMResponse, TraceData, TraceEvent,
11+
};
812
use indexmap::{IndexMap, IndexSet};
913
use once_cell::sync::Lazy;
14+
use serde::Serialize;
1015
use std::collections::{HashMap, HashSet};
1116
use std::fmt;
1217
use std::hash::Hash;
1318
use std::sync::{Arc, Mutex};
1419
use uuid::Uuid;
1520

16-
use baml_types::tracing::events::{
17-
FunctionEnd, FunctionId, FunctionStart, HTTPRequest, HTTPResponse, LoggedLLMRequest,
18-
LoggedLLMResponse, TraceData, TraceEvent,
19-
};
20-
2121
use crate::tracingv2::publisher::publisher::PublisherMessage;
2222

2323
use super::super::publisher::PUBLISHING_CHANNEL;
@@ -470,7 +470,7 @@ impl Drop for FunctionLog {
470470
/// Represents the "inner" data for a single function call
471471
/// (the real set of usage/calls/timing, etc.).
472472
///
473-
#[derive(Debug, Clone)]
473+
#[derive(Debug, Clone, Serialize)]
474474
pub struct FunctionLogInner {
475475
pub id: String,
476476
pub function_name: String,
@@ -489,28 +489,29 @@ impl FunctionLogInner {
489489
}
490490
}
491491

492-
#[derive(Debug, Default, Clone, Hash, Eq, PartialEq)]
492+
#[derive(Debug, Default, Clone, Hash, Eq, PartialEq, Serialize)]
493493
pub struct Usage {
494494
pub input_tokens: Option<i64>,
495495
pub output_tokens: Option<i64>,
496496
}
497497

498-
#[derive(Debug, Default, Clone, Hash, Eq, PartialEq)]
498+
#[derive(Debug, Default, Clone, Hash, Eq, PartialEq, Serialize)]
499499
pub struct Timing {
500500
pub start_time_utc_ms: i64,
501501
pub duration_ms: Option<i64>,
502502
pub time_to_first_parsed_ms: Option<i64>,
503503
}
504504

505-
#[derive(Debug, Default, Clone, Hash, Eq, PartialEq)]
505+
#[derive(Debug, Default, Clone, Hash, Eq, PartialEq, Serialize)]
506506
pub struct StreamTiming {
507507
pub start_time_utc_ms: i64,
508508
pub duration_ms: Option<i64>,
509509
pub time_to_first_parsed_ms: Option<i64>,
510510
pub time_to_first_token_ms: Option<i64>,
511511
}
512512

513-
#[derive(Debug, Clone)]
513+
#[derive(Debug, Clone, Serialize)]
514+
#[serde(untagged)]
514515
pub enum LLMCallKind {
515516
Basic(LLMCall),
516517
Stream(LLMStreamCall),
@@ -526,7 +527,7 @@ impl LLMCallKind {
526527
}
527528
}
528529

529-
#[derive(Debug, Default, Clone)]
530+
#[derive(Debug, Default, Clone, Serialize)]
530531
pub struct LLMCall {
531532
pub client_name: String,
532533
pub provider: String,
@@ -537,7 +538,7 @@ pub struct LLMCall {
537538
pub selected: bool,
538539
}
539540

540-
#[derive(Debug, Default, Clone)]
541+
#[derive(Debug, Default, Clone, Serialize)]
541542
pub struct LLMStreamCall {
542543
pub client_name: String,
543544
pub provider: String,

engine/language_client_codegen/src/ruby/templates/client.rb.j2

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ module Baml
5151
{% for (name, type) in fn.args -%}
5252
{{name}}: {{type}},
5353
{%- endfor %}
54-
baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry)]
54+
baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry, T.any(Baml::Collector, T::Array[Baml::Collector]))]
5555
).returns({{ fn.return_type }})
5656
}
5757
def {{fn.name}}(
@@ -65,8 +65,14 @@ module Baml
6565
{# We rely on sorbet-runtime to give errors about the list of allowed kwargs #}
6666
raise ArgumentError.new("{{fn.name}} may only be called with keyword arguments")
6767
end
68-
if (baml_options.keys - [:client_registry, :tb]).any?
69-
raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb): #{baml_options.keys - [:client_registry, :tb]}")
68+
if (baml_options.keys - [:client_registry, :tb, :collector]).any?
69+
raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb, :collector): #{baml_options.keys - [:client_registry, :tb, :collector]}")
70+
end
71+
72+
collector = if baml_options[:collector]
73+
baml_options[:collector].is_a?(Array) ? baml_options[:collector] : [baml_options[:collector]]
74+
else
75+
[]
7076
end
7177

7278
raw = @runtime.call_function(
@@ -79,6 +85,7 @@ module Baml
7985
@ctx_manager,
8086
baml_options[:tb]&.instance_variable_get(:@registry),
8187
baml_options[:client_registry],
88+
collector,
8289
)
8390
(raw.parsed_using_types(Baml::Types, Baml::PartialTypes, false))
8491
end
@@ -103,7 +110,7 @@ module Baml
103110
{% for (name, type) in fn.args -%}
104111
{{name}}: {{type}},
105112
{%- endfor %}
106-
baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry)]
113+
baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry, T.any(Baml::Collector, T::Array[Baml::Collector]))]
107114
).returns(Baml::BamlStream[{{ fn.return_type }}])
108115
}
109116
def {{fn.name}}(
@@ -117,8 +124,14 @@ module Baml
117124
{# We rely on sorbet-runtime to give errors about the list of allowed kwargs #}
118125
raise ArgumentError.new("{{fn.name}} may only be called with keyword arguments")
119126
end
120-
if (baml_options.keys - [:client_registry, :tb]).any?
121-
raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb): #{baml_options.keys - [:client_registry, :tb]}")
127+
if (baml_options.keys - [:client_registry, :tb, :collector]).any?
128+
raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb, :collector): #{baml_options.keys - [:client_registry, :tb, :collector]}")
129+
end
130+
131+
collector = if baml_options[:collector]
132+
baml_options[:collector].is_a?(Array) ? baml_options[:collector] : [baml_options[:collector]]
133+
else
134+
[]
122135
end
123136

124137
raw = @runtime.stream_function(
@@ -131,6 +144,7 @@ module Baml
131144
@ctx_manager,
132145
baml_options[:tb]&.instance_variable_get(:@registry),
133146
baml_options[:client_registry],
147+
collector,
134148
)
135149
Baml::BamlStream[{{fn.partial_return_type}}, {{fn.return_type}}].new(
136150
ffi_stream: raw,

engine/language_client_ruby/Gemfile.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
PATH
22
remote: .
33
specs:
4-
baml (0.76.2)
4+
baml (0.78.0)
55

66
GEM
77
remote: https://rubygems.org/

engine/language_client_ruby/ext/ruby_ffi/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ rb-sys = { version = "0.9.99", features = [
3030
] }
3131
serde.workspace = true
3232
serde_json.workspace = true
33-
serde_magnus = { git = "https://github.com/BoundaryML/serde-magnus.git", branch = "sam/magnus-0.7.1" }
33+
serde_magnus = { version = "0.9.0" }
3434
tokio = { version = "1", features = ["full"] }
3535
tracing-subscriber = { version = "0.3.18", features = ["json", "env-filter","valuable"] }
3636

engine/language_client_ruby/ext/ruby_ffi/src/lib.rs

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
use baml_runtime::BamlRuntime;
22
use baml_types::BamlValue;
3-
use magnus::{class, function, method, prelude::*, Error, RHash, Ruby};
3+
use magnus::{class, function, method, prelude::*, Error, RArray, RHash, Ruby};
44
use std::collections::HashMap;
55
use std::path::PathBuf;
66
use std::sync::Arc;
77
use tracing_subscriber::EnvFilter;
8+
use types::log_collector::Collector;
89

910
use function_result::FunctionResult;
1011
use function_result_stream::FunctionResultStream;
@@ -112,6 +113,7 @@ impl BamlRuntimeFfi {
112113
ctx: &RuntimeContextManager,
113114
type_registry: Option<&types::type_builder::TypeBuilder>,
114115
client_registry: Option<&types::client_registry::ClientRegistry>,
116+
collector: RArray,
115117
) -> Result<FunctionResult> {
116118
let args = match ruby_to_json::RubyToJson::convert_hash_to_json(args) {
117119
Ok(args) => args.into_iter().collect(),
@@ -123,14 +125,18 @@ impl BamlRuntimeFfi {
123125
}
124126
};
125127

128+
let mut collectors = Vec::new();
129+
for i in collector.into_iter() {
130+
collectors.push(<&Collector>::try_convert(i)?.inner.clone());
131+
}
132+
126133
let retval = match rb_self.t.block_on(rb_self.inner.call_function(
127134
function_name.clone(),
128135
&args,
129136
&ctx.inner,
130137
type_registry.map(|t| &t.inner),
131138
client_registry.map(|c| c.inner.borrow_mut()).as_deref(),
132-
// TODO: wire this
133-
Some(vec![]),
139+
Some(collectors),
134140
)) {
135141
(Ok(res), _) => Ok(FunctionResult::new(res)),
136142
(Err(e), _) => Err(Error::new(
@@ -153,6 +159,7 @@ impl BamlRuntimeFfi {
153159
ctx: &RuntimeContextManager,
154160
type_registry: Option<&types::type_builder::TypeBuilder>,
155161
client_registry: Option<&types::client_registry::ClientRegistry>,
162+
collector: RArray,
156163
) -> Result<FunctionResultStream> {
157164
let args = match ruby_to_json::RubyToJson::convert_hash_to_json(args) {
158165
Ok(args) => args.into_iter().collect(),
@@ -164,6 +171,11 @@ impl BamlRuntimeFfi {
164171
}
165172
};
166173

174+
let mut collectors = Vec::new();
175+
for i in collector.into_iter() {
176+
collectors.push(<&Collector>::try_convert(i)?.inner.clone());
177+
}
178+
167179
log::debug!("Streaming {function_name} with:\nargs: {args:#?}\nctx ???");
168180

169181
let retval = match rb_self.inner.stream_function(
@@ -172,8 +184,7 @@ impl BamlRuntimeFfi {
172184
&ctx.inner,
173185
type_registry.map(|t| &t.inner),
174186
client_registry.map(|c| c.inner.borrow_mut()).as_deref(),
175-
// TODO: wire this
176-
Some(vec![]),
187+
Some(collectors),
177188
) {
178189
Ok(res) => Ok(FunctionResultStream::new(res, rb_self.t.clone())),
179190
Err(e) => Err(Error::new(
@@ -255,10 +266,10 @@ fn init(ruby: &Ruby) -> Result<()> {
255266
"create_context_manager",
256267
method!(BamlRuntimeFfi::create_context_manager, 0),
257268
)?;
258-
runtime_class.define_method("call_function", method!(BamlRuntimeFfi::call_function, 5))?;
269+
runtime_class.define_method("call_function", method!(BamlRuntimeFfi::call_function, 6))?;
259270
runtime_class.define_method(
260271
"stream_function",
261-
method!(BamlRuntimeFfi::stream_function, 5),
272+
method!(BamlRuntimeFfi::stream_function, 6),
262273
)?;
263274

264275
FunctionResult::define_in_ruby(&module)?;
@@ -277,6 +288,9 @@ fn init(ruby: &Ruby) -> Result<()> {
277288
types::media::Audio::define_in_ruby(&module)?;
278289
types::media::Image::define_in_ruby(&module)?;
279290

291+
// Register the new log collector classes
292+
types::log_collector::define_all_in_ruby(&module)?;
293+
280294
// everything below this is for our own testing purposes
281295
module.define_module_function(
282296
"roundtrip",

engine/language_client_ruby/ext/ruby_ffi/src/types/lang_wrapper.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,33 @@ macro_rules! lang_wrapper {
2828
}
2929
};
3030

31+
32+
($name:ident, $wrap_name:expr, $type:ty, clone_safe $(, $attr_name:ident : $attr_type:ty)*) => {
33+
#[magnus::wrap(class = $wrap_name, free_immediately, size)]
34+
pub struct $name {
35+
pub(crate) inner: std::sync::Arc<$type>,
36+
$($attr_name: $attr_type),*
37+
}
38+
39+
impl From<$type> for $name {
40+
fn from(inner: $type) -> Self {
41+
Self {
42+
inner: std::sync::Arc::new(inner),
43+
$($attr_name: Default::default()),*
44+
}
45+
}
46+
}
47+
48+
49+
impl From<std::sync::Arc<$type>> for $name {
50+
fn from(inner: std::sync::Arc<$type>) -> Self {
51+
Self {
52+
inner,
53+
$($attr_name: Default::default()),*
54+
}
55+
}
56+
}
57+
};
58+
59+
3160
}

0 commit comments

Comments
 (0)