diff --git a/CHANGES.md b/CHANGES.md index 8d2623d..dbe8a9a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,7 +1,15 @@ +## v3.2.0 (04/111/2023) + +* Fix potential GC corruption when serialising large Yaml + buffers (#75 @hhugo) + +* Add missing `?len` argument to `yaml_of_string` to specify + buffer size (#74 @hhugo) + ## v3.1.0 (27/03/2022) * Support MSVC with .obj and .lib extensions, and defining - -DDYAML_DECLARE_EXPORT (@jonahbeckford #53) + `-DDYAML_DECLARE_EXPORT` (@jonahbeckford #53) * Upgrade to dune 2 (@TheLortex, #54) diff --git a/ffi/bindings/yaml_bindings.ml b/ffi/bindings/yaml_bindings.ml index 9f83ff0..e99b01c 100644 --- a/ffi/bindings/yaml_bindings.ml +++ b/ffi/bindings/yaml_bindings.ml @@ -57,7 +57,7 @@ module M (F : Ctypes.FOREIGN) = struct foreign "yaml_emitter_set_output_string" C.( ptr T.Emitter.t - @-> ocaml_bytes + @-> ptr char @-> size_t @-> ptr size_t @-> returning void) diff --git a/lib/stream.ml b/lib/stream.ml index 7606a71..189831a 100644 --- a/lib/stream.ml +++ b/lib/stream.ml @@ -261,7 +261,7 @@ let do_parse { p; event } = type emitter = { e : T.Emitter.t Ctypes.structure Ctypes.ptr; event : T.Event.t Ctypes.structure Ctypes.ptr; - buf : Bytes.t; + buf : char Ctypes.ptr; written : Unsigned.size_t Ctypes.ptr; } @@ -273,15 +273,16 @@ let emitter ?(len = 65535 * 4) () = let event = Ctypes.(allocate_n T.Event.t ~count:1) in let written = Ctypes.allocate_n Ctypes.size_t ~count:1 in let r = B.emitter_init e in - let buf = Bytes.create len in - let len = Bytes.length buf |> Unsigned.Size_t.of_int in - B.emitter_set_output_string e (Ctypes.ocaml_bytes_start buf) len written; + let buf = Ctypes.(allocate_n Ctypes.char ~count:len) in + let len = Unsigned.Size_t.of_int len in + B.emitter_set_output_string e buf len written; match r with | 1 -> Ok { e; event; written; buf } | n -> Error (`Msg ("error initialising emitter: " ^ string_of_int n)) let emitter_buf { buf; written } = - Ctypes.(!@written) |> Unsigned.Size_t.to_int |> Bytes.sub buf 0 + let length = Ctypes.(!@written) |> Unsigned.Size_t.to_int in + Ctypes.string_from_ptr buf ~length let check l a = match a with diff --git a/lib/yaml.ml b/lib/yaml.ml index f3eabba..6479fa0 100644 --- a/lib/yaml.ml +++ b/lib/yaml.ml @@ -115,15 +115,15 @@ let to_string ?len ?(encoding = `Utf8) ?scalar_style ?layout_style (v : value) = document_end t >>= fun () -> stream_end t >>= fun () -> let r = Stream.emitter_buf t in - Ok (Bytes.to_string r) + Ok r let to_string_exn ?len ?encoding ?scalar_style ?layout_style s = match to_string ?len ?encoding ?scalar_style ?layout_style s with | Ok s -> s | Error (`Msg m) -> raise (Invalid_argument m) -let yaml_to_string ?(encoding = `Utf8) ?scalar_style ?layout_style v = - emitter () >>= fun t -> +let yaml_to_string ?len ?(encoding = `Utf8) ?scalar_style ?layout_style v = + emitter ?len () >>= fun t -> stream_start t encoding >>= fun () -> document_start t >>= fun () -> let rec iter = function @@ -151,7 +151,7 @@ let yaml_to_string ?(encoding = `Utf8) ?scalar_style ?layout_style v = document_end t >>= fun () -> stream_end t >>= fun () -> let r = Stream.emitter_buf t in - Ok (Bytes.to_string r) + Ok r let yaml_of_string s = let open Event in diff --git a/lib/yaml.mli b/lib/yaml.mli index 285f107..bd244ab 100644 --- a/lib/yaml.mli +++ b/lib/yaml.mli @@ -144,7 +144,7 @@ val to_string : (** [to_string v] converts the JSON value to a Yaml string representation. The [encoding], [scalar_style] and [layout_style] control the various output parameters. The current implementation uses a non-resizable internal string - buffer of 64KB, which can be increased via [len]. *) + buffer of 256KB, which can be increased via [len]. *) val to_string_exn : ?len:int -> @@ -169,6 +169,7 @@ val yaml_of_string : string -> yaml res Yaml-specific information such as anchors. *) val yaml_to_string : + ?len:int -> ?encoding:encoding -> ?scalar_style:scalar_style -> ?layout_style:layout_style -> @@ -177,7 +178,7 @@ val yaml_to_string : (** [yaml_to_string v] converts the Yaml value to a string representation. The [encoding], [scalar_style] and [layout_style] control the various output parameters. The current implementation uses a non-resizable internal string - buffer of 16KB, which can be increased via [len]. *) + buffer of 256KB, which can be increased via [len]. *) (** {2 JSON/Yaml conversion functions} *) @@ -277,7 +278,7 @@ module Stream : sig buffer that the output is written into is. In the future, [len] will be redundant as the buffer will be dynamically allocated. *) - val emitter_buf : emitter -> Bytes.t + val emitter_buf : emitter -> string val emit : emitter -> Event.t -> unit res val document_start : ?version:version -> ?implicit:bool -> emitter -> unit res val document_end : ?implicit:bool -> emitter -> unit res diff --git a/tests/test_emit.ml b/tests/test_emit.ml index a49fc8e..814c269 100644 --- a/tests/test_emit.ml +++ b/tests/test_emit.ml @@ -43,5 +43,5 @@ let v () = S.stream_end t >>= fun () -> Printf.printf "written: %d\n%!" (S.emitter_written t); let r = S.emitter_buf t in - print_endline (Bytes.to_string r); + print_endline r; Ok () diff --git a/tests/test_reflect.ml b/tests/test_reflect.ml index ca60e2c..eec0e25 100644 --- a/tests/test_reflect.ml +++ b/tests/test_reflect.ml @@ -31,5 +31,5 @@ let v file = iter_until_done (reflect e) >>= fun () -> let r = Yaml.Stream.emitter_buf e in print_endline buf; - print_endline (Bytes.to_string r); + print_endline r; Ok ()