Skip to content

Commit 586c9d4

Browse files
committed
Agressive debugging, especially of GC
1 parent 7d3eeba commit 586c9d4

File tree

10 files changed

+34
-24
lines changed

10 files changed

+34
-24
lines changed

arrayjit/lib/assignments.ml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ let sequence l =
107107
{ asgns = sts; embedded_nodes = embs } { asgns = another_st; embedded_nodes = emb } ->
108108
{ asgns = Seq (sts, another_st); embedded_nodes = Set.union embs emb })
109109

110-
let%diagn1_sexp to_low_level code =
110+
let%diagn2_sexp to_low_level code =
111111
let open Indexing in
112112
let get buffer idcs =
113113
let tn = match buffer with Node tn -> tn | Merge_buffer tn -> tn in
@@ -362,9 +362,9 @@ let fprint_hum ?name ?static_indices () ppf c =
362362
loop c;
363363
fprintf ppf "@]"
364364

365-
let lower ~unoptim_ll_source ~ll_source ~cd_source ~name static_indices (proc : t) :
365+
let%track6_sexp lower ~unoptim_ll_source ~ll_source ~cd_source ~name static_indices (proc : t) :
366366
Low_level.optimized =
367-
let llc = to_low_level proc in
367+
let llc: Low_level.t = to_low_level proc in
368368
(* Generate the low-level code before outputting the assignments, to force projections. *)
369369
(match cd_source with
370370
| None -> ()

arrayjit/lib/backend_impl.ml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ module No_device_buffer_and_copying () :
4141
let get_used_memory () = Atomic.get used_memory
4242

4343
let alloc_impl ~size_in_bytes =
44-
let finalize _ptr = ignore (Atomic.fetch_and_add used_memory ~-size_in_bytes : int) in
44+
let%track7_l_sexp finalize (_ptr : buffer_ptr) : unit =
45+
ignore (Atomic.fetch_and_add used_memory ~-size_in_bytes : int)
46+
in
4547
let ptr = Ctypes.(to_voidp @@ allocate_n int8_t ~count:size_in_bytes) in
4648
let _ : int = Atomic.fetch_and_add used_memory size_in_bytes in
4749
Stdlib.Gc.finalise finalize ptr;

arrayjit/lib/backends.ml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,10 @@ module Add_buffer_retrieval_and_syncing (Backend : No_buffer_retrieval_or_syncin
156156
Hashtbl.clear s.updating_for)
157157
end
158158

159-
let lower_assignments ?name bindings asgns =
160-
let name = Option.value_or_thunk name ~default:(fun () -> Assignments.get_name_exn asgns) in
159+
let%track6_sexp lower_assignments ?name bindings asgns =
160+
let name : string =
161+
Option.value_or_thunk name ~default:(fun () -> Assignments.get_name_exn asgns)
162+
in
161163
let unoptim_ll_source = Utils.get_debug_formatter ~fname:(name ^ "-unoptimized.ll") in
162164
let ll_source = Utils.get_debug_formatter ~fname:(name ^ ".ll") in
163165
let cd_source = Utils.get_debug_formatter ~fname:(name ^ ".cd") in
@@ -319,9 +321,11 @@ module Raise_backend (Device : Lowered_backend) : Backend = struct
319321
[@@deriving sexp_of]
320322

321323
let%debug3_sexp compile ?name bindings (comp : Assignments.comp) : code =
322-
let name, lowered = lower_assignments ?name bindings comp.Assignments.asgns in
323-
let code = compile ~name bindings lowered in
324-
let from_prior_context =
324+
let (name : string), (lowered : Low_level.optimized) =
325+
lower_assignments ?name bindings comp.Assignments.asgns
326+
in
327+
let code : Device.code = compile ~name bindings lowered in
328+
let from_prior_context : Tn.t_set =
325329
Set.diff (Assignments.context_nodes ~use_host_memory comp.asgns) comp.embedded_nodes
326330
in
327331
{ from_prior_context; name; lowered; code; expected_merge_node = lowered.Low_level.merge_node }
@@ -500,7 +504,7 @@ let finalize (type buffer_ptr dev runner event)
500504
&& not (Hashtbl.mem ctx.stream.device.cross_stream_candidates key)
501505
then mem_free ctx.stream data)))
502506

503-
let fresh_backend ?backend_name ?(config = Only_devices_parallel) () =
507+
let%track5_sexp fresh_backend ?backend_name ?(config = Only_devices_parallel) () =
504508
let backend =
505509
match
506510
Option.value_or_thunk backend_name ~default:(fun () ->

arrayjit/lib/cc_backend.ml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ let c_compile_and_load ~f_name =
6666
invalid_arg errors);
6767
(* Note: RTLD_DEEPBIND not available on MacOS. *)
6868
let result = { lib = Dl.dlopen ~filename:libname ~flags:[ RTLD_NOW ]; libname } in
69-
Stdlib.Gc.finalise (fun lib -> Dl.dlclose ~handle:lib.lib) result;
69+
let%track7_l_sexp finalize (lib : library) : unit = Dl.dlclose ~handle:lib.lib in
70+
Stdlib.Gc.finalise finalize result;
7071
result
7172

7273
module C_syntax_config (Input : sig

arrayjit/lib/cuda_backend.cudajit.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ let%track3_sexp cleanup_device (device : device) =
120120
Hashtbl.iter device.cross_stream_candidates ~f:(fun buffer_ptr ->
121121
Cu.Deviceptr.mem_free buffer_ptr)
122122

123-
let%track5_sexp finalize_device device =
123+
let%track5_l_sexp finalize_device (device : device) =
124124
if Atomic.compare_and_set device.released false true then cleanup_device device
125125

126126
let%track3_sexp get_device ~(ordinal : int) : device =

arrayjit/lib/gcc_backend.gccjit.ml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -601,8 +601,9 @@ let compile ~(name : string) bindings (lowered : Low_level.optimized) =
601601
(if Utils.settings.output_debug_files_in_build_directory then
602602
let f_name = Utils.build_file @@ name ^ "-gccjit-debug.c" in
603603
Context.dump_to_file ctx ~update_locs:true f_name);
604+
let%track7_l_sexp finalize result = Result.release result in
604605
let result = Context.compile ctx in
605-
Stdlib.Gc.finalise Result.release result;
606+
Stdlib.Gc.finalise finalize result;
606607
Context.release ctx;
607608
{ info; result; bindings; name; params = List.map ~f:snd params }
608609

arrayjit/lib/low_level.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ let visit_llc traced_store ~merge_node_id reverse_node_map ~max_visits llc =
239239
Tn.update_memory_mode tn (Hosted (Changed_on_devices Unset)) 38
240240
else Tn.update_memory_mode tn Materialized 36))
241241

242-
let%diagn_sexp check_and_store_virtual traced static_indices top_llc =
242+
let%diagn2_sexp check_and_store_virtual traced static_indices top_llc =
243243
let exception Non_virtual of int in
244244
let static_indices =
245245
Set.of_list (module Indexing.Symbol)

arrayjit/lib/ndarray.ml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -386,15 +386,15 @@ let create_array ~debug:_debug prec ~dims init_op =
386386
let size_in_bytes =
387387
(if Array.length dims = 0 then 0 else Array.reduce_exn dims ~f:( * )) * Ops.prec_in_bytes prec
388388
in
389-
let%diagn2_sexp finalizer _result =
389+
let%track7_l_sexp finalizer (_result : t) =
390390
let _ : int = Atomic.fetch_and_add used_memory size_in_bytes in
391-
[%log "Deleting", _debug, ptr_to_string_hum _result]
391+
[%log3 "Deleting", _debug, ptr_to_string_hum _result]
392392
in
393393
let f prec = as_array prec @@ create_bigarray prec ~dims init_op in
394394
let result = Ops.map_prec { f } prec in
395395
Stdlib.Gc.finalise finalizer result;
396396
let _ : int = Atomic.fetch_and_add used_memory size_in_bytes in
397-
[%debug2_sexp
397+
[%debug3_l_sexp
398398
[%log_block
399399
"create_array";
400400
[%log _debug, ptr_to_string_hum result]]];

arrayjit/lib/schedulers.ml

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -191,25 +191,27 @@ module Multicore (Backend : For_add_scheduler) :
191191
reader_streams = Hashtbl.create (module Tnode);
192192
})
193193

194-
module Dynarr = Stdlib.Dynarray
195-
196194
let num_devices () = 1
197195
let suggested_num_streams _device = Domain.recommended_domain_count () - 1
198196

199-
let cleanup_stream (stream : stream) =
200-
assert (Domain.is_main_domain ());
197+
let%track7_l_sexp cleanup_stream (stream : stream) : unit =
198+
(* Allow running in parallel. *)
199+
(* assert (Domain.is_main_domain ()); *)
200+
[%log "cleanup_stream: await stream"];
201201
await stream;
202-
let r = stream.runner in
202+
let r : runner = stream.runner in
203203
r.state.keep_spinning <- false;
204+
[%log "cleanup_stream: broadcasting r.state.dev_wait_for_work to wake up the worker"];
204205
Stdlib.Condition.broadcast r.state.dev_wait_for_work;
206+
[%log "cleanup_stream: joining the domain"];
205207
Domain.join r.domain
206208

207209
let get_device ~ordinal =
208210
if ordinal <> 0 then
209211
invalid_arg [%string "Multicore_scheduler.get_device %{ordinal#Int}: only device 0 exists"];
210212
device
211213

212-
let new_stream _device =
214+
let%track5_sexp new_stream _device =
213215
assert (Domain.is_main_domain ());
214216
let stream = spinup_stream () in
215217
Stdlib.Gc.finalise cleanup_stream stream;

lib/tensor.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ type session_state = {
7272
let session_state =
7373
{ next_id = 0; forward_roots = Map.empty (module Int); backprop_roots = Map.empty (module Int) }
7474

75-
let unsafe_reinitialize () =
75+
let%track5_sexp unsafe_reinitialize () =
7676
session_state.next_id <- 0;
7777
session_state.forward_roots <- Map.empty (module Int);
7878
session_state.backprop_roots <- Map.empty (module Int);

0 commit comments

Comments
 (0)