This repository has been archived by the owner on Sep 19, 2018. It is now read-only.
/
jenga-talk.tex
1056 lines (882 loc) · 33.7 KB
/
jenga-talk.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass{beamer}
\usepackage{helvet}
\usepackage{textpos}
\usepackage{setspace}
\useinnertheme{circles}
\definecolor{jswhite}{rgb}{0.9,0.9,0.9}
\definecolor{jsyellow}{rgb}{0.9,0.7,0.1}
\definecolor{jsblue}{rgb}{0,0,0.3}
\definecolor{jslightblue}{rgb}{0,0.3,0.3}
\setbeamercolor{normal text}{fg=black,bg=white}
\setbeamercolor{structure}{fg=orange}
\setbeamercolor*{separation line}{fg=jslightblue}
\setbeamercolor*{fine separation line}{fg=jslightblue}
\setbeamercolor*{frametitle}{fg=orange}
\setbeamerfont{frametitle}{series=\bfseries}
\begin{document}
\title{\Huge{\bf Jenga}\vskip24pt\large\textcolor{black}{The design of an expressive and scalable build system}}
\date{
\includegraphics[width=4cm]{new-jane-street-logo.pdf}
}
\addtobeamertemplate{frametitle}{}{%
\begin{textblock*}{3cm}(.77\textwidth,5.4cm)
\vskip26pt\hskip20pt
\includegraphics[width=2cm]{new-jane-street-logo.pdf}
\end{textblock*}\bf}
\setbeamertemplate{navigation symbols}{}
\let\oldframetitle\frametitle
\renewcommand{\frametitle}[1]{%
\oldframetitle{\vskip4pt\hskip4pt#1}\setstretch{1.4}}
\newcommand\gdb{{\tt gdb}}
\newcommand\gap{\vskip18pt}
\newcommand\highlight[1]{\colorbox{orange}{\textcolor{black}{#1}}}
%----------------------------------------------------------------------
\begin{frame}
\titlepage
\end{frame}
% This talk is about jenga.
% - a new build tool developed a janestreet -
% and various aspects of it design and implementation
% Two parts: design (user persepective); implementation
%- SHORT TALK - part 2 significantly reduced.
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{What is a build system?}
\begin{center}
\highlight{Build tool + build rules}
\end{center}
\begin{itemize}
\item General purpose build tool
\item Rules are provided by the user
\begin{itemize}
\item targets; actions; {\em dependencies}
\item shared framework (e.g. for a whole company)
\item per-project config
\end{itemize}
\item Build process is demand driven
\end{itemize}
\end{frame}
% Tool & rules:
% - Rules provided by user; interpreted by the tool
% The build tool is responsible to orchestrate the minimal sequence of
% actions necessary to bring a set of generated targets up to date.
% General purpose tool
% - no build in knowledge of specific languages or compilation tools
% - no magic handling
% - just a consistent general purpose build model
% The overall build process is demand driven.
% We request the system to build some top level target, perhaps main.exe
% Rules (provided by user) decribe actions to run to create generated targets.
% And any dependencies (other files) required for the action.
% Example: a rule for linking
% - linking stage might describes how some .o are linked into a .exe
% - the .o's are the dependencies.
% - if these are also generated targets (not sources), as expected, there will other rules for each .o
% - then before the link is run the .os must be generated, or brought up to date
% - by running whatever action is responsible for creating them
% - and finally the link command can be run (or not, if we discover it is not required)
% Commonly, rules are split. framework + per-project config
% Dont want every developer to have to be a build system expert
% They should say: build all .ml in this dir into library ``foo'', making use of lib ``bar''
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Why a new build system?}
\begin{itemize}
\item Already so many to choose from:
\begin{itemize}
\item make, omake, ocamlbuild, ninja, tup, redo, shake...
\end{itemize}
\item Jane Street workflow
\begin{itemize}
\item One big tree! (almost)
\begin{itemize}
\item 3500 dirs; 33k files; 2.2m lines OCaml
\end{itemize}
\item Two workflows
\end{itemize}
\item Issues: correctness \& scalability
\end{itemize}
\end{frame}
% So, the first question is probably: why on earth did we feel the
% need to develop yet another build tool. Does the world not have enough?
% - no other existing tool has the full set of features we deem important.
% - omake (what we used before), comes closest
%
% Overview of JS workflow: One big tree; two workflows...
% One tree; big - main advantage of a single tree. Can modify
% interfaces to heavily shared common library (improve names/types)
% and fix up all callers in one consistent change-set
% Two flows...
% Continuous integration
% - remote box; full tree; guard bad commits; incremental but restarted
%
% Individual development
% - local box; own project subtree; incremental, polling
% Before jenga, janestreet used omake - pretty good!
% Has lots of features we view as important.
% And solves many issues newer tools fail on.
% Brief list''
% - one build instance for whole tree
% - quick
% - polling
% - dynamic dependencies
% - programmable rule generation
%
% Problems, increasing over time...
% - at JS we have stretched it scalability to the limit
% - more importantly: hard to program
%
% Thing we dislike the most about omake, is the omake language itself.
% - dynamic typing; dynamic scoping; scripting language not a programming language
% Correctness of build rules.
% Golden test: Incremental = From-clean
%
% - want performance of incremenal builds, but with semantics of form-clean builds
% - requires accurate spec of dependencies.
%----------------------------------------------------------------------
\begin{frame}[fragile]
\huge
\begin{center}
{\bf \textcolor{orange}{Design}}
\end{center}
\end{frame}
% First part of talk: design; focusing on user persepective
% How jenga is used: as a tool, and as system to describe build rules
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Necessities}
\begin{itemize}
\item Programmable
\begin{itemize}
\item rule generation in a {\em real} programming language
\item {\tt jengaroot.ml} dynamically compiled and loaded
\end{itemize}
\item Incremental
\begin{itemize}
\item {\em the} point of a build system
\end{itemize}
\item Polling (inotify)
\begin{itemize}
\item for individual development
\end{itemize}
\item Parallel
\begin{itemize}
\item run compilation actions in parallel
\end{itemize}
\end{itemize}
\end{frame}
% Programable
% Rules are coded in OCaml, against an API. No DSL here.
% {\tt jengaroot.ml} loaded using {\tt ocaml\_plugin}
% omake had DSL. Ok in the small, but for a large engineering project it becomes unmanageable
% Question. why wouldn't you want to use a real programming language?
% and so an early design decision of jenga was for the rules to be described in ocaml, using an API
% Incremental
% If not incremental. Your not a build system, but a script.
% We want to be able to rely on our incremental builds being correct.
% Requires that rules declare correct deps... any file wich is read by action
% Golden test: Incremental = From-clean
% How many times have you heard a broken build excesed with ``oh, just clean and build again''
% clear inditement that the build system is broken
% Not acceptable when a full tree build takes 1--2 hours
% Would be nice to avoid this explicit statement of deps. Dream system wold avoid it.
% Instead we focus on giving user means to make accurate specification of deps
% We need our build tool to to offer a language rich enough to express
% complex (& dynamic) dependencies for rules and rule generation,
% which exists in the real world
% Persistant (imp detail of incremental build)
% Use persistent store to record record of every action run.
% - digest of deps & targets + the exact command executed.
% so we can tell when we dont have to run again.
% Polling
% for interactive use; had in oamke. lovely feature
% rerun compilation ASAP, even while still building other parts of the tree
% Parallel build:
% jenga extract maximum parallel as expressed in the rules
% throttled by -j argument
% Commonly choose to set this to number of cores. Or higher and let OS slice.
% Most relevant when building a full tree ``from clean''
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Correctness}
\begin{itemize}
\item Rules are hard
\begin{itemize}
\item Requires detailed knowledge of toolchain
\item Accurate dependencies are required for consistent builds
\end{itemize}
\item Dependencies are not always static
\begin{itemize}
\item ``scanner dependencies''
\item glob dependencies ({\tt ls *.ml})
\end{itemize}
\item Rule generation
\begin{itemize}
\item not a distinct phase (unlike {\tt ninja})
\item may also have dependencies
\end{itemize}
\end{itemize}
\end{frame}
% The most important thing is dependencies.
% Without accurate deps, there can be no hope of a correct incremental builds.
% Developing \& maintaining a build system is a major engineering project.
% Real world deps are not trivial, and very often dynamic.
% - dont know what the deps are before the build starts
% - they are discovered during the build process
% Often we dont even know what targets have rules before we start
% - i.e. setting up a compile rule for all .ml in a directory
% Dynamic dependencies
% generalisation of what some build tools call scanner dependencies.
% Example: gcc -M to detect which header files are required
% (if headers are generated - need fixpointing)
% for ocaml - ocamldep.. also incomplete soltion
%
% In jenga, arbitrary conditional deps can be expressed
% example: ocaml compilation rule setup depends on existance/or not of .mli
%
% Jenga introduces ``glob dependencies'' i.e. *.ml
% Most commonly used during rule configuration
% And this also works properly in polling mode where we can be
% triggered as new files appear or exiting files disappear
% in omake, *.ml did not wrk in polling mode, requiring restart of omake
% rule generation.
% Not a distinct phase. Integrated with rest of build system.
% For example, rules can be written to have a per-directory config file (flags etc) which is consulted for rule-gen.
% This config file could itself be a generated file!
% We did in fact use generatted config for during switch over from omake
% For a while (some months) we could build with both build systems. jenga config was 99% generated automattically from omake config
% jbuild.gen / jbuild
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Rules {\tt make} style}
\begin{itemize}
\item Triple of targets, dependencies and action
{\footnotesize
\begin{verbatim}
val rule : path list -> dep list -> action -> rule
\end{verbatim}}
\item Not expressive enough!
\begin{itemize}
\item Can't represent {\em dynamic dependencies}
\item Action is fixed
\end{itemize}
\end{itemize}
\end{frame}
% The concept of a rule in classic make is a static triple.
% Nice and simple.
% Often is sufficient
% But in general - it is not expressive enough
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Encoding dependencies}
Introduce a notion of {\it a value and its dependencies}. \par
%Represented by the type:
\begin{center}\colorbox{orange}{$\alpha$\ {\tt dep}}\end{center}
{\bf Constant value:}
{\footnotesize
\begin{verbatim}
val need : path -> unit dep
\end{verbatim}}
{\bf Varying value:}
{\footnotesize
\begin{verbatim}
val glob : dir:path -> string -> path list dep
val contents : path -> string dep
\end{verbatim}}
\end{frame}
% So ``dep'' is a parametrised type.
% The simpler dep of the make rules triple, now becomes: unit dep.
% What do values of this type mean?
% Semantically, a value of type {\tt t dep} can be thought of
% as taking different values of type {\tt t} at different times.
%
% These times might be:
% - each time the build tool is restart (we re-evaluate the rules)
% - we re-evaluate as necessary in response to being triggered by external events (inotify)
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Composing dependencies}
{\footnotesize
\begin{verbatim}
val return : 'a -> 'a dep
val ( *>>= ) : 'a dep -> ('a -> 'b dep) -> 'b dep
val ( *>>| ) : 'a dep -> ('a -> 'b ) -> 'b dep
\end{verbatim}}
Concurrency expressed using {\tt all}:
{\footnotesize
\begin{verbatim}
val all : 'a dep list -> 'a list dep
val all_unit : unit dep list -> unit dep
\end{verbatim}}
\end{frame}
% This is also the approach takes by shake
%
% Early version attempted to avoid exposing the monad to the rules
% author - but this was a mistake.
%
% Made the rule setup more tricky & much more fragile.
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Rules {\tt jenga} style}
\begin{itemize}
\item Action is carried by the dependency
{\footnotesize
\begin{verbatim}
val rule : path list -> action dep -> rule
\end{verbatim}}
\item Rule generation
{\footnotesize
\begin{verbatim}
val generate : (dir:path -> rule list dep) -> scheme
\end{verbatim}}
\item Recover simple rules
{\footnotesize
\begin{verbatim}
let simple_rule targets deps action =
rule targets (
all_unit deps *>>= fun () ->
return act)
\end{verbatim}}
\end{itemize}
\end{frame}
% Action carried by the parametrised dependency type.
% Rule generation fits in the scheme.
% Simple rule creation can be recovered.
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Example 1: OCaml compilation}
{\footnotesize
\begin{verbatim}
val compile_ml: dir:path -> name:string -> rule
let compile_ml ~dir ~name =
let p x = relative ~dir (name ^ x) in
rule [p".cmi"; p".cmx"; p".o"] (
\end{verbatim}
\vskip-16pt\hskip18pt
\colorbox{orange}{\parbox[t][2.3cm][t]{8.7cm}{\ }}\vspace*{-2.5cm}
\begin{verbatim}
let static = [p".ml"] in
deps_from_file ~dir (p".ml.d") *>>= fun dynamic ->
needs (static @ dynamic) >>| fun () ->
bash ~dir (sprintf "ocamlopt -c %s.ml" name)
\end{verbatim}
\begin{verbatim}
)
\end{verbatim}}
\end{frame}
% All leading up to this big example. I'll take a little time here.
% Sorry about the rather visually disturbing orange rectangle!
%
% This is used to highlight the 2nd argument to ``rule'', having type ``action dep''
% composed using the map/bind combinators
% determines dependencies: static & dynamic
% Ends in a call to ``bash''
% In this case the cbash command string is static, but it need not be.
% - In the case of linking the bash action would not be static, for example
% - if the objects being linked have to be ordered w.r.t inter-module dep
%
% But this example does how dynamic deps are represented.
% - the call to ``needs'' makes use of ``dynamic'', coming from deps_from_file
% - but on RHS of the bind operator.
% - (the .ml.d file will presuamble be generated by some other rule, perhaps running ocamldep)
% - if the .ml.d changes, this rule will reconfig itself to be dependant on the new names listed.
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Example 2: OCaml rule generation}
{\footnotesize
\begin{verbatim}
val generate : (dir:path -> rule list dep) -> scheme
\end{verbatim}}
Rules for a directory of ocaml
{\footnotesize
\begin{verbatim}
generate (fun ~dir ->
glob ~dir "*.ml" *>>= fun mls ->
glob ~dir "*.mli" *>>| fun mlis ->
let exists_mli x = List.mem mlis (relative ~dir (x ^ ".mli")) in
List.map mls ~f:(fun ml ->
let name = chop_suffix (basename ml) ".ml" in
if (exists_mli name)
then compile_ml_mli ~dir ~name
else compile_ml ~dir ~name)
)
\end{verbatim}}
\end{frame}
% Example shows:
% - rule generation, based on *.ml existing in a dir
% - conditional rule setup
%======================================================================
\begin{frame}[fragile]
\huge
\begin{center}
{\bf \textcolor{orange}{Implementation}}
\end{center}
\end{frame}
% Switch away from descibing jenga from user (rule author) perspecitive
% And to the implementation of jenga
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Tower of monads}
\begin{itemize}
\item {\tt Depends}
\begin{itemize}
\item {\em incremental}\/ builds
\end{itemize}
\item {\tt Tenacious}
\begin{itemize}
\item { \em polling}\/ builds
\end{itemize}
\item {\tt Deferred}
\begin{itemize}
\item {\em parallel}\/ builds
\end{itemize}
\end{itemize}
\end{frame}
% Clearly, we like the features described.
% As a means for user to describe declaratively the build rules.
% And we want the parallel, polling, incremental builds!
% So how does jenga achieve this... Tower of monads.
% Differnet layers reposnsible for different aspects of the design.
%
% Depends
% Already seem Depends. from user POV
% API offerred to user of jenga
% main purpose is supports incremental (recompilation) feature of jenga
%
% Tenacious
% part of implementation of jenga; not exposed to user
% supports polling feature
%
% Deferred
% monad at heart of JS asyn library
% support concurrent/parallel jenga builds
% "Co-operative multi threading"
%
% All monadic. return; bind; map.
% Support parallel/concurrentcy via [all] combinator
%======================================================================
\begin{frame}[fragile]
\huge
\begin{center}
\highlight{\tt Deferred}
\end{center}
\end{frame}
% Start at bottom of tower & work up...
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Deferred: part of {\tt async}}
\begin{center}
\highlight{\tt 'a Deferred.t}\vskip16pt
holds the {\em result of a computation},\\\vskip-4pt which might {\em not yet be available}.
\end{center}
\begin{itemize}
\item From Jane Street's {\tt async} library
\begin{itemize}
\item {\em ``co-operative multi threading''}
\end{itemize}
\item Enables {\em parallel}\/ builds in jenga
\end{itemize}
\end{frame}
% Supports concurrent apps; read/write multi files/sockets; server apps.
% Computations started from when the deferred is created.
% Dont want blocking computation in one (async) thread to block entire app.
%
% What is a value of deferred type?
% Cell to hold the result of computation. Filled in asynchronously when finished.
% When the result is ready, the deferred is said to become determined.
% A deferred is determined (at most) once; and retains its value.
%
% User code can wait for the result of a deferred value to be determined
% When a deferred becomes determined, waiting computations are resumed.
%
% Abstraction for asynchronous computations. Core type of JS async library.
% "Co-operative multi threading"
% Lightweight.
% Some nice atomicity guarentees
% User code `can' block entire application. (downside)
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Deferred: composition}
{\bf Monadic:}
{\footnotesize
\begin{verbatim}
val (>>=) : 'a def -> ('a -> 'b def) -> 'b def
val all : 'a def list -> 'a list def
val in_thread : (unit -> 'a) -> 'a def
\end{verbatim}}
{\bf Example primitives:}
{\footnotesize
\begin{verbatim}
val file_contents : path -> string def
val system : string -> string list -> exit_code def
\end{verbatim}}
\end{frame}
% {\bf Monadic (more or less!):}
% val next_elem : 'a socket -> 'a def
% val system : string -> string list -> (int * string) def
% Monadic composition: sequential (bind); concurrent (all)
% [all] synchronises when a list of computation are all determined.
% [in_thread] wraps blocking primitive computations (run in separate thread.)
% so cant block entire application.
%
% example primitives
% - and operation which takes signiifcant time (i.e. may block app)
% - system call or bigger.
% - not really primitives!
% written in user async code, from real primitives - system calls lifted in to async monad
% Most uses of in_thread within async library where system calls are lifted into async
%
% User code which composes deferred computations runs in a single thread.
% Scheduler is the single point of mediation
% Queue of pending deferreds; Pool of threads; Central select loop for IO
% When leaf in_thread computation are finished, or IO becomes available
% scheduler fills in the deferred; waiting computations can proceed
%======================================================================
\begin{frame}[fragile]
\huge
\begin{center}
\highlight{\tt Tenacious}
\end{center}
\end{frame}
% So use of async is pervasive across 90% of JS's code base
%
% From POV of apps like jenga, deferred gives us the basic support for writing concurrent apps.
% i.e. run multiple compilation commands in parallel
% also needed in jenga...
% read files / digest files (run external md5sum to digest file)
% async save to persist store
% support query access via socket (jenga is a server)
% receive inotify events from OS indicating indicating file mod event. i.e source file edited
%
% So what is tenacious for?
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Tenacious: {\tt 'a ten}}
\begin{itemize}
\item Tenacious computations may be:
\begin{itemize}
\item {\em invalidated}
\item {\em restarted}
\item {\em cancelled}
\end{itemize}
\item Support {\em polling}\/ builds in jenga
\begin{itemize}
\item Notifier subsystem invalidates file-system ops
\end{itemize}
\item {\em ``self-adjusting computation''}
\end{itemize}
\end{frame}
% Tenacious computation is a: invalidatable, restartable, cancelable computation
% [invalidatable]
% Each (sub) computations has associated with it, a certificate. Indicating the basis for the computation is still valid.
% What "basis for computation" means is up to user.
%
% [restartable]
% An invalid computation will get re-played.
% RHS of bind re-applied to get new result
% to supersede old RSH computation
%
% [cancelable]
% A computation which is dep on an invalid computation (RHS of bind)
% will be cancelled - stop unwinding & discontinued
% (choose that leaf computations will run to completion)
% Purpose of tenacious is to support implementation OS jenga 'polling' mode.
%
% Describe from jenga user POV.
% - start build.. compilation commands run.. assuming succ build.. everything done.. waiting
% - get inotify event.. some .ml file is edited. need to redo part of the build effected.
% - first the compilation of that file.. then any dep files.. linking libraries.. linking exe
% - only want to check the effected part of the tree (important when tree big)
%
% tenacious does not have knowledge of FS notification built in
% although that is a very important application
% But, building is a long running process. make take an hour say. don't
% want to wait until the whole build is finished before we restart compiles which need to be redone.
% Want to start immediately!
% Must discard any thing done previously which make use of now invalidated data
% Mustn't start anything new which was dependent on the pre-event state of the world.
% Different form Acar's self-adjusting computation,
% which has two-phase semantics: {change-inputs; stabilize} repeat
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Tenacious: interface}
{\tt Tenacious}
{\footnotesize
\begin{verbatim}
type 'a ten
val (>>=) : 'a ten -> ('a -> 'b ten) -> 'b ten
val lift : (unit -> ('a * heart) def) -> 'a ten
\end{verbatim}}
% val exec : 'a ten -> ('a * heart) def
{\tt Heart}
{\footnotesize
\begin{verbatim}
type heart
val create_heart : unit -> heart
val break_heart : heart -> unit
\end{verbatim}}
\end{frame}
% Like deferred, Monadic composition, with ``all'' for concurrent computations
% Primitive computations are lifted into the tenacious type, making use of heart abstraction.
%
% [Heart] is a certificate of validity. break-once mutable triggers. (heart is broken); light weight
% hearts compose internally by tenacious
% invalidity trigger flows instantly to all computations which are deemed broken.
% As tenacious computations are unwound, relevant hearts are checked before started new leaf lifted computations
% when hearts are broken, computations are re-played. RHS of binds re-applied.. maybe creating different computations
% [lift]
% allows arb deferred computation, which may be async invalidated to be liftend in to tenacious
% tenacious does not have knowledge of FS notification built in
% although that is a very important application - next example
%======================================================================
\begin{frame}[fragile]
\huge
\begin{center}
\highlight{\tt Depends}
\end{center}
\end{frame}
% So we have parallel builds.
% And builds which are retriggered in response to event from the FS notifier.
% What does the depends layer give us?
%
% Support for checking builds are up-to-date,
% but without running any actions which are not necessary
% because we determine the actions have been run before & nothing (pertinent) has changed.
% Running the action again now should(!) have no effect.
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Depends: {\tt 'a dep}}
\begin{itemize}
\item Provide API for build rules with filesystem dependencies
\begin{itemize}
\item {\tt val need} : {\tt path -> unit dep}
\item {\tt type rule = path list * action dep}
\end{itemize}
\vspace*{.2cm}
\item Support {\em incremental}\/ builds in jenga
\begin{itemize}
\item Avoid running {\em unnecessary} (compilation) actions
\begin{itemize}
\item Because {\em nothing} has changed since last run
\end{itemize}
\end{itemize}
\end{itemize}
\end{frame}
% API for describing build rules.
% rule is pair of target paths & action carried by depends monad
% (think of action as the command line string which we will run)
% Supports "incremental builds!"
% Avoid running unnecessary (compilation) actions
%
% An action is unnecessary when
% - we ran it before
% - all deps are up-to-date & are unchanged (from when we last ran the action)
% - and the targets are still there & are untampered with
%
% by unchanged we consider a digest of the file
% for glob, the list of files is unchanged - but not in this example!
% Presumption that user compilation actions will always operate the same
% generating identical targets from same dependent files
% Not always true! e.g. ar - timestamp
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Depends: the build algorithm}
\begin{itemize}
\item Run the action for a rule iff:
\begin{itemize}
\item we have no record of running it before
\item dependencies have changed
\item action has changed
\item a targets is missing of different from expected
\end{itemize}
\item Record successful run in the persistent state.
\end{itemize}
\end{frame}
% Explanation of depends abstraction is inter-twined with operation of he build algorithm
% stated here. read!
% Particular example - the null build...
% - build whole tree.. everything is up to date
% - stop jenga (dont touch an files)
% - restart: everything is checked - NO actions are run
%======================================================================
\begin{frame}[fragile]
\frametitle{Summary of Jenga}
\begin{itemize}
\item Key features
\begin{itemize}
\item Rule development in OCaml
\item Expressive API for dynamic dependencies
\item {\em Incremental}, {\em polling}, {\em parallel}\/ builds
\end{itemize}
\item Developed and used at Jane Street
\item Open source
\begin{center}
\highlight{\tt opam install jenga}
\end{center}
\vskip12pt
\end{itemize}
\end{frame}
%======================================================================
\begin{frame}[fragile]
\huge
\begin{center}
{\bf \textcolor{orange}{Additional slides}}
\end{center}
\end{frame}
% Extra slides cut from SHORT talk...
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Deferred: identity semantics}
\begin{itemize}
\item A ``set-once reference cell''
\end{itemize}
{\footnotesize
\begin{verbatim}
let next = next_element my_socket in
next >>= fun x1 ->
next >>= fun x2 ->
assert (x1 = x2);
\end{verbatim}}
\hspace{1cm}versus:
{\footnotesize
\begin{verbatim}
let next() = next_element my_socket in
next() >>= fun x1 ->
next() >>= fun x2 ->
...
\end{verbatim}}
\end{frame}
% deferred is a specific running computation
% not a recipe for a computation
% binding on it does not force it - make it start - duplicate it etc
% it is already running
% (>>=) is nothing to do with the running of the computation!
% the deferred is place-holder for result
% a cell which can flip state from running to determined
% implelemtation terms: a runing deferred provides place to 'hang' waiting computations
% Shall we be pure or impure?
% - sometimes impure seems to fit better with ocaml
% - we live with call by value & so we take advantage
% Top - shows identity
% Bottom - shows how we can avoid, putting behing thunk
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Deferred: no preemption}
\begin{itemize}
\item Guaranteed atomicity between bind points
\end{itemize}
{\footnotesize
\begin{verbatim}
let memoized_file_contents =
let ht = Hashtbl.create () in
fun path ->
match Hashtbl.find ht path with
| Some d -> d
| None
let d = file_contents path in
Hashtbl.add ht ~path ~value:d;
d
\end{verbatim}}
\end{frame}
% Saw earlier files_contents example.
% Here we want to share file loads & avoid loading same file twice
% file load takes arb amount of time. not enough to share the result
% want to share the computation (the cell which will receive the result of the comp)
% Use simple memoization approach, with standard hash tables
%
% Normally, in code like this, coded using threads. we need some way to ensure atomicity
% so that the find/add are automatically run in atomic block.
% With async we get that guarantee automatically (for free)
% or cost is the same one we paid for using co-operative multi threading (one async thread can block entire app)
%
% With async (deferred) there is no preemption
% Context switching only at bind points;easier to reason about.
% User code guaranteed to run atomically.
% Avoid need monitors; locks etc to protect local state.
%
% We know that memoized_file_contents cant be re-entrant
% Whatever file_contents does!
% We know that nothing else can access the HT between the find & the add
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Tenacious: example}
%{\tt tenacious\_contents: path -> string ten}
{\footnotesize
\begin{verbatim}
let tenacious_contents path =
Tenacious.lift (fun () ->
let heart = Notifier.watch the_notifier path in
Deferred.file_contents path >>| fun x ->
(x, heart)
)
\end{verbatim}}
\end{frame}
% This example, we imagine an asyncronous ``Notifier'' subsystem
% which we can ask for a heart corresponding to a given path
% which is sure to unbroken at the point we ask for it - but may break at any following moment
% the notifier will break the heart when there is an event on the file
% which we use as the trigger for the invalidity of the tenacioud returned by ``contents''
% The tenacious system will rerun the lifted computation as/when necessary
% In this example we make use of the basic deferred file_contents op
% But we might well go on to memoize this tenacious version
% Can do this because, like deferred - TENACIOUS HAVE IDENTITY
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Depends: persistent record of build action}
{\footnotesize
\begin{verbatim}
type digest = string (* md5sum *)
type fs_proxy = (path, digest) Map.t
type rule_proxy = {
targets : fs_proxy;
deps : fs_proxy;
action : act
}
\end{verbatim}}
\end{frame}
% a build record is a triple