Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
1057 lines (882 sloc) 33.7 KB
\documentclass{beamer}
\usepackage{helvet}
\usepackage{textpos}
\usepackage{setspace}
\useinnertheme{circles}
\definecolor{jswhite}{rgb}{0.9,0.9,0.9}
\definecolor{jsyellow}{rgb}{0.9,0.7,0.1}
\definecolor{jsblue}{rgb}{0,0,0.3}
\definecolor{jslightblue}{rgb}{0,0.3,0.3}
\setbeamercolor{normal text}{fg=black,bg=white}
\setbeamercolor{structure}{fg=orange}
\setbeamercolor*{separation line}{fg=jslightblue}
\setbeamercolor*{fine separation line}{fg=jslightblue}
\setbeamercolor*{frametitle}{fg=orange}
\setbeamerfont{frametitle}{series=\bfseries}
\begin{document}
\title{\Huge{\bf Jenga}\vskip24pt\large\textcolor{black}{The design of an expressive and scalable build system}}
\date{
\includegraphics[width=4cm]{new-jane-street-logo.pdf}
}
\addtobeamertemplate{frametitle}{}{%
\begin{textblock*}{3cm}(.77\textwidth,5.4cm)
\vskip26pt\hskip20pt
\includegraphics[width=2cm]{new-jane-street-logo.pdf}
\end{textblock*}\bf}
\setbeamertemplate{navigation symbols}{}
\let\oldframetitle\frametitle
\renewcommand{\frametitle}[1]{%
\oldframetitle{\vskip4pt\hskip4pt#1}\setstretch{1.4}}
\newcommand\gdb{{\tt gdb}}
\newcommand\gap{\vskip18pt}
\newcommand\highlight[1]{\colorbox{orange}{\textcolor{black}{#1}}}
%----------------------------------------------------------------------
\begin{frame}
\titlepage
\end{frame}
% This talk is about jenga.
% - a new build tool developed a janestreet -
% and various aspects of it design and implementation
% Two parts: design (user persepective); implementation
%- SHORT TALK - part 2 significantly reduced.
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{What is a build system?}
\begin{center}
\highlight{Build tool + build rules}
\end{center}
\begin{itemize}
\item General purpose build tool
\item Rules are provided by the user
\begin{itemize}
\item targets; actions; {\em dependencies}
\item shared framework (e.g. for a whole company)
\item per-project config
\end{itemize}
\item Build process is demand driven
\end{itemize}
\end{frame}
% Tool & rules:
% - Rules provided by user; interpreted by the tool
% The build tool is responsible to orchestrate the minimal sequence of
% actions necessary to bring a set of generated targets up to date.
% General purpose tool
% - no build in knowledge of specific languages or compilation tools
% - no magic handling
% - just a consistent general purpose build model
% The overall build process is demand driven.
% We request the system to build some top level target, perhaps main.exe
% Rules (provided by user) decribe actions to run to create generated targets.
% And any dependencies (other files) required for the action.
% Example: a rule for linking
% - linking stage might describes how some .o are linked into a .exe
% - the .o's are the dependencies.
% - if these are also generated targets (not sources), as expected, there will other rules for each .o
% - then before the link is run the .os must be generated, or brought up to date
% - by running whatever action is responsible for creating them
% - and finally the link command can be run (or not, if we discover it is not required)
% Commonly, rules are split. framework + per-project config
% Dont want every developer to have to be a build system expert
% They should say: build all .ml in this dir into library ``foo'', making use of lib ``bar''
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Why a new build system?}
\begin{itemize}
\item Already so many to choose from:
\begin{itemize}
\item make, omake, ocamlbuild, ninja, tup, redo, shake...
\end{itemize}
\item Jane Street workflow
\begin{itemize}
\item One big tree! (almost)
\begin{itemize}
\item 3500 dirs; 33k files; 2.2m lines OCaml
\end{itemize}
\item Two workflows
\end{itemize}
\item Issues: correctness \& scalability
\end{itemize}
\end{frame}
% So, the first question is probably: why on earth did we feel the
% need to develop yet another build tool. Does the world not have enough?
% - no other existing tool has the full set of features we deem important.
% - omake (what we used before), comes closest
%
% Overview of JS workflow: One big tree; two workflows...
% One tree; big - main advantage of a single tree. Can modify
% interfaces to heavily shared common library (improve names/types)
% and fix up all callers in one consistent change-set
% Two flows...
% Continuous integration
% - remote box; full tree; guard bad commits; incremental but restarted
%
% Individual development
% - local box; own project subtree; incremental, polling
% Before jenga, janestreet used omake - pretty good!
% Has lots of features we view as important.
% And solves many issues newer tools fail on.
% Brief list''
% - one build instance for whole tree
% - quick
% - polling
% - dynamic dependencies
% - programmable rule generation
%
% Problems, increasing over time...
% - at JS we have stretched it scalability to the limit
% - more importantly: hard to program
%
% Thing we dislike the most about omake, is the omake language itself.
% - dynamic typing; dynamic scoping; scripting language not a programming language
% Correctness of build rules.
% Golden test: Incremental = From-clean
%
% - want performance of incremenal builds, but with semantics of form-clean builds
% - requires accurate spec of dependencies.
%----------------------------------------------------------------------
\begin{frame}[fragile]
\huge
\begin{center}
{\bf \textcolor{orange}{Design}}
\end{center}
\end{frame}
% First part of talk: design; focusing on user persepective
% How jenga is used: as a tool, and as system to describe build rules
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Necessities}
\begin{itemize}
\item Programmable
\begin{itemize}
\item rule generation in a {\em real} programming language
\item {\tt jengaroot.ml} dynamically compiled and loaded
\end{itemize}
\item Incremental
\begin{itemize}
\item {\em the} point of a build system
\end{itemize}
\item Polling (inotify)
\begin{itemize}
\item for individual development
\end{itemize}
\item Parallel
\begin{itemize}
\item run compilation actions in parallel
\end{itemize}
\end{itemize}
\end{frame}
% Programable
% Rules are coded in OCaml, against an API. No DSL here.
% {\tt jengaroot.ml} loaded using {\tt ocaml\_plugin}
% omake had DSL. Ok in the small, but for a large engineering project it becomes unmanageable
% Question. why wouldn't you want to use a real programming language?
% and so an early design decision of jenga was for the rules to be described in ocaml, using an API
% Incremental
% If not incremental. Your not a build system, but a script.
% We want to be able to rely on our incremental builds being correct.
% Requires that rules declare correct deps... any file wich is read by action
% Golden test: Incremental = From-clean
% How many times have you heard a broken build excesed with ``oh, just clean and build again''
% clear inditement that the build system is broken
% Not acceptable when a full tree build takes 1--2 hours
% Would be nice to avoid this explicit statement of deps. Dream system wold avoid it.
% Instead we focus on giving user means to make accurate specification of deps
% We need our build tool to to offer a language rich enough to express
% complex (& dynamic) dependencies for rules and rule generation,
% which exists in the real world
% Persistant (imp detail of incremental build)
% Use persistent store to record record of every action run.
% - digest of deps & targets + the exact command executed.
% so we can tell when we dont have to run again.
% Polling
% for interactive use; had in oamke. lovely feature
% rerun compilation ASAP, even while still building other parts of the tree
% Parallel build:
% jenga extract maximum parallel as expressed in the rules
% throttled by -j argument
% Commonly choose to set this to number of cores. Or higher and let OS slice.
% Most relevant when building a full tree ``from clean''
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Correctness}
\begin{itemize}
\item Rules are hard
\begin{itemize}
\item Requires detailed knowledge of toolchain
\item Accurate dependencies are required for consistent builds
\end{itemize}
\item Dependencies are not always static
\begin{itemize}
\item ``scanner dependencies''
\item glob dependencies ({\tt ls *.ml})
\end{itemize}
\item Rule generation
\begin{itemize}
\item not a distinct phase (unlike {\tt ninja})
\item may also have dependencies
\end{itemize}
\end{itemize}
\end{frame}
% The most important thing is dependencies.
% Without accurate deps, there can be no hope of a correct incremental builds.
% Developing \& maintaining a build system is a major engineering project.
% Real world deps are not trivial, and very often dynamic.
% - dont know what the deps are before the build starts
% - they are discovered during the build process
% Often we dont even know what targets have rules before we start
% - i.e. setting up a compile rule for all .ml in a directory
% Dynamic dependencies
% generalisation of what some build tools call scanner dependencies.
% Example: gcc -M to detect which header files are required
% (if headers are generated - need fixpointing)
% for ocaml - ocamldep.. also incomplete soltion
%
% In jenga, arbitrary conditional deps can be expressed
% example: ocaml compilation rule setup depends on existance/or not of .mli
%
% Jenga introduces ``glob dependencies'' i.e. *.ml
% Most commonly used during rule configuration
% And this also works properly in polling mode where we can be
% triggered as new files appear or exiting files disappear
% in omake, *.ml did not wrk in polling mode, requiring restart of omake
% rule generation.
% Not a distinct phase. Integrated with rest of build system.
% For example, rules can be written to have a per-directory config file (flags etc) which is consulted for rule-gen.
% This config file could itself be a generated file!
% We did in fact use generatted config for during switch over from omake
% For a while (some months) we could build with both build systems. jenga config was 99% generated automattically from omake config
% jbuild.gen / jbuild
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Rules {\tt make} style}
\begin{itemize}
\item Triple of targets, dependencies and action
{\footnotesize
\begin{verbatim}
val rule : path list -> dep list -> action -> rule
\end{verbatim}}
\item Not expressive enough!
\begin{itemize}
\item Can't represent {\em dynamic dependencies}
\item Action is fixed
\end{itemize}
\end{itemize}
\end{frame}
% The concept of a rule in classic make is a static triple.
% Nice and simple.
% Often is sufficient
% But in general - it is not expressive enough
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Encoding dependencies}
Introduce a notion of {\it a value and its dependencies}. \par
%Represented by the type:
\begin{center}\colorbox{orange}{$\alpha$\ {\tt dep}}\end{center}
{\bf Constant value:}
{\footnotesize
\begin{verbatim}
val need : path -> unit dep
\end{verbatim}}
{\bf Varying value:}
{\footnotesize
\begin{verbatim}
val glob : dir:path -> string -> path list dep
val contents : path -> string dep
\end{verbatim}}
\end{frame}
% So ``dep'' is a parametrised type.
% The simpler dep of the make rules triple, now becomes: unit dep.
% What do values of this type mean?
% Semantically, a value of type {\tt t dep} can be thought of
% as taking different values of type {\tt t} at different times.
%
% These times might be:
% - each time the build tool is restart (we re-evaluate the rules)
% - we re-evaluate as necessary in response to being triggered by external events (inotify)
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Composing dependencies}
{\footnotesize
\begin{verbatim}
val return : 'a -> 'a dep
val ( *>>= ) : 'a dep -> ('a -> 'b dep) -> 'b dep
val ( *>>| ) : 'a dep -> ('a -> 'b ) -> 'b dep
\end{verbatim}}
Concurrency expressed using {\tt all}:
{\footnotesize
\begin{verbatim}
val all : 'a dep list -> 'a list dep
val all_unit : unit dep list -> unit dep
\end{verbatim}}
\end{frame}
% This is also the approach takes by shake
%
% Early version attempted to avoid exposing the monad to the rules
% author - but this was a mistake.
%
% Made the rule setup more tricky & much more fragile.
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Rules {\tt jenga} style}
\begin{itemize}
\item Action is carried by the dependency
{\footnotesize
\begin{verbatim}
val rule : path list -> action dep -> rule
\end{verbatim}}
\item Rule generation
{\footnotesize
\begin{verbatim}
val generate : (dir:path -> rule list dep) -> scheme
\end{verbatim}}
\item Recover simple rules
{\footnotesize
\begin{verbatim}
let simple_rule targets deps action =
rule targets (
all_unit deps *>>= fun () ->
return act)
\end{verbatim}}
\end{itemize}
\end{frame}
% Action carried by the parametrised dependency type.
% Rule generation fits in the scheme.
% Simple rule creation can be recovered.
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Example 1: OCaml compilation}
{\footnotesize
\begin{verbatim}
val compile_ml: dir:path -> name:string -> rule
let compile_ml ~dir ~name =
let p x = relative ~dir (name ^ x) in
rule [p".cmi"; p".cmx"; p".o"] (
\end{verbatim}
\vskip-16pt\hskip18pt
\colorbox{orange}{\parbox[t][2.3cm][t]{8.7cm}{\ }}\vspace*{-2.5cm}
\begin{verbatim}
let static = [p".ml"] in
deps_from_file ~dir (p".ml.d") *>>= fun dynamic ->
needs (static @ dynamic) >>| fun () ->
bash ~dir (sprintf "ocamlopt -c %s.ml" name)
\end{verbatim}
\begin{verbatim}
)
\end{verbatim}}
\end{frame}
% All leading up to this big example. I'll take a little time here.
% Sorry about the rather visually disturbing orange rectangle!
%
% This is used to highlight the 2nd argument to ``rule'', having type ``action dep''
% composed using the map/bind combinators
% determines dependencies: static & dynamic
% Ends in a call to ``bash''
% In this case the cbash command string is static, but it need not be.
% - In the case of linking the bash action would not be static, for example
% - if the objects being linked have to be ordered w.r.t inter-module dep
%
% But this example does how dynamic deps are represented.
% - the call to ``needs'' makes use of ``dynamic'', coming from deps_from_file
% - but on RHS of the bind operator.
% - (the .ml.d file will presuamble be generated by some other rule, perhaps running ocamldep)
% - if the .ml.d changes, this rule will reconfig itself to be dependant on the new names listed.
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Example 2: OCaml rule generation}
{\footnotesize
\begin{verbatim}
val generate : (dir:path -> rule list dep) -> scheme
\end{verbatim}}
Rules for a directory of ocaml
{\footnotesize
\begin{verbatim}
generate (fun ~dir ->
glob ~dir "*.ml" *>>= fun mls ->
glob ~dir "*.mli" *>>| fun mlis ->
let exists_mli x = List.mem mlis (relative ~dir (x ^ ".mli")) in
List.map mls ~f:(fun ml ->
let name = chop_suffix (basename ml) ".ml" in
if (exists_mli name)
then compile_ml_mli ~dir ~name
else compile_ml ~dir ~name)
)
\end{verbatim}}
\end{frame}
% Example shows:
% - rule generation, based on *.ml existing in a dir
% - conditional rule setup
%======================================================================
\begin{frame}[fragile]
\huge
\begin{center}
{\bf \textcolor{orange}{Implementation}}
\end{center}
\end{frame}
% Switch away from descibing jenga from user (rule author) perspecitive
% And to the implementation of jenga
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Tower of monads}
\begin{itemize}
\item {\tt Depends}
\begin{itemize}
\item {\em incremental}\/ builds
\end{itemize}
\item {\tt Tenacious}
\begin{itemize}
\item { \em polling}\/ builds
\end{itemize}
\item {\tt Deferred}
\begin{itemize}
\item {\em parallel}\/ builds
\end{itemize}
\end{itemize}
\end{frame}
% Clearly, we like the features described.
% As a means for user to describe declaratively the build rules.
% And we want the parallel, polling, incremental builds!
% So how does jenga achieve this... Tower of monads.
% Differnet layers reposnsible for different aspects of the design.
%
% Depends
% Already seem Depends. from user POV
% API offerred to user of jenga
% main purpose is supports incremental (recompilation) feature of jenga
%
% Tenacious
% part of implementation of jenga; not exposed to user
% supports polling feature
%
% Deferred
% monad at heart of JS asyn library
% support concurrent/parallel jenga builds
% "Co-operative multi threading"
%
% All monadic. return; bind; map.
% Support parallel/concurrentcy via [all] combinator
%======================================================================
\begin{frame}[fragile]
\huge
\begin{center}
\highlight{\tt Deferred}
\end{center}
\end{frame}
% Start at bottom of tower & work up...
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Deferred: part of {\tt async}}
\begin{center}
\highlight{\tt 'a Deferred.t}\vskip16pt
holds the {\em result of a computation},\\\vskip-4pt which might {\em not yet be available}.
\end{center}
\begin{itemize}
\item From Jane Street's {\tt async} library
\begin{itemize}
\item {\em ``co-operative multi threading''}
\end{itemize}
\item Enables {\em parallel}\/ builds in jenga
\end{itemize}
\end{frame}
% Supports concurrent apps; read/write multi files/sockets; server apps.
% Computations started from when the deferred is created.
% Dont want blocking computation in one (async) thread to block entire app.
%
% What is a value of deferred type?
% Cell to hold the result of computation. Filled in asynchronously when finished.
% When the result is ready, the deferred is said to become determined.
% A deferred is determined (at most) once; and retains its value.
%
% User code can wait for the result of a deferred value to be determined
% When a deferred becomes determined, waiting computations are resumed.
%
% Abstraction for asynchronous computations. Core type of JS async library.
% "Co-operative multi threading"
% Lightweight.
% Some nice atomicity guarentees
% User code `can' block entire application. (downside)
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Deferred: composition}
{\bf Monadic:}
{\footnotesize
\begin{verbatim}
val (>>=) : 'a def -> ('a -> 'b def) -> 'b def
val all : 'a def list -> 'a list def
val in_thread : (unit -> 'a) -> 'a def
\end{verbatim}}
{\bf Example primitives:}
{\footnotesize
\begin{verbatim}
val file_contents : path -> string def
val system : string -> string list -> exit_code def
\end{verbatim}}
\end{frame}
% {\bf Monadic (more or less!):}
% val next_elem : 'a socket -> 'a def
% val system : string -> string list -> (int * string) def
% Monadic composition: sequential (bind); concurrent (all)
% [all] synchronises when a list of computation are all determined.
% [in_thread] wraps blocking primitive computations (run in separate thread.)
% so cant block entire application.
%
% example primitives
% - and operation which takes signiifcant time (i.e. may block app)
% - system call or bigger.
% - not really primitives!
% written in user async code, from real primitives - system calls lifted in to async monad
% Most uses of in_thread within async library where system calls are lifted into async
%
% User code which composes deferred computations runs in a single thread.
% Scheduler is the single point of mediation
% Queue of pending deferreds; Pool of threads; Central select loop for IO
% When leaf in_thread computation are finished, or IO becomes available
% scheduler fills in the deferred; waiting computations can proceed
%======================================================================
\begin{frame}[fragile]
\huge
\begin{center}
\highlight{\tt Tenacious}
\end{center}
\end{frame}
% So use of async is pervasive across 90% of JS's code base
%
% From POV of apps like jenga, deferred gives us the basic support for writing concurrent apps.
% i.e. run multiple compilation commands in parallel
% also needed in jenga...
% read files / digest files (run external md5sum to digest file)
% async save to persist store
% support query access via socket (jenga is a server)
% receive inotify events from OS indicating indicating file mod event. i.e source file edited
%
% So what is tenacious for?
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Tenacious: {\tt 'a ten}}
\begin{itemize}
\item Tenacious computations may be:
\begin{itemize}
\item {\em invalidated}
\item {\em restarted}
\item {\em cancelled}
\end{itemize}
\item Support {\em polling}\/ builds in jenga
\begin{itemize}
\item Notifier subsystem invalidates file-system ops
\end{itemize}
\item {\em ``self-adjusting computation''}
\end{itemize}
\end{frame}
% Tenacious computation is a: invalidatable, restartable, cancelable computation
% [invalidatable]
% Each (sub) computations has associated with it, a certificate. Indicating the basis for the computation is still valid.
% What "basis for computation" means is up to user.
%
% [restartable]
% An invalid computation will get re-played.
% RHS of bind re-applied to get new result
% to supersede old RSH computation
%
% [cancelable]
% A computation which is dep on an invalid computation (RHS of bind)
% will be cancelled - stop unwinding & discontinued
% (choose that leaf computations will run to completion)
% Purpose of tenacious is to support implementation OS jenga 'polling' mode.
%
% Describe from jenga user POV.
% - start build.. compilation commands run.. assuming succ build.. everything done.. waiting
% - get inotify event.. some .ml file is edited. need to redo part of the build effected.
% - first the compilation of that file.. then any dep files.. linking libraries.. linking exe
% - only want to check the effected part of the tree (important when tree big)
%
% tenacious does not have knowledge of FS notification built in
% although that is a very important application
% But, building is a long running process. make take an hour say. don't
% want to wait until the whole build is finished before we restart compiles which need to be redone.
% Want to start immediately!
% Must discard any thing done previously which make use of now invalidated data
% Mustn't start anything new which was dependent on the pre-event state of the world.
% Different form Acar's self-adjusting computation,
% which has two-phase semantics: {change-inputs; stabilize} repeat
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Tenacious: interface}
{\tt Tenacious}
{\footnotesize
\begin{verbatim}
type 'a ten
val (>>=) : 'a ten -> ('a -> 'b ten) -> 'b ten
val lift : (unit -> ('a * heart) def) -> 'a ten
\end{verbatim}}
% val exec : 'a ten -> ('a * heart) def
{\tt Heart}
{\footnotesize
\begin{verbatim}
type heart
val create_heart : unit -> heart
val break_heart : heart -> unit
\end{verbatim}}
\end{frame}
% Like deferred, Monadic composition, with ``all'' for concurrent computations
% Primitive computations are lifted into the tenacious type, making use of heart abstraction.
%
% [Heart] is a certificate of validity. break-once mutable triggers. (heart is broken); light weight
% hearts compose internally by tenacious
% invalidity trigger flows instantly to all computations which are deemed broken.
% As tenacious computations are unwound, relevant hearts are checked before started new leaf lifted computations
% when hearts are broken, computations are re-played. RHS of binds re-applied.. maybe creating different computations
% [lift]
% allows arb deferred computation, which may be async invalidated to be liftend in to tenacious
% tenacious does not have knowledge of FS notification built in
% although that is a very important application - next example
%======================================================================
\begin{frame}[fragile]
\huge
\begin{center}
\highlight{\tt Depends}
\end{center}
\end{frame}
% So we have parallel builds.
% And builds which are retriggered in response to event from the FS notifier.
% What does the depends layer give us?
%
% Support for checking builds are up-to-date,
% but without running any actions which are not necessary
% because we determine the actions have been run before & nothing (pertinent) has changed.
% Running the action again now should(!) have no effect.
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Depends: {\tt 'a dep}}
\begin{itemize}
\item Provide API for build rules with filesystem dependencies
\begin{itemize}
\item {\tt val need} : {\tt path -> unit dep}
\item {\tt type rule = path list * action dep}
\end{itemize}
\vspace*{.2cm}
\item Support {\em incremental}\/ builds in jenga
\begin{itemize}
\item Avoid running {\em unnecessary} (compilation) actions
\begin{itemize}
\item Because {\em nothing} has changed since last run
\end{itemize}
\end{itemize}
\end{itemize}
\end{frame}
% API for describing build rules.
% rule is pair of target paths & action carried by depends monad
% (think of action as the command line string which we will run)
% Supports "incremental builds!"
% Avoid running unnecessary (compilation) actions
%
% An action is unnecessary when
% - we ran it before
% - all deps are up-to-date & are unchanged (from when we last ran the action)
% - and the targets are still there & are untampered with
%
% by unchanged we consider a digest of the file
% for glob, the list of files is unchanged - but not in this example!
% Presumption that user compilation actions will always operate the same
% generating identical targets from same dependent files
% Not always true! e.g. ar - timestamp
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Depends: the build algorithm}
\begin{itemize}
\item Run the action for a rule iff:
\begin{itemize}
\item we have no record of running it before
\item dependencies have changed
\item action has changed
\item a targets is missing of different from expected
\end{itemize}
\item Record successful run in the persistent state.
\end{itemize}
\end{frame}
% Explanation of depends abstraction is inter-twined with operation of he build algorithm
% stated here. read!
% Particular example - the null build...
% - build whole tree.. everything is up to date
% - stop jenga (dont touch an files)
% - restart: everything is checked - NO actions are run
%======================================================================
\begin{frame}[fragile]
\frametitle{Summary of Jenga}
\begin{itemize}
\item Key features
\begin{itemize}
\item Rule development in OCaml
\item Expressive API for dynamic dependencies
\item {\em Incremental}, {\em polling}, {\em parallel}\/ builds
\end{itemize}
\item Developed and used at Jane Street
\item Open source
\begin{center}
\highlight{\tt opam install jenga}
\end{center}
\vskip12pt
\end{itemize}
\end{frame}
%======================================================================
\begin{frame}[fragile]
\huge
\begin{center}
{\bf \textcolor{orange}{Additional slides}}
\end{center}
\end{frame}
% Extra slides cut from SHORT talk...
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Deferred: identity semantics}
\begin{itemize}
\item A ``set-once reference cell''
\end{itemize}
{\footnotesize
\begin{verbatim}
let next = next_element my_socket in
next >>= fun x1 ->
next >>= fun x2 ->
assert (x1 = x2);
\end{verbatim}}
\hspace{1cm}versus:
{\footnotesize
\begin{verbatim}
let next() = next_element my_socket in
next() >>= fun x1 ->
next() >>= fun x2 ->
...
\end{verbatim}}
\end{frame}
% deferred is a specific running computation
% not a recipe for a computation
% binding on it does not force it - make it start - duplicate it etc
% it is already running
% (>>=) is nothing to do with the running of the computation!
% the deferred is place-holder for result
% a cell which can flip state from running to determined
% implelemtation terms: a runing deferred provides place to 'hang' waiting computations
% Shall we be pure or impure?
% - sometimes impure seems to fit better with ocaml
% - we live with call by value & so we take advantage
% Top - shows identity
% Bottom - shows how we can avoid, putting behing thunk
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Deferred: no preemption}
\begin{itemize}
\item Guaranteed atomicity between bind points
\end{itemize}
{\footnotesize
\begin{verbatim}
let memoized_file_contents =
let ht = Hashtbl.create () in
fun path ->
match Hashtbl.find ht path with
| Some d -> d
| None
let d = file_contents path in
Hashtbl.add ht ~path ~value:d;
d
\end{verbatim}}
\end{frame}
% Saw earlier files_contents example.
% Here we want to share file loads & avoid loading same file twice
% file load takes arb amount of time. not enough to share the result
% want to share the computation (the cell which will receive the result of the comp)
% Use simple memoization approach, with standard hash tables
%
% Normally, in code like this, coded using threads. we need some way to ensure atomicity
% so that the find/add are automatically run in atomic block.
% With async we get that guarantee automatically (for free)
% or cost is the same one we paid for using co-operative multi threading (one async thread can block entire app)
%
% With async (deferred) there is no preemption
% Context switching only at bind points;easier to reason about.
% User code guaranteed to run atomically.
% Avoid need monitors; locks etc to protect local state.
%
% We know that memoized_file_contents cant be re-entrant
% Whatever file_contents does!
% We know that nothing else can access the HT between the find & the add
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Tenacious: example}
%{\tt tenacious\_contents: path -> string ten}
{\footnotesize
\begin{verbatim}
let tenacious_contents path =
Tenacious.lift (fun () ->
let heart = Notifier.watch the_notifier path in
Deferred.file_contents path >>| fun x ->
(x, heart)
)
\end{verbatim}}
\end{frame}
% This example, we imagine an asyncronous ``Notifier'' subsystem
% which we can ask for a heart corresponding to a given path
% which is sure to unbroken at the point we ask for it - but may break at any following moment
% the notifier will break the heart when there is an event on the file
% which we use as the trigger for the invalidity of the tenacioud returned by ``contents''
% The tenacious system will rerun the lifted computation as/when necessary
% In this example we make use of the basic deferred file_contents op
% But we might well go on to memoize this tenacious version
% Can do this because, like deferred - TENACIOUS HAVE IDENTITY
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Depends: persistent record of build action}
{\footnotesize
\begin{verbatim}
type digest = string (* md5sum *)
type fs_proxy = (path, digest) Map.t
type rule_proxy = {
targets : fs_proxy;
deps : fs_proxy;
action : act
}
\end{verbatim}}
\end{frame}
% a build record is a triple
% first 2 elems - file-system proxy: finite map from path to digest
% have fs_proxy for deps & targets
% third element - the actual action that was run.
% fs-proxy for deps allow us to detect when something relavant has changed
% which require us to re-run the build action
%
% If a dep is not stated. then we may not realise that we must rerun.
% and so we can get an incorrect incremental build.
% Notice, fs_proxy for targets means we will undo changes made to files which the build
% system believe are under its control (targets - build artifacts)
% we record this triple in a persistent DB which exists between jenga runs.
% If this file is deleted. Every actions must be rerun
% Even if we have all the build-artifacts in the FS
% As jenga has no record of them - what there deps & action are.
%----------------------------------------------------------------------
\begin{frame}[fragile]
\frametitle{Depends: build code fragment}
{\footnotesize
\begin{verbatim}
type action = path * string
type rule = path list * action dep
val build_dep : 'a dep -> ('a * fs_proxy) Or_error.t Tenacious.t
val need_path : path -> digest Tenacious.t
\end{verbatim}}
\end{frame}
% action is: command line string (for bash say) & path in whcih is run
% rule is pair of target paths & action carried by depends monad
% Can think of the build algorithm as evaluation of expression of type dep
% Into values proxy_map
% ..embedded in an error monad
% ..embedded in the tenacious monad
%
% evaluate action dep -> action
% causes recursive checking that deps are up-to-date
% when we "need" a path, (or get contents) we take a snapshot of the digest
% and build up the fs_proxy
%
% And so get the action value (string)
% before running action, take a snap of the target files
% So we have the triple which represents what we think if should be now.
% If the record matches the saved triple, we elide the build action.
% If not: run action
% after an action has run, we snapshot the generated files (checking they have been created)
% to get the new rule_proxy triple which we record in the DB.
\end{document}