Skip to content
Browse files
Option to restart server if a big job followed a detected rebase
Summary: As in title.

Reviewed By: ljw1004

Differential Revision: D10100451

fbshipit-source-id: 3c6f08bb7ec8c0b71c89915d325d7df2de76f266
  • Loading branch information
dabek authored and hhvm-bot committed Sep 28, 2018
1 parent 386af68 commit 91ae1676c973fe02ee9b13d40478d8b02a01c245
@@ -489,6 +489,9 @@ module Make_monitor (SC : ServerMonitorUtils.Server_config)
c = Exit_status.(exit_code Sql_corrupt) ||
c = Exit_status.(exit_code Sql_misuse) -> true
| _ -> false in
let is_big_rebase = match exit_status with
| Some c when c = Exit_status.(exit_code Big_rebase_detected) -> true
| _ -> false in
let max_watchman_retries = 3 in
let max_sql_retries = 3 in
if (is_watchman_failed || is_watchman_fresh_instance)
@@ -510,6 +513,15 @@ module Make_monitor (SC : ServerMonitorUtils.Server_config)
"Several large rebases caused FileHeap to be stale. Restarting";
kill_and_maybe_restart_server env exit_status
end else if is_big_rebase then begin
(* Server detected rebase sooner than monitor. If we keep going,
* monitor will eventually discover the same rebase and restart the
* server again for no reason. Reinitializing informant to bring it to
* the same understanding of current revision as server. *)
Informant.reinit env.informant;
"Server exited because of big rebase. Restarting";
kill_and_maybe_restart_server env exit_status
end else if is_sql_assertion_failure
&& (env.retries < max_sql_retries) then begin
@@ -119,6 +119,7 @@ let make_genv options config local_config handle =
ServerRevisionTracker.on_state_leave root name metadata;
Notifier_state_leave (name, metadata)
| Watchman.Files_changed changes ->
ServerRevisionTracker.files_changed local_config (SSet.cardinal changes);
Notifier_async_changes changes
let concat_changes_list = List.fold_left begin fun acc changes ->
@@ -500,6 +500,8 @@ let serve_one_iteration genv env client_provider =
(fun () -> SharedMem.collect `aggressive);
let t = Unix.gettimeofday () in
if t -. env.last_idle_job_time > 0.5 then begin
(* Check on pending queries periodically *)
ServerRevisionTracker.check_non_blocking env;
ServerIdle.go ();
{ env with last_idle_job_time = t }
end else env
@@ -32,7 +32,7 @@ let intersect_with_master_deps ~deps ~dirty_master_deps ~rechecked_files genv en

let size = Relative_path.Set.cardinal needs_recheck in
let env = if size = 0 then env else begin
ignore genv; (* TODO: use this to compare size to restart thresholds *)
ServerRevisionTracker.typing_changed genv.local_config size;
Hh_logger.log "Adding %d files to recheck" size;
let needs_recheck =
Relative_path.Set.union env.needs_recheck needs_recheck in
@@ -7,21 +7,35 @@

(** Note: the tracking in this module is for approximate logging purposes only;
(** Note: the tracking in this module is best effort only;
* it's not guaranteed to always reflect accurate merge base transitions:
* - in some init types, initial merge base is not known so we will only notice
* the second transition
* - to avoid blocking rest of the system, mergebase queries time out after 30
* seconds and are not retried in case of errors
* - we only record "new" mergebases as we see them, not detecting transitions
* between already visited revisions
* In other words: do not depend on anything in this module for things more
* critical than logging (like HackEventLogger.set_changed_mergebase ())

(* This will be None after init in case of canaries and Precomputed loads *)
let current_mergebase : Hg.svn_rev option ref = ref None

(* Do we think that this server have processed a mergebase change? If we are
* in this state and get notified about changes to a huge number of files (or
* even small number of files that fan-out to a huge amount of work), we might
* decide that restarting the server is a better option than going through with
* incremental processing (See ServerLocalConfig.hg_aware_*_restart_threshold).
* It is likely to be faster because:
* - a better saved state might be available
* - even with same saved state, during init we can treat all those changes
* (if they were indeed due to non-local commits ) as prechecked (see ServerPrecheckedFiles)
* and avoid processing them.
* There is some room for false positives, when small inconsequential rebase is immediately
* followed by a big local change, but that seems unlikely to happen often compared to
* the hours we waste processing incremental rebases.
let did_change_mergebase = ref false

let mergebase_queries : (Hg.hg_rev, (Hg.svn_rev Future.t)) Hashtbl.t = Hashtbl.create 200
(* Keys from mergebase_queries that contain futures that were not resolved yet *)
let pending_queries : Hg.hg_rev Queue.t = Queue.create ()
@@ -61,6 +75,7 @@ let check_query future ~timeout ~current_t =
match !current_mergebase with
| Some svn_rev when svn_rev <> new_svn_rev ->
current_mergebase := Some new_svn_rev;
did_change_mergebase := true;
HackEventLogger.set_changed_mergebase true;
Hh_logger.log "ServerRevisionTracker: Changing mergebase from r%d to r%d"
svn_rev new_svn_rev;
@@ -85,3 +100,45 @@ let check_blocking () =
let _ : float = Hh_logger.log_duration "Finished querying Mercurial" start_t in

let rec check_non_blocking env =
if Queue.is_empty pending_queries then begin
if ServerEnv.(env.full_check = Full_check_done) && !did_change_mergebase then begin
Hh_logger.log "ServerRevisionTracker: Full check completed despite mergebase changes";
(* Clearing this flag because we somehow managed to get through this rebase,
* so no need to restart anymore *)
did_change_mergebase := false;
HackEventLogger.set_changed_mergebase false;
end else
let hg_rev = Queue.peek pending_queries in
let future = Hashtbl.find mergebase_queries hg_rev in
if Future.is_ready future then begin
let _ : Hg.hg_rev = Queue.pop pending_queries in
check_query future ~timeout:30 ~current_t:(Unix.gettimeofday ());
check_non_blocking env

let make_decision threshold count name =
if threshold = 0 || count < threshold then () else begin
(* Enough files / declarations / typings have changed to possibly warrant
* a restart. Let's wait for Mercurial to decide if we want to before
* proceeding. *)
check_blocking ();
if !did_change_mergebase then begin
Hh_logger.log "Changed %d %s due to rebase. Restarting!" count name;
Exit_status.(exit Big_rebase_detected)

let files_changed local_config count =
make_decision (local_config.ServerLocalConfig.hg_aware_parsing_restart_threshold)
count "files"

let decl_changed local_config count =
make_decision (local_config.ServerLocalConfig.hg_aware_redecl_restart_threshold)
count "declarations"

let typing_changed local_config count =
make_decision (local_config.ServerLocalConfig.hg_aware_recheck_restart_threshold)
count "file typings"
@@ -15,3 +15,29 @@ val on_state_leave :

val check_blocking : unit -> unit
val check_non_blocking : ServerEnv.env -> unit

(* This module tracks changes to mergebase, and is also informed (by functions below)
* about the sizes of jobs that are being processed. If we are in "mergebase changed"
* state AND a big job have occurred, it can infer that this job was DUE to rebase,
* and that restarting will be faster than going forward (due to possibility of better
* saved state being available, or possibility to treat some files as prechecked
* (see
* If there is no better saved state available and the only reason we restart is due
* to handling of prechecked files, we could theoretically avoid the restart.
* But initialization and incremental mode are so divergent that this would require
* implementing entire thing twice (and was not done - we can only treat files
* as prechecked during init).
* Moreover:
* - attributing incremental file changes to rebases is based on timing and
* there is a risk that we would precheck a real local change
* - even if we just want to treat files as prechecked, without fully processing
* them, it can require quiet a lot of work in incrmental mode to invalidate all
* things that need to be invalidated (which are absent during init)
val files_changed : ServerLocalConfig.t -> int -> unit
val decl_changed : ServerLocalConfig.t -> int -> unit
val typing_changed : ServerLocalConfig.t -> int -> unit
@@ -724,6 +724,8 @@ end = functor(CheckKind:CheckKindType) -> struct
let hs = SharedMem.heap_size () in
Hh_logger.log "Heap size: %d" hs;
HackEventLogger.first_redecl_end t hs;
ServerRevisionTracker.decl_changed genv.ServerEnv.local_config
(Relative_path.Set.cardinal to_redecl_phase2);
let t = Hh_logger.log_duration "Determining changes" t in

@@ -775,6 +777,8 @@ end = functor(CheckKind:CheckKindType) -> struct
let hs = SharedMem.heap_size () in
Hh_logger.log "Heap size: %d" hs;
HackEventLogger.second_redecl_end t hs;
ServerRevisionTracker.typing_changed genv.local_config
(Relative_path.Set.cardinal to_recheck);
let t = Hh_logger.log_duration logstring t in
let env = CheckKind.get_env_after_decl
~old_env:env ~files_info ~failed_naming in
@@ -786,8 +790,9 @@ end = functor(CheckKind:CheckKindType) -> struct
deptable_unlocked in

(* Checking this before starting typechecking because we want to attribtue
* big rechecks to rebases. *)
ServerRevisionTracker.check_blocking ();
* big rechecks to rebases, even when restarting is disabled *)
if genv.local_config.ServerLocalConfig.hg_aware_recheck_restart_threshold = 0 then
ServerRevisionTracker.check_blocking ();

let fast, lazy_check_later = CheckKind.get_defs_to_recheck
@@ -875,6 +880,9 @@ end = functor(CheckKind:CheckKindType) -> struct
let deptable_unlocked = Typing_deps.allow_dependency_table_reads true in
let new_env = ServerPrecheckedFiles.update_after_recheck genv new_env fast in
(* We might have completed a full check, which might mean that a rebase was
* successfully processed. *)
ServerRevisionTracker.check_non_blocking new_env;
let _ : bool = Typing_deps.allow_dependency_table_reads deptable_unlocked in

new_env, {reparse_count; total_rechecked_count;}

0 comments on commit 91ae167

Please sign in to comment.