Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add poll watcher support to spec watcher for M1 #8449

Merged
merged 29 commits into from Mar 18, 2022
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 13 additions & 1 deletion components/hab/src/command/studio/enter.rs
Expand Up @@ -8,7 +8,8 @@ use crate::{common::ui::{UIWriter,
BLDR_URL_ENVVAR,
ORIGIN_ENVVAR};
use habitat_common::cli_config::CliConfig;
use habitat_core::AUTH_TOKEN_ENVVAR;
use habitat_core::{package::target::PackageTarget,
AUTH_TOKEN_ENVVAR};
use same_file::is_same_file;
use std::{env,
ffi::OsString,
Expand All @@ -19,6 +20,7 @@ use std::{env,
pub const ARTIFACT_PATH_ENVVAR: &str = "ARTIFACT_PATH";
pub const CERT_PATH_ENVVAR: &str = "CERT_PATH";
pub const SSL_CERT_FILE_ENVVAR: &str = "SSL_CERT_FILE";
pub const STUDIO_HOST_ARCH_ENVVAR: &str = "HAB_STUDIO_SECRET_HAB_STUDIO_HOST_ARCH";
atrniv marked this conversation as resolved.
Show resolved Hide resolved

const STUDIO_PACKAGE_IDENT: &str = "core/hab-studio";

Expand All @@ -42,6 +44,14 @@ fn set_env_var_from_config(env_var: &str, config_val: Option<String>, sensitive:
}
}

// Set the environment variable for the host architecture to be used in
// hab studio. It must be set outside of studio and passed in through
// the environment variable defined in STUDIO_HOST_ARCH_ENVVAR.
fn set_arch_env_var() {
env::set_var(STUDIO_HOST_ARCH_ENVVAR,
format!("{}", PackageTarget::active_target()));
}

fn cache_ssl_cert_file(cert_file: &str, cert_cache_dir: &Path) -> Result<()> {
let cert_path = Path::new(&cert_file);

Expand Down Expand Up @@ -74,6 +84,8 @@ pub async fn start(ui: &mut UI, args: &[OsString]) -> Result<()> {
config.origin.map(|o| o.to_string()),
Sensitivity::PrintValue);

set_arch_env_var();

if config.ctl_secret.is_some() {
ui.warn("Your Supervisor CtlGateway secret is not being copied to the Studio \
environment because the Studio's Supervisor is local. If you wish to contact a \
Expand Down
2 changes: 2 additions & 0 deletions components/sup/src/manager.rs
Expand Up @@ -9,6 +9,7 @@ mod self_updater;
mod service_updater;
mod spec_dir;
mod spec_watcher;
mod sup_watcher;
pub(crate) mod sys;
mod user_config_watcher;

Expand Down Expand Up @@ -691,6 +692,7 @@ impl Manager {
spec_dir.migrate_specs();

let spec_watcher = SpecWatcher::run(&spec_dir)?;
trace!("Created SpecWatcher");

if let Some(config) = cfg.event_stream_config {
// Collect the FQDN of the running machine
Expand Down
192 changes: 124 additions & 68 deletions components/sup/src/manager/file_watcher.rs
@@ -1,11 +1,11 @@
use super::sup_watcher::SupWatcher;
use crate::{error::{Error,
Result},
manager::debug::{IndentedStructFormatter,
IndentedToString}};
use habitat_common::liveliness_checker;
use notify::{self,
DebouncedEvent,
RecommendedWatcher,
RecursiveMode,
Watcher};
use std::{collections::{hash_map::Entry,
Expand Down Expand Up @@ -1141,56 +1141,30 @@ pub struct FileWatcher<C: Callbacks, W: Watcher> {

/// Convenience function for returning a new file watcher that matches
/// the platform.
pub fn default_file_watcher<P, C>(path: P,
callbacks: C)
-> Result<FileWatcher<C, RecommendedWatcher>>
pub fn create_file_watcher<P, C>(path: P,
callbacks: C,
ignore_initial: bool)
-> Result<FileWatcher<C, SupWatcher>>
where P: Into<PathBuf>,
C: Callbacks
{
FileWatcher::<C, RecommendedWatcher>::create(path, callbacks)
}

pub fn default_file_watcher_with_no_initial_event<P, C>(
path: P,
callbacks: C)
-> Result<FileWatcher<C, RecommendedWatcher>>
where P: Into<PathBuf>,
C: Callbacks
{
FileWatcher::<C, RecommendedWatcher>::create_with_no_initial_event(path, callbacks)
FileWatcher::<C, SupWatcher>::create(path, callbacks, ignore_initial)
}

impl<C: Callbacks, W: Watcher> FileWatcher<C, W> {
/// Creates a new `FileWatcher`.
///
/// This will create an instance of `W` and start watching the
/// paths. When looping the file watcher, it will emit an initial
/// "file appeared" event if the watched file existed when the
/// file watcher was created.
///
/// "file appeared" event if ignore_initial is Some(false) and the
/// watched file existed when the file watcher was created.
/// Will return `Error::NotifyCreateError` if creating the watcher
/// fails. In case of watching errors, it returns
/// `Error::NotifyError`.
pub fn create<P>(path: P, callbacks: C) -> Result<Self>
pub fn create<P>(path: P, callbacks: C, ignore_initial: bool) -> Result<Self>
where P: Into<PathBuf>
{
Self::create_instance(path, callbacks, true)
}

/// Creates a new `FileWatcher`.
///
/// This will create an instance of `W` and start watching the
/// paths. When looping the file watcher, it will not emit any
/// initial "file appeared" event even if the watched file existed
/// when the file watcher was created.
///
/// Will return `Error::NotifyCreateError` if creating the watcher
/// fails. In case of watching errors, it returns
/// `Error::NotifyError`.
pub fn create_with_no_initial_event<P>(path: P, callbacks: C) -> Result<Self>
where P: Into<PathBuf>
{
Self::create_instance(path, callbacks, false)
Self::create_instance(path, callbacks, ignore_initial)
}

// Creates an instance of the FileWatcher.
Expand Down Expand Up @@ -1600,6 +1574,8 @@ impl<C: Callbacks, W: Watcher> FileWatcher<C, W> {
// scenario, that involves symlinks.
#[cfg(all(unix, test))]
mod tests {
use crate::manager::sup_watcher::SupWatcher;
use habitat_core::locked_env_var;
use std::{collections::{HashMap,
HashSet,
VecDeque},
Expand All @@ -1621,7 +1597,6 @@ mod tests {
use notify::{self,
DebouncedEvent,
RawEvent,
RecommendedWatcher,
RecursiveMode,
Watcher};

Expand Down Expand Up @@ -1664,6 +1639,8 @@ mod tests {
};
);

locked_env_var!(HAB_STUDIO_HOST_ARCH, lock_env_var);

// Add new test cases here.
fn get_test_cases() -> Vec<TestCase> {
vec![TestCase { name: "Basic add/remove directories/files",
Expand Down Expand Up @@ -2402,15 +2379,39 @@ mod tests {
events: vec![NotifyEvent::disappeared(pb!("/a/b/c"))], },], },]
}

fn run_test_case(tc: &TestCase) {
let mut runner = TestCaseRunner::new();
runner.debug_info.add(format!("test case: {}", tc.name));
runner.run_init_commands(&tc.init.commands);
let setup = runner.prepare_watcher(&tc.init.path);
runner.run_steps(setup, &tc.init.initial_file, &tc.steps);
}

#[test]
fn file_watcher() {
let lock = lock_env_var();
lock.unset();

for tc in get_test_cases() {
let mut runner = TestCaseRunner::new();
runner.debug_info.add(format!("test case: {}", tc.name));
runner.run_init_commands(&tc.init.commands);
let setup = runner.prepare_watcher(&tc.init.path);
runner.run_steps(setup, &tc.init.initial_file, &tc.steps);
run_test_case(&tc);
}
}

#[test]
fn polling_file_watcher() {
let lock = lock_env_var();
lock.set("aarch64-darwin");

// When using the PollWatcher variant of SupWatcher, the
// behavior is different than the NotifyWatcher. The
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you briefly describe the difference and how that affects those cases?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Description updated in the code:
// When using the PollWatcher variant of SupWatcher, the
// behavior is different than the NotifyWatcher. The NotifyWatcher
// receives event callbacks in response to a file or directory change,
// while the PollWatcher must poll and check for changes.
// As such, there were observed differences in the timing and number
// of events that the PollWatcher will handle. It was determined
// through analyzing the output that the poll watcher as written is not handling
// test cases correctly after the second test case. It does not receive
// the required events to pass the test case regardless of timing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you briefly describe the difference and how that affects the tests?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

// first two test cases will pass if the timing is adjusted
// as well as the expected number of iterations.
let cases = get_test_cases();
let polling_cases = &cases[0..2];
for tc in polling_cases {
run_test_case(tc);
}
lock.unset();
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets extract the common code from these 2 tests into a sharable function.


// Commands that can be executed at the test case init.
Expand Down Expand Up @@ -2587,7 +2588,7 @@ mod tests {
// purposes.
struct TestWatcher {
// The real watcher that does the grunt work.
real_watcher: RecommendedWatcher,
real_watcher: SupWatcher,
// A set of watched dirs. We will use these to correctly
// compute the number of iterations to perform after executing
// the step action.
Expand All @@ -2596,12 +2597,12 @@ mod tests {

impl Watcher for TestWatcher {
fn new_raw(tx: Sender<RawEvent>) -> notify::Result<Self> {
Ok(TestWatcher { real_watcher: RecommendedWatcher::new_raw(tx)?,
Ok(TestWatcher { real_watcher: SupWatcher::new_raw(tx)?,
watched_dirs: HashSet::new(), })
}

fn new(tx: Sender<DebouncedEvent>, d: Duration) -> notify::Result<Self> {
Ok(TestWatcher { real_watcher: RecommendedWatcher::new(tx, d)?,
Ok(TestWatcher { real_watcher: SupWatcher::new(tx, d)?,
watched_dirs: HashSet::new(), })
}

Expand Down Expand Up @@ -2904,7 +2905,7 @@ mod tests {
}

fn run_init_commands(&mut self, commands: &[InitCommand]) {
let fs_ops = self.get_fs_ops();
let fs_ops = self.get_fs_ops_init();
for command in commands {
match command {
InitCommand::MkdirP(ref path) => {
Expand All @@ -2925,9 +2926,13 @@ mod tests {
let additional_dirs = self.get_additional_directories_from_root();
let callbacks = TestCallbacks::new(&additional_dirs);
let watcher = FileWatcher::<_, TestWatcher>::create(self.prepend_root(&init_path),
callbacks).unwrap_or_else(|_| {
panic!("failed to create watcher, debug info:\n{}", self.debug_info,)
});
callbacks,
true).unwrap_or_else(|_| {
panic!("failed to create \
watcher, debug \
info:\n{}",
self.debug_info,)
});
WatcherSetup { init_path, watcher }
}

Expand All @@ -2937,29 +2942,49 @@ mod tests {
steps: &[Step]) {
let mut initial_file = tc_initial_file.clone();
let mut actual_initial_file = setup.watcher.initial_real_file.clone();

let is_poll_watcher = match &setup.watcher.get_mut_underlying_watcher().real_watcher {
SupWatcher::Native(_watcher) => false,
SupWatcher::Fallback(_watcher) => true,
};

for (step_idx, step) in steps.iter().enumerate() {
self.debug_info.push_level();
self.debug_info
.add(format!("step {}:\n{}", step_idx, dits!(step)));
let iterations = self.execute_step_action(&mut setup, &step.action);
self.spin_watcher(&mut setup, iterations);
let expected_event_count = self.execute_step_action(&mut setup, &step.action);
self.spin_watcher(&mut setup, expected_event_count);

if is_poll_watcher {
thread::sleep(Duration::from_secs(5));
};
self.test_dirs(&step.dirs, &setup.watcher.paths.dirs);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

based on my buildkite and local tests, we can remove this sleep.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

based on my buildkite and local tests, we can remove this sleep.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

self.test_paths(&step.paths, &setup.init_path, &setup.watcher.paths.paths);
let real_initial_file =
self.test_initial_file(iterations, &mut initial_file, &mut actual_initial_file);
self.test_events(real_initial_file,
&step.events,
&mut setup.watcher.get_mut_callbacks().events);
let real_initial_file = self.test_initial_file(expected_event_count,
&mut initial_file,
&mut actual_initial_file);

if is_poll_watcher {
self.test_events_polling(real_initial_file,
&step.events,
&mut setup.watcher.get_mut_callbacks().events);
} else {
self.test_events(real_initial_file,
&step.events,
&mut setup.watcher.get_mut_callbacks().events);
}
self.debug_info.pop_level();
debug!("\n\n\n++++++++++++++++\n++++STEP+END++++\n++++++++++++++++\n\n\n");
}

debug!("\n\n\n================\n=TEST=CASE=ENDS=\n================\n\n\n");
}

fn execute_step_action(&mut self, setup: &mut WatcherSetup, action: &StepAction) -> u32 {
let tw = setup.watcher.get_mut_underlying_watcher();
let iterations = {
let tw = setup.watcher.get_mut_underlying_watcher();
let mut fs_ops = self.get_fs_ops_with_dirs(&tw.watched_dirs);

match action {
StepAction::LnS(ref target, ref path) => fs_ops.ln_s(target, path),
StepAction::MkdirP(ref path) => fs_ops.mkdir_p(path),
Expand All @@ -2974,12 +2999,21 @@ mod tests {
iterations
}

fn spin_watcher(&self, setup: &mut WatcherSetup, iterations: u32) {
fn spin_watcher(&self, setup: &mut WatcherSetup, expected_event_count: u32) {
let mut iteration = 0;

// After switching single_iteration() from recv() to try_recv(), this sleep is required
// for these tests to pass.
thread::sleep(Duration::from_secs(3));
let is_poll_watcher = match &setup.watcher.get_mut_underlying_watcher().real_watcher {
SupWatcher::Native(_watcher) => false,
SupWatcher::Fallback(_watcher) => true,
};

let mut iterations = expected_event_count;
if is_poll_watcher {
thread::sleep(Duration::from_secs(15));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

15 seconds seems really high. Did we play much with smaller delays?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I originally got things working with a 30 second delay and then reduced it to 15 seconds and did not find it to be a problem. I can try. Will reduce if it will continue to pass the tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update - 5 second sleep failed. 10 second sleep failed. Reverting to 15 second delay.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should be able to bring this to 5

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should be able to bring this to 5

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

15 seconds seems really high. Did we play much with smaller delays?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I originally got things working with a 30 second delay and then reduced it to 15 seconds and did not find it to be a problem. I can try. Will reduce if it will continue to pass the tests.

iterations *= 5;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you explain why the iteration count needs to be bumped?

Copy link
Contributor Author

@pozsgaic pozsgaic Mar 15, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Through experimentation it was determined that the PollWatcher is less responsive
and emits more events than the NotifyWatcher. The initial sleep used in NotifyWatcher
was not adequate to pass the tests and was increased as a result. Also the number of iterations
required is larger for the PollWatcher case as there were intermediate events observed that would lead to
test case failure with the original iteration count used. The test cases will fail if the desired events are not emitted, so the iteration count was increased to account for the increased number of events in the PollWatcher."

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should be able to make this 3 iterations

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should be able to make this 3 iterations

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you explain why the iteration count needs to be bumped?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Through experimentation it was determined that the PollWatcher is less responsive
and emits more events than the NotifyWatcher. The initial sleep used in NotifyWatcher
was not adequate to pass the tests and was increased as a result. Also the number of iterations
required is larger for the PollWatcher case as there were intermediate events observed that would lead to
test case failure with the original iteration count used. The test cases will fail if the desired events are not emitted, so the iteration count was increased to account for the increased number of events in the PollWatcher."

} else {
thread::sleep(Duration::from_secs(3));
}

while iteration < iterations {
setup.watcher
Expand Down Expand Up @@ -3038,6 +3072,24 @@ mod tests {
}
}

// For the poll watcher tehre are more than one Debounced event received
// and this test will fail. Instead we can look at the last event and
// ensure that it is correct, as it will be the last entry that will
// determine the final state of the watched.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we have both tests just look at the last one if that is the only one that matters?

Copy link
Contributor Author

@pozsgaic pozsgaic Mar 15, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note the reason for testing only the last element in the PollWatcher tests as it can't test the entire set of events in order as the NotifyWatcher can. That would be a degradation of the NotifyWatcher test. It does work though if you think we should add it as a test for NotifyWatcher.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we have both tests just look at the last one if that is the only one that matters?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate question

fn test_events_polling(&mut self,
real_initial_file: Option<PathBuf>,
step_events: &[NotifyEvent],
actual_events: &mut Vec<NotifyEvent>) {
let expected_events = self.fixup_expected_events(step_events, real_initial_file);
self.debug_info
.add(format!("fixed up expected events: {:?}", expected_events));
assert_eq!(expected_events.last(),
actual_events.last(),
"comparing expected events, debug info:\n{}",
self.debug_info,);
actual_events.clear();
}

fn test_events(&mut self,
real_initial_file: Option<PathBuf>,
step_events: &[NotifyEvent],
Expand All @@ -3051,18 +3103,22 @@ mod tests {
actual_events.clear();
}

fn get_fs_ops_with_dirs<'a>(&'a mut self, watched_dirs: &'a HashSet<PathBuf>) -> FsOps<'a> {
let mut fs_ops = self.get_fs_ops();
fs_ops.watched_dirs = Some(watched_dirs);
fs_ops
}

fn get_fs_ops(&mut self) -> FsOps<'_> {
// For performing initial setup of watcher directories. This occurs
// before creation of the watcher.
fn get_fs_ops_init(&mut self) -> FsOps<'_> {
FsOps { debug_info: &mut self.debug_info,
root: &self.root,
watched_dirs: None, }
}

// For running watcher tests, this requires a WatcherType so we can
// delineate between the NotifyWatcher and PollWatcher specific behaviors.
fn get_fs_ops_with_dirs<'a>(&'a mut self, watched_dirs: &'a HashSet<PathBuf>) -> FsOps<'a> {
let mut fs_ops = self.get_fs_ops_init();
fs_ops.watched_dirs = Some(watched_dirs);
fs_ops
}

fn fixup_expected_dirs(&self, dirs: &HashMap<PathBuf, u32>) -> HashMap<PathBuf, u32> {
let mut expected_dirs = dirs.iter()
.map(|(p, c)| (self.prepend_root(p), *c))
Expand Down