diff --git a/Justfile b/Justfile index a844c8e..4b1b1ec 100644 --- a/Justfile +++ b/Justfile @@ -95,6 +95,7 @@ examples-ci target=default-target features="": (build-rust-wasm-examples target) cargo run {{ if features =="" {''} else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" {"dev"} else { target } }} --example helloworld cargo run {{ if features =="" {''} else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" {"dev"} else { target } }} --example hostfuncs cargo run {{ if features =="" {''} else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" {"dev"} else { target } }} --example rust_wasm_examples + cargo run {{ if features =="" {''} else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" {"dev"} else { target } }} --example interruption cargo run {{ if features =="" {''} else {"--no-default-features -F function_call_metrics," + features } }} --profile={{ if target == "debug" {"dev"} else { target } }} --example metrics cargo run {{ if features =="" {"--no-default-features --features kvm,mshv3"} else {"--no-default-features -F function_call_metrics," + features } }} --profile={{ if target == "debug" {"dev"} else { target } }} --example metrics diff --git a/src/hyperlight_wasm/Cargo.toml b/src/hyperlight_wasm/Cargo.toml index 11bc6b4..3d8eca5 100644 --- a/src/hyperlight_wasm/Cargo.toml +++ b/src/hyperlight_wasm/Cargo.toml @@ -54,6 +54,11 @@ name = "tracing-otlp" path = "examples/tracing-otlp/main.rs" test = true +[[example]] +name = "interruption" +path = "examples/interruption/main.rs" +test = true + [dependencies] hyperlight-host = { workspace = true } libc = { version = "0.2.178" } diff --git a/src/hyperlight_wasm/examples/interruption/main.rs b/src/hyperlight_wasm/examples/interruption/main.rs new file mode 100644 index 0000000..2d14d0d --- /dev/null +++ b/src/hyperlight_wasm/examples/interruption/main.rs @@ -0,0 +1,143 @@ +/* +Copyright 2024 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! This example demonstrates how to: +//! 1. Get an interrupt handle for a sandbox +//! 2. Interrupt long-running guest code from another thread +//! 3. Detect when a sandbox is poisoned +//! 4. Recover a poisoned sandbox using `restore()` or `unload_module()` + +use std::thread; +use std::time::Duration; + +use examples_common::get_wasm_module_path; +use hyperlight_wasm::{HyperlightError, Result, SandboxBuilder}; + +fn get_time_since_boot_microsecond() -> Result { + let res = std::time::SystemTime::now() + .duration_since(std::time::SystemTime::UNIX_EPOCH)? + .as_micros(); + i64::try_from(res).map_err(HyperlightError::IntConversionFailure) +} + +fn main() -> Result<()> { + println!("=== Hyperlight-Wasm Interruption Example ===\n"); + + // Build a sandbox and register host functions + let mut sandbox = SandboxBuilder::new().build()?; + sandbox.register( + "GetTimeSinceBootMicrosecond", + get_time_since_boot_microsecond, + )?; + + let wasm_sandbox = sandbox.load_runtime()?; + let mod_path = get_wasm_module_path("RunWasm.aot")?; + let mut loaded = wasm_sandbox.load_module(mod_path)?; + + println!("1. Sandbox created and module loaded"); + assert!(!loaded.is_poisoned()?); + println!(" is_poisoned: {}", loaded.is_poisoned()?); + + // Take a snapshot before we do anything + let snapshot = loaded.snapshot()?; + println!("2. Snapshot taken for later recovery\n"); + + // Get an interrupt handle - this can be sent to another thread + let interrupt = loaded.interrupt_handle()?; + println!("3. Interrupt handle obtained\n"); + + // Spawn a thread that will interrupt the guest after 1 second + println!("4. Starting long-running guest function..."); + println!(" (A background thread will interrupt it after 1 second)\n"); + + thread::spawn(move || { + thread::sleep(Duration::from_secs(1)); + println!(" [Background thread] Calling interrupt.kill()..."); + interrupt.kill(); + }); + + // Call a long-running guest function that will be interrupted + let result = loaded.call_guest_function::("KeepCPUBusy", 100000i32); + + match result { + Ok(_) => panic!(" Guest function completed (unexpected!)"), + Err(HyperlightError::ExecutionCanceledByHost()) => { + println!(" Guest function was interrupted (ExecutionCanceledByHost)"); + } + Err(e) => panic!(" Unexpected error: {:?}", e), + } + + println!("\n5. Checking sandbox state after interruption:"); + println!(" is_poisoned: {}", loaded.is_poisoned()?); + + // Demonstrate that calling a poisoned sandbox fails + println!("\n6. Attempting to call guest function on poisoned sandbox..."); + let result = loaded.call_guest_function::("CalcFib", 10i32); + + match result { + Ok(_) => panic!(" Call succeeded (unexpected!)"), + Err(HyperlightError::PoisonedSandbox) => { + println!(" Call failed with PoisonedSandbox error (expected)"); + } + Err(e) => panic!(" Unexpected error: {:?}", e), + } + + // Recovery option 1: Use restore() to recover the sandbox + println!("\n7. Recovering sandbox using restore()..."); + loaded.restore(&snapshot)?; + assert!(!loaded.is_poisoned()?); + println!(" is_poisoned after restore: {}", loaded.is_poisoned()?); + + // Now we can call guest functions again + println!("\n8. Calling guest function after recovery..."); + let result: i32 = loaded.call_guest_function("CalcFib", 10i32)?; + println!(" CalcFib(10) returned: {} (expected 55)", result); + + // Demonstrate recovery option 2: unload_module + println!("\n9. Demonstrating unload_module recovery..."); + + // First, poison the sandbox again + let interrupt = loaded.interrupt_handle()?; + thread::spawn(move || { + thread::sleep(Duration::from_millis(500)); + interrupt.kill(); + }); + let _ = loaded.call_guest_function::("KeepCPUBusy", 100000i32); + + assert!(loaded.is_poisoned()?); + println!(" Sandbox poisoned again {}", loaded.is_poisoned()?); + + // unload_module() will recover the sandbox + let wasm_sandbox = loaded.unload_module()?; + println!(" Module unloaded (this calls restore internally)"); + + // Load a different module and continue + let hello_path = get_wasm_module_path("HelloWorld.aot")?; + let mut new_loaded = wasm_sandbox.load_module(hello_path)?; + assert!(!new_loaded.is_poisoned()?); + println!( + " New module loaded, is_poisoned: {}", + new_loaded.is_poisoned()? + ); + + let result: i32 = + new_loaded.call_guest_function("HelloWorld", "Recovery successful!".to_string())?; + + println!(" HelloWorld returned: {}", result); + + println!("\n=== Example Complete ==="); + Ok(()) +} diff --git a/src/hyperlight_wasm/src/sandbox/loaded_wasm_sandbox.rs b/src/hyperlight_wasm/src/sandbox/loaded_wasm_sandbox.rs index e69c867..c926b98 100644 --- a/src/hyperlight_wasm/src/sandbox/loaded_wasm_sandbox.rs +++ b/src/hyperlight_wasm/src/sandbox/loaded_wasm_sandbox.rs @@ -52,6 +52,19 @@ impl LoadedWasmSandbox { /// On success, return an `Ok` with the return /// value and a new copy of `Self` suitable for further use. On failure, /// return an appropriate `Err`. + /// + /// # Errors + /// + /// Returns `Err(HyperlightError::PoisonedSandbox)` if the sandbox is in a + /// poisoned state. Use [`restore()`](Self::restore) to recover a poisoned + /// sandbox before calling this method again. + /// + /// Note: A sandbox becomes poisoned when a *previous* call fails due to + /// abnormal guest execution. That call returns the original error (e.g., + /// `ExecutionCanceledByHost` from `interrupt_handle().kill()`, or errors + /// from guest panics, memory violations, etc.), and the sandbox is marked + /// as poisoned. This method then returns `PoisonedSandbox` on subsequent + /// calls until the sandbox is recovered. pub fn call_guest_function( &mut self, fn_name: &str, @@ -64,6 +77,15 @@ impl LoadedWasmSandbox { } /// Take a snapshot of the current state of the sandbox. + /// + /// The snapshot can later be used with [`restore()`](Self::restore) to + /// return the sandbox to this state. + /// + /// # Errors + /// + /// Returns `Err(HyperlightError::PoisonedSandbox)` if the sandbox is in a + /// poisoned state. Use [`restore()`](Self::restore) with a previously + /// taken snapshot to recover before taking a new snapshot. pub fn snapshot(&mut self) -> Result { match &mut self.inner { Some(inner) => inner.snapshot(), @@ -72,6 +94,17 @@ impl LoadedWasmSandbox { } /// Restore the state of the sandbox to the state captured in the given snapshot. + /// + /// This method clears the poisoned state if the sandbox was poisoned, making + /// it usable again for guest function calls. + /// + /// # Recovery from poisoned state + /// + /// If a sandbox becomes poisoned (e.g., after `interrupt_handle().kill()`), + /// calling `restore()` with a valid snapshot will: + /// 1. Clear the poisoned state + /// 2. Reset memory to the snapshot state + /// 3. Allow subsequent [`call_guest_function()`](Self::call_guest_function) calls to succeed pub fn restore(&mut self, snapshot: &Snapshot) -> Result<()> { match &mut self.inner { Some(inner) => inner.restore(snapshot), @@ -79,7 +112,11 @@ impl LoadedWasmSandbox { } } - /// unload the wasm module and return a `WasmSandbox` that can be used to load another module + /// Unload the wasm module and return a `WasmSandbox` that can be used to load another module. + /// + /// This method internally calls [`restore()`](Self::restore) to reset the sandbox to its + /// pre-module state, which also clears any poisoned state. This means `unload_module()` + /// can be called on a poisoned sandbox to recover it. pub fn unload_module(mut self) -> Result { let sandbox = self .inner @@ -119,6 +156,36 @@ impl LoadedWasmSandbox { )) } } + + /// Check if the sandbox is in a poisoned state. + /// + /// A sandbox becomes poisoned when guest execution does not complete normally, + /// such as after: + /// - Forced termination via `interrupt_handle().kill()` + /// - Guest panic or abort + /// - Memory violation + /// - Stack or heap exhaustion + /// + /// Note: The call that causes poisoning returns the original error (e.g., + /// `ExecutionCanceledByHost`), not `PoisonedSandbox`. The sandbox is marked + /// as poisoned after that error, and subsequent calls to + /// [`call_guest_function()`](Self::call_guest_function) will return + /// `Err(HyperlightError::PoisonedSandbox)`. + /// + /// A poisoned sandbox cannot execute guest functions until recovered via + /// [`restore()`](Self::restore). Calling [`unload_module()`](Self::unload_module) + /// will also recover a poisoned sandbox since it performs a restore internally. + /// + /// # Returns + /// - `Ok(true)` if the sandbox is poisoned and needs recovery + /// - `Ok(false)` if the sandbox is healthy and can execute guest functions + /// - `Err` if the sandbox is in an invalid state + pub fn is_poisoned(&self) -> Result { + match &self.inner { + Some(inner) => Ok(inner.poisoned()), + None => log_then_return!("No inner MultiUseSandbox to check poisoned state"), + } + } } impl Callable for LoadedWasmSandbox { diff --git a/src/hyperlight_wasm/src/sandbox/wasm_sandbox.rs b/src/hyperlight_wasm/src/sandbox/wasm_sandbox.rs index 2629e34..074e575 100644 --- a/src/hyperlight_wasm/src/sandbox/wasm_sandbox.rs +++ b/src/hyperlight_wasm/src/sandbox/wasm_sandbox.rs @@ -240,6 +240,210 @@ mod tests { }, } + // Verify sandbox is poisoned after interruption + assert!( + loaded.is_poisoned()?, + "Sandbox should be poisoned after interruption" + ); + + Ok(()) + } + + #[test] + fn test_sandbox_is_poisoned_after_interruption() -> Result<()> { + let mut sandbox = SandboxBuilder::new().build()?; + + sandbox.register( + "GetTimeSinceBootMicrosecond", + get_time_since_boot_microsecond, + )?; + + let loaded = sandbox.load_runtime()?; + let run_wasm = get_test_file_path("RunWasm.aot")?; + let mut loaded = loaded.load_module(run_wasm)?; + + // Verify sandbox is not poisoned initially + assert!( + !loaded.is_poisoned()?, + "Sandbox should not be poisoned initially" + ); + + let interrupt = loaded.interrupt_handle()?; + + std::thread::spawn(move || { + std::thread::sleep(std::time::Duration::from_millis(500)); + interrupt.kill(); + }); + + // This call will be interrupted + let _ = loaded.call_guest_function::("KeepCPUBusy", 100000i32); + + // Verify sandbox is now poisoned + assert!( + loaded.is_poisoned()?, + "Sandbox should be poisoned after interruption" + ); + + Ok(()) + } + + #[test] + fn test_call_guest_function_fails_when_poisoned() -> Result<()> { + let mut sandbox = SandboxBuilder::new().build()?; + + sandbox.register( + "GetTimeSinceBootMicrosecond", + get_time_since_boot_microsecond, + )?; + + let loaded = sandbox.load_runtime()?; + let run_wasm = get_test_file_path("RunWasm.aot")?; + let mut loaded = loaded.load_module(run_wasm)?; + + let interrupt = loaded.interrupt_handle()?; + + std::thread::spawn(move || { + std::thread::sleep(std::time::Duration::from_millis(500)); + interrupt.kill(); + }); + + // First call will be interrupted + let _ = loaded.call_guest_function::("KeepCPUBusy", 100000i32); + + // Second call should fail with PoisonedSandbox + let result = loaded.call_guest_function::("PrintOutput", 42i32); + + match result { + Ok(_) => panic!("Expected PoisonedSandbox error"), + Err(HyperlightError::PoisonedSandbox) => { + // Expected error + } + Err(e) => panic!("Unexpected error: {:?}", e), + } + + Ok(()) + } + + #[test] + fn test_snapshot_fails_when_poisoned() -> Result<()> { + let mut sandbox = SandboxBuilder::new().build()?; + + sandbox.register( + "GetTimeSinceBootMicrosecond", + get_time_since_boot_microsecond, + )?; + + let loaded = sandbox.load_runtime()?; + let run_wasm = get_test_file_path("RunWasm.aot")?; + let mut loaded = loaded.load_module(run_wasm)?; + + let interrupt = loaded.interrupt_handle()?; + + std::thread::spawn(move || { + std::thread::sleep(std::time::Duration::from_millis(500)); + interrupt.kill(); + }); + + // Call will be interrupted, poisoning the sandbox + let _ = loaded.call_guest_function::("KeepCPUBusy", 100000i32); + + // Snapshot should fail on poisoned sandbox + let result = loaded.snapshot(); + + match result { + Ok(_) => panic!("Expected PoisonedSandbox error"), + Err(HyperlightError::PoisonedSandbox) => { + // Expected error + } + Err(e) => panic!("Unexpected error: {:?}", e), + } + + Ok(()) + } + + #[test] + fn test_restore_recovers_poisoned_sandbox() -> Result<()> { + let mut sandbox = SandboxBuilder::new().build()?; + + sandbox.register( + "GetTimeSinceBootMicrosecond", + get_time_since_boot_microsecond, + )?; + + let loaded = sandbox.load_runtime()?; + let run_wasm = get_test_file_path("RunWasm.aot")?; + let mut loaded = loaded.load_module(run_wasm)?; + + // Take a snapshot before poisoning + let snapshot = loaded.snapshot()?; + + let interrupt = loaded.interrupt_handle()?; + + std::thread::spawn(move || { + std::thread::sleep(std::time::Duration::from_millis(500)); + interrupt.kill(); + }); + + // Call will be interrupted, poisoning the sandbox + let _ = loaded.call_guest_function::("KeepCPUBusy", 100000i32); + + assert!(loaded.is_poisoned()?, "Sandbox should be poisoned"); + + // Restore should recover the sandbox + loaded.restore(&snapshot)?; + + assert!( + !loaded.is_poisoned()?, + "Sandbox should not be poisoned after restore" + ); + + // Should be able to call guest functions again + let result: i32 = loaded.call_guest_function("CalcFib", 10i32)?; + assert_eq!(result, 55); + + Ok(()) + } + + #[test] + fn test_unload_module_recovers_poisoned_sandbox() -> Result<()> { + let mut sandbox = SandboxBuilder::new().build()?; + + sandbox.register( + "GetTimeSinceBootMicrosecond", + get_time_since_boot_microsecond, + )?; + + let loaded = sandbox.load_runtime()?; + let run_wasm = get_test_file_path("RunWasm.aot")?; + let mut loaded = loaded.load_module(run_wasm)?; + + let interrupt = loaded.interrupt_handle()?; + + std::thread::spawn(move || { + std::thread::sleep(std::time::Duration::from_millis(500)); + interrupt.kill(); + }); + + // Call will be interrupted, poisoning the sandbox + let _ = loaded.call_guest_function::("KeepCPUBusy", 100000i32); + + assert!(loaded.is_poisoned()?, "Sandbox should be poisoned"); + + // unload_module should recover the sandbox (it calls restore internally) + let wasm_sandbox = loaded.unload_module()?; + + // Should be able to load a new module and call functions + let helloworld_wasm = get_test_file_path("HelloWorld.aot")?; + let mut new_loaded = wasm_sandbox.load_module(helloworld_wasm)?; + + assert!( + !new_loaded.is_poisoned()?, + "New sandbox should not be poisoned" + ); + + let result: i32 = new_loaded.call_guest_function("HelloWorld", "Test".to_string())?; + assert_eq!(result, 0); + Ok(()) }