diff --git a/.gitignore b/.gitignore
index 155e4cbd8a8..1044507cef7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,4 @@ test_results/*
 /resources/linux
 /resources/x86_64
 /resources/aarch64
+aws/
\ No newline at end of file
diff --git a/docs/logger.md b/docs/logger.md
index 4585c4f3f67..3b0f9a082ae 100644
--- a/docs/logger.md
+++ b/docs/logger.md
@@ -50,7 +50,7 @@ process:
 ```
 
 The other Logger fields have, in this case, the default values:
-`Level -> Warning`, `show_level -> false`, `show_log_origin -> false`. For
+`Level -> Debug`, `show_level -> false`, `show_log_origin -> false`. For
 configuring these too, you can also pass the following optional parameters:
 `--level <log_level>`, `--show-level`, `--show-log-origin`:
 
diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml
index 5f16fc1df40..6bdc63f7792 100644
--- a/src/firecracker/swagger/firecracker.yaml
+++ b/src/firecracker/swagger/firecracker.yaml
@@ -85,12 +85,12 @@ paths:
         Will fail if update is not possible.
       operationId: putBalloon
       parameters:
-      - name: body
-        in: body
-        description: Balloon properties
-        required: true
-        schema:
-          $ref: "#/definitions/Balloon"
+        - name: body
+          in: body
+          description: Balloon properties
+          required: true
+          schema:
+            $ref: "#/definitions/Balloon"
       responses:
         204:
           description: Balloon device created/updated
@@ -109,12 +109,12 @@ paths:
         Will fail if update is not possible.
       operationId: patchBalloon
       parameters:
-      - name: body
-        in: body
-        description: Balloon properties
-        required: true
-        schema:
-          $ref: "#/definitions/BalloonUpdate"
+        - name: body
+          in: body
+          description: Balloon properties
+          required: true
+          schema:
+            $ref: "#/definitions/BalloonUpdate"
       responses:
         204:
           description: Balloon device updated
@@ -151,12 +151,12 @@ paths:
         Will fail if update is not possible.
       operationId: patchBalloonStatsInterval
       parameters:
-      - name: body
-        in: body
-        description: Balloon properties
-        required: true
-        schema:
-          $ref: "#/definitions/BalloonStatsUpdate"
+        - name: body
+          in: body
+          description: Balloon properties
+          required: true
+          schema:
+            $ref: "#/definitions/BalloonStatsUpdate"
       responses:
         204:
           description: Balloon statistics interval updated
@@ -220,7 +220,6 @@ paths:
           schema:
             $ref: "#/definitions/Error"
 
-
   /drives/{drive_id}:
     put:
       summary: Creates or updates a drive. Pre-boot only.
@@ -488,8 +487,7 @@ paths:
   /entropy:
     put:
       summary: Creates an entropy device. Pre-boot only.
-      description:
-        Enables an entropy device that provides high-quality random data to the guest.
+      description: Enables an entropy device that provides high-quality random data to the guest.
       operationId: putEntropyDevice
       parameters:
         - name: body
@@ -506,12 +504,10 @@ paths:
           schema:
             $ref: "#/definitions/Error"
 
-
   /network-interfaces/{iface_id}:
     put:
       summary: Creates a network interface. Pre-boot only.
-      description:
-        Creates new network interface with ID specified by iface_id path parameter.
+      description: Creates new network interface with ID specified by iface_id path parameter.
       operationId: putGuestNetworkInterfaceByID
       parameters:
         - name: iface_id
@@ -538,8 +534,7 @@ paths:
             $ref: "#/definitions/Error"
     patch:
       summary: Updates the rate limiters applied to a network interface. Post-boot only.
-      description:
-        Updates the rate limiters applied to a network interface.
+      description: Updates the rate limiters applied to a network interface.
       operationId: patchGuestNetworkInterfaceByID
       parameters:
         - name: iface_id
@@ -594,8 +589,7 @@ paths:
   /snapshot/load:
     put:
       summary: Loads a snapshot. Pre-boot only.
-      description:
-        Loads the microVM state from a snapshot.
+      description: Loads the microVM state from a snapshot.
         Only accepted on a fresh Firecracker process (before configuring
         any resource other than the Logger and Metrics).
       operationId: loadSnapshot
@@ -635,8 +629,7 @@ paths:
   /vm:
     patch:
       summary: Updates the microVM state.
-      description:
-        Sets the desired state (Paused or Resumed) for the microVM.
+      description: Sets the desired state (Paused or Resumed) for the microVM.
       operationId: patchVm
       parameters:
         - name: body
@@ -707,8 +700,7 @@ definitions:
     required:
       - amount_mib
       - deflate_on_oom
-    description:
-      Balloon device descriptor.
+    description: Balloon device descriptor.
     properties:
       amount_mib:
         type: integer
@@ -724,8 +716,7 @@ definitions:
     type: object
     required:
       - amount_mib
-    description:
-      Balloon device descriptor.
+    description: Balloon device descriptor.
     properties:
       amount_mib:
         type: integer
@@ -733,8 +724,7 @@ definitions:
 
   BalloonStats:
     type: object
-    description:
-      Describes the balloon device statistics.
+    description: Describes the balloon device statistics.
     required:
       - target_pages
       - actual_pages
@@ -798,8 +788,7 @@ definitions:
     type: object
     required:
       - stats_polling_interval_s
-    description:
-      Update the statistics polling interval, with the first statistics update scheduled immediately. Statistics cannot be turned on/off after boot.
+    description: Update the statistics polling interval, with the first statistics update scheduled immediately. Statistics cannot be turned on/off after boot.
     properties:
       stats_polling_interval_s:
         type: integer
@@ -809,8 +798,7 @@ definitions:
     type: object
     required:
       - kernel_image_path
-    description:
-      Boot source descriptor.
+    description: Boot source descriptor.
     properties:
       boot_args:
         type: string
@@ -879,21 +867,18 @@ definitions:
         type: boolean
       cache_type:
         type: string
-        description:
-          Represents the caching strategy for the block device.
+        description: Represents the caching strategy for the block device.
         enum: ["Unsafe", "Writeback"]
         default: "Unsafe"
 
       # VirtioBlock specific parameters
       is_read_only:
         type: boolean
-        description:
-          Is block read only.
+        description: Is block read only.
           This field is required for virtio-block config and should be omitted for vhost-user-block configuration.
       path_on_host:
         type: string
-        description:
-          Host level path for the guest drive.
+        description: Host level path for the guest drive.
           This field is required for virtio-block config and should be omitted for vhost-user-block configuration.
       rate_limiter:
         $ref: "#/definitions/RateLimiter"
@@ -909,8 +894,7 @@ definitions:
       # VhostUserBlock specific parameters
       socket:
         type: string
-        description:
-          Path to the socket of vhost-user-block backend.
+        description: Path to the socket of vhost-user-block backend.
           This field is required for vhost-user-block config should be omitted for virtio-block configuration.
 
   Error:
@@ -955,8 +939,7 @@ definitions:
 
   InstanceActionInfo:
     type: object
-    description:
-      Variant wrapper containing the real action.
+    description: Variant wrapper containing the real action.
     required:
       - action_type
     properties:
@@ -970,8 +953,7 @@ definitions:
 
   InstanceInfo:
     type: object
-    description:
-      Describes MicroVM instance information.
+    description: Describes MicroVM instance information.
     required:
       - app_name
       - id
@@ -999,14 +981,13 @@ definitions:
 
   Logger:
     type: object
-    description:
-      Describes the configuration option for the logging capability.
+    description: Describes the configuration option for the logging capability.
     properties:
       level:
         type: string
         description: Set the level. The possible values are case-insensitive.
         enum: [Error, Warning, Info, Debug, Trace, Off]
-        default: Info
+        default: Debug
       log_path:
         type: string
         description: Path to the named pipe or file for the human readable log output.
@@ -1085,8 +1066,7 @@ definitions:
 
   Metrics:
     type: object
-    description:
-      Describes the configuration option for the metrics capability.
+    description: Describes the configuration option for the metrics capability.
     required:
       - metrics_path
     properties:
@@ -1096,8 +1076,7 @@ definitions:
 
   MmdsConfig:
     type: object
-    description:
-      Defines the MMDS configuration.
+    description: Defines the MMDS configuration.
     required:
       - network_interfaces
     properties:
@@ -1134,13 +1113,11 @@ definitions:
 
   MmdsContentsObject:
     type: object
-    description:
-      Describes the contents of MMDS in JSON format.
+    description: Describes the contents of MMDS in JSON format.
 
   NetworkInterface:
     type: object
-    description:
-      Defines a network interface.
+    description: Defines a network interface.
     required:
       - host_dev_name
       - iface_id
@@ -1166,8 +1143,7 @@ definitions:
         type: string
       path_on_host:
         type: string
-        description:
-          Host level path for the guest drive.
+        description: Host level path for the guest drive.
           This field is optional for virtio-block config and should be omitted for vhost-user-block configuration.
       rate_limiter:
         $ref: "#/definitions/RateLimiter"
@@ -1233,12 +1209,10 @@ definitions:
     properties:
       iface_id:
         type: string
-        description:
-          The name of the interface to modify
+        description: The name of the interface to modify
       host_dev_name:
         type: string
-        description:
-          The new host device of the interface
+        description: The new host device of the interface
 
   SnapshotLoadParams:
     type: object
@@ -1250,12 +1224,10 @@ definitions:
     properties:
       enable_diff_snapshots:
         type: boolean
-        description:
-          (Deprecated) Enable dirty page tracking to improve space efficiency of diff snapshots
+        description: (Deprecated) Enable dirty page tracking to improve space efficiency of diff snapshots
       track_dirty_pages:
         type: boolean
-        description:
-          Enable dirty page tracking to improve space efficiency of diff snapshots
+        description: Enable dirty page tracking to improve space efficiency of diff snapshots
       mem_file_path:
         type: string
         description:
@@ -1273,15 +1245,13 @@ definitions:
         description: Path to the file that contains the microVM state to be loaded.
       resume_vm:
         type: boolean
-        description:
-          When set to true, the vm is also resumed if the snapshot load is successful.
+        description: When set to true, the vm is also resumed if the snapshot load is successful.
       network_overrides:
         type: array
         description: Network host device names to override
         items:
           $ref: "#/definitions/NetworkOverride"
 
-
   TokenBucket:
     type: object
     description:
@@ -1315,8 +1285,7 @@ definitions:
 
   Vm:
     type: object
-    description:
-      Defines the microVM running state. It is especially useful in the snapshotting context.
+    description: Defines the microVM running state. It is especially useful in the snapshotting context.
     required:
       - state
     properties:
@@ -1328,16 +1297,14 @@ definitions:
 
   EntropyDevice:
     type: object
-    description:
-      Defines an entropy device.
+    description: Defines an entropy device.
     properties:
       rate_limiter:
         $ref: "#/definitions/RateLimiter"
 
   FirecrackerVersion:
     type: object
-    description:
-      Describes the Firecracker version.
+    description: Describes the Firecracker version.
     required:
       - firecracker_version
     properties:
diff --git a/src/vmm/src/device_manager/pci_mngr.rs b/src/vmm/src/device_manager/pci_mngr.rs
index 8ebf3250581..e3554c60470 100644
--- a/src/vmm/src/device_manager/pci_mngr.rs
+++ b/src/vmm/src/device_manager/pci_mngr.rs
@@ -311,7 +311,9 @@ impl<'a> Persist<'a> for PciDevices {
                              snapshotting yet"
                         );
                     } else {
-                        block_dev.prepare_save();
+                        if let Err(err) = block_dev.prepare_save() {
+                            warn!("Failed to prepare block device for save: {:?}", err);
+                        }
                         let device_state = block_dev.save();
                         state.block_devices.push(VirtioDeviceState {
                             device_id: block_dev.id().to_string(),
diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs
index 0a074dc9fa1..9cbab81fd79 100644
--- a/src/vmm/src/device_manager/persist.rs
+++ b/src/vmm/src/device_manager/persist.rs
@@ -265,7 +265,9 @@ impl<'a> Persist<'a> for MMIODeviceManager {
                              snapshotting yet"
                         );
                     } else {
-                        block.prepare_save();
+                        if let Err(err) = block.prepare_save() {
+                            warn!("Failed to prepare block device for save: {:?}", err);
+                        }
                         let device_state = block.save();
                         states.block_devices.push(VirtioDeviceState {
                             device_id,
diff --git a/src/vmm/src/devices/virtio/block/device.rs b/src/vmm/src/devices/virtio/block/device.rs
index 93eb56e7760..1c1175f93c4 100644
--- a/src/vmm/src/devices/virtio/block/device.rs
+++ b/src/vmm/src/devices/virtio/block/device.rs
@@ -82,10 +82,10 @@ impl Block {
         }
     }
 
-    pub fn prepare_save(&mut self) {
+    pub fn prepare_save(&mut self) -> Result<(), BlockError> {
         match self {
-            Self::Virtio(b) => b.prepare_save(),
-            Self::VhostUser(b) => b.prepare_save(),
+            Self::Virtio(b) => b.prepare_save().map_err(BlockError::VirtioBackend),
+            Self::VhostUser(_) => Err(BlockError::InvalidBlockBackend),
         }
     }
 
diff --git a/src/vmm/src/devices/virtio/block/virtio/device.rs b/src/vmm/src/devices/virtio/block/virtio/device.rs
index 62ebdbbf5fd..b7e26ea4d40 100644
--- a/src/vmm/src/devices/virtio/block/virtio/device.rs
+++ b/src/vmm/src/devices/virtio/block/virtio/device.rs
@@ -35,7 +35,7 @@ use crate::devices::virtio::generated::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
 use crate::devices::virtio::queue::{InvalidAvailIdx, Queue};
 use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType};
 use crate::impl_device_type;
-use crate::logger::{IncMetric, error, warn};
+use crate::logger::{IncMetric, debug, error, warn};
 use crate::rate_limiter::{BucketUpdate, RateLimiter};
 use crate::utils::u64_to_usize;
 use crate::vmm_config::RateLimiterConfig;
@@ -474,6 +474,12 @@ impl VirtioBlock {
         // This is safe since we checked in the event handler that the device is activated.
         let active_state = self.device_state.active_state().unwrap();
         let queue = &mut self.queues[0];
+        let mut processed = 0_u32;
+        let pending_before = engine.pending_ops();
+        debug!(
+            "virtio-block '{}' starting async completion drain: pending_ops={}",
+            self.id, pending_before
+        );
 
         loop {
             match engine.pop(&active_state.mem) {
@@ -504,6 +510,7 @@ impl VirtioBlock {
                                 finished.desc_idx, err
                             )
                         });
+                    processed = processed.saturating_add(1);
                 }
             }
         }
@@ -517,6 +524,12 @@ impl VirtioBlock {
                     self.metrics.event_fails.inc();
                 });
         }
+        debug!(
+            "virtio-block '{}' processed {} io_uring completions; pending_ops={} after drain",
+            self.id,
+            processed,
+            engine.pending_ops()
+        );
     }
 
     pub fn process_async_completion_event(&mut self) {
@@ -563,22 +576,48 @@ impl VirtioBlock {
         }
     }
 
-    fn drain_and_flush(&mut self, discard: bool) {
-        if let Err(err) = self.disk.file_engine.drain_and_flush(discard) {
-            error!("Failed to drain ops and flush block data: {:?}", err);
-        }
+    fn drain_and_flush(&mut self, discard: bool) -> Result<(), VirtioBlockError> {
+        self.disk
+            .file_engine
+            .drain_and_flush(discard)
+            .map_err(VirtioBlockError::FileEngine)
     }
 
     /// Prepare device for being snapshotted.
-    pub fn prepare_save(&mut self) {
+    pub fn prepare_save(&mut self) -> Result<(), VirtioBlockError> {
         if !self.is_activated() {
-            return;
+            debug!(
+                "virtio-block '{}' prepare_save skipped (device inactive)",
+                self.id
+            );
+            return Ok(());
         }
 
-        self.drain_and_flush(false);
-        if let FileEngine::Async(ref _engine) = self.disk.file_engine {
+        debug!(
+            "virtio-block '{}' draining outstanding IO before snapshot",
+            self.id
+        );
+        self.drain_and_flush(false)?;
+        let is_async = matches!(self.disk.file_engine, FileEngine::Async(_));
+        if is_async {
             self.process_async_completion_queue();
+            if let FileEngine::Async(ref engine) = self.disk.file_engine {
+                let pending = engine.pending_ops();
+                debug!(
+                    "virtio-block '{}' prepare_save async pending_ops after drain: {}",
+                    self.id, pending
+                );
+                if pending != 0 {
+                    return Err(VirtioBlockError::PendingAsyncOperations(pending));
+                }
+            }
         }
+
+        debug!(
+            "virtio-block '{}' prepare_save finished; async backend drained",
+            self.id
+        );
+        Ok(())
     }
 }
 
@@ -682,7 +721,9 @@ impl Drop for VirtioBlock {
                 }
             }
             CacheType::Writeback => {
-                self.drain_and_flush(true);
+                if let Err(err) = self.drain_and_flush(true) {
+                    error!("Failed to drain ops and flush block data: {:?}", err);
+                }
             }
         };
     }
@@ -1673,7 +1714,7 @@ mod tests {
             // Add a batch of flush requests.
             add_flush_requests_batch(&mut block, &vq, 5);
             simulate_queue_event(&mut block, None);
-            block.prepare_save();
+            block.prepare_save().unwrap();
 
             // Check that all the pending flush requests were processed during `prepare_save()`.
             check_flush_requests_batch(5, &vq);
diff --git a/src/vmm/src/devices/virtio/block/virtio/io/async_io.rs b/src/vmm/src/devices/virtio/block/virtio/io/async_io.rs
index f83d9dea1df..4b7e4517d4a 100644
--- a/src/vmm/src/devices/virtio/block/virtio/io/async_io.rs
+++ b/src/vmm/src/devices/virtio/block/virtio/io/async_io.rs
@@ -14,7 +14,7 @@ use crate::devices::virtio::block::virtio::{IO_URING_NUM_ENTRIES, PendingRequest
 use crate::io_uring::operation::{Cqe, OpCode, Operation};
 use crate::io_uring::restriction::Restriction;
 use crate::io_uring::{IoUring, IoUringError};
-use crate::logger::log_dev_preview_warning;
+use crate::logger::{debug, log_dev_preview_warning};
 use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryExtension, GuestMemoryMmap};
 
 #[derive(Debug, thiserror::Error, displaydoc::Display)]
@@ -119,6 +119,10 @@ impl AsyncFileEngine {
         &self.completion_evt
     }
 
+    pub fn pending_ops(&self) -> u32 {
+        self.ring.num_ops()
+    }
+
     pub fn push_read(
         &mut self,
         offset: u64,
@@ -127,6 +131,8 @@ impl AsyncFileEngine {
         count: u32,
         req: PendingRequest,
     ) -> Result<(), RequestError<AsyncIoError>> {
+        let req_type = req.request_type();
+        let desc_idx = req.desc_idx();
         let buf = match mem.get_slice(addr, count as usize) {
             Ok(slice) => slice.ptr_guard_mut().as_ptr(),
             Err(err) => {
@@ -150,7 +156,17 @@ impl AsyncFileEngine {
             .map_err(|(io_uring_error, data)| RequestError {
                 req: data.req,
                 error: AsyncIoError::IoUring(io_uring_error),
-            })
+            })?;
+
+        debug!(
+            "AsyncFileEngine queued {:?} request desc_idx={} offset={} len={} pending_ops={}",
+            req_type,
+            desc_idx,
+            offset,
+            count,
+            self.pending_ops()
+        );
+        Ok(())
     }
 
     pub fn push_write(
@@ -161,6 +177,8 @@ impl AsyncFileEngine {
         count: u32,
         req: PendingRequest,
     ) -> Result<(), RequestError<AsyncIoError>> {
+        let req_type = req.request_type();
+        let desc_idx = req.desc_idx();
         let buf = match mem.get_slice(addr, count as usize) {
             Ok(slice) => slice.ptr_guard_mut().as_ptr(),
             Err(err) => {
@@ -184,10 +202,22 @@ impl AsyncFileEngine {
             .map_err(|(io_uring_error, data)| RequestError {
                 req: data.req,
                 error: AsyncIoError::IoUring(io_uring_error),
-            })
+            })?;
+
+        debug!(
+            "AsyncFileEngine queued {:?} request desc_idx={} offset={} len={} pending_ops={}",
+            req_type,
+            desc_idx,
+            offset,
+            count,
+            self.pending_ops()
+        );
+        Ok(())
     }
 
     pub fn push_flush(&mut self, req: PendingRequest) -> Result<(), RequestError<AsyncIoError>> {
+        let req_type = req.request_type();
+        let desc_idx = req.desc_idx();
         let wrapped_user_data = WrappedRequest::new(req);
 
         self.ring
@@ -195,7 +225,15 @@ impl AsyncFileEngine {
             .map_err(|(io_uring_error, data)| RequestError {
                 req: data.req,
                 error: AsyncIoError::IoUring(io_uring_error),
-            })
+            })?;
+
+        debug!(
+            "AsyncFileEngine queued {:?} request desc_idx={} pending_ops={}",
+            req_type,
+            desc_idx,
+            self.pending_ops()
+        );
+        Ok(())
     }
 
     pub fn kick_submission_queue(&mut self) -> Result<(), AsyncIoError> {
@@ -220,12 +258,26 @@ impl AsyncFileEngine {
     }
 
     pub fn drain_and_flush(&mut self, discard_cqes: bool) -> Result<(), AsyncIoError> {
+        let pending_before = self.pending_ops();
+        debug!(
+            "AsyncFileEngine draining: pending_ops={} discard_cqes={}",
+            pending_before, discard_cqes
+        );
         self.drain(discard_cqes)?;
+        debug!(
+            "AsyncFileEngine drain complete: pending_ops={} (discard_cqes={})",
+            self.pending_ops(),
+            discard_cqes
+        );
 
         // Sync data out to physical media on host.
         // We don't need to call flush first since all the ops are performed through io_uring
         // and Rust shouldn't manage any data in its internal buffers.
         self.file.sync_all().map_err(AsyncIoError::SyncAll)?;
+        debug!(
+            "AsyncFileEngine sync complete: pending_ops={}",
+            self.pending_ops()
+        );
 
         Ok(())
     }
diff --git a/src/vmm/src/devices/virtio/block/virtio/io/mod.rs b/src/vmm/src/devices/virtio/block/virtio/io/mod.rs
index 09cc7c4e31d..05a7982b8e0 100644
--- a/src/vmm/src/devices/virtio/block/virtio/io/mod.rs
+++ b/src/vmm/src/devices/virtio/block/virtio/io/mod.rs
@@ -172,6 +172,13 @@ impl FileEngine {
             FileEngine::Sync(engine) => engine.flush().map_err(BlockIoError::Sync),
         }
     }
+
+    pub fn pending_async_ops(&self) -> u32 {
+        match self {
+            FileEngine::Async(engine) => engine.pending_ops(),
+            FileEngine::Sync(_) => 0,
+        }
+    }
 }
 
 #[cfg(test)]
diff --git a/src/vmm/src/devices/virtio/block/virtio/mod.rs b/src/vmm/src/devices/virtio/block/virtio/mod.rs
index 9e97d6d3897..24fc574cc5f 100644
--- a/src/vmm/src/devices/virtio/block/virtio/mod.rs
+++ b/src/vmm/src/devices/virtio/block/virtio/mod.rs
@@ -63,4 +63,6 @@ pub enum VirtioBlockError {
     RateLimiter(std::io::Error),
     /// Persistence error: {0}
     Persist(crate::devices::virtio::persist::PersistError),
+    /// Async file engine still has {0} pending operations.
+    PendingAsyncOperations(u32),
 }
diff --git a/src/vmm/src/devices/virtio/block/virtio/persist.rs b/src/vmm/src/devices/virtio/block/virtio/persist.rs
index 5b544a12c19..99804b634d2 100644
--- a/src/vmm/src/devices/virtio/block/virtio/persist.rs
+++ b/src/vmm/src/devices/virtio/block/virtio/persist.rs
@@ -16,6 +16,7 @@ use crate::devices::virtio::device::{ActiveState, DeviceState};
 use crate::devices::virtio::generated::virtio_blk::VIRTIO_BLK_F_RO;
 use crate::devices::virtio::generated::virtio_ids::VIRTIO_ID_BLOCK;
 use crate::devices::virtio::persist::VirtioDeviceState;
+use crate::logger::debug;
 use crate::rate_limiter::RateLimiter;
 use crate::rate_limiter::persist::RateLimiterState;
 use crate::snapshot::Persist;
@@ -69,6 +70,11 @@ impl Persist<'_> for VirtioBlock {
     type Error = VirtioBlockError;
 
     fn save(&self) -> Self::State {
+        let pending_ops = self.disk.file_engine.pending_async_ops();
+        debug!(
+            "virtio-block '{}' save invoked with pending_async_ops={}",
+            self.id, pending_ops
+        );
         // Save device state.
         VirtioBlockState {
             id: self.id.clone(),
diff --git a/src/vmm/src/devices/virtio/block/virtio/request.rs b/src/vmm/src/devices/virtio/block/virtio/request.rs
index 8fc83cf43da..97119083720 100644
--- a/src/vmm/src/devices/virtio/block/virtio/request.rs
+++ b/src/vmm/src/devices/virtio/block/virtio/request.rs
@@ -98,6 +98,18 @@ pub struct PendingRequest {
 }
 
 impl PendingRequest {
+    pub fn request_type(&self) -> RequestType {
+        self.r#type
+    }
+
+    pub fn desc_idx(&self) -> u16 {
+        self.desc_idx
+    }
+
+    pub fn data_len(&self) -> u32 {
+        self.data_len
+    }
+
     fn write_status_and_finish(
         self,
         status: &Status,
diff --git a/src/vmm/src/logger/logging.rs b/src/vmm/src/logger/logging.rs
index e5cdd8a33d3..7ab240dd3c7 100644
--- a/src/vmm/src/logger/logging.rs
+++ b/src/vmm/src/logger/logging.rs
@@ -17,7 +17,7 @@ use super::metrics::{IncMetric, METRICS};
 
 /// Default level filter for logger matching the swagger specification
 /// (`src/firecracker/swagger/firecracker.yaml`).
-pub const DEFAULT_LEVEL: log::LevelFilter = log::LevelFilter::Info;
+pub const DEFAULT_LEVEL: log::LevelFilter = log::LevelFilter::Debug;
 /// Default instance id.
 pub const DEFAULT_INSTANCE_ID: &str = "anonymous-instance";
 /// Instance id.
diff --git a/tests/README.md b/tests/README.md
index e8ad62d0792..b08ab5746c2 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -81,7 +81,7 @@ exercise its programmatic API without the HTTP integration. `Cargo`
 automatically picks up these tests when `cargo test` is issued. They also count
 towards code coverage.
 
-To run *only* the Rust integration tests:
+To run _only_ the Rust integration tests:
 
 ```bash
 cargo test --test integration_tests --all
@@ -92,7 +92,7 @@ Unlike unit tests, Rust integration tests are each run in a separate process.
 
 1. Only the `pub` functions can be called. This is fine, as it allows the VMM to
    be consumed as a programmatic user would. If any function is necessary but
-   not `pub`, please consider carefully whether it conceptually *needs* to be in
+   not `pub`, please consider carefully whether it conceptually _needs_ to be in
    the public interface before making it so.
 
 1. The correct functioning scenario of the `vmm` implies that it `exit`s with
@@ -413,33 +413,33 @@ setting to achieve consistent performance. Please see the `test` section of
 
 ## FAQ
 
-`Q1:` *I have a shell script that runs my tests and I don't want to rewrite
-it.*\
+`Q1:` _I have a shell script that runs my tests and I don't want to rewrite
+it._\
 `A1:` Insofar as it makes sense, you should write it as a python test function.
 However, you can always call the script from a shim python test function. You
 can also add it as a microvm image resource in the s3 bucket (and it will be
 made available under `microvm.slot.path`) or copy it over to a guest filesystem
 as part of your test.
 
-`Q2:` *I want to add more tests that I don't want to commit to the Firecracker
-repository.*\
+`Q2:` _I want to add more tests that I don't want to commit to the Firecracker
+repository._\
 `A2:` Before a testrun or test session, just add your test directory under
 `tests/`. `pytest` will discover all tests in this tree.
 
-`Q3:` *I want to have my own test fixtures, and not commit them in the repo.*\
+`Q3:` _I want to have my own test fixtures, and not commit them in the repo._\
 `A3:` Add a `conftest.py` file in your test directory, and place your fixtures
 there. `pytest` will bring them into scope for all your tests.
 
-`Q4:` *I want to use more/other microvm test images, but I don't want to add
-them to the common s3 bucket.*\
+`Q4:` _I want to use more/other microvm test images, but I don't want to add
+them to the common s3 bucket._\
 `A4:` Add your custom images to the `build/img` subdirectory in the Firecracker
 source tree. This directory is bind-mounted in the container and used as a local
 image cache.
 
-`Q5:` *How can I get live logger output from the tests?*\
+`Q5:` _How can I get live logger output from the tests?_\
 `A5:` Accessing **pytest.ini** will allow you to modify logger settings.
 
-`Q6:` *Is there a way to speed up integration tests execution time?*\
+`Q6:` _Is there a way to speed up integration tests execution time?_\
 `A6:` You can narrow down the test selection as described in the **Running**
 section. For example:
 
@@ -658,3 +658,9 @@ sudo env PYTHONPATH=tests HOME=$HOME ~/.local/bin/ipython3 -i tools/sandbox.py -
 > [!WARNING]
 >
 > **Notice this runs as root!**
+
+## Repro the freeze by
+
+```sh
+tools/devtool -y test -- -s integration_tests/functional/test_snapshot_basic.py::test_snapshot_with_heavy_async_io
+```
diff --git a/tests/conftest.py b/tests/conftest.py
index 96ee285d192..87ded8df2cc 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -26,6 +26,7 @@
 import shutil
 import sys
 import tempfile
+import uuid
 from pathlib import Path
 
 import pytest
@@ -502,10 +503,37 @@ def rootfs():
     return disks("ubuntu-24.04.squashfs")[0]
 
 
-@pytest.fixture
+@pytest.fixture(scope="function")
 def rootfs_rw():
-    """Return an Ubuntu 24.04 ext4 rootfs"""
-    return disks("ubuntu-24.04.ext4")[0]
+    """Return an Ubuntu 24.04 ext4 rootfs, resized to 20GB for each test"""
+    original_rootfs = disks("ubuntu-24.04.ext4")[0]
+    original_ssh_key = original_rootfs.with_suffix(".id_rsa")
+    
+    # Create a unique resized copy for this test
+    unique_id = str(uuid.uuid4())[:8]
+    resized_rootfs_path = Path(tempfile.gettempdir()) / f"rootfs_rw_{unique_id}.ext4"
+    resized_ssh_key_path = resized_rootfs_path.with_suffix(".id_rsa")
+    
+    # Copy the original rootfs
+    shutil.copyfile(original_rootfs, resized_rootfs_path)
+    
+    # Copy the SSH key
+    if original_ssh_key.exists():
+        shutil.copyfile(original_ssh_key, resized_ssh_key_path)
+    
+    # Resize to 20GB
+    new_size_bytes = 20 * 1024 * 1024 * 1024
+    os.truncate(resized_rootfs_path, new_size_bytes)
+    utils.check_output(f"e2fsck -f -y {resized_rootfs_path}")
+    utils.check_output(f"resize2fs {resized_rootfs_path}")
+    
+    yield resized_rootfs_path
+    
+    # Cleanup: remove the resized rootfs file and SSH key after the test
+    if resized_rootfs_path.exists():
+        resized_rootfs_path.unlink()
+    if resized_ssh_key_path.exists():
+        resized_ssh_key_path.unlink()
 
 
 @pytest.fixture
diff --git a/tests/integration_tests/functional/test_snapshot_basic.py b/tests/integration_tests/functional/test_snapshot_basic.py
index c4eac866028..f5ebd5e0ad7 100644
--- a/tests/integration_tests/functional/test_snapshot_basic.py
+++ b/tests/integration_tests/functional/test_snapshot_basic.py
@@ -19,8 +19,9 @@
 import host_tools.drive as drive_tools
 import host_tools.network as net_tools
 from framework import utils
+from framework.microvm import SnapshotType
 from framework.properties import global_props
-from framework.utils import check_filesystem, check_output
+from framework.utils import Timeout, check_filesystem, check_output
 from framework.utils_vsock import (
     ECHO_SERVER_PORT,
     VSOCK_UDS_PATH,
@@ -586,3 +587,129 @@ def test_snapshot_rename_interface(uvm_nano, microvm_factory):
         rename_interfaces={iface_override.dev_name: iface_override.tap_name},
         resume=True,
     )
+
+
+@pytest.mark.parametrize("snapshot_type", [SnapshotType.FULL])
+@pytest.mark.parametrize("pci_enabled", [False])
+@pytest.mark.parametrize("iteration", range(100))
+def test_snapshot_with_heavy_async_io(
+    microvm_factory, guest_kernel_linux_6_1, rootfs_rw, snapshot_type, pci_enabled, iteration
+):
+    print("=" * 80)
+    print(f"Starting iteration {iteration + 1}/100 - Testing for non-zero async I/O drain")
+    print("=" * 80)
+
+    # CHECK: if we set `rootfs` to just "rootfs" (readonly) it also freezes on async.
+    # By default though, even the rw rootfs has only like 37MB of free space.
+    vm = microvm_factory.build(guest_kernel_linux_6_1, rootfs_rw, pci=pci_enabled)
+
+    vm.spawn(log_level="Trace", log_show_level=True, log_show_origin=True)
+    vm.basic_config(
+        vcpu_count=2,
+        mem_size_mib=1024,
+        # CHECK: If we set this to "Sync", it will not freeze.
+        rootfs_io_engine="Async",
+    )
+    vm.add_net_iface()
+    vm.start()
+
+    # Check free space on sandbox start
+    _, free_space_start, _ = vm.ssh.check_output("df -h / | awk 'NR==2 {{print $4}}'")
+    print(f"Free space on sandbox start: {free_space_start.strip()}")
+
+    write_io_script = """
+    # Create a test directory
+    mkdir -p /root/io_test
+    cd /root/io_test
+
+    for i in $(seq 1 2000); do
+        dd if=/dev/urandom of=/root/io_test/test_file_$i bs=8k count=1 oflag=direct 2>/dev/null &
+    done
+    
+    for i in $(seq 1 1000); do
+        dd if=/dev/urandom of=/root/io_test/medium_file_$i bs=16k count=1 oflag=direct 2>/dev/null &
+    done
+    
+    for i in $(seq 1 500); do
+        dd if=/dev/urandom of=/root/io_test/large_file_$i bs=32k count=1 oflag=direct 2>/dev/null &
+    done
+    
+    for i in $(seq 1 200); do
+        dd if=/dev/urandom of=/root/io_test/xlarge_file_$i bs=64k count=1 oflag=direct 2>/dev/null &
+    done
+
+    if command -v fio >/dev/null 2>&1; then
+        fio --name=heavy_write --filename=/root/io_test/fio_write_test \
+            --rw=randwrite --bs=4k --size=2G --ioengine=libaio \
+            --iodepth=512 --direct=1 --runtime=30 --time_based \
+            --numjobs=2 --group_reporting \
+            --output=/root/fio_write.log >/dev/null 2>&1 &
+        
+        fio --name=heavy_write2 --filename=/root/io_test/fio_write_test2 \
+            --rw=randwrite --bs=8k --size=2G --ioengine=libaio \
+            --iodepth=512 --direct=1 --runtime=30 --time_based \
+            --numjobs=2 --group_reporting \
+            --output=/root/fio_write2.log >/dev/null 2>&1 &
+            
+        fio --name=heavy_write3 --filename=/root/io_test/fio_write_test3 \
+            --rw=randwrite --bs=16k --size=2G --ioengine=libaio \
+            --iodepth=512 --direct=1 --runtime=30 --time_based \
+            --numjobs=2 --group_reporting \
+            --output=/root/fio_write3.log >/dev/null 2>&1 &
+            
+        fio --name=heavy_write4 --filename=/root/io_test/fio_write_test4 \
+            --rw=randwrite --bs=4k --size=2G --ioengine=libaio \
+            --iodepth=512 --direct=1 --runtime=30 --time_based \
+            --numjobs=2 --group_reporting \
+            --output=/root/fio_write4.log >/dev/null 2>&1 &
+    fi
+    """
+
+    vm.ssh.run(write_io_script)
+
+    # if set to cca 2 seconds it stops freezing for Async.
+    time.sleep(0.01)
+    
+    snapshot = vm.make_snapshot(snapshot_type)
+    
+    time.sleep(0.05)
+    
+    if vm.log_file and vm.log_file.exists():
+        try:
+            log_data = vm.log_data
+            log_lines = log_data.splitlines()
+            
+            for line in log_lines:
+                if "AsyncFileEngine draining:" in line:
+                    match = re.search(r'pending_ops=(\d+)', line)
+                    if match:
+                        pending_ops_during_drain = int(match.group(1))
+                        print(f"DRAIN: pending_ops={pending_ops_during_drain}")
+
+        except Exception as e:
+            print(f"ERROR: Failed to parse log file: {e}")
+    
+    vm.kill()
+    
+    print(f"Restoring from snapshot...")
+    with Timeout(30):
+        restored_vm = microvm_factory.build_from_snapshot(snapshot)
+    
+    # Check free space after resume
+    _, free_space_after_resume, _ = restored_vm.ssh.check_output("df -h / | awk 'NR==2 {{print $4}}'")
+    print(f"Free space after resume: {free_space_after_resume.strip()}")
+    
+    # Delete all files/dirs created during the test
+    cleanup_script = """
+    rm -rf /root/io_test
+    rm -f /root/fio_write*.log
+    """
+    restored_vm.ssh.run(cleanup_script)
+    
+    # Check free space after cleanup
+    _, free_space_after_cleanup, _ = restored_vm.ssh.check_output("df -h / | awk 'NR==2 {{print $4}}'")
+    print(f"Free space after cleanup: {free_space_after_cleanup.strip()}")
+    
+    print(f"VM resumed - no freeze detected")
+    
+    restored_vm.kill()
diff --git a/tools/test.sh b/tools/test.sh
index 0bf67a65666..5b5c80159d3 100755
--- a/tools/test.sh
+++ b/tools/test.sh
@@ -47,7 +47,7 @@ export PYTEST_ADDOPTS="${PYTEST_ADDOPTS:-} --pdbcls=IPython.terminal.debugger:Te
 
 # if the tests failed and we are running in CI, print some disk usage stats
 # to help troubleshooting
-if [ $ret != 0 ] && [ "$BUILDKITE" == "true" ]; then
+if [ $ret != 0 ] && [ "${BUILDKITE:-false}" == "true" ]; then
     df -ih
     df -h
     du -h / 2>/dev/null |sort -h |tail -32