-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: keep dirty memory bitmaps until snapshot is made #4383
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -98,6 +98,9 @@ where | |
writer: &mut T, | ||
dirty_bitmap: &DirtyBitmap, | ||
) -> Result<(), MemoryError>; | ||
|
||
/// Erases the firecracker internal bitmap of dirty pages | ||
fn erase_dirty(&self); | ||
} | ||
|
||
/// State of a guest memory region saved to file/buffer. | ||
|
@@ -274,6 +277,15 @@ impl GuestMemoryExtension for GuestMemoryMmap { | |
.map_err(MemoryError::WriteMemory) | ||
} | ||
|
||
/// Erases the firecracker internal bitmap of dirty pages | ||
fn erase_dirty(&self) { | ||
for region in self.iter() { | ||
if let Some(bitmap) = region.bitmap() { | ||
bitmap.reset(); | ||
} | ||
} | ||
} | ||
|
||
/// Dumps all pages of GuestMemoryMmap present in `dirty_bitmap` to a writer. | ||
fn dump_dirty<T: WriteVolatile + std::io::Seek>( | ||
&self, | ||
|
@@ -283,20 +295,49 @@ impl GuestMemoryExtension for GuestMemoryMmap { | |
let mut writer_offset = 0; | ||
let page_size = get_page_size().map_err(MemoryError::PageSize)?; | ||
|
||
// Merging KVM dirty pages bitmap into internal firecracker bitmap. Needed to keep track of | ||
// KVM dirty pages in case we need to use them again if current dump operation fails. | ||
self.iter() | ||
.enumerate() | ||
.try_for_each(|(slot, region)| { | ||
let firecracker_bitmap = region.bitmap(); | ||
let kvm_bitmap = dirty_bitmap.get(&slot).unwrap(); | ||
for (i, v) in kvm_bitmap.iter().enumerate() { | ||
// FIXME: looks like this loop can be replaced with some | ||
// `(kvm_dirty_pages | fc_dirty_pages)` | ||
for j in 0..64 { | ||
let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64; | ||
let page_offset = ((i * 64) + j) * page_size; | ||
|
||
if is_kvm_page_dirty { | ||
// FIXME: not sure about passing page_size as len | ||
firecracker_bitmap.mark_dirty(page_offset, page_size) | ||
} | ||
} | ||
} | ||
|
||
Ok(()) | ||
}) | ||
.map_err(MemoryError::WriteMemory); | ||
|
||
self.iter() | ||
.enumerate() | ||
.try_for_each(|(slot, region)| { | ||
let firecracker_bitmap = region.bitmap(); | ||
let kvm_bitmap = dirty_bitmap.get(&slot).unwrap(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need this at all? can't we just iterate over firecracker_bitmap? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we don't need KVM bitmap after we merged it into the FC one. |
||
let mut write_size = 0; | ||
let mut dirty_batch_start: u64 = 0; | ||
|
||
for (i, v) in kvm_bitmap.iter().enumerate() { | ||
// FIXME: we can iterate over firecracker_bitmap now when the kvm bitmap is merged | ||
// there | ||
// FIXME: unused `v`. Can we avoid having it at all instead of marking it with | ||
// underscore? | ||
for (i, _v) in kvm_bitmap.iter().enumerate() { | ||
for j in 0..64 { | ||
let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64; | ||
let page_offset = ((i * 64) + j) * page_size; | ||
let is_firecracker_page_dirty = firecracker_bitmap.dirty_at(page_offset); | ||
if is_kvm_page_dirty || is_firecracker_page_dirty { | ||
|
||
if is_firecracker_page_dirty { | ||
// We are at the start of a new batch of dirty pages. | ||
if write_size == 0 { | ||
// Seek forward over the unmodified pages. | ||
|
@@ -326,13 +367,12 @@ impl GuestMemoryExtension for GuestMemoryMmap { | |
)?; | ||
} | ||
writer_offset += region.len(); | ||
if let Some(bitmap) = firecracker_bitmap { | ||
bitmap.reset(); | ||
} | ||
|
||
Ok(()) | ||
}) | ||
.map_err(MemoryError::WriteMemory) | ||
.map_err(MemoryError::WriteMemory); | ||
|
||
self.erase_dirty(); | ||
Ok(()) | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
"""Test that the no dirty pages lost in case of error during snapshot creation.""" | ||
|
||
import subprocess | ||
from pathlib import Path | ||
|
||
|
||
def test_not_loosing_dirty_pages_on_snapshot_failure(uvm_plain, microvm_factory): | ||
""" | ||
Test that in case of error during snapshot creation no dirty pages were lost. | ||
""" | ||
vm_mem_size = 128 | ||
uvm = uvm_plain | ||
uvm.spawn() | ||
uvm.basic_config(mem_size_mib=vm_mem_size, track_dirty_pages=True) | ||
uvm.add_net_iface() | ||
uvm.start() | ||
uvm.ssh.run("true") | ||
|
||
chroot = Path(uvm.chroot()) | ||
|
||
# Create a large file, so we run out of space (ENOSPC) during the snapshot | ||
# Assumes a Docker /srv tmpfs of 1G, derived by trial and error | ||
fudge = chroot / "fudge" | ||
subprocess.check_call(f"fallocate -l 550M {fudge}", shell=True) | ||
|
||
try: | ||
uvm.snapshot_diff() | ||
except RuntimeError: | ||
msg = "No space left on device" | ||
uvm.check_log_message(msg) | ||
else: | ||
assert False, "This should fail" | ||
|
||
fudge.unlink() | ||
|
||
# Now there is enough space for it to work | ||
snap2 = uvm.snapshot_diff() | ||
|
||
vm2 = microvm_factory.build() | ||
vm2.spawn() | ||
vm2.restore_from_snapshot(snap2, resume=True) | ||
vm2.ssh.run("true") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -309,7 +309,7 @@ run_devctr() { | |
--rm \ | ||
--volume /dev:/dev \ | ||
--volume "$FC_ROOT_DIR:$CTR_FC_ROOT_DIR:z" \ | ||
--tmpfs /srv:exec,dev,size=32G \ | ||
--tmpfs /srv:exec,dev,size=1G \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm I will need to rethink this bit before it gets merged as it will affect other tests. This will probably cause a lot of other tests to fail. You can drop this commit for now and we can run the tests. |
||
-v /boot:/boot \ | ||
--env PYTHONDONTWRITEBYTECODE=1 \ | ||
"$DEVCTR_IMAGE" "${ctr_args[@]}" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you mean
fc_dirty = fc_dirty | kvm_dirty
? Yeah, I think that would be simpler than iterating over each bitThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, something like that. Not sure yet what are the types of the
fc_dirty
andkvm_dirty
.