Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Firecracker Snapshots Support #448

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,8 @@ demo-network: install-cni-bins $(FCNET_CONFIG)
# Firecracker submodule
##########################
.PHONY: firecracker
firecracker: $(FIRECRACKER_BIN) $(JAILER_BIN)

firecracker:
_submodules/firecracker/tools/devtool build --release
.PHONY: install-firecracker
install-firecracker: firecracker
install -D -o root -g root -m755 -t $(INSTALLROOT)/bin $(FIRECRACKER_BIN)
Expand Down
2 changes: 1 addition & 1 deletion _submodules/firecracker
Submodule firecracker updated 93 files
+0 −1 Cargo.lock
+1 −16 src/api_server/src/lib.rs
+1 −0 src/api_server/src/parsed_request.rs
+2 −0 src/api_server/src/request/actions.rs
+0 −8 src/arch/src/lib.rs
+0 −2 src/arch/src/x86_64/interrupts.rs
+22 −38 src/cpuid/src/bit_helper.rs
+0 −8 src/cpuid/src/lib.rs
+1 −1 src/devices/src/legacy/i8042.rs
+1 −4 src/devices/src/legacy/rtc_pl031.rs
+1 −1 src/devices/src/legacy/serial.rs
+1 −17 src/devices/src/lib.rs
+2 −2 src/devices/src/pseudo/boot_timer.rs
+125 −78 src/devices/src/virtio/block/device.rs
+1 −0 src/devices/src/virtio/block/event_handler.rs
+2 −2 src/devices/src/virtio/block/persist.rs
+12 −10 src/devices/src/virtio/block/request.rs
+1 −0 src/devices/src/virtio/device.rs
+1 −0 src/devices/src/virtio/mmio.rs
+3 −3 src/devices/src/virtio/net/device.rs
+1 −1 src/devices/src/virtio/net/event_handler.rs
+1 −0 src/devices/src/virtio/net/tap.rs
+1 −1 src/devices/src/virtio/persist.rs
+1 −2 src/devices/src/virtio/queue.rs
+1 −1 src/devices/src/virtio/vsock/csm/connection.rs
+1 −1 src/devices/src/virtio/vsock/device.rs
+1 −1 src/devices/src/virtio/vsock/event_handler.rs
+2 −2 src/devices/src/virtio/vsock/unix/muxer.rs
+0 −9 src/dumbo/src/lib.rs
+0 −10 src/dumbo/src/mac.rs
+2 −0 src/dumbo/src/pdu/tcp.rs
+2 −0 src/dumbo/src/tcp/connection.rs
+1 −1 src/dumbo/src/tcp/endpoint.rs
+1 −1 src/dumbo/src/tcp/handler.rs
+18 −11 src/firecracker/src/api_server_adapter.rs
+1 −13 src/firecracker/src/main.rs
+1 −1 src/firecracker/src/metrics.rs
+1 −1 src/jailer/src/env.rs
+0 −6 src/jailer/src/main.rs
+0 −3 src/kernel/src/lib.rs
+0 −9 src/logger/src/lib.rs
+69 −92 src/logger/src/logger.rs
+5 −6 src/logger/src/metrics.rs
+0 −4 src/micro_http/src/common/headers.rs
+0 −2 src/micro_http/src/common/mod.rs
+0 −6 src/micro_http/src/lib.rs
+0 −1 src/micro_http/src/request.rs
+0 −2 src/micro_http/src/server.rs
+1 −14 src/mmds/src/lib.rs
+0 −1 src/mmds/src/persist.rs
+0 −3 src/polly/src/lib.rs
+133 −47 src/rate_limiter/src/lib.rs
+10 −9 src/rate_limiter/src/persist.rs
+34 −49 src/seccomp/src/lib.rs
+0 −5 src/snapshot/benches/main.rs
+0 −5 src/snapshot/benches/version_map.rs
+0 −6 src/snapshot/src/lib.rs
+0 −5 src/snapshot/tests/test.rs
+0 −2 src/utils/src/lib.rs
+0 −3 src/utils/src/signal.rs
+1 −2 src/vmm/Cargo.toml
+1 −0 src/vmm/src/builder.rs
+0 −4 src/vmm/src/default_syscalls/filters.rs
+0 −4 src/vmm/src/device_manager/mmio.rs
+1 −29 src/vmm/src/lib.rs
+2 −0 src/vmm/src/resources.rs
+8 −31 src/vmm/src/rpc_interface.rs
+1 −1 src/vmm/src/signal_handler.rs
+2 −0 src/vmm/src/vmm_config/boot_source.rs
+8 −6 src/vmm/src/vmm_config/drive.rs
+1 −0 src/vmm/src/vmm_config/instance_info.rs
+3 −5 src/vmm/src/vmm_config/logger.rs
+1 −1 src/vmm/src/vmm_config/machine_config.rs
+3 −4 src/vmm/src/vmm_config/metrics.rs
+1 −1 src/vmm/src/vmm_config/mmds.rs
+1 −0 src/vmm/src/vmm_config/mod.rs
+2 −0 src/vmm/src/vmm_config/net.rs
+2 −0 src/vmm/src/vmm_config/snapshot.rs
+2 −0 src/vmm/src/vmm_config/vsock.rs
+1 −2 src/vmm/src/vstate.rs
+0 −10 src/vmm/tests/integration_tests.rs
+27 −12 tests/framework/builder.py
+34 −1 tests/framework/microvm.py
+1 −1 tests/integration_tests/build/test_coverage.py
+1 −1 tests/integration_tests/functional/test_api.py
+150 −16 tests/integration_tests/functional/test_snapshot_basic.py
+265 −0 tests/integration_tests/performance/test_snapshot_perf.py
+10 −6 tests/integration_tests/security/demo_seccomp/src/bin/demo_advanced_jailer.rs
+0 −4 tests/integration_tests/security/demo_seccomp/src/bin/demo_basic_jailer.rs
+0 −3 tests/integration_tests/security/demo_seccomp/src/bin/demo_harmless.rs
+0 −3 tests/integration_tests/security/demo_seccomp/src/bin/demo_malicious.rs
+0 −3 tests/integration_tests/security/demo_seccomp/src/bin/seccomp_rules/mod.rs
+1 −1 tests/pytest.ini
293 changes: 287 additions & 6 deletions firecracker-control/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ type local struct {

processesMu sync.Mutex
processes map[string]int32

fcControlSocket *net.UnixListener
}

func newLocal(ic *plugin.InitContext) (*local, error) {
Expand Down Expand Up @@ -243,7 +245,7 @@ func (s *local) StopVM(requestCtx context.Context, req *proto.StopVMRequest) (*e
defer client.Close()

resp, shimErr := client.StopVM(requestCtx, req)
waitErr := s.waitForShimToExit(requestCtx, req.VMID)
waitErr := s.waitForShimToExit(requestCtx, req.VMID, false)

// Assuming the shim is returning containerd's error code, return the error as is if possible.
if waitErr == nil {
Expand All @@ -252,7 +254,7 @@ func (s *local) StopVM(requestCtx context.Context, req *proto.StopVMRequest) (*e
return resp, multierror.Append(shimErr, waitErr).ErrorOrNil()
}

func (s *local) waitForShimToExit(ctx context.Context, vmID string) error {
func (s *local) waitForShimToExit(ctx context.Context, vmID string, killShim bool) error {
socketAddr, err := fcShim.SocketAddress(ctx, vmID)
if err != nil {
return err
Expand All @@ -267,6 +269,17 @@ func (s *local) waitForShimToExit(ctx context.Context, vmID string) error {
}
defer delete(s.processes, socketAddr)

if killShim {
s.logger.Debug("Killing shim")

if err := syscall.Kill(int(pid), syscall.SIGKILL); err != nil {
s.logger.WithError(err).Error("Failed to kill shim process")
return err
}

return nil
}

return internal.WaitForPidToExit(ctx, stopVMInterval, pid)
}

Expand Down Expand Up @@ -429,10 +442,6 @@ func (s *local) newShim(ns, vmID, containerdAddress string, shimSocket *net.Unix
if err := fcSocketFile.Close(); err != nil {
logger.WithError(err).Errorf("failed to close %q", fcSocketFile.Name())
}

if err := os.RemoveAll(shimDir.RootPath()); err != nil {
logger.WithError(err).Errorf("failed to remove %q", shimDir.RootPath())
}
}()

err = setShimOOMScore(cmd.Process.Pid)
Expand Down Expand Up @@ -463,3 +472,275 @@ func setShimOOMScore(shimPid int) error {

return nil
}

func (s *local) loadShim(ctx context.Context, ns, vmID, containerdAddress string) (*exec.Cmd, error) {
logger := s.logger.WithField("vmID", vmID)
logger.Debug("Loading shim")

shimSocketAddress, err := fcShim.SocketAddress(ctx, vmID)
if err != nil {
err = errors.Wrap(err, "failed to obtain shim socket address")
s.logger.WithError(err).Error()
return nil, err
}

shimSocket, err := shim.NewSocket(shimSocketAddress)
if isEADDRINUSE(err) {
return nil, status.Errorf(codes.AlreadyExists, "VM with ID %q already exists (socket: %q)", vmID, shimSocketAddress)
} else if err != nil {
err = errors.Wrapf(err, "failed to open shim socket at address %q", shimSocketAddress)
s.logger.WithError(err).Error()
return nil, err
}

defer shimSocket.Close()

// If we're here, there is no pre-existing shim for this VMID, so we spawn a new one
if _, err := os.Stat(s.config.ShimBaseDir); os.IsNotExist(err) {
return nil, errors.Wrapf(err, "shim base dir does not exist: %s", s.config.ShimBaseDir)
}

shimDir, err := vm.ShimDir(s.config.ShimBaseDir, ns, vmID)
if err != nil {
err = errors.Wrapf(err, "failed to build shim path")
s.logger.WithError(err).Error()
return nil, err
}

fcSocketAddress, err := fcShim.FCControlSocketAddress(ctx, vmID)
if err != nil {
err = errors.Wrap(err, "failed to obtain shim socket address")
s.logger.WithError(err).Error()
return nil, err
}

fcSocket, err := shim.NewSocket(fcSocketAddress)
if err != nil {
err = errors.Wrapf(err, "failed to open fccontrol socket at address %q", fcSocketAddress)
s.logger.WithError(err).Error()
return nil, err
}

s.fcControlSocket = fcSocket

args := []string{
"-namespace", ns,
"-address", containerdAddress,
}

cmd := exec.Command(internal.ShimBinaryName, args...)

// note: The working dir of the shim has an effect on the length of the path
// needed to specify various unix sockets that the shim uses to communicate
// with the firecracker VMM and guest agent within. The length of that path
// has a relatively low limit (usually 108 chars), so modifying the working
// dir should be done with caution. See internal/vm/dir.go for the path
// definitions.
cmd.Dir = shimDir.RootPath()

shimSocketFile, err := shimSocket.File()
if err != nil {
err = errors.Wrap(err, "failed to get shim socket fd")
logger.WithError(err).Error()
return nil, err
}

fcSocketFile, err := fcSocket.File()
if err != nil {
err = errors.Wrap(err, "failed to get shim fccontrol socket fd")
logger.WithError(err).Error()
return nil, err
}

cmd.ExtraFiles = append(cmd.ExtraFiles, shimSocketFile, fcSocketFile)
fcSocketFDNum := 2 + len(cmd.ExtraFiles) // "2 +" because ExtraFiles come after stderr (fd #2)

ttrpc := containerdAddress + ".ttrpc"
cmd.Env = append(os.Environ(),
fmt.Sprintf("%s=%s", ttrpcAddressEnv, ttrpc),
fmt.Sprintf("%s=%s", internal.VMIDEnvVarKey, vmID),
fmt.Sprintf("%s=%s", internal.FCSocketFDEnvKey, strconv.Itoa(fcSocketFDNum))) // TODO remove after containerd is updated to expose ttrpc server to shim

cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}

// shim stderr is just raw text, so pass it through our logrus formatter first
cmd.Stderr = logger.WithField("shim_stream", "stderr").WriterLevel(logrus.ErrorLevel)
// shim stdout on the other hand is already formatted by logrus, so pass that transparently through to containerd logs
cmd.Stdout = logger.Logger.Out

logger.Debugf("starting %s", internal.ShimBinaryName)

err = cmd.Start()
if err != nil {
err = errors.Wrap(err, "failed to start shim child process")
logger.WithError(err).Error()
return nil, err
}

// make sure to wait after start
go func() {
if err := cmd.Wait(); err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
// shim is usually terminated by cancelling the context
logger.WithError(exitErr).Debug("shim has been terminated")
} else {
logger.WithError(err).Error("shim has been unexpectedly terminated")
}
}

// Close all Unix abstract sockets.
if err := shimSocketFile.Close(); err != nil {
logger.WithError(err).Errorf("failed to close %q", shimSocketFile.Name())
}
if err := fcSocketFile.Close(); err != nil {
logger.WithError(err).Errorf("failed to close %q", fcSocketFile.Name())
}
}()

err = setShimOOMScore(cmd.Process.Pid)
if err != nil {
logger.WithError(err).Error()
return nil, err
}

s.addShim(shimSocketAddress, cmd)

return cmd, nil
}

// PauseVM Pauses a VM
func (s *local) PauseVM(ctx context.Context, req *proto.PauseVMRequest) (*empty.Empty, error) {
client, err := s.shimFirecrackerClient(ctx, req.VMID)
if err != nil {
return nil, err
}

defer client.Close()

resp, err := client.PauseVM(ctx, req)
if err != nil {
s.logger.WithError(err).Error()
return nil, err
}

return resp, nil
}

// ResumeVM Resumes a VM
func (s *local) ResumeVM(ctx context.Context, req *proto.ResumeVMRequest) (*empty.Empty, error) {
client, err := s.shimFirecrackerClient(ctx, req.VMID)
if err != nil {
return nil, err
}

defer client.Close()

resp, err := client.ResumeVM(ctx, req)
if err != nil {
s.logger.WithError(err).Error()
return nil, err
}

return resp, nil
}

// CreateSnapshot Creates a snapshot of a VM
func (s *local) CreateSnapshot(ctx context.Context, req *proto.CreateSnapshotRequest) (*empty.Empty, error) {
client, err := s.shimFirecrackerClient(ctx, req.VMID)
if err != nil {
return nil, err
}

defer client.Close()

resp, err := client.CreateSnapshot(ctx, req)
if err != nil {
s.logger.WithError(err).Error()
return nil, err
}

return resp, nil
}

// LoadSnapshot Loads a snapshot of a VM
func (s *local) LoadSnapshot(ctx context.Context, req *proto.LoadSnapshotRequest) (*empty.Empty, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
err = errors.Wrap(err, "error retrieving namespace of request")
s.logger.WithError(err).Error()
return nil, err
}

_, err = s.loadShim(ctx, ns, req.VMID, s.containerdAddress)
if err != nil {
return nil, err
}

client, err := s.shimFirecrackerClient(ctx, req.VMID)
if err != nil {
return nil, err
}

defer client.Close()

resp, err := client.LoadSnapshot(ctx, req)
if err != nil {
s.logger.WithError(err).Error()
return nil, err
}

return resp, nil
}

// Offload Shuts down a VM started through the firecracker go sdk and deletes
// the corresponding firecracker socket. All of the other resources (shim, other sockets)
// will persist.
func (s *local) Offload(ctx context.Context, req *proto.OffloadRequest) (*empty.Empty, error) {
client, err := s.shimFirecrackerClient(ctx, req.VMID)
if err != nil {
return nil, err
}

defer client.Close()

resp, err := client.Offload(ctx, req)
if err != nil {
s.logger.WithError(err).Error()
return nil, err
}

s.fcControlSocket.Close()

shimSocketAddress, err := fcShim.SocketAddress(ctx, req.VMID)
if err != nil {
err = errors.Wrap(err, "failed to obtain shim socket address")
s.logger.WithError(err).Error()
return nil, err
}
removeErr := os.RemoveAll(shimSocketAddress)
if removeErr != nil {
s.logger.Errorf("failed to remove shim socket addr file: %v", removeErr)
return nil, err
}

fcSocketAddress, err := fcShim.FCControlSocketAddress(ctx, req.VMID)
if err != nil {
s.logger.Error("failed to get FC socket address")
return nil, err
}
removeErr = os.RemoveAll(fcSocketAddress)
if removeErr != nil {
s.logger.Errorf("failed to remove fc socket addr file: %v", removeErr)
return nil, err
}

waitErr := s.waitForShimToExit(ctx, req.VMID, true)
if waitErr != nil {
s.logger.Error("failed to wait for shim to exit on offload")
return nil, waitErr
}

return resp, nil
}
25 changes: 25 additions & 0 deletions firecracker-control/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,28 @@ func (s *service) GetVMMetadata(ctx context.Context, req *proto.GetVMMetadataReq
log.G(ctx).Debug("Getting vm metadata")
return s.local.GetVMMetadata(ctx, req)
}

func (s *service) PauseVM(ctx context.Context, req *proto.PauseVMRequest) (*empty.Empty, error) {
log.G(ctx).Debugf("pause VM request: %+v", req)
return s.local.PauseVM(ctx, req)
}

func (s *service) ResumeVM(ctx context.Context, req *proto.ResumeVMRequest) (*empty.Empty, error) {
log.G(ctx).Debugf("resume VM request: %+v", req)
return s.local.ResumeVM(ctx, req)
}

func (s *service) LoadSnapshot(ctx context.Context, req *proto.LoadSnapshotRequest) (*empty.Empty, error) {
log.G(ctx).Debugf("load snapshot request: %+v", req)
return s.local.LoadSnapshot(ctx, req)
}

func (s *service) CreateSnapshot(ctx context.Context, req *proto.CreateSnapshotRequest) (*empty.Empty, error) {
log.G(ctx).Debugf("create snapshot request: %+v", req)
return s.local.CreateSnapshot(ctx, req)
}

func (s *service) Offload(ctx context.Context, req *proto.OffloadRequest) (*empty.Empty, error) {
log.G(ctx).Debugf("offload request: %+v", req)
return s.local.Offload(ctx, req)
}
Loading