merge nested warden into master #27

Merged
merged 20 commits into from Sep 12, 2013
+405 −64
Split
View
2 warden/config/linux.yml
@@ -37,6 +37,8 @@ server:
quota:
disk_quota_enabled: true
+ allow_nested_warden: false
+
health_check_server:
port: 2345
View
15 warden/lib/warden/config.rb
@@ -14,6 +14,7 @@ def self.server_defaults
"quota" => {
"disk_quota_enabled" => true,
},
+ "allow_nested_warden" => false,
}
end
@@ -59,6 +60,8 @@ def self.server_schema
"quota" => {
optional("disk_quota_enabled") => bool,
},
+
+ "allow_nested_warden" => bool,
}
end
end
@@ -97,6 +100,7 @@ def self.network_schema
# Present for Backwards compatibility
optional("pool_start_address") => String,
optional("pool_size") => Integer,
+ optional("release_delay") => Integer,
optional("mtu") => Integer,
"deny_networks" => [String],
@@ -106,7 +110,12 @@ def self.network_schema
end
def self.ip_local_port_range
- File.read("/proc/sys/net/ipv4/ip_local_port_range").split.map(&:to_i)
+ # if no ip_local_port_range found, make some guess"
+ if File.exist?("/proc/sys/net/ipv4/ip_local_port_range")
+ File.read("/proc/sys/net/ipv4/ip_local_port_range").split.map(&:to_i)
+ else
+ return 32768, 61000
@ryantang
ryantang Aug 23, 2013

Why this is required for nested warden functionality?

@mariash @ryantang

@Kaixiang
Kaixiang Aug 27, 2013

in newer kernel , /proc/sys/net/ipv4/ip_local_port_range is not exported inside container anymore.
so we should check whether it exist before provide some default values.

+ end
end
def self.port_defaults
@@ -205,6 +214,10 @@ def rlimits
@server["container_rlimits"] || {}
end
+ def allow_nested_warden?
+ !!@server["allow_nested_warden"]
+ end
+
def to_hash
{
"server" => server,
View
64 warden/lib/warden/container/linux.rb
@@ -82,6 +82,7 @@ def env
"network_netmask" => self.class.network_pool.pooled_netmask.to_human,
"user_uid" => uid,
"rootfs_path" => container_rootfs_path,
+ "allow_nested_warden" => Server.config.allow_nested_warden?.to_s,
"container_iface_mtu" => container_iface_mtu,
}
env
@@ -194,38 +195,53 @@ def perform_rsync(src_path, dst_path)
sh *args
end
- def write_bind_mount_commands(request)
- return if request.bind_mounts.nil? || request.bind_mounts.empty?
+ def add_bind_mount(file, src_path, dst_path, mode)
@cf-frameworks
cf-frameworks Sep 11, 2013

Will this blow up if src doesn't exist? Previously we used to File.stat it and raise a Warden error if it didn't exist.

@vito & @mariash

+ dst_path = File.join(container_path, "mnt", dst_path[1..-1])
+
+ file.puts "mkdir -p #{dst_path}"
+ file.puts "mount -n --bind #{src_path} #{dst_path}"
+ file.puts "mount -n --bind -o remount,#{mode} #{src_path} #{dst_path}"
+ end
- File.open(File.join(container_path, "lib", "hook-parent-before-clone.sh"), "a") do |file|
+ def write_bind_mount_commands(request)
+ File.open(File.join(container_path, "lib", "hook-child-before-pivot.sh"), "a") do |file|
file.puts
file.puts
- request.bind_mounts.each do |bind_mount|
- src_path = bind_mount.src_path
- dst_path = bind_mount.dst_path
-
- # Check that the source path exists
- stat = File.stat(src_path) rescue nil
- if stat.nil?
- raise WardenError.new("Source path for bind mount does not exist: #{src_path}")
+ if request.bind_mounts.respond_to?(:each)
+ request.bind_mounts.each do |bind_mount|
+ src_path = bind_mount.src_path
+ dst_path = bind_mount.dst_path
+
+ # Check that the source path exists
+ stat = File.stat(src_path) rescue nil
+ raise WardenError.new("Source path for bind mount does not exist: #{src_path}") if stat.nil?
+
+ mode = case bind_mount.mode
+ when Protocol::CreateRequest::BindMount::Mode::RO
+ "ro"
+ when Protocol::CreateRequest::BindMount::Mode::RW
+ "rw"
+ else
+ raise "Unknown mode"
+ end
+
+ add_bind_mount(file, src_path, dst_path, mode)
end
+ end
- # Fix up destination path to be an absolute path inside the union
- dst_path = File.join(container_path, "mnt", dst_path[1..-1])
+ # for nested warden, we share the host's cgroup fs to contrainers using bind mount
+ if Server.config.allow_nested_warden?
+ tmp_warden_cgroup = File.join(container_path, "tmp", "warden", "cgroup")
+ FileUtils.mkdir_p(tmp_warden_cgroup)
- mode = case bind_mount.mode
- when Protocol::CreateRequest::BindMount::Mode::RO
- "ro"
- when Protocol::CreateRequest::BindMount::Mode::RW
- "rw"
- else
- raise "Unknown mode"
- end
+ # Bind-mount cgroups
+ add_bind_mount(file, tmp_warden_cgroup, "/tmp/warden/cgroup", "rw")
- file.puts "mkdir -p #{dst_path}"
- file.puts "mount -n --bind #{src_path} #{dst_path}"
- file.puts "mount -n --bind -o remount,#{mode} #{src_path} #{dst_path}"
+ # for each subsystems, only pass the group of the current container instead of all groups, so that nested warder server will setup groups directly under parent container's hierarchy
+ %w(cpu cpuacct devices memory).each do |subsystem|
+ add_bind_mount(file, cgroup_path(subsystem), "/tmp/warden/cgroup/#{subsystem}", "rw")
+ end
end
end
end
View
2 warden/lib/warden/server.rb
@@ -189,7 +189,7 @@ def self.setup_logging
end
def self.setup_network
- network_pool = Pool::Network.new(config.network["pool_network"])
+ network_pool = Pool::Network.new(config.network["pool_network"], :release_delay => config.network["release_delay"])
container_klass.network_pool = network_pool
end
View
5 warden/root/linux/net.sh
@@ -103,7 +103,12 @@ function setup_filter() {
iptables -A ${filter_default_chain} --destination "$n" --jump DROP
done
+ # Forward outbound traffic via ${filter_forward_chain}
iptables -A FORWARD -i w-+ --jump ${filter_forward_chain}
+
+ # Forward inbound traffic immediately
+ default_interface=$(ip route show | grep default | cut -d' ' -f5 | head -1)
+ iptables -I ${filter_forward_chain} -i $default_interface --jump ACCEPT
}
function teardown_nat() {
View
37 warden/root/linux/setup.sh
@@ -20,30 +20,27 @@ fi
cgroup_path=/tmp/warden/cgroup
-mkdir -p $cgroup_path
-
-if grep "${cgroup_path} " /proc/mounts | cut -d' ' -f3 | grep -q cgroup
+if [ ! -d $cgroup_path ]
then
- find $cgroup_path -mindepth 1 -type d | sort | tac | xargs rmdir
- umount $cgroup_path
-fi
-
-# Mount tmpfs
-if ! grep "${cgroup_path} " /proc/mounts | cut -d' ' -f3 | grep -q tmpfs
-then
- mount -t tmpfs none $cgroup_path
-fi
+ mkdir -p $cgroup_path
-# Mount cgroup subsystems individually
-for subsystem in cpu cpuacct devices memory
-do
- mkdir -p $cgroup_path/$subsystem
-
- if ! grep -q "${cgroup_path}/$subsystem " /proc/mounts
+ # Mount tmpfs
+ if ! grep "${cgroup_path} " /proc/mounts | cut -d' ' -f3 | grep -q tmpfs
then
- mount -t cgroup -o $subsystem none $cgroup_path/$subsystem
+ mount -t tmpfs none $cgroup_path
fi
-done
+
+ # Mount cgroup subsystems individually
+ for subsystem in cpu cpuacct devices memory
+ do
+ mkdir -p $cgroup_path/$subsystem
+
+ if ! grep -q "${cgroup_path}/$subsystem " /proc/mounts
+ then
+ mount -t cgroup -o $subsystem none $cgroup_path/$subsystem
+ fi
+ done
+fi
./net.sh setup
View
1 warden/root/linux/skeleton/lib/common.sh
@@ -44,6 +44,7 @@ function setup_fs_other() {
overlay_directory_in_rootfs /etc rw
overlay_directory_in_rootfs /home rw
overlay_directory_in_rootfs /sbin rw
+ overlay_directory_in_rootfs /var rw
mkdir -p tmp/rootfs/tmp
chmod 777 tmp/rootfs/tmp
View
38 warden/root/linux/skeleton/lib/hook-parent-after-clone.sh
@@ -24,22 +24,28 @@ do
if [ $(basename $system_path) == "devices" ]
then
- # disallow everything, allow explicitly
- echo a > $instance_path/devices.deny
- # /dev/null
- echo "c 1:3 rw" > $instance_path/devices.allow
- # /dev/zero
- echo "c 1:5 rw" > $instance_path/devices.allow
- # /dev/random
- echo "c 1:8 rw" > $instance_path/devices.allow
- # /dev/urandom
- echo "c 1:9 rw" > $instance_path/devices.allow
- # /dev/tty
- echo "c 5:0 rw" > $instance_path/devices.allow
- # /dev/ptmx
- echo "c 5:2 rw" > $instance_path/devices.allow
- # /dev/pts/*
- echo "c 136:* rw" > $instance_path/devices.allow
+ if [ $allow_nested_warden == "true" ]
+ then
+ # Allow everything
+ echo "a *:* rw" > $instance_path/devices.allow
@ryantang
ryantang Aug 23, 2013

Can you explain why you need everything? It seems like the right thing to do is to just add the specific devices you need.

@mariash @ryantang

@Kaixiang
Kaixiang Aug 27, 2013

at least one thing fail as I observed, when untar rootfs package for dea job, a lot of device node need mknod.
otherwise we will fail because of Operation not permitted. (and we want share the same package from cf-realase master)

and since warden-cpi are the only consumer of nested warden, we see no requirement to restrict devices access for it right now.

+ else
+ # Deny everything, allow explicitly
+ echo a > $instance_path/devices.deny
+ # /dev/null
+ echo "c 1:3 rw" > $instance_path/devices.allow
+ # /dev/zero
+ echo "c 1:5 rw" > $instance_path/devices.allow
+ # /dev/random
+ echo "c 1:8 rw" > $instance_path/devices.allow
+ # /dev/urandom
+ echo "c 1:9 rw" > $instance_path/devices.allow
+ # /dev/tty
+ echo "c 5:0 rw" > $instance_path/devices.allow
+ # /dev/ptmx
+ echo "c 5:2 rw" > $instance_path/devices.allow
+ # /dev/pts/*
+ echo "c 136:* rw" > $instance_path/devices.allow
+ fi
fi
echo $PID > $instance_path/tasks
View
2 warden/root/linux/skeleton/net.sh
@@ -38,7 +38,7 @@ function setup_filter() {
--goto ${filter_default_chain}
# Bind instance chain to forward chain
- iptables -I ${filter_forward_chain} \
+ iptables -I ${filter_forward_chain} 2 \
@ryantang
ryantang Aug 23, 2013

Why do you need this change? Can you add a test to showcase the desired behavior?

@mariash @ryantang

@cf-frameworks
cf-frameworks Sep 11, 2013

Was this added by you guys or Pieter & David?

@vito & @mariash

--in-interface ${network_host_iface} \
--goto ${filter_instance_chain}
}
View
2 warden/root/linux/skeleton/setup.sh
@@ -18,6 +18,7 @@ network_container_ip=${network_container_ip:-10.0.0.2}
network_container_iface="w-${id}-1"
user_uid=${user_uid:-10000}
rootfs_path=$(readlink -f $rootfs_path)
+allow_nested_warden=${allow_nested_warden:-false}
# Write configuration
cat > etc/config <<-EOS
@@ -29,6 +30,7 @@ network_container_ip=$network_container_ip
network_container_iface=$network_container_iface
user_uid=$user_uid
rootfs_path=$rootfs_path
+allow_nested_warden=$allow_nested_warden
EOS
setup_fs
View
62 warden/spec/assets/config/child-linux.yml
@@ -0,0 +1,62 @@
+---
+server:
+ container_klass: Warden::Container::Linux
+
+ # Wait this long before destroying a container, after the last client
+ # referencing it disconnected. The timer is cancelled when during this
+ # period, another client references the container.
+ #
+ # Clients can be forced to specify this setting by setting the
+ # server-wide variable to an invalid value:
+ # container_grace_time: invalid
+ #
+ # The grace time can be disabled by setting it to nil:
+ # container_grace_time: ~
+ #
+ container_grace_time: 30
+
+ unix_domain_permissions: 0777
+
+ # Specifies the path to the base chroot used as the read-only root
+ # filesystem
+ container_rootfs_path: /tmp/warden/rootfs
+
+ # Specifies the path to the parent directory under which all containers
+ # will live.
+ container_depot_path: /tmp/warden/containers
+
+ # See getrlimit(2) for details. Integer values are passed verbatim.
+ container_rlimits:
+ as: 4294967296
+ nofile: 8192
+ nproc: 512
+
+ # Specifies the output limit of a job (stdout/stderr combined).
+ job_output_limit: 10485760
+
+ quota:
+ disk_quota_enabled: false
+
+ allow_nested_warden: false
+
+health_check_server:
+ port: 2345
+
+logging:
+ level: debug2
+ file: /tmp/warden.log
+
+network:
+ # Use this /30 network as offset for the network pool.
+ pool_start_address: 10.254.0.0
+
+ # Pool this many /30 networks.
+ pool_size: 256
+
+ # Interface MTU size
+ # (for OpenStack use 1454 to avoid problems with rubygems with GRE tunneling)
+ mtu: 1500
+
+user:
+ pool_start_uid: 20000
+ pool_size: 25
View
234 warden/spec/container/linux_nested_spec.rb
@@ -0,0 +1,234 @@
+# coding: UTF-8
+
+# this file has the tests for nested warden feature.
+# These tests don't fit into linux_spec.rb because in linux_spec.rb all before/after hooks are :each,
+# but we need :all since setting up nested warden container is very expensive.
+# TODO: This file has some duplication with linux_spec.rb. Refactoring is needed later.
+
+require "spec_helper"
+
+require "warden/server"
+require "warden/client"
+require "warden/network"
+require "warden/util"
+
+require "warden/container/linux"
+
+describe "linux", :platform => "linux", :needs_root => true do
+ let(:work_path) { File.join(Dir.tmpdir, "warden", "spec") }
+ let(:unix_domain_path) { File.join(work_path, "warden.sock") }
+ let(:container_klass) { "Warden::Container::Linux" }
+ let(:container_rootfs_path) { File.join(work_path, "..", "rootfs") }
+ let(:container_depot_path) { File.join(work_path, "containers") }
+ let(:container_depot_file) { container_depot_path + ".img" }
+ let(:have_uid_support) { true }
+ let(:netmask) { Warden::Network::Netmask.new(255, 255, 255, 252) }
+ let(:allow_networks) { [] }
+ let(:deny_networks) { [] }
+ let(:mtu) { 1500 }
+ let(:nested_linux_config) {
+
+ { "server" => {
+ "unix_domain_path" => unix_domain_path,
+ "container_klass" => container_klass,
+ "container_rootfs_path" => container_rootfs_path,
+ "container_depot_path" => container_depot_path,
+ "container_grace_time" => nil,
+ "job_output_limit" => 100 * 1024,
+ "allow_nested_warden" => true },
+ "network" => {
+ "pool_start_address" => "10.244.0.0",
+ "pool_size" => 64,
+ "mtu" => mtu,
+ "allow_networks" => allow_networks,
+ "deny_networks" => deny_networks },
+ "port" => {
+ "pool_start_port" => 64000,
+ "pool_size" => 1000 },
+ "logging" => {
+ "level" => "debug",
+ "file" => File.join(work_path, "warden.log") }
+ }
+ }
+
+ before :all do
+ FileUtils.mkdir_p(work_path)
+
+ unless File.directory?(container_rootfs_path)
+ raise "%s does not exist" % container_rootfs_path
+ end
+
+ FileUtils.mkdir_p(container_depot_path)
+
+ start_warden
+ end
+
+ after :all do
+
+ stop_warden
+
+ # Destroy all artifacts
+ Dir[File.join(Warden::Util.path("root"), "*", "clear.sh")].each do |clear|
+ execute("#{clear} #{container_depot_path} > /dev/null")
+ end
+
+ execute("rm -rf #{container_depot_path}")
+
+ end
+
+ def execute(command)
+ `#{command}`.tap do
+ $?.should be_success
+ end
+ end
+
+ def create_client
+ client = ::Warden::Client.new(unix_domain_path)
+ client.connect
+ client
+ end
+
+ def start_warden(config=nil)
+ FileUtils.rm_f(unix_domain_path)
+
+ # Grab new network for every test to avoid resource contention
+ @start_address = next_class_c.to_human
+ config ||= nested_linux_config
+ @pid = fork do
+ Process.setsid
+ Signal.trap("TERM") { exit }
+
+ Warden::Server.setup config
+ Warden::Server.run!
+ end
+
+ # Wait for the socket to come up
+ loop do
+ begin
+ UNIXSocket.new(unix_domain_path)
+ break
+ rescue Errno::ENOENT, Errno::ECONNREFUSED
+ end
+
+ if Process.waitpid(@pid, Process::WNOHANG)
+ STDERR.puts "Warden exited early aborting spec suite"
+ exit 1
+ end
+
+ sleep 0.01
+ end
+ end
+
+ def stop_warden(signal = "USR2")
+ Process.kill(signal, -@pid) rescue Errno::ECHILD
+ Process.waitpid(@pid) rescue Errno::ECHILD
+ end
+
+ def restart_warden(signal = "USR2")
+ stop_warden(signal)
+ start_warden
+ end
+
+ def client
+ @client ||= create_client
+ end
+
+ def reset_client
+ @client = nil
+ end
+
+
+ describe "nested" do
+
+ attr_reader :handle
+
+ def run_as_root(script)
+ response = client.run(:handle => handle, :script => script, :privileged => true)
+ puts response.stdout, response.stderr unless response.exit_status == 0
+ response.exit_status.should == 0
+ end
+
+ def create
+ response = client.call(@create_request)
+ response.should be_ok
+
+ @handle = response.handle
+ end
+
+ before :all do
+
+ warden_repo = File.expand_path('../../../..' ,__FILE__)
+
+ # whiteout rootfs/dev beforehand to avoid 'overlayfs: operation not permitted' error
+ `rm -rf /tmp/warden/rootfs/dev/*`
+
+ @bind_mount_warden = Warden::Protocol::CreateRequest::BindMount.new
+ @bind_mount_warden.src_path = File.join(warden_repo, 'warden')
+ @bind_mount_warden.dst_path = "/warden"
+ @bind_mount_warden.mode = Warden::Protocol::CreateRequest::BindMount::Mode::RO
+
+ @bind_mount_rootfs = Warden::Protocol::CreateRequest::BindMount.new
+ @bind_mount_rootfs.src_path = "/tmp/warden/rootfs"
+ @bind_mount_rootfs.dst_path = "/tmp/warden/rootfs"
+ @bind_mount_rootfs.mode = Warden::Protocol::CreateRequest::BindMount::Mode::RO
+
+ @create_request = Warden::Protocol::CreateRequest.new
+ @create_request.bind_mounts = [@bind_mount_warden, @bind_mount_rootfs]
+
+ create
+
+ run_as_root 'apt-get -qq -y install iptables'
+ run_as_root 'sed -i s/lucid/precise/ /etc/lsb-release'
+ run_as_root 'curl -L https://get.rvm.io | bash -s stable'
+ ruby_version = File.read(File.join(warden_repo, '.ruby-version')).chomp
+ run_as_root "source /etc/profile.d/rvm.sh; rvm install #{ruby_version}"
+ run_as_root 'source /etc/profile.d/rvm.sh; gem install bundler --no-rdoc --no-ri'
+ run_as_root 'source /etc/profile.d/rvm.sh; cd /warden && bundle install --quiet'
+ run_as_root 'rm /tmp/warden.sock || true'
+ run_as_root 'source /etc/profile.d/rvm.sh; cd /warden && bundle exec rake warden:start[spec/assets/config/child-linux.yml] &'
+
+ sleep 5 #wait warden server to start up
+
+ run_as_root 'ls /tmp/warden.sock'
+ end
+
+ after :all do
+
+ #destroy nested containers if there is any
+ run_as_root '/warden/root/linux/clear.sh /tmp/warden/containers > /dev/null'
+
+ #stop child warden server
+ run_as_root "ps -ef|grep [r]ake |awk '{print $2}'|xargs kill"
+ end
+
+ it 'should run nested containers' do
+
+ run_as_root 'source /etc/profile.d/rvm.sh; /warden/bin/warden -- create'
+
+ end
+
+ it 'should setup nested cgroup' do
+
+ run_as_root 'source /etc/profile.d/rvm.sh; /warden/bin/warden -- create'
+ Dir.glob("/tmp/warden/cgroup/cpu/instance-#{handle}/instance-*").should_not be_empty
+
+ end
+
+ it 'should allow inbound traffic to nested containers' do
+
+ #ping the nested container from host
+ execute "route add -net 10.254.0.0/22 gw 10.244.0.2"
+ run_as_root 'source /etc/profile.d/rvm.sh; /warden/bin/warden -- create --network 10.254.0.126'
+ execute 'ping -c3 10.254.0.126'
+ execute "route del -net 10.254.0.0/22 gw 10.244.0.2"
+ end
+
+ it 'should allow outbound traffic from nested containers' do
+
+ #create a nested container and have it download something
+ run_as_root 'source /etc/profile.d/rvm.sh; handle=`/warden/bin/warden -- create | cut -d' ' -f3`;
+ /warden/bin/warden -- run --handle $handle --script "curl http://rvm.io" '
+
+ end
+ end
+end
View
5 warden/spec/container/linux_spec.rb
@@ -656,9 +656,12 @@ def create
@bind_mount.mode = Warden::Protocol::CreateRequest::BindMount::Mode::RO
@bind_mount.src_path = tmpdir + ".doesnt.exist"
+ # This will fail from the hook-child-before-pivot hook. It is not
+ # possible to check if a bind mount exists before create is executed,
+ # because the bind mount may be created _during_ create.
expect do
create
- end.to raise_error(Warden::Client::ServerError, /\bdoes not exist\b/i)
+ end.to raise_error(Warden::Client::ServerError)
end
end