Skip to content

Commit

Permalink
Support for user namespaces
Browse files Browse the repository at this point in the history
This exposes UID namespace support.  A new command line option (--uidmap)
maps a set of virtual UIDs to which the application within the container
is confined.  The application could potentially be the root in the
container but unprivileged on the host.

Addresses issue moby#2918

Docker-DCO-1.1-Signed-off-by: Dinesh Subhraveti <dineshs@altiscale.com> (github: dineshs-altiscale)
  • Loading branch information
dineshs-altiscale committed Mar 20, 2014
1 parent 9bbc11b commit 104725d
Show file tree
Hide file tree
Showing 10 changed files with 256 additions and 6 deletions.
15 changes: 15 additions & 0 deletions docs/sources/reference/commandline/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,7 @@ image is removed.
-t, --tty=false: Allocate a pseudo-tty
-u, --user="": Username or UID
--dns=[]: Set custom dns servers for the container
--uidmap=[]: Map host UID range into the container: <host UID>:<container UID>:<size> (e.g. --uidmap="100000:0:10000")
-v, --volume=[]: Create a bind mount to a directory or file with: [host-path]:[container-path]:[rw|ro]. If a directory "container-path" is missing, then docker creates a new volume.
--volumes-from="": Mount all volumes from the given container(s)
--entrypoint="": Overwrite the default entrypoint set by the image
Expand Down Expand Up @@ -1280,6 +1281,20 @@ ID may be optionally suffixed with ``:ro`` or ``:rw`` to mount the volumes in
read-only or read-write mode, respectively. By default, the volumes are mounted
in the same mode (read write or read only) as the reference container.

.. code-block:: bash
$ touch /tmp/uid100000 && chown 100000:100000 /tmp/uid100000 && sudo docker run --uidmap="100000:0:10000" -v="/tmp:/mnt:rw" -i -t ubuntu ls -lh /mnt/uid100000
-rw-r--r--. 1 root root 0 Mar 10 19:16 /mnt/uid100000
Using UID namespaces, processes in the container can run with a virtual set of
UIDs that map to a set of real UIDs on the host. For instance, a process
running as UID 10000 on the host could be UID 0 (root) in the container, or a
file with UID 11000 on the host could appear as UID 1000 within the container.
``--uidmap`` flag specifies the range of host UIDs that are mapped into the
container. In the example above, 10000 host UIDs starting from 100000 are
mapped to 10000 virtual UIDs starting from 0 in the container. UID 100000 on
the host would then become the root in the container.

A complete example
..................

Expand Down
8 changes: 7 additions & 1 deletion engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,13 @@ func New(root string) (*Engine, error) {
}
}

if err := os.MkdirAll(root, 0700); err != nil && !os.IsExist(err) {
if err := os.MkdirAll(root, 0711); err != nil && !os.IsExist(err) {
return nil, err
}

// Change to 0711 in case it was already created 0700 by an earlier
// Docker version. MkdirAll doesn't fail if the dir already exists.
if err := os.Chmod(root, 0711); err != nil {
return nil, err
}

Expand Down
7 changes: 7 additions & 0 deletions opts/opts.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,13 @@ func ValidateLink(val string) (string, error) {
return val, nil
}

func ValidateUidMap(val string) (string, error) {
if _, _, _, err := utils.ParseUidMap(val); err != nil {
return val, err
}
return val, nil
}

func ValidatePath(val string) (string, error) {
var containerPath string

Expand Down
6 changes: 6 additions & 0 deletions runconfig/hostconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ type HostConfig struct {
PortBindings nat.PortMap
Links []string
PublishAllPorts bool
UidMaps []string
ContainerRoot int64
}

type KeyValuePair struct {
Expand All @@ -25,6 +27,7 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
ContainerIDFile: job.Getenv("ContainerIDFile"),
Privileged: job.GetenvBool("Privileged"),
PublishAllPorts: job.GetenvBool("PublishAllPorts"),
ContainerRoot: job.GetenvInt64("ContainerRoot"),
}
job.GetenvJson("LxcConf", &hostConfig.LxcConf)
job.GetenvJson("PortBindings", &hostConfig.PortBindings)
Expand All @@ -34,6 +37,9 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
if Links := job.GetenvList("Links"); Links != nil {
hostConfig.Links = Links
}
if uidMaps := job.GetenvList("UidMaps"); uidMaps != nil {
hostConfig.UidMaps = uidMaps
}

return hostConfig
}
56 changes: 56 additions & 0 deletions runconfig/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/dotcloud/docker/utils"
"io/ioutil"
"path"
"strconv"
"strings"
)

Expand Down Expand Up @@ -44,6 +45,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
flDns opts.ListOpts
flVolumesFrom opts.ListOpts
flLxcOpts opts.ListOpts
flUidMaps opts.ListOpts

flAutoRemove = cmd.Bool([]string{"#rm", "-rm"}, false, "Automatically remove the container when it exits (incompatible with -d)")
flDetach = cmd.Bool([]string{"d", "-detach"}, false, "Detached mode: Run container in the background, print new container id")
Expand Down Expand Up @@ -75,6 +77,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
cmd.Var(&flDns, []string{"#dns", "-dns"}, "Set custom dns servers")
cmd.Var(&flVolumesFrom, []string{"#volumes-from", "-volumes-from"}, "Mount volumes from the specified container(s)")
cmd.Var(&flLxcOpts, []string{"#lxc-conf", "-lxc-conf"}, "Add custom lxc options --lxc-conf=\"lxc.cgroup.cpuset.cpus = 0,1\"")
cmd.Var(&flUidMaps, []string{"#uidmap", "-uidmap"}, "Map host UID range into the container: <host UID>:<container UID>:<size> (e.g. --uidmap=\"100000:0:10000\")")

if err := cmd.Parse(args); err != nil {
return nil, nil, cmd, err
Expand Down Expand Up @@ -148,6 +151,11 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
entrypoint = []string{*flEntrypoint}
}

uidMaps, containerRoot, err := parseUidMapOpts(flUidMaps, *flUser)
if err != nil {
return nil, nil, cmd, err
}

lxcConf, err := parseLxcConfOpts(flLxcOpts)
if err != nil {
return nil, nil, cmd, err
Expand Down Expand Up @@ -211,6 +219,8 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
PortBindings: portBindings,
Links: flLinks.GetAll(),
PublishAllPorts: *flPublishAll,
UidMaps: uidMaps,
ContainerRoot: containerRoot,
}

if sysInfo != nil && flMemory > 0 && !sysInfo.SwapLimit {
Expand All @@ -225,6 +235,52 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
return config, hostConfig, cmd, nil
}

func parseUidMapOpts(opts opts.ListOpts, userid string) ([]string, int64, error) {
uMaps := opts.GetAll()

// No UID mappings specified, no checks to enforce
if len(uMaps) == 0 {
return uMaps, -1, nil
}

containerRoot := int64(-1)
containerUser := int64(-1)
if userid != "" {
cUser, err := strconv.ParseInt(userid, 10, 64)
if err != nil {
return nil, -1, fmt.Errorf("Invalid user: %s (-u has to be specified as a valid container UID rather than username when --uidmap is used)", userid)
}
containerUser = cUser
}

cRootFound := false
cUserFound := false
if containerUser == -1 {
cUserFound = true
}
for _, uMap := range uMaps {
cUid, hUid, size, err := utils.ParseUidMap(uMap)
if err != nil {
return nil, -1, err
}
if cRootFound == false && cUid <= 0 && 0 < cUid+size {
cRootFound = true
containerRoot = 0 - cUid + hUid
}
if cUserFound == false && cUid <= containerUser && containerUser < cUid+size {
cUserFound = true
}
}
if !cRootFound {
return nil, -1, fmt.Errorf("Container UID 0 must be a part of the UID map")
}
if !cUserFound {
return nil, -1, fmt.Errorf("User '%s' must be a part of the UID map",
userid)
}
return uMaps, containerRoot, nil
}

func parseLxcConfOpts(opts opts.ListOpts) ([]KeyValuePair, error) {
out := make([]KeyValuePair, opts.Len())
for i, o := range opts.GetAll() {
Expand Down
9 changes: 9 additions & 0 deletions runtime/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,15 @@ func populateCommand(c *Container) {
driverConfig = append(driverConfig, fmt.Sprintf("%s = %s", pair.Key, pair.Value))
}
}

if uidMaps := c.hostConfig.UidMaps; uidMaps != nil {
for _, uidMap := range uidMaps {
cUid, hUid, size, _ := utils.ParseUidMap(uidMap)
driverConfig = append(driverConfig, fmt.Sprintf("lxc.id_map = u %d %d %d", cUid, hUid, size))
driverConfig = append(driverConfig, fmt.Sprintf("lxc.id_map = g %d %d %d", cUid, hUid, size))
}
}

resources := &execdriver.Resources{
Memory: c.Config.Memory,
MemorySwap: c.Config.MemorySwap,
Expand Down
18 changes: 16 additions & 2 deletions runtime/graphdriver/vfs/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ func Init(home string) (graphdriver.Driver, error) {
d := &Driver{
home: home,
}
d.fixPermissions()
return d, nil
}

Expand All @@ -35,6 +36,19 @@ func (d *Driver) Cleanup() error {
return nil
}

// Fix the permissions of home and dir in case they were already created 0700 by an earlier Docker version
func (d *Driver) fixPermissions() error {
dir := d.dir("id")
if err := os.Chmod(path.Dir(dir), 0711); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.Chmod(d.home, 0711); err != nil && !os.IsNotExist(err) {
return err
}

return nil
}

func copyDir(src, dst string) error {
if output, err := exec.Command("cp", "-aT", "--reflink=auto", src, dst).CombinedOutput(); err != nil {
return fmt.Errorf("Error VFS copying directory: %s (%s)", err, output)
Expand All @@ -44,10 +58,10 @@ func copyDir(src, dst string) error {

func (d *Driver) Create(id string, parent string) error {
dir := d.dir(id)
if err := os.MkdirAll(path.Dir(dir), 0700); err != nil {
if err := os.MkdirAll(path.Dir(dir), 0711); err != nil {
return err
}
if err := os.Mkdir(dir, 0700); err != nil {
if err := os.Mkdir(dir, 0711); err != nil {
return err
}
if parent == "" {
Expand Down
19 changes: 16 additions & 3 deletions runtime/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,13 @@ func NewRuntimeFromDirectory(config *daemonconfig.Config, eng *engine.Engine) (*

runtimeRepo := path.Join(config.Root, "containers")

if err := os.MkdirAll(runtimeRepo, 0700); err != nil && !os.IsExist(err) {
if err := os.MkdirAll(runtimeRepo, 0711); err != nil && !os.IsExist(err) {
return nil, err
}

// Change to 0755 in case it was already created 0700 by an earlier
// Docker version. MkdirAll doesn't fail if the dir already exists.
if err := os.Chmod(runtimeRepo, 0711); err != nil {
return nil, err
}

Expand All @@ -666,6 +672,9 @@ func NewRuntimeFromDirectory(config *daemonconfig.Config, eng *engine.Engine) (*
if err != nil {
return nil, err
}
if err != nil {
return nil, err
}
utils.Debugf("Creating volumes graph")
volumes, err := graph.NewGraph(path.Join(config.Root, "volumes"), volumesDriver)
if err != nil {
Expand Down Expand Up @@ -706,13 +715,17 @@ func NewRuntimeFromDirectory(config *daemonconfig.Config, eng *engine.Engine) (*

if sysInitPath != localCopy {
// When we find a suitable dockerinit binary (even if it's our local binary), we copy it into config.Root at localCopy for future use (so that the original can go away without that being a problem, for example during a package upgrade).
if err := os.Mkdir(path.Dir(localCopy), 0700); err != nil && !os.IsExist(err) {
if err := os.Mkdir(path.Dir(localCopy), 0711); err != nil && !os.IsExist(err) {
return nil, err
}
if _, err := utils.CopyFile(sysInitPath, localCopy); err != nil {
return nil, err
}
if err := os.Chmod(localCopy, 0700); err != nil {
// Change to 0711 in case it was already created 0700 by an earlier Docker version
if err := os.Chmod(path.Dir(localCopy), 0711); err != nil {
return nil, err
}
if err := os.Chmod(localCopy, 0711); err != nil {
return nil, err
}
sysInitPath = localCopy
Expand Down
89 changes: 89 additions & 0 deletions runtime/volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,96 @@ func prepareVolumesForContainer(container *Container) error {
return nil
}

func xlateOneFile(path string, finfo os.FileInfo, cUid, hUid, size int64) error {
uid := int64(finfo.Sys().(*syscall.Stat_t).Uid)
gid := int64(finfo.Sys().(*syscall.Stat_t).Gid)
mode := finfo.Mode()

if uid >= cUid && uid < cUid+size {
newUid := (uid - cUid) + hUid
newGid := (gid - cUid) + hUid
if err := os.Lchown(path, int(newUid), int(newGid)); err != nil {
fmt.Errorf("Cannot chown %s: %s", path, err)
// Let's keep going
}
if err := os.Chmod(path, mode); err != nil {
fmt.Errorf("Cannot chmod %s: %s", path, err)
// Let's keep going
}
}

return nil
}

func xlateUidsRecursive(base string, cUid, hUid, size int64) error {
f, err := os.Open(base)
if err != nil {
return err
}

list, err := f.Readdir(-1)
f.Close()
if err != nil {
return err
}

for _, finfo := range list {
path := filepath.Join(base, finfo.Name())
if finfo.IsDir() {
xlateUidsRecursive(path, cUid, hUid, size)
}
if err := xlateOneFile(path, finfo, cUid, hUid, size); err != nil {
return err
}
}

return nil
}

// Translate UIDs and GIDs of the files under root to what should
// be their 'real' values on the host
func xlateUids(container *Container, root string) error {
if uidMaps := container.hostConfig.UidMaps; uidMaps != nil {
for _, uidMap := range uidMaps {
cUid, hUid, size, _ := utils.ParseUidMap(uidMap)
if err := xlateUidsRecursive(root, cUid, hUid, size); err != nil {
return err
}
finfo, err := os.Stat(root)
if (err != nil) {
return err
}
if err := xlateOneFile(root, finfo, cUid, hUid, size); err != nil {
return err
}
}
}
return nil
}

func setupMountsForContainer(container *Container, envPath string) error {
mounts := []execdriver.Mount{
{container.runtime.sysInitPath, "/.dockerinit", false, true},
{envPath, "/.dockerenv", false, true},
{container.ResolvConfPath, "/etc/resolv.conf", false, true},
}

// Let root in the container own container.root and container.basefs
cRootUid := container.hostConfig.ContainerRoot
if cRootUid != -1 {
if err := os.Chown(container.root, int(cRootUid), int(cRootUid)); err != nil {
return err
}
// Even if -x flag is not set, container rootfs directory needs to be chowned to container root to be able to setup pivot root
// TODO: Warn if the UIDs in the image don't match the mappings
if err := os.Chown(container.RootfsPath(), int(cRootUid), int(cRootUid)); err != nil {
return err
}
}
if err := xlateUids(container, container.RootfsPath()); err != nil {
return err
}

if container.HostnamePath != "" && container.HostsPath != "" {
mounts = append(mounts, execdriver.Mount{container.HostnamePath, "/etc/hostname", false, true})
mounts = append(mounts, execdriver.Mount{container.HostsPath, "/etc/hosts", false, true})
Expand Down Expand Up @@ -262,6 +345,12 @@ func createVolumes(container *Container) error {
}
}
}
// Translate UIDs/GIDs of the empty new volumes and volumes copied from the image but not
// volumes imported from other containers or the host.

if err := xlateUids(container, srcPath); err != nil {
return err
}
}
}
return nil
Expand Down
Loading

0 comments on commit 104725d

Please sign in to comment.