@@ -10,19 +10,29 @@ import (
10
10
"unsafe"
11
11
)
12
12
13
+ // SysProcIDMap holds Container ID to Host ID mappings used for User Namespaces in Linux.
14
+ // See user_namespaces(7).
15
+ type SysProcIDMap struct {
16
+ ContainerID int // Container ID.
17
+ HostID int // Host ID.
18
+ Size int // Size.
19
+ }
20
+
13
21
type SysProcAttr struct {
14
- Chroot string // Chroot.
15
- Credential * Credential // Credential.
16
- Ptrace bool // Enable tracing.
17
- Setsid bool // Create session.
18
- Setpgid bool // Set process group ID to new pid (SYSV setpgrp)
19
- Setctty bool // Set controlling terminal to fd Ctty (only meaningful if Setsid is set)
20
- Noctty bool // Detach fd 0 from controlling terminal
21
- Ctty int // Controlling TTY fd (Linux only)
22
- Pdeathsig Signal // Signal that the process will get when its parent dies (Linux only)
23
- Cloneflags uintptr // Flags for clone calls (Linux only)
24
- Foreground bool // Set foreground process group to child's pid. (Implies Setpgid. Stdin should be a TTY)
25
- Joinpgrp int // If != 0, child's process group ID. (Setpgid must not be set)
22
+ Chroot string // Chroot.
23
+ Credential * Credential // Credential.
24
+ Ptrace bool // Enable tracing.
25
+ Setsid bool // Create session.
26
+ Setpgid bool // Set process group ID to new pid (SYSV setpgrp)
27
+ Setctty bool // Set controlling terminal to fd Ctty (only meaningful if Setsid is set)
28
+ Noctty bool // Detach fd 0 from controlling terminal
29
+ Ctty int // Controlling TTY fd (Linux only)
30
+ Pdeathsig Signal // Signal that the process will get when its parent dies (Linux only)
31
+ Cloneflags uintptr // Flags for clone calls (Linux only)
32
+ Foreground bool // Set foreground process group to child's pid. (Implies Setpgid. Stdin should be a TTY)
33
+ Joinpgrp int // If != 0, child's process group ID. (Setpgid must not be set)
34
+ UidMappings []SysProcIDMap // User ID mappings for user namespaces.
35
+ GidMappings []SysProcIDMap // Group ID mappings for user namespaces.
26
36
}
27
37
28
38
// Implemented in runtime package.
@@ -44,8 +54,10 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr
44
54
var (
45
55
r1 uintptr
46
56
err1 Errno
57
+ err2 Errno
47
58
nextfd int
48
59
i int
60
+ p [2 ]int
49
61
)
50
62
51
63
// Guard against side effects of shuffling fds below.
@@ -61,6 +73,14 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr
61
73
}
62
74
nextfd ++
63
75
76
+ // Allocate another pipe for parent to child communication for
77
+ // synchronizing writing of User ID/Group ID mappings.
78
+ if sys .UidMappings != nil || sys .GidMappings != nil {
79
+ if err := forkExecPipe (p [:]); err != nil {
80
+ return 0 , err .(Errno )
81
+ }
82
+ }
83
+
64
84
// About to call fork.
65
85
// No more allocation or calls of non-assembly functions.
66
86
runtime_BeforeFork ()
@@ -75,6 +95,16 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr
75
95
runtime_AfterFork ()
76
96
pid = int (r1 )
77
97
98
+ if sys .UidMappings != nil || sys .GidMappings != nil {
99
+ Close (p [0 ])
100
+ err := writeUidGidMappings (pid , sys )
101
+ if err != nil {
102
+ err2 = err .(Errno )
103
+ }
104
+ RawSyscall (SYS_WRITE , uintptr (p [1 ]), uintptr (unsafe .Pointer (& err2 )), unsafe .Sizeof (err2 ))
105
+ Close (p [1 ])
106
+ }
107
+
78
108
if sys .Joinpgrp != 0 {
79
109
// Place the child in the specified process group.
80
110
RawSyscall (SYS_SETPGID , r1 , uintptr (sys .Joinpgrp ), 0 )
@@ -93,6 +123,25 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr
93
123
94
124
// Fork succeeded, now in child.
95
125
126
+ // Wait for User ID/Group ID mappings to be written.
127
+ if sys .UidMappings != nil || sys .GidMappings != nil {
128
+ if _ , _ , err1 = RawSyscall (SYS_CLOSE , uintptr (p [1 ]), 0 , 0 ); err1 != 0 {
129
+ goto childerror
130
+ }
131
+ r1 , _ , err1 = RawSyscall (SYS_READ , uintptr (p [0 ]), uintptr (unsafe .Pointer (& err2 )), unsafe .Sizeof (err2 ))
132
+ if err1 != 0 {
133
+ goto childerror
134
+ }
135
+ if r1 != unsafe .Sizeof (err2 ) {
136
+ err1 = EINVAL
137
+ goto childerror
138
+ }
139
+ if err2 != 0 {
140
+ err1 = err2
141
+ goto childerror
142
+ }
143
+ }
144
+
96
145
// Parent death signal
97
146
if sys .Pdeathsig != 0 {
98
147
_ , _ , err1 = RawSyscall6 (SYS_PRCTL , PR_SET_PDEATHSIG , uintptr (sys .Pdeathsig ), 0 , 0 , 0 , 0 )
@@ -296,3 +345,53 @@ func forkExecPipe(p []int) (err error) {
296
345
}
297
346
return
298
347
}
348
+
349
+ // writeIDMappings writes the user namespace User ID or Group ID mappings to the specified path.
350
+ func writeIDMappings (path string , idMap []SysProcIDMap ) error {
351
+ fd , err := Open (path , O_RDWR , 0 )
352
+ if err != nil {
353
+ return err
354
+ }
355
+
356
+ data := ""
357
+ for _ , im := range idMap {
358
+ data = data + itoa (im .ContainerID ) + " " + itoa (im .HostID ) + " " + itoa (im .Size ) + "\n "
359
+ }
360
+
361
+ bytes , err := ByteSliceFromString (data )
362
+ if err != nil {
363
+ Close (fd )
364
+ return err
365
+ }
366
+
367
+ if _ , err := Write (fd , bytes ); err != nil {
368
+ Close (fd )
369
+ return err
370
+ }
371
+
372
+ if err := Close (fd ); err != nil {
373
+ return err
374
+ }
375
+
376
+ return nil
377
+ }
378
+
379
+ // writeUidGidMappings writes User ID and Group ID mappings for user namespaces
380
+ // for a process and it is called from the parent process.
381
+ func writeUidGidMappings (pid int , sys * SysProcAttr ) error {
382
+ if sys .UidMappings != nil {
383
+ uidf := "/proc/" + itoa (pid ) + "/uid_map"
384
+ if err := writeIDMappings (uidf , sys .UidMappings ); err != nil {
385
+ return err
386
+ }
387
+ }
388
+
389
+ if sys .GidMappings != nil {
390
+ gidf := "/proc/" + itoa (pid ) + "/gid_map"
391
+ if err := writeIDMappings (gidf , sys .GidMappings ); err != nil {
392
+ return err
393
+ }
394
+ }
395
+
396
+ return nil
397
+ }
0 commit comments