This repository has been archived by the owner on Feb 24, 2020. It is now read-only.
/
seccomp.go
202 lines (184 loc) · 5.98 KB
/
seccomp.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
// Copyright 2016 The rkt Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//+build linux
package common
import (
"errors"
"fmt"
"strings"
stage1commontypes "github.com/rkt/rkt/stage1/common/types"
"github.com/appc/spec/schema/types"
"github.com/coreos/go-systemd/unit"
)
var (
ErrTooManySeccompIsolators = errors.New("too many seccomp isolators specified")
)
// Systemd filter mode, see
// https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=
const (
sdBlacklistPrefix = "~"
sdWhitelistPrefix = ""
)
type filterType int
const (
ModeBlacklist filterType = iota
ModeWhitelist
)
// seccompFilter is an internal representation of the seccomp filtering
// supplied by the isolators.
type seccompFilter struct {
syscalls []string // List of syscalls to filter
mode filterType // whitelist or blacklist
errno string // optional - empty string = use default
forceNoNewPrivileges bool // If true, then override the NoNewPrivileges isolator
}
// generateSeccompFilter computes the concrete seccomp filter from the isolators
func generateSeccompFilter(p *stage1commontypes.Pod, pa *preparedApp) (*seccompFilter, error) {
sf := seccompFilter{}
seenIsolators := 0
for _, i := range pa.app.App.Isolators {
var flag string
var err error
if seccomp, ok := i.Value().(types.LinuxSeccompSet); ok {
seenIsolators++
// By appc spec, only one seccomp isolator per app is allowed
if seenIsolators > 1 {
return nil, ErrTooManySeccompIsolators
}
switch i.Name {
case types.LinuxSeccompRemoveSetName:
sf.mode = ModeBlacklist
sf.syscalls, flag, err = parseLinuxSeccompSet(p, seccomp)
if err != nil {
return nil, err
}
if flag == "empty" {
// we interpret "remove @empty" to mean "default whitelist"
sf.mode = ModeWhitelist
sf.syscalls = RktDefaultSeccompWhitelist
}
case types.LinuxSeccompRetainSetName:
sf.mode = ModeWhitelist
sf.syscalls, flag, err = parseLinuxSeccompSet(p, seccomp)
if err != nil {
return nil, err
}
if flag == "all" {
// Opt-out seccomp filtering
return nil, nil
}
}
sf.errno = string(seccomp.Errno())
}
}
// If unset, use rkt default whitelist
if seenIsolators == 0 {
sf.mode = ModeWhitelist
sf.syscalls = RktDefaultSeccompWhitelist
}
// Non-priv apps *must* have NoNewPrivileges set if they have seccomp
sf.forceNoNewPrivileges = (pa.uid != 0)
return &sf, nil
}
// seccompUnitOptions converts a concrete seccomp filter to systemd unit options
func seccompUnitOptions(opts []*unit.UnitOption, sf *seccompFilter) ([]*unit.UnitOption, error) {
if sf == nil {
return opts, nil
}
if sf.errno != "" {
opts = append(opts, unit.NewUnitOption("Service", "SystemCallErrorNumber", sf.errno))
}
var filterPrefix string
switch sf.mode {
case ModeWhitelist:
filterPrefix = sdWhitelistPrefix
case ModeBlacklist:
filterPrefix = sdBlacklistPrefix
default:
return nil, fmt.Errorf("unknown filter mode %v", sf.mode)
}
// SystemCallFilter options are written down one entry per line, because
// filtering sets may be quite large and overlong lines break unit serialization.
opts = appendOptionsList(opts, "Service", "SystemCallFilter", filterPrefix, sf.syscalls...)
return opts, nil
}
// parseLinuxSeccompSet gets an appc LinuxSeccompSet and returns an array
// of values suitable for systemd SystemCallFilter.
func parseLinuxSeccompSet(p *stage1commontypes.Pod, s types.LinuxSeccompSet) (syscallFilter []string, flag string, err error) {
for _, item := range s.Set() {
if item[0] == '@' {
// Wildcards
wildcard := strings.SplitN(string(item), "/", 2)
if len(wildcard) != 2 {
continue
}
scope := wildcard[0]
name := wildcard[1]
switch scope {
case "@appc.io":
// appc-reserved wildcards
switch name {
case "all":
return nil, "all", nil
case "empty":
return nil, "empty", nil
}
case "@docker":
// Docker-originated wildcards
switch name {
case "default-blacklist":
syscallFilter = append(syscallFilter, DockerDefaultSeccompBlacklist...)
case "default-whitelist":
syscallFilter = append(syscallFilter, DockerDefaultSeccompWhitelist...)
}
case "@rkt":
// Custom rkt wildcards
switch name {
case "default-blacklist":
syscallFilter = append(syscallFilter, RktDefaultSeccompBlacklist...)
case "default-whitelist":
syscallFilter = append(syscallFilter, RktDefaultSeccompWhitelist...)
}
case "@systemd":
// Custom systemd wildcards (systemd >= 231)
_, systemdVersion, err := GetFlavor(p)
if err != nil || systemdVersion < 231 {
return nil, "", errors.New("Unsupported or unknown systemd version, seccomp groups need systemd >= v231")
}
switch name {
case "clock":
syscallFilter = append(syscallFilter, "@clock")
case "default-whitelist":
syscallFilter = append(syscallFilter, "@default")
case "mount":
syscallFilter = append(syscallFilter, "@mount")
case "network-io":
syscallFilter = append(syscallFilter, "@network-io")
case "obsolete":
syscallFilter = append(syscallFilter, "@obsolete")
case "privileged":
syscallFilter = append(syscallFilter, "@privileged")
case "process":
syscallFilter = append(syscallFilter, "@process")
case "raw-io":
syscallFilter = append(syscallFilter, "@raw-io")
}
}
} else {
// Plain syscall name
syscallFilter = append(syscallFilter, string(item))
}
}
return syscallFilter, "", nil
}