Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SGX: set EPC limits via NRI annotations #1582

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/lib-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ jobs:
- sgx-aesmd-demo
- dlb-dpdk-demo
- dlb-libdlb-demo
- stress-ng-gramine
builder: [buildah, docker]
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ e2e-dlb:
@$(GO) test -v ./test/e2e/... -ginkgo.v -ginkgo.show-node-events -ginkgo.focus "Device:dlb.*$(ADDITIONAL_FOCUS_REGEX)" $(GENERATED_SKIP_OPT) -delete-namespace-on-failure=false

e2e-spr:
@$(GO) test -v ./test/e2e/... -ginkgo.v -ginkgo.show-node-events -ginkgo.focus "Device:(iaa|dsa)|Device:qat.*Mode:dpdk.*Resource:(cy|dc).*" -ginkgo.focus "Device:sgx.*|(SGX Admission)" -ginkgo.focus "Device:gpu.*Resource:i915" $(GENERATED_SKIP_OPT) -delete-namespace-on-failure=false
@$(GO) test -v ./test/e2e/... -ginkgo.v -ginkgo.show-node-events -ginkgo.focus "Device:(iaa|dsa)|Device:qat.*Mode:dpdk.*Resource:(cy|dc).*" -ginkgo.focus "Device:sgx.*|(SGX Admission)" -ginkgo.focus "Device:gpu.*Resource:i915" -ginkgo.skip "App:sgx-epc-cgroup" $(GENERATED_SKIP_OPT) -delete-namespace-on-failure=false

pre-pull:
ifeq ($(TAG),devel)
Expand Down
13 changes: 13 additions & 0 deletions demo/stress-ng-gramine/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM gramineproject/gramine:1.6-jammy

RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
stress-ng \
make

COPY * /stress-ng/
WORKDIR /stress-ng

RUN gramine-sgx-gen-private-key && \
make SGX=1
ENTRYPOINT ["/usr/bin/gramine-sgx"]
52 changes: 52 additions & 0 deletions demo/stress-ng-gramine/build/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
ARCH_LIBDIR ?= /lib/x86_64-linux-gnu

ifeq ($(DEBUG),1)
GRAMINE_LOG_LEVEL = debug
else
GRAMINE_LOG_LEVEL = error
endif

.PHONY: all
all: stress-ng.manifest stress-ng-edmm.manifest
ifeq ($(SGX),1)
all: stress-ng.manifest.sgx stress-ng.sig stress-ng-edmm.manifest.sgx stress-ng-edmm.sig
endif

stress-ng.manifest: stress-ng.manifest.template
gramine-manifest \
-Dlog_level=$(GRAMINE_LOG_LEVEL) \
-Dedmm='false' \
-Denclave_size=128M \
-Dexecdir=$(shell dirname $(shell which stress-ng)) \
-Darch_libdir=$(ARCH_LIBDIR) \
$< >$@

stress-ng.manifest.sgx: stress-ng.manifest
gramine-sgx-sign \
--manifest stress-ng.manifest \
--output $@

stress-ng.sig: stress-ng.manifest.sgx

stress-ng-edmm.manifest: stress-ng.manifest.template
gramine-manifest \
-Dlog_level=$(GRAMINE_LOG_LEVEL) \
-Dedmm='true' \
-Denclave_size=128G \
-Dexecdir=$(shell dirname $(shell which stress-ng)) \
-Darch_libdir=$(ARCH_LIBDIR) \
$< >$@

stress-ng-edmm.manifest.sgx: stress-ng.manifest
gramine-sgx-sign \
--manifest stress-ng-edmm.manifest \
--output $@

stress-ng-edmm.sig: stress-ng-edmm.manifest.sgx

.PHONY: clean
clean:
$(RM) *.manifest *.manifest.sgx *.token *.sig OUTPUT

.PHONY: distclean
distclean: clean
29 changes: 29 additions & 0 deletions demo/stress-ng-gramine/build/stress-ng.manifest.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
loader.entrypoint = "file:{{ gramine.libos }}"
libos.entrypoint = "{{ execdir }}/stress-ng"

loader.log_level = "{{ log_level }}"
loader.insecure__use_cmdline_argv = true

loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}:/usr{{ arch_libdir }}"
loader.env.PATH = "{{ execdir }}"

fs.mounts = [
{ path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
{ path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" },
{ path = "/usr/lib", uri = "file:/usr/lib" },
{ path = "/stress-ng", uri = "file:/stress-ng" },
{ path = "{{ execdir }}", uri = "file:{{ execdir }}" },
]

sgx.debug = false
sgx.edmm_enable = {{ edmm }}
sgx.enclave_size = "{{ enclave_size }}"
sgx.max_threads = 6

sgx.trusted_files = [
"file:{{ gramine.libos }}",
"file:{{ execdir }}/",
"file:{{ gramine.runtimedir() }}/",
"file:{{ arch_libdir }}/",
"file:/usr/{{ arch_libdir }}/",
]
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ spec:
description: NodeSelector provides a simple way to constrain device
plugin pods to nodes with particular labels.
type: object
nriImage:
description: |-
NRIImage is a container image with SGX Node Resource Interface (NRI) plugin executable. Set
this value if SGX EPC cgroups limits enforcement is wanted.
TODO: is this a good name?
type: string
provisionLimit:
description: ProvisionLimit is a number of containers that can share
the same SGX provision device.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ metadata:
name: sgxdeviceplugin-sample
spec:
image: intel/intel-sgx-plugin:0.30.0
nriImage: ghcr.io/containers/nri-plugins/nri-sgx-epc:v0.3.2
enclaveLimit: 110
provisionLimit: 110
logLevel: 4
Expand Down
5 changes: 5 additions & 0 deletions deployments/sgx_epc_metrics/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
resources:
- "https://github.com/google/cadvisor/deploy/kubernetes/base?ref=master"
- service.yaml
patches:
- path: misc-metrics.yaml
18 changes: 18 additions & 0 deletions deployments/sgx_epc_metrics/misc-metrics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: cadvisor
namespace: cadvisor
spec:
template:
spec:
nodeSelector:
intel.feature.node.kubernetes.io/sgx: 'true'
containers:
- name: cadvisor
image: docker.io/library/cadvisor:4af2b9b9
command: [
"/usr/bin/cadvisor",
"-enable_metrics", "misc",
"-logtostderr"
]
14 changes: 14 additions & 0 deletions deployments/sgx_epc_metrics/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
labels:
app: cadvisor
name: cadvisor
namespace: cadvisor
spec:
ports:
- name: http
port: 8080
targetPort: http
selector:
app: cadvisor
1 change: 1 addition & 0 deletions deployments/sgx_plugin/base/intel-sgx-plugin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ spec:
labels:
app: intel-sgx-plugin
spec:
priorityClassName: system-node-critical
automountServiceAccountToken: false
containers:
- name: intel-sgx-plugin
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
resources:
- ../../base

patches:
- path: nri_plugin_patch.yaml
target:
name: intel-sgx-plugin
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: intel-sgx-plugin
spec:
template:
spec:
containers:
- name: nri-sgx-epc
image: ghcr.io/containers/nri-plugins/nri-sgx-epc:unstable
securityContext:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
imagePullPolicy: IfNotPresent
volumeMounts:
- name: nrisockets
mountPath: /var/run/nri
volumes:
- name: nrisockets
hostPath:
path: /var/run/nri
5 changes: 5 additions & 0 deletions pkg/apis/deviceplugin/v1/sgxdeviceplugin_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ type SgxDevicePluginSpec struct {
// Recommendation is to leave this unset and prefer the SGX NodeFeatureRule instead.
InitImage string `json:"initImage,omitempty"`

// NRIImage is a container image with SGX Node Resource Interface (NRI) plugin executable. Set
// this value if SGX EPC cgroups limits enforcement is wanted.
// TODO: is this a good name?
NRIImage string `json:"nriImage,omitempty"`

// Specialized nodes (e.g., with accelerators) can be Tainted to make sure unwanted pods are not scheduled on them. Tolerations can be set for the plugin pod to neutralize the Taint.
Tolerations []v1.Toleration `json:"tolerations,omitempty"`

Expand Down
45 changes: 45 additions & 0 deletions pkg/controllers/sgx/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,27 @@ func setInitContainer(spec *v1.PodSpec, imageName string) {
addVolumeIfMissing(spec, "nfd-features", "/etc/kubernetes/node-feature-discovery/source.d/", v1.HostPathDirectoryOrCreate)
}

func setNRIContainer(spec *v1.PodSpec, imageName string) {
yes := true
no := false
spec.Containers = append(spec.Containers, v1.Container{
Name: "nri-sgx-epc",
Image: imageName,
ImagePullPolicy: "IfNotPresent",
SecurityContext: &v1.SecurityContext{
ReadOnlyRootFilesystem: &yes,
AllowPrivilegeEscalation: &no,
},
VolumeMounts: []v1.VolumeMount{
{
Name: "nrisockets",
MountPath: "/var/run/nri",
},
},
})
addVolumeIfMissing(spec, "nrisockets", "/var/run/nri", v1.HostPathDirectoryOrCreate)
}

func (c *controller) NewDaemonSet(rawObj client.Object) *apps.DaemonSet {
devicePlugin := rawObj.(*devicepluginv1.SgxDevicePlugin)

Expand All @@ -135,6 +156,10 @@ func (c *controller) NewDaemonSet(rawObj client.Object) *apps.DaemonSet {
if devicePlugin.Spec.InitImage != "" {
setInitContainer(&daemonSet.Spec.Template.Spec, devicePlugin.Spec.InitImage)
}
// add the optional NRI plugin container
if devicePlugin.Spec.NRIImage != "" {
setNRIContainer(&daemonSet.Spec.Template.Spec, devicePlugin.Spec.NRIImage)
}

return daemonSet
}
Expand Down Expand Up @@ -171,6 +196,26 @@ func (c *controller) UpdateDaemonSet(rawObj client.Object, ds *apps.DaemonSet) (
updated = true
}

// remove NRI plugin
if len(ds.Spec.Template.Spec.Containers) > 1 && dp.Spec.NRIImage == "" {
ds.Spec.Template.Spec.Containers = []v1.Container{ds.Spec.Template.Spec.Containers[0]}
ds.Spec.Template.Spec.Volumes = removeVolume(ds.Spec.Template.Spec.Volumes, "nrisockets")
updated = true
}

// update NRI plugin image
if len(ds.Spec.Template.Spec.Containers) > 1 && ds.Spec.Template.Spec.Containers[1].Image != dp.Spec.NRIImage {
ds.Spec.Template.Spec.Containers[1].Image = dp.Spec.NRIImage
updated = true
}

// add NRI plugin image
if len(ds.Spec.Template.Spec.Containers) == 1 && dp.Spec.NRIImage != "" {
setNRIContainer(&ds.Spec.Template.Spec, dp.Spec.NRIImage)

updated = true
}

if len(dp.Spec.NodeSelector) > 0 {
if !reflect.DeepEqual(ds.Spec.Template.Spec.NodeSelector, dp.Spec.NodeSelector) {
ds.Spec.Template.Spec.NodeSelector = dp.Spec.NodeSelector
Expand Down
1 change: 1 addition & 0 deletions pkg/controllers/sgx/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ func (c *controller) newDaemonSetExpected(rawObj client.Object) *apps.DaemonSet
},
},
Spec: v1.PodSpec{
PriorityClassName: "system-node-critical",
AutomountServiceAccountToken: &no,
Containers: []v1.Container{
{
Expand Down
3 changes: 3 additions & 0 deletions pkg/webhooks/sgx/sgx.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ func (s *Mutator) SetupWebhookWithManager(mgr ctrl.Manager) error {
}

const (
epcLimitKey = "epc-limit.nri.io/container"
namespace = "sgx.intel.com"
encl = namespace + "/enclave"
epc = namespace + "/epc"
Expand Down Expand Up @@ -156,6 +157,8 @@ func (s *Mutator) Default(ctx context.Context, obj runtime.Object) error {
continue
}

pod.Annotations[fmt.Sprintf("%s.%s", epcLimitKey, container.Name)] = fmt.Sprintf("%d", epcSize)

totalEpc += epcSize

// Quote Generation Modes:
Expand Down
4 changes: 2 additions & 2 deletions scripts/set-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ if [ $# != 1 ] || [ "$1" = "?" ] || [ "$1" = "--help" ]; then
exit 1
fi

files=$(git grep -l '^TAG?*=\|intel/accel-config-demo:\|intel/crypto-perf:\|intel/opae-nlb-demo:\|intel/openssl-qat-engine:\|intel/dlb-libdlb-demo:\|intel/sgx-sdk-demo:\|intel/intel-[^ ]*:\|version=\|appVersion:\|tag:' Makefile deployments demo/*accel-config*.yaml demo/*fpga*.yaml demo/*openssl*.yaml demo/dlb-libdlb*.yaml pkg/controllers/*/*_test.go build/docker/*.Dockerfile test/e2e/*/*.go)
files=$(git grep -l '^TAG?*=\|intel/accel-config-demo:\|intel/crypto-perf:\|intel/opae-nlb-demo:\|intel/openssl-qat-engine:\|intel/dlb-libdlb-demo:\|intel/stress-ng-gramine:\|intel/sgx-sdk-demo:\|intel/intel-[^ ]*:\|version=\|appVersion:\|tag:' Makefile deployments demo/*accel-config*.yaml demo/*fpga*.yaml demo/*openssl*.yaml demo/dlb-libdlb*.yaml pkg/controllers/*/*_test.go build/docker/*.Dockerfile test/e2e/*/*.go)

for file in $files; do
sed -i -e "s;\(^TAG?*=\|intel/accel-config-demo:\|intel/crypto-perf:\|intel/opae-nlb-demo:\|intel/openssl-qat-engine:\|intel/dlb-libdlb-demo:\|intel/sgx-sdk-demo:\|intel/intel-[^ ]*:\|version=\|appVersion: [^ ]\|tag: [^ ]\)[^ \"]*;\1$1;g" "$file";
sed -i -e "s;\(^TAG?*=\|intel/accel-config-demo:\|intel/crypto-perf:\|intel/opae-nlb-demo:\|intel/openssl-qat-engine:\|intel/dlb-libdlb-demo:\|intel/stress-ng-gramine:\|intel/sgx-sdk-demo:\|intel/intel-[^ ]*:\|version=\|appVersion: [^ ]\|tag: [^ ]\)[^ \"]*;\1$1;g" "$file";
done
Loading
Loading