-
Notifications
You must be signed in to change notification settings - Fork 2.8k
/
netlink.go
321 lines (273 loc) · 9.13 KB
/
netlink.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Cilium
package loader
import (
"context"
"errors"
"fmt"
"net"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
"github.com/cilium/ebpf"
"github.com/cilium/cilium/pkg/bpf"
"github.com/cilium/cilium/pkg/defaults"
"github.com/cilium/cilium/pkg/mac"
"github.com/cilium/cilium/pkg/option"
"github.com/cilium/cilium/pkg/sysctl"
)
type baseDeviceMode string
const (
directMode = baseDeviceMode("direct")
tunnelMode = baseDeviceMode("tunnel")
)
func directionToParent(dir string) uint32 {
switch dir {
case dirIngress:
return netlink.HANDLE_MIN_INGRESS
case dirEgress:
return netlink.HANDLE_MIN_EGRESS
}
return 0
}
func replaceQdisc(link netlink.Link) error {
attrs := netlink.QdiscAttrs{
LinkIndex: link.Attrs().Index,
Handle: netlink.MakeHandle(0xffff, 0),
Parent: netlink.HANDLE_CLSACT,
}
qdisc := &netlink.GenericQdisc{
QdiscAttrs: attrs,
QdiscType: "clsact",
}
return netlink.QdiscReplace(qdisc)
}
// replaceDatapath replaces the qdisc and BPF program for an endpoint or XDP program.
//
// When successful, returns a finalizer to allow the map cleanup operation to be
// deferred by the caller. On error, any maps pending migration are immediately
// re-pinned to their original paths and a finalizer is not returned.
//
// When replacing multiple programs from the same ELF in a loop, the finalizer
// should only be run when all the interface's programs have been replaced
// since they might share one or more tail call maps.
//
// For example, this is the case with from-netdev and to-netdev. If eth0:to-netdev
// gets its program and maps replaced and unpinned, its eth0:from-netdev counterpart
// will miss tail calls (and drop packets) until it has been replaced as well.
func replaceDatapath(ctx context.Context, ifName, objPath, progName, direction string, xdpMode string) (func(), error) {
// Avoid unnecessarily loading a prog.
if err := ctx.Err(); err != nil {
return nil, err
}
link, err := netlink.LinkByName(ifName)
if err != nil {
return nil, fmt.Errorf("getting interface %s by name: %w", ifName, err)
}
l := log.WithField("device", ifName).WithField("objPath", objPath).
WithField("progName", progName).WithField("direction", direction).
WithField("ifindex", link.Attrs().Index)
// Load the ELF from disk.
l.Debug("Loading CollectionSpec from ELF")
spec, err := bpf.LoadCollectionSpec(objPath)
if err != nil {
return nil, fmt.Errorf("loading eBPF ELF: %w", err)
}
if spec.Programs[progName] == nil {
return nil, fmt.Errorf("no program %s found in eBPF ELF", progName)
}
// Load the CollectionSpec into the kernel, picking up any pinned maps from
// bpffs in the process.
finalize := func() {}
opts := ebpf.CollectionOptions{
Maps: ebpf.MapOptions{PinPath: bpf.MapPrefixPath()},
}
l.Debug("Loading Collection into kernel")
coll, err := bpf.LoadCollection(spec, opts)
if errors.Is(err, ebpf.ErrMapIncompatible) {
// Temporarily rename bpffs pins of maps whose definitions have changed in
// a new version of a datapath ELF.
l.Debug("Starting bpffs map migration")
if err := bpf.StartBPFFSMigration(bpf.MapPrefixPath(), spec); err != nil {
return nil, fmt.Errorf("Failed to start bpffs map migration: %w", err)
}
finalize = func() {
l.Debug("Finalizing bpffs map migration")
if err := bpf.FinalizeBPFFSMigration(bpf.MapPrefixPath(), spec, false); err != nil {
l.WithError(err).Error("Could not finalize bpffs map migration")
}
}
// Retry loading the Collection after starting map migration.
l.Debug("Retrying loading Collection into kernel after map migration")
coll, err = bpf.LoadCollection(spec, opts)
}
var ve *ebpf.VerifierError
if errors.As(err, &ve) {
//TODO: Write this to a file in endpoint directory instead.
l.Debugf("Got verifier error: %+v", ve)
}
if err != nil {
return nil, fmt.Errorf("error loading eBPF collection into the kernel: %w", err)
}
defer coll.Close()
// Avoid attaching a prog to a stale interface.
if err := ctx.Err(); err != nil {
return nil, err
}
l.Debug("Attaching program to interface")
if err := attachProgram(link, coll.Programs[progName], directionToParent(direction), xdpModeToFlag(xdpMode)); err != nil {
// Program replacement unsuccessful, revert bpffs migration.
l.Debug("Reverting bpffs map migration")
if err := bpf.FinalizeBPFFSMigration(bpf.MapPrefixPath(), spec, true); err != nil {
l.WithError(err).Error("Failed to revert bpffs map migration")
}
return nil, fmt.Errorf("program %s: %w", progName, err)
}
l.Debugf("Successfully attached program to interface")
return finalize, nil
}
// attachProgram attaches prog to link.
// If xdpFlags is non-zero, attaches prog to XDP.
func attachProgram(link netlink.Link, prog *ebpf.Program, qdiscParent uint32, xdpFlags uint32) error {
if prog == nil {
return errors.New("cannot attach a nil program")
}
if xdpFlags != 0 {
// Omitting XDP_FLAGS_UPDATE_IF_NOEXIST equals running 'ip' with -force,
// and will clobber any existing XDP attachment to the interface.
if err := netlink.LinkSetXdpFdWithFlags(link, prog.FD(), int(xdpFlags)); err != nil {
return fmt.Errorf("attaching XDP program to interface %s: %w", link.Attrs().Name, err)
}
return nil
}
if err := replaceQdisc(link); err != nil {
return fmt.Errorf("replacing clsact qdisc for interface %s: %w", link.Attrs().Name, err)
}
filter := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: link.Attrs().Index,
Parent: qdiscParent,
Handle: 1,
Protocol: unix.ETH_P_ALL,
Priority: option.Config.TCFilterPriority,
},
Fd: prog.FD(),
Name: fmt.Sprintf("cilium-%s", link.Attrs().Name),
DirectAction: true,
}
if err := netlink.FilterReplace(filter); err != nil {
return fmt.Errorf("replacing tc filter: %w", err)
}
return nil
}
// RemoveTCFilters removes all tc filters from the given interface.
// Direction is passed as netlink.HANDLE_MIN_{INGRESS,EGRESS} via tcDir.
func RemoveTCFilters(ifName string, tcDir uint32) error {
link, err := netlink.LinkByName(ifName)
if err != nil {
return err
}
filters, err := netlink.FilterList(link, tcDir)
if err != nil {
return err
}
for _, f := range filters {
if err := netlink.FilterDel(f); err != nil {
return err
}
}
return nil
}
func setupDev(link netlink.Link) error {
ifName := link.Attrs().Name
if err := netlink.LinkSetUp(link); err != nil {
log.WithError(err).WithField("device", ifName).Warn("Could not set up the link")
return err
}
sysSettings := make([]sysctl.Setting, 0, 5)
if option.Config.EnableIPv6 {
sysSettings = append(sysSettings, sysctl.Setting{
Name: fmt.Sprintf("net.ipv6.conf.%s.forwarding", ifName), Val: "1", IgnoreErr: false})
}
if option.Config.EnableIPv4 {
sysSettings = append(sysSettings, []sysctl.Setting{
{Name: fmt.Sprintf("net.ipv4.conf.%s.forwarding", ifName), Val: "1", IgnoreErr: false},
{Name: fmt.Sprintf("net.ipv4.conf.%s.rp_filter", ifName), Val: "0", IgnoreErr: false},
{Name: fmt.Sprintf("net.ipv4.conf.%s.accept_local", ifName), Val: "1", IgnoreErr: false},
{Name: fmt.Sprintf("net.ipv4.conf.%s.send_redirects", ifName), Val: "0", IgnoreErr: false},
}...)
}
if err := sysctl.ApplySettings(sysSettings); err != nil {
return err
}
return nil
}
func setupVethPair(name, peerName string) error {
// Create the veth pair if it doesn't exist.
if _, err := netlink.LinkByName(name); err != nil {
hostMac, err := mac.GenerateRandMAC()
if err != nil {
return err
}
peerMac, err := mac.GenerateRandMAC()
if err != nil {
return err
}
veth := &netlink.Veth{
LinkAttrs: netlink.LinkAttrs{
Name: name,
HardwareAddr: net.HardwareAddr(hostMac),
TxQLen: 1000,
},
PeerName: peerName,
PeerHardwareAddr: net.HardwareAddr(peerMac),
}
if err := netlink.LinkAdd(veth); err != nil {
return err
}
}
veth, err := netlink.LinkByName(name)
if err != nil {
return err
}
if err := setupDev(veth); err != nil {
return err
}
peer, err := netlink.LinkByName(peerName)
if err != nil {
return err
}
if err := setupDev(peer); err != nil {
return err
}
return nil
}
// SetupBaseDevice decides which and what kind of interfaces should be set up as
// the first step of datapath initialization, then performs the setup (and
// creation, if needed) of those interfaces. It returns two links and an error.
// By default, it sets up the veth pair - cilium_host and cilium_net.
func SetupBaseDevice(mtu int) (netlink.Link, netlink.Link, error) {
if err := setupVethPair(defaults.HostDevice, defaults.SecondHostDevice); err != nil {
return nil, nil, err
}
linkHost, err := netlink.LinkByName(defaults.HostDevice)
if err != nil {
return nil, nil, err
}
linkNet, err := netlink.LinkByName(defaults.SecondHostDevice)
if err != nil {
return nil, nil, err
}
if err := netlink.LinkSetARPOff(linkHost); err != nil {
return nil, nil, err
}
if err := netlink.LinkSetARPOff(linkNet); err != nil {
return nil, nil, err
}
if err := netlink.LinkSetMTU(linkHost, mtu); err != nil {
return nil, nil, err
}
if err := netlink.LinkSetMTU(linkNet, mtu); err != nil {
return nil, nil, err
}
return linkHost, linkNet, nil
}