/
route_linux.go
465 lines (390 loc) · 11.8 KB
/
route_linux.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
// Copyright 2016-2018 Authors of Cilium
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// +build linux
package route
import (
"fmt"
"net"
"time"
"github.com/cilium/cilium/pkg/option"
"github.com/vishvananda/netlink"
)
const (
// RouteReplaceMaxTries is the number of attempts the route will be
// attempted to be added or updated in case the kernel returns an error
RouteReplaceMaxTries = 10
// RouteReplaceRetryInterval is the interval in which
// RouteReplaceMaxTries attempts are attempted
RouteReplaceRetryInterval = 100 * time.Millisecond
// RTN_LOCAL is a route type used to indicate packet should be "routed"
// locally and passed up the stack. Is used by IPSec to force encrypted
// packets to pass through XFRM layer.
RTN_LOCAL = 0x2
// MainTable is Linux's default routing table
MainTable = 254
// EncryptRouteProtocol for Encryption specific routes
EncryptRouteProtocol = 192
)
// getNetlinkRoute returns the route configuration as netlink.Route
func (r *Route) getNetlinkRoute() netlink.Route {
rt := netlink.Route{
Dst: &r.Prefix,
Src: r.Local,
MTU: r.MTU,
Protocol: r.Proto,
Table: r.Table,
Type: r.Type,
}
if r.Nexthop != nil {
rt.Gw = *r.Nexthop
}
if r.Scope != netlink.SCOPE_UNIVERSE {
rt.Scope = r.Scope
} else if r.Scope == netlink.SCOPE_UNIVERSE && r.Type == RTN_LOCAL {
rt.Scope = netlink.SCOPE_HOST
}
return rt
}
// getNexthopAsIPNet returns the nexthop of the route as IPNet
func (r *Route) getNexthopAsIPNet() *net.IPNet {
if r.Nexthop == nil {
return nil
}
if r.Nexthop.To4() != nil {
return &net.IPNet{IP: *r.Nexthop, Mask: net.CIDRMask(32, 32)}
}
return &net.IPNet{IP: *r.Nexthop, Mask: net.CIDRMask(128, 128)}
}
func ipFamily(ip net.IP) int {
if ip.To4() == nil {
return netlink.FAMILY_V6
}
return netlink.FAMILY_V4
}
// Lookup attempts to find the linux route based on the route specification.
// If the route exists, the route is returned, otherwise an error is returned.
func Lookup(route Route) (*Route, error) {
link, err := netlink.LinkByName(route.Device)
if err != nil {
return nil, fmt.Errorf("unable to find interface '%s' of route: %s", route.Device, err)
}
routeSpec := route.getNetlinkRoute()
routeSpec.LinkIndex = link.Attrs().Index
nlRoute := lookup(&routeSpec)
if nlRoute == nil {
return nil, nil
}
result := &Route{
Local: nlRoute.Src,
Device: link.Attrs().Name,
MTU: nlRoute.MTU,
Scope: nlRoute.Scope,
Nexthop: &nlRoute.Gw,
}
if nlRoute.Dst != nil {
result.Prefix = *nlRoute.Dst
}
return result, nil
}
// lookup finds a particular route as specified by the filter which points
// to the specified device. The filter route can have the following fields set:
// - Dst
// - LinkIndex
// - Scope
// - Gw
func lookup(route *netlink.Route) *netlink.Route {
var filter uint64
if route.Dst != nil {
filter |= netlink.RT_FILTER_DST
}
if route.Table != 0 {
filter |= netlink.RT_FILTER_TABLE
}
if route.Scope != 0 {
filter |= netlink.RT_FILTER_SCOPE
}
if route.Gw != nil {
filter |= netlink.RT_FILTER_GW
}
if route.LinkIndex != 0 {
filter |= netlink.RT_FILTER_OIF
}
routes, err := netlink.RouteListFiltered(ipFamily(route.Dst.IP), route, filter)
if err != nil {
return nil
}
for _, r := range routes {
if r.Dst != nil && route.Dst == nil {
continue
}
if route.Dst != nil && r.Dst == nil {
continue
}
if route.Table != 0 && route.Table != r.Table {
continue
}
aMaskLen, aMaskBits := r.Dst.Mask.Size()
bMaskLen, bMaskBits := route.Dst.Mask.Size()
if r.Scope == route.Scope &&
aMaskLen == bMaskLen && aMaskBits == bMaskBits &&
r.Dst.IP.Equal(route.Dst.IP) && r.Gw.Equal(route.Gw) {
return &r
}
}
return nil
}
func createNexthopRoute(route Route, link netlink.Link, routerNet *net.IPNet) *netlink.Route {
// This is the L2 route which makes router IP available behind the
// interface.
rt := &netlink.Route{
LinkIndex: link.Attrs().Index,
Dst: routerNet,
Table: route.Table,
}
// Known issue: scope for IPv6 routes is not propagated correctly. If
// we set the scope here, lookup() will be unable to identify the route
// again and we will continuously re-add the route
if routerNet.IP.To4() != nil {
rt.Scope = netlink.SCOPE_LINK
}
return rt
}
// replaceNexthopRoute verifies that the L2 route for the router IP which is
// used as nexthop for all node routes is properly installed. If unavailable or
// incorrect, it will be replaced with the proper L2 route.
func replaceNexthopRoute(route Route, link netlink.Link, routerNet *net.IPNet) (bool, error) {
if err := netlink.RouteReplace(createNexthopRoute(route, link, routerNet)); err != nil {
return false, fmt.Errorf("unable to add L2 nexthop route: %s", err)
}
return true, nil
}
// deleteNexthopRoute deletes
func deleteNexthopRoute(route Route, link netlink.Link, routerNet *net.IPNet) error {
if err := netlink.RouteDel(createNexthopRoute(route, link, routerNet)); err != nil {
return fmt.Errorf("unable to delete L2 nexthop route: %s", err)
}
return nil
}
// Upsert adds or updates a Linux kernel route. The route described can be in
// the following two forms:
//
// direct:
// prefix dev foo
//
// nexthop:
// prefix via nexthop dev foo
//
// If a nexthop route is specified, this function will check whether a direct
// route to the nexthop exists and add if required. This means that the
// following two routes will exist afterwards:
//
// nexthop dev foo
// prefix via nexthop dev foo
//
// Due to a bug in the Linux kernel, the prefix route is attempted to be
// updated RouteReplaceMaxTries with an interval of RouteReplaceRetryInterval.
// This is a workaround for a race condition in which the direct route to the
// nexthop is not available immediately and the prefix route can fail with
// EINVAL if the Netlink calls are issued in short order.
//
// An error is returned if the route can not be added or updated.
func Upsert(route Route) (bool, error) {
var nexthopRouteCreated bool
link, err := netlink.LinkByName(route.Device)
if err != nil {
return false, fmt.Errorf("unable to lookup interface %s: %s", route.Device, err)
}
routerNet := route.getNexthopAsIPNet()
if routerNet != nil {
if _, err := replaceNexthopRoute(route, link, routerNet); err != nil {
return false, fmt.Errorf("unable to add nexthop route: %s", err)
}
nexthopRouteCreated = true
}
routeSpec := route.getNetlinkRoute()
routeSpec.LinkIndex = link.Attrs().Index
err = fmt.Errorf("routeReplace not called yet")
// Workaround: See description of this function
for i := 0; err != nil && i < RouteReplaceMaxTries; i++ {
err = netlink.RouteReplace(&routeSpec)
if err == nil {
break
}
time.Sleep(RouteReplaceRetryInterval)
}
if err != nil {
if nexthopRouteCreated {
deleteNexthopRoute(route, link, routerNet)
}
return false, err
}
return true, nil
}
// Delete deletes a Linux route. An error is returned if the route does not
// exist or if the route could not be deleted.
func Delete(route Route) error {
link, err := netlink.LinkByName(route.Device)
if err != nil {
return fmt.Errorf("unable to lookup interface %s: %s", route.Device, err)
}
// Deletion of routes with Nexthop or Local set fails for IPv6.
// Therefore do not use getNetlinkRoute().
routeSpec := netlink.Route{
Dst: &route.Prefix,
LinkIndex: link.Attrs().Index,
Table: route.Table,
}
// Scope can only be specified for IPv4
if route.Prefix.IP.To4() != nil {
routeSpec.Scope = route.Scope
}
if err := netlink.RouteDel(&routeSpec); err != nil {
return err
}
return nil
}
// Rule is the specification of an IP routing rule
type Rule struct {
// Priority is the routing rule priority
Priority int
// Mark is the skb mark that needs to match
Mark int
// Mask is the mask to apply to the skb mark before matching the Mark
// field
Mask int
// From is the source address selector
From *net.IPNet
// To is the destination address selector
To *net.IPNet
// Table is the routing table to look up if the rule matches
Table int
}
func lookupRule(spec Rule, family int) (bool, error) {
rules, err := netlink.RuleList(family)
if err != nil {
return false, err
}
for _, r := range rules {
if spec.Priority != 0 && spec.Priority != r.Priority {
continue
}
if spec.From != nil && (r.Src == nil || r.Src.String() != spec.From.String()) {
continue
}
if spec.To != nil && (r.Dst == nil || r.Dst.String() != spec.To.String()) {
continue
}
if spec.Mark != 0 && r.Mark != spec.Mark {
continue
}
if r.Table == spec.Table {
return true, nil
}
}
return false, nil
}
// ReplaceRule add or replace rule in the routing table using a mark to indicate
// table. Used with BPF datapath to set mark and direct packets to route table.
func ReplaceRule(spec Rule) error {
return replaceRule(spec, netlink.FAMILY_V4)
}
// ReplaceRuleIPv6 add or replace IPv6 rule in the routing table using a mark to
// indicate table.
func ReplaceRuleIPv6(spec Rule) error {
return replaceRule(spec, netlink.FAMILY_V6)
}
func replaceRule(spec Rule, family int) error {
exists, err := lookupRule(spec, family)
if err != nil {
return err
}
if exists == true {
return nil
}
rule := netlink.NewRule()
rule.Mark = spec.Mark
rule.Mask = spec.Mask
rule.Table = spec.Table
rule.Family = family
rule.Priority = spec.Priority
rule.Src = spec.From
rule.Dst = spec.To
return netlink.RuleAdd(rule)
}
// DeleteRule delete a mark based rule from the routing table.
func DeleteRule(spec Rule) error {
return deleteRule(spec, netlink.FAMILY_V4)
}
// DeleteRuleIPv6 delete a mark based IPv6 rule from the routing table.
func DeleteRuleIPv6(spec Rule) error {
return deleteRule(spec, netlink.FAMILY_V6)
}
func deleteRule(spec Rule, family int) error {
rule := netlink.NewRule()
rule.Mark = spec.Mark
rule.Mask = spec.Mask
rule.Table = spec.Table
rule.Priority = spec.Priority
rule.Src = spec.From
rule.Dst = spec.To
rule.Family = family
return netlink.RuleDel(rule)
}
func lookupDefaultRoute(family int) (netlink.Route, error) {
linkIndex := 0
routes, err := netlink.RouteListFiltered(family, &netlink.Route{Dst: nil}, netlink.RT_FILTER_DST)
if err != nil {
return netlink.Route{}, fmt.Errorf("Unable to list direct routes: %s", err)
}
if len(routes) == 0 {
return netlink.Route{}, fmt.Errorf("Default route not found for family %d", family)
}
for _, route := range routes {
if linkIndex != 0 && linkIndex != route.LinkIndex {
return netlink.Route{}, fmt.Errorf("Found default routes with different netdev ifindices: %v vs %v",
linkIndex, route.LinkIndex)
}
linkIndex = route.LinkIndex
}
log.Debugf("Found default route on node %v", routes[0])
return routes[0], nil
}
// NodeDeviceWithDefaultRoute returns the node's device which handles the
// default route in the current namespace
func NodeDeviceWithDefaultRoute() (netlink.Link, error) {
linkIndex := 0
if option.Config.EnableIPv4 {
route, err := lookupDefaultRoute(netlink.FAMILY_V4)
if err != nil {
return nil, err
}
linkIndex = route.LinkIndex
}
if option.Config.EnableIPv6 {
route, err := lookupDefaultRoute(netlink.FAMILY_V6)
if err != nil {
return nil, err
}
if linkIndex != 0 && linkIndex != route.LinkIndex {
return nil, fmt.Errorf("IPv4/IPv6 have different link indices")
}
linkIndex = route.LinkIndex
}
link, err := netlink.LinkByIndex(linkIndex)
if err != nil {
return nil, err
}
return link, nil
}