diff --git a/pkg/config/config.go b/pkg/config/config.go index add9db694..97d4fe6be 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -85,8 +85,9 @@ type CniConfig struct { NetworkPluginMaxConfNum int `toml:"max_conf_num" json:"maxConfNum"` // NetworkPluginConfTemplate is the file path of golang template used to generate // cni config. - // When it is set, containerd will get cidr from kubelet to replace {{.PodCIDR}} in - // the template, and write the config into NetworkPluginConfDir. + // When it is set, containerd will get cidr(s) from kubelet to replace {{.PodCIDR}}, + // {{.PodCIDRRanges}} or {{.Routes}} in the template, and write the config into + // NetworkPluginConfDir. // Ideally the cni config should be placed by system admin or cni daemon like calico, // weaveworks etc. However, there are still users using kubenet // (https://kubernetes.io/docs/concepts/cluster-administration/network-plugins/#kubenet) diff --git a/pkg/server/sandbox_run.go b/pkg/server/sandbox_run.go index 39e03a116..edad4e5c8 100644 --- a/pkg/server/sandbox_run.go +++ b/pkg/server/sandbox_run.go @@ -139,14 +139,14 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox // In this case however caching the IP will add a subtle performance enhancement by avoiding // calls to network namespace of the pod to query the IP of the veth interface on every // SandboxStatus request. - sandbox.IP, sandbox.CNIResult, err = c.setupPod(ctx, id, sandbox.NetNSPath, config) + sandbox.IP, sandbox.AdditionalIPs, sandbox.CNIResult, err = c.setupPodNetwork(ctx, id, sandbox.NetNSPath, config) if err != nil { return nil, errors.Wrapf(err, "failed to setup network for sandbox %q", id) } defer func() { if retErr != nil { // Teardown network if an error is returned. - if err := c.teardownPod(ctx, id, sandbox.NetNSPath, config); err != nil { + if err := c.teardownPodNetwork(ctx, id, sandbox.NetNSPath, config); err != nil { log.G(ctx).WithError(err).Errorf("Failed to destroy network for sandbox %q", id) } } @@ -544,10 +544,10 @@ func (c *criService) unmountSandboxFiles(id string, config *runtime.PodSandboxCo return nil } -// setupPod setups up the network for a pod -func (c *criService) setupPod(ctx context.Context, id string, path string, config *runtime.PodSandboxConfig) (string, *cni.CNIResult, error) { +// setupPodNetwork setups up the network for a pod +func (c *criService) setupPodNetwork(ctx context.Context, id string, path string, config *runtime.PodSandboxConfig) (string, []string, *cni.CNIResult, error) { if c.netPlugin == nil { - return "", nil, errors.New("cni config not initialized") + return "", nil, nil, errors.New("cni config not initialized") } labels := getPodCNILabels(id, config) @@ -556,7 +556,7 @@ func (c *criService) setupPod(ctx context.Context, id string, path string, confi // or an unreasonable valure see validateBandwidthIsReasonable() bandWidth, err := toCNIBandWidth(config.Annotations) if err != nil { - return "", nil, errors.Wrap(err, "failed to get bandwidth info from annotations") + return "", nil, nil, errors.Wrap(err, "failed to get bandwidth info from annotations") } result, err := c.netPlugin.Setup(ctx, id, @@ -567,18 +567,19 @@ func (c *criService) setupPod(ctx context.Context, id string, path string, confi ) if err != nil { - return "", nil, err + return "", nil, nil, err } logDebugCNIResult(ctx, id, result) // Check if the default interface has IP config if configs, ok := result.Interfaces[defaultIfName]; ok && len(configs.IPConfigs) > 0 { - return selectPodIP(configs.IPConfigs), result, nil + ip, additionalIPs := selectPodIPs(configs.IPConfigs) + return ip, additionalIPs, result, nil } // If it comes here then the result was invalid so destroy the pod network and return error - if err := c.teardownPod(ctx, id, path, config); err != nil { + if err := c.teardownPodNetwork(ctx, id, path, config); err != nil { log.G(ctx).WithError(err).Errorf("Failed to destroy network for sandbox %q", id) } - return "", result, errors.Errorf("failed to find network info for sandbox %q", id) + return "", nil, result, errors.Errorf("failed to find network info for sandbox %q", id) } // toCNIBandWidth converts CRI annotations to CNI bandwidth. @@ -623,14 +624,28 @@ func toCNIPortMappings(criPortMappings []*runtime.PortMapping) []cni.PortMapping return portMappings } -// selectPodIP select an ip from the ip list. It prefers ipv4 more than ipv6. -func selectPodIP(ipConfigs []*cni.IPConfig) string { +// selectPodIPs select an ip from the ip list. It prefers ipv4 more than ipv6 +// and returns the additional ips +// TODO(random-liu): Revisit the ip order in the ipv6 beta stage. (cri#1278) +func selectPodIPs(ipConfigs []*cni.IPConfig) (string, []string) { + var ( + additionalIPs []string + ip string + ) for _, c := range ipConfigs { - if c.IP.To4() != nil { - return c.IP.String() + if c.IP.To4() != nil && ip == "" { + ip = c.IP.String() + } else { + additionalIPs = append(additionalIPs, c.IP.String()) } } - return ipConfigs[0].IP.String() + if ip != "" { + return ip, additionalIPs + } + if len(ipConfigs) == 1 { + return additionalIPs[0], nil + } + return additionalIPs[0], additionalIPs[1:] } // untrustedWorkload returns true if the sandbox contains untrusted workload. diff --git a/pkg/server/sandbox_run_test.go b/pkg/server/sandbox_run_test.go index 7398a2d0d..98a575f16 100644 --- a/pkg/server/sandbox_run_test.go +++ b/pkg/server/sandbox_run_test.go @@ -552,16 +552,29 @@ func TestToCNIPortMappings(t *testing.T) { func TestSelectPodIP(t *testing.T) { for desc, test := range map[string]struct { - ips []string - expected string + ips []string + expectedIP string + expectedAdditionalIPs []string }{ "ipv4 should be picked even if ipv6 comes first": { - ips: []string{"2001:db8:85a3::8a2e:370:7334", "192.168.17.43"}, - expected: "192.168.17.43", + ips: []string{"2001:db8:85a3::8a2e:370:7334", "192.168.17.43"}, + expectedIP: "192.168.17.43", + expectedAdditionalIPs: []string{"2001:db8:85a3::8a2e:370:7334"}, + }, + "ipv4 should be picked when there is only ipv4": { + ips: []string{"192.168.17.43"}, + expectedIP: "192.168.17.43", + expectedAdditionalIPs: nil, }, "ipv6 should be picked when there is no ipv4": { - ips: []string{"2001:db8:85a3::8a2e:370:7334"}, - expected: "2001:db8:85a3::8a2e:370:7334", + ips: []string{"2001:db8:85a3::8a2e:370:7334"}, + expectedIP: "2001:db8:85a3::8a2e:370:7334", + expectedAdditionalIPs: nil, + }, + "the first ipv4 should be picked when there are multiple ipv4": { // unlikely to happen + ips: []string{"2001:db8:85a3::8a2e:370:7334", "192.168.17.43", "2001:db8:85a3::8a2e:370:7335", "192.168.17.45"}, + expectedIP: "192.168.17.43", + expectedAdditionalIPs: []string{"2001:db8:85a3::8a2e:370:7334", "2001:db8:85a3::8a2e:370:7335", "192.168.17.45"}, }, } { t.Logf("TestCase %q", desc) @@ -571,7 +584,9 @@ func TestSelectPodIP(t *testing.T) { IP: net.ParseIP(ip), }) } - assert.Equal(t, test.expected, selectPodIP(ipConfigs)) + ip, additionalIPs := selectPodIPs(ipConfigs) + assert.Equal(t, test.expectedIP, ip) + assert.Equal(t, test.expectedAdditionalIPs, additionalIPs) } } diff --git a/pkg/server/sandbox_status.go b/pkg/server/sandbox_status.go index 977ef83c8..774259d2f 100644 --- a/pkg/server/sandbox_status.go +++ b/pkg/server/sandbox_status.go @@ -37,11 +37,11 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox return nil, errors.Wrap(err, "an error occurred when try to find sandbox") } - ip, err := c.getIP(sandbox) + ip, additionalIPs, err := c.getIPs(sandbox) if err != nil { return nil, errors.Wrap(err, "failed to get sandbox ip") } - status := toCRISandboxStatus(sandbox.Metadata, sandbox.Status.Get(), ip) + status := toCRISandboxStatus(sandbox.Metadata, sandbox.Status.Get(), ip, additionalIPs) if status.GetCreatedAt() == 0 { // CRI doesn't allow CreatedAt == 0. info, err := sandbox.Container.Info(ctx) @@ -66,38 +66,45 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox }, nil } -func (c *criService) getIP(sandbox sandboxstore.Sandbox) (string, error) { +func (c *criService) getIPs(sandbox sandboxstore.Sandbox) (string, []string, error) { config := sandbox.Config if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { // For sandboxes using the node network we are not // responsible for reporting the IP. - return "", nil + return "", nil, nil } if closed, err := sandbox.NetNS.Closed(); err != nil { - return "", errors.Wrap(err, "check network namespace closed") + return "", nil, errors.Wrap(err, "check network namespace closed") } else if closed { - return "", nil + return "", nil, nil } - return sandbox.IP, nil + return sandbox.IP, sandbox.AdditionalIPs, nil } // toCRISandboxStatus converts sandbox metadata into CRI pod sandbox status. -func toCRISandboxStatus(meta sandboxstore.Metadata, status sandboxstore.Status, ip string) *runtime.PodSandboxStatus { +func toCRISandboxStatus(meta sandboxstore.Metadata, status sandboxstore.Status, ip string, additionalIPs []string) *runtime.PodSandboxStatus { // Set sandbox state to NOTREADY by default. state := runtime.PodSandboxState_SANDBOX_NOTREADY if status.State == sandboxstore.StateReady { state = runtime.PodSandboxState_SANDBOX_READY } nsOpts := meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions() + var ips []*runtime.PodIP + for _, additionalIP := range additionalIPs { + ips = append(ips, &runtime.PodIP{Ip: additionalIP}) + } return &runtime.PodSandboxStatus{ Id: meta.ID, Metadata: meta.Config.GetMetadata(), State: state, CreatedAt: status.CreatedAt.UnixNano(), - Network: &runtime.PodSandboxNetworkStatus{Ip: ip}, + Network: &runtime.PodSandboxNetworkStatus{ + Ip: ip, + AdditionalIps: ips, + }, Linux: &runtime.LinuxPodSandboxStatus{ Namespaces: &runtime.Namespace{ Options: &runtime.NamespaceOption{ diff --git a/pkg/server/sandbox_status_test.go b/pkg/server/sandbox_status_test.go index 89abf22d0..f0c190f49 100644 --- a/pkg/server/sandbox_status_test.go +++ b/pkg/server/sandbox_status_test.go @@ -31,6 +31,7 @@ func TestPodSandboxStatus(t *testing.T) { id = "test-id" ip = "10.10.10.10" ) + additionalIPs := []string{"8.8.8.8", "2001:db8:85a3::8a2e:370:7334"} createdAt := time.Now() config := &runtime.PodSandboxConfig{ Metadata: &runtime.PodSandboxMetadata{ @@ -62,7 +63,17 @@ func TestPodSandboxStatus(t *testing.T) { Id: id, Metadata: config.GetMetadata(), CreatedAt: createdAt.UnixNano(), - Network: &runtime.PodSandboxNetworkStatus{Ip: ip}, + Network: &runtime.PodSandboxNetworkStatus{ + Ip: ip, + AdditionalIps: []*runtime.PodIP{ + { + Ip: additionalIPs[0], + }, + { + Ip: additionalIPs[1], + }, + }, + }, Linux: &runtime.LinuxPodSandboxStatus{ Namespaces: &runtime.Namespace{ Options: &runtime.NamespaceOption{ @@ -99,7 +110,7 @@ func TestPodSandboxStatus(t *testing.T) { State: test.state, } expected.State = test.expectedState - got := toCRISandboxStatus(metadata, status, ip) + got := toCRISandboxStatus(metadata, status, ip, additionalIPs) assert.Equal(t, expected, got) } } diff --git a/pkg/server/sandbox_stop.go b/pkg/server/sandbox_stop.go index 0796f5cfd..df080ad4d 100644 --- a/pkg/server/sandbox_stop.go +++ b/pkg/server/sandbox_stop.go @@ -80,7 +80,7 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb } else if closed { netNSPath = "" } - if err := c.teardownPod(ctx, id, netNSPath, sandbox.Config); err != nil { + if err := c.teardownPodNetwork(ctx, id, netNSPath, sandbox.Config); err != nil { return nil, errors.Wrapf(err, "failed to destroy network for sandbox %q", id) } if err = sandbox.NetNS.Remove(); err != nil { @@ -156,8 +156,8 @@ func (c *criService) waitSandboxStop(ctx context.Context, sandbox sandboxstore.S } } -// teardownPod removes the network from the pod -func (c *criService) teardownPod(ctx context.Context, id string, path string, config *runtime.PodSandboxConfig) error { +// teardownPodNetwork removes the network from the pod +func (c *criService) teardownPodNetwork(ctx context.Context, id string, path string, config *runtime.PodSandboxConfig) error { if c.netPlugin == nil { return errors.New("cni config not initialized") } diff --git a/pkg/server/update_runtime_config.go b/pkg/server/update_runtime_config.go index ca2684f3b..e359582aa 100644 --- a/pkg/server/update_runtime_config.go +++ b/pkg/server/update_runtime_config.go @@ -17,8 +17,10 @@ limitations under the License. package server import ( + "net" "os" "path/filepath" + "strings" "text/template" "github.com/containerd/containerd/log" @@ -33,17 +35,36 @@ import ( type cniConfigTemplate struct { // PodCIDR is the cidr for pods on the node. PodCIDR string + // PodCIDRRanges is the cidr ranges for pods on the node. + PodCIDRRanges []string + // Routes is a list of routes configured. + Routes []string } -// cniConfigFileName is the name of cni config file generated by containerd. -const cniConfigFileName = "10-containerd-net.conflist" +const ( + // cniConfigFileName is the name of cni config file generated by containerd. + cniConfigFileName = "10-containerd-net.conflist" + // zeroCIDRv6 is the null route for IPv6. + zeroCIDRv6 = "::/0" + // zeroCIDRv4 is the null route for IPv4. + zeroCIDRv4 = "0.0.0.0/0" +) // UpdateRuntimeConfig updates the runtime config. Currently only handles podCIDR updates. func (c *criService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateRuntimeConfigRequest) (*runtime.UpdateRuntimeConfigResponse, error) { - podCIDR := r.GetRuntimeConfig().GetNetworkConfig().GetPodCidr() - if podCIDR == "" { + podCIDRs := r.GetRuntimeConfig().GetNetworkConfig().GetPodCidr() + if podCIDRs == "" { return &runtime.UpdateRuntimeConfigResponse{}, nil } + cidrs := strings.Split(podCIDRs, ",") + for i := range cidrs { + cidrs[i] = strings.TrimSpace(cidrs[i]) + } + routes, err := getRoutes(cidrs) + if err != nil { + return nil, errors.Wrap(err, "get routes") + } + confTemplate := c.config.NetworkPluginConfTemplate if confTemplate == "" { log.G(ctx).Info("No cni config template is specified, wait for other system components to drop the config.") @@ -71,8 +92,38 @@ func (c *criService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateR return nil, errors.Wrapf(err, "failed to open cni config file %q", confFile) } defer f.Close() - if err := t.Execute(f, cniConfigTemplate{PodCIDR: podCIDR}); err != nil { + if err := t.Execute(f, cniConfigTemplate{ + PodCIDR: cidrs[0], + PodCIDRRanges: cidrs, + Routes: routes, + }); err != nil { return nil, errors.Wrapf(err, "failed to generate cni config file %q", confFile) } return &runtime.UpdateRuntimeConfigResponse{}, nil } + +// getRoutes generates required routes for the passed in cidrs. +func getRoutes(cidrs []string) ([]string, error) { + var ( + routes []string + hasV4, hasV6 bool + ) + for _, c := range cidrs { + _, cidr, err := net.ParseCIDR(c) + if err != nil { + return nil, err + } + if cidr.IP.To4() != nil { + hasV4 = true + } else { + hasV6 = true + } + } + if hasV4 { + routes = append(routes, zeroCIDRv4) + } + if hasV6 { + routes = append(routes, zeroCIDRv6) + } + return routes, nil +} diff --git a/pkg/server/update_runtime_config_test.go b/pkg/server/update_runtime_config_test.go index df7918562..79113ced0 100644 --- a/pkg/server/update_runtime_config_test.go +++ b/pkg/server/update_runtime_config_test.go @@ -45,14 +45,13 @@ func TestUpdateRuntimeConfig(t *testing.T) { "ipam": { "type": "host-local", "subnet": "{{.PodCIDR}}", - "routes": [ - {"dst": "0.0.0.0/0"} - ] + "ranges": [{{range $i, $range := .PodCIDRRanges}}{{if $i}}, {{end}}[{"subnet": "{{$range}}"}]{{end}}], + "routes": [{{range $i, $route := .Routes}}{{if $i}}, {{end}}{"dst": "{{$route}}"}{{end}}] } }, ] }` - testCIDR = "10.0.0.0/24" + testCIDR = "10.0.0.0/24, 2001:4860:4860::8888/32" expected = ` { "name": "test-pod-network", @@ -64,9 +63,8 @@ func TestUpdateRuntimeConfig(t *testing.T) { "ipam": { "type": "host-local", "subnet": "10.0.0.0/24", - "routes": [ - {"dst": "0.0.0.0/0"} - ] + "ranges": [[{"subnet": "10.0.0.0/24"}], [{"subnet": "2001:4860:4860::8888/32"}]], + "routes": [{"dst": "0.0.0.0/0"}, {"dst": "::/0"}] } }, ] diff --git a/pkg/store/sandbox/metadata.go b/pkg/store/sandbox/metadata.go index 3c4ee589b..68f5fd881 100644 --- a/pkg/store/sandbox/metadata.go +++ b/pkg/store/sandbox/metadata.go @@ -55,6 +55,8 @@ type Metadata struct { NetNSPath string // IP of Pod if it is attached to non host network IP string + // AdditionalIPs of the Pod if it is attached to non host network + AdditionalIPs []string // RuntimeHandler is the runtime handler name of the pod. RuntimeHandler string // CNIresult resulting configuration for attached network namespace interfaces