diff --git a/docs/plugins.md b/docs/plugins.md index 377848b027..62b6b23f44 100644 --- a/docs/plugins.md +++ b/docs/plugins.md @@ -15,6 +15,7 @@ plugins/bgp.originate.md plugins/check.config.md plugins/fabric.md + plugins/mlag.vtep.md plugins/multilab.md plugins/node.clone.md plugins/vrrp.version.md diff --git a/docs/plugins/mlag.vtep.md b/docs/plugins/mlag.vtep.md new file mode 100644 index 0000000000..441d7fa9af --- /dev/null +++ b/docs/plugins/mlag.vtep.md @@ -0,0 +1,61 @@ +(plugin-mlag-vtep)= +# Combining MLAG redundancy with VXLAN VTEP functionality + +VXLAN enables L3 forwarding of L2 network traffic, and MLAG provides network state synchronization to support redundant active-active connectivity. Some designs require the combination of both, i.e. VXLAN endpoints with redundancy such that single device failures do not lead to broken network overlay paths. + +Conceptually, VXLAN redundancy requires the allocation of a shared logical VTEP IP address that is common across both MLAG peers. This enables both peers to send and receive VXLAN packets transparently, and if one fails the other can take over. This plugin coordinates the allocation of this logical anycast IP for MLAG peers, across multiple vendors. + +## Supported platforms + +This plugin supports the following devices (that also support both VXLAN and MLAG) + +| Operating system | Supports MLAG and VXLAN | Supported by mlag.vtep plugin | +| ------------------ | ----------------------- | ----------------------------- | +| Arista EOS | ✅ | ✅ | +| Aruba AOS-CX | ✅ | ❌ | +| Cumulus Linux 4.x | ❌ | ❌ | +| Cumulus 5.x (NVUE) | ✅ | ✅ | +| Dell OS10 | ✅ | ✅ | +| FRR | ❌ | ❌ | + +```eval_rst +.. contents:: Table of Contents + :depth: 2 + :local: + :backlinks: none +``` + +## Using the Plugin + +The plugin is enabled by simply including it in the toplogy: +``` +plugin: [ ...., mlag.vtep ] +``` + +At that point, anycast MLAG VTEPs are automatically enabled for any MLAG pair of devices in the topology. If this is not desired, the plugin can be disabled on a per-node level: +``` +nodes: + node_without_mlag_vtep: + lag.mlag.vtep: False +``` + +The MLAG VTEP works for both static VXLAN and EVPN signalled topologies. + +### Customizing the address allocation pool + +By default, the plugin configures a pool for `10.101.101.0/24` to allocate its /32 IPs from (1 per MLAG pair). If desired, this configuration can be changed: +``` +defaults.mlag.vtep.address_pool: 10.99.99.0/24 +``` + +The regular loopback pool is passed as a secondary source to allocate from, should the first pool run out. + +### Elaborate example + +The integration tests contain an example topology, under `integration/mlag.vtep/01-vxlan-bridging.yml`. This topology presents a combination of *Netlab* features, showcasing multi-vendor: +* VLAN bridging +* OSPFv2 +* static VXLAN +* MLAG to Linux hosts with LACP signalled port-channels +* MLAG orphan hosts (single connected) +* this new `mlag.vtep` plugin for logical VTEP redundancy \ No newline at end of file diff --git a/netsim/ansible/templates/vlan/cumulus_nvue.j2 b/netsim/ansible/templates/vlan/cumulus_nvue.j2 index 5f1ee06ad5..fbb7515437 100644 --- a/netsim/ansible/templates/vlan/cumulus_nvue.j2 +++ b/netsim/ansible/templates/vlan/cumulus_nvue.j2 @@ -17,17 +17,17 @@ {% if loop.first %} interface: {% endif %} - {{ i.ifname }}: - bridge: - domain: - br_default: + {{ i.ifname }}: + bridge: + domain: + br_default: {% if i.vlan.trunk_id is defined +%} - vlan: + vlan: {% for v in i.vlan.trunk_id|sort %} - '{{ v }}': {} + '{{ v }}': {} {% endfor %} - untagged: {{ i.vlan.access_id if 'native' in i.vlan else 'none' }} + untagged: {{ i.vlan.access_id if 'native' in i.vlan else 'none' }} {% elif i.vlan.access_id is defined %} - access: {{ i.vlan.access_id }} + access: {{ i.vlan.access_id }} {% endif %} {% endfor %} diff --git a/netsim/extra/mlag.vtep/cumulus_nvue.j2 b/netsim/extra/mlag.vtep/cumulus_nvue.j2 new file mode 100644 index 0000000000..21ffe2c9d4 --- /dev/null +++ b/netsim/extra/mlag.vtep/cumulus_nvue.j2 @@ -0,0 +1,8 @@ +# nv set nve vxlan mlag shared-address +{% if lag.mlag.vtep is defined %} +- set: + nve: + vxlan: + mlag: + shared-address: {{ lag.mlag.vtep }} +{% endif %} \ No newline at end of file diff --git a/netsim/extra/mlag.vtep/defaults.yml b/netsim/extra/mlag.vtep/defaults.yml new file mode 100644 index 0000000000..83cc0bf4de --- /dev/null +++ b/netsim/extra/mlag.vtep/defaults.yml @@ -0,0 +1,22 @@ +# mlag.vtep attributes +# +--- +devices: # Only devices that support MLAG can be supported by this plugin + dellos10: + features.lag.mlag_vtep: True + cumulus_nvue: + features.lag: + mlag_vtep: True + mlag_vtep_needs_script: True # Cumulus NVUE requires a specific configuration + eos: + features.lag.mlag_vtep: True # EOS also has a more complex scheme using a secondary IP on loopback, not used here + none: + features.lag.mlag_vtep: True + +mlag.vtep: + address_pool: 10.101.101.0/24 # Address pool to allocate anycast VTEP loopback IPs from + +lag: + attributes: + node: + mlag.vtep: bool # Provide a way to selectively disable the plugin on nodes diff --git a/netsim/extra/mlag.vtep/plugin.py b/netsim/extra/mlag.vtep/plugin.py new file mode 100644 index 0000000000..fef4c3f358 --- /dev/null +++ b/netsim/extra/mlag.vtep/plugin.py @@ -0,0 +1,83 @@ +# +# Modified from https://github.com/ssasso/netsim-topologies/blob/main/multivendor-evpn/_plugins/vxlan_anycast_plugin.py +# + +import os + +from netsim.utils import log +from netsim.augment import addressing, devices, links +from netsim import api, data +from box import Box +import netaddr + +_config_name = 'mlag.vtep' +_requires = [ 'vxlan', 'lag' ] + +POOL_NAME = "mlag_vtep" + +def pre_link_transform(topology: Box) -> None: + global _config_name + # Error if vxlan/lag module is not loaded + if 'vxlan' not in topology.module or 'lag' not in topology.module: + log.error( + 'vxlan and/or lag module is not loaded.', + log.IncorrectValue, + _config_name) + +def topology_expand(topology: Box) -> None: + # Create address pool to check for overlap with other address ranges + topology.addressing[POOL_NAME] = { 'ipv4' : topology.defaults.mlag.vtep.address_pool, + 'prefix' : 32 } + +def post_transform(topology: Box) -> None: + # Allocate ANYCAST mlag VTEP Address for the loopbacks + change_ip = {} + for link in topology.get('links', []): + # Only for mlag peer links, XXX assuming at most 1 peergroup per node + if not link.get('lag.mlag.peergroup', False): + continue + peers = link.get('interfaces',[]) + if peers: + # vtep address - Replace currently allocated VTEP with a new anycast VTEP generated for each mlag pair + vtep_a = addressing.get(topology.pools, [POOL_NAME, 'loopback'])['ipv4'] + for i in peers: + node = topology.nodes[i.node] + features = devices.get_device_features(node,topology.defaults) + if not features.get('lag.mlag_vtep',None): + log.error(f'Node {node.name}({node.device}) is not supported by the mlag.vtep plugin', + log.IncorrectValue,_config_name) + continue + if 'vtep' in node.vxlan and node.get('lag.mlag.vtep',None) is not False: + node.lag.mlag.vtep = change_ip[ node.vxlan.vtep ] = str(vtep_a.network) + + if 'mlag_vtep_needs_script' in features.lag: + api.node_config(node,_config_name) # Remember that we have to do extra configuration + + # On Cumulus, the source interface remains the unicast IP + else: + # Add an extra loopback interface with the allocated VTEP IP + vtep_loopback = data.get_empty_box() + vtep_loopback.type = 'loopback' + vtep_loopback.name = f"MLAG VTEP VXLAN interface shared between {' - '.join([i.node for i in peers])}" + vtep_loopback.ipv4 = node.lag.mlag.vtep + "/32" + vtep_loopback.vxlan.vtep = True + links.create_virtual_interface(node, vtep_loopback, topology.defaults) + if 'ospf' in node.get('module',[]): # Add it to OSPF when used, TODO ISIS + vtep_loopback.ospf = { 'area': "0.0.0.0", 'passive': True } + node.interfaces.append( vtep_loopback ) + + # Update VXLAN VTEP + node.vxlan.vtep_interface = vtep_loopback.ifname + node.vxlan.vtep = node.lag.mlag.vtep + + for n, ndata in topology.nodes.items(): + # Update remote vtep list in case of static flooding + if ndata.vxlan.get('flooding', '') == 'static': + + def replace(ip: str) -> str: + return change_ip[ip] if ip in change_ip else ip + mlag_vtep = ndata.get('lag.mlag.vtep',None) + ndata.vxlan.vtep_list = list({ replace(v) for v in ndata.vxlan.vtep_list if replace(v)!=mlag_vtep }) + for vl, vdata in ndata.get('vlans', {}).items(): + if 'vtep_list' in vdata: + vdata.vtep_list = list({ replace(v) for v in vdata.vtep_list if replace(v)!=mlag_vtep }) diff --git a/tests/integration/mlag.vtep/01-vxlan-bridging.yml b/tests/integration/mlag.vtep/01-vxlan-bridging.yml new file mode 100644 index 0000000000..84451b6486 --- /dev/null +++ b/tests/integration/mlag.vtep/01-vxlan-bridging.yml @@ -0,0 +1,98 @@ +--- +message: | + The devices under test are an mlag pair of VLAN-to-VXLAN bridges providing two access VLANs + and two VXLAN VNIs, using static VXLAN tunnels. Both VLANs are using the same IP prefix to identify + potential inter-VLAN leaking. + + The pair uses the mlag.vtep plugin to provision an anycast MLAG VTEP across both, such that + both devices are equivalent from the perspective of the 3rd VTEP (FRR) + + * h1,h2 and h3,h4 should be able to ping each other + * h1 should not be able to reach h3 or h4 + + Please note it might take a while for the lab to work due to + STP learning phase and/or OSPF peering delays + +plugin: [ mlag.vtep ] + +defaults.device: eos + +groups: + _auto_create: True + lag_hosts: + members: [ h1, h3 ] + module: [ lag ] + device: linux + # provider: clab, use Same provider as MLAG devices for LACP to work + + hosts: + members: [ h2, h4, h5 ] + device: linux + provider: clab + + switches: + members: [ dut_a, dut_b ] + module: [ vlan, vxlan, ospf, lag ] + + probes: + members: [ xs ] + module: [ vlan, vxlan, ospf ] + device: frr + provider: clab + +vlans: + red: + mode: bridge + prefix: + ipv4: 172.31.1.0/24 + links: [ xs-h2, dut_a-h5 ] # h5 is single connected + blue: + mode: bridge + prefix: + ipv4: 172.31.1.0/24 + links: [ xs-h4 ] + +vxlan.vlans: [ red, blue ] + +links: +- lag: + members: [ dut_a-dut_b ] + mlag.peergroup: True + +- lag: + members: [ h1-dut_a, h1-dut_b ] + vlan.access: red +- lag: + members: [ h3-dut_a, h3-dut_b ] + vlan.access: blue + +- xs-dut_a +- xs-dut_b + +validate: + no_dut_a_route: + description: Remove route via VTEP A + nodes: [ xs ] + devices: [ frr ] + exec: > + ip route replace {{ vxlan.vtep_list[0] }}/32 via + {{ interfaces[1].neighbors[0].ipv4|ipaddr('address') }} dev {{ interfaces[1].ifname }} + adj: + description: Check OSPF adjacency with DUT_B + wait_msg: Waiting for OSPF adjacency process to complete + wait: 50 + nodes: [ xs ] + plugin: ospf_neighbor(nodes.dut_b.ospf.router_id) + ping_red: + description: Ping-based reachability test in VLAN red + nodes: [ h1, h5 ] + plugin: ping('h2') + ping_blue: + description: Ping-based reachability test in VLAN blue + nodes: [ h3 ] + plugin: ping('h4') + inter_vlan: + description: Ping-based reachability test between blue and red VLANs + nodes: [ h1 ] + devices: [ linux ] + plugin: ping('h4',expect='fail') diff --git a/tests/integration/mlag.vtep/02-evpn-vxlan-bridging.yml b/tests/integration/mlag.vtep/02-evpn-vxlan-bridging.yml new file mode 100644 index 0000000000..80a6a4ab81 --- /dev/null +++ b/tests/integration/mlag.vtep/02-evpn-vxlan-bridging.yml @@ -0,0 +1,102 @@ +--- +message: | + The devices under test are an mlag pair of VLAN-to-VXLAN bridges between two access VLANs + and two VXLAN VNIs. Both VLANs are using the same IP prefix to identify + potential inter-VLAN leaking. + + The pair uses the mlag.vtep plugin to provision an anycast MLAG VTEP across both, such that + both devices are equivalent from the perspective of the 3rd VTEP (FRR) + + This example uses BGP EVPN as the control plane for VXLAN + + * h1,h2 and h3,h4 should be able to ping each other + * h1 should not be able to reach h3 or h4 + + Please note it might take a while for the lab to work due to + STP learning phase + +plugin: [ mlag.vtep ] + +defaults.device: eos + +bgp.as: 65000 + +groups: + _auto_create: True + lag_hosts: + members: [ h1, h3 ] + module: [ lag ] + device: linux + # provider: clab, use Same provider as MLAG devices for LACP to work + + hosts: + members: [ h2, h4, h5 ] + device: linux + provider: clab + + switches: + members: [ dut_a, dut_b ] + module: [ vlan, vxlan, ospf, lag, bgp, evpn ] + + probes: + members: [ xs ] + module: [ vlan, vxlan, ospf, bgp, evpn ] + device: frr + provider: clab + +vlans: + red: + mode: bridge + prefix: + ipv4: 172.31.1.0/24 + links: [ xs-h2, dut_a-h5 ] # h5 is single connected + blue: + mode: bridge + prefix: + ipv4: 172.31.1.0/24 + links: [ xs-h4 ] + +vxlan.vlans: [ red, blue ] + +links: +- lag: + members: [ dut_a-dut_b ] + mlag.peergroup: True + +- lag: + members: [ h1-dut_a, h1-dut_b ] + vlan.access: red +- lag: + members: [ h3-dut_a, h3-dut_b ] + vlan.access: blue + +- xs-dut_a +- xs-dut_b + +validate: + no_dut_a_route: + description: Remove route via VTEP A + nodes: [ xs ] + devices: [ frr ] + exec: > + ip route replace 10.101.101.1/32 via + {{ interfaces[1].neighbors[0].ipv4|ipaddr('address') }} dev {{ interfaces[1].ifname }} + adj: + description: Check OSPF adjacency with DUT_B + wait_msg: Waiting for OSPF adjacency process to complete + wait: 50 + nodes: [ xs ] + plugin: ospf_neighbor(nodes.dut_b.ospf.router_id) + ping_red: + description: Ping-based reachability test in VLAN red + nodes: [ h1, h5 ] + plugin: ping('h2') + ping_blue: + description: Ping-based reachability test in VLAN blue + nodes: [ h3 ] + plugin: ping('h4') + inter_vlan: + description: Ping-based reachability test between blue and red VLANs + nodes: [ h1 ] + devices: [ linux ] + plugin: ping('h4',expect='fail')