From 93af19f4f0d2861cba82793066fdbfcb3d092eec Mon Sep 17 00:00:00 2001 From: Jeroen van Bemmel Date: Sun, 2 Feb 2025 12:33:34 -0600 Subject: [PATCH 01/12] New MLAG VTEP plugin with support for Cumulus NVUE, Dell OS10 and EOS --- docs/plugins.md | 1 + docs/plugins/mlag.vtep.md | 61 +++++++++++++ netsim/extra/mlag.vtep/cumulus_nvue.j2 | 8 ++ netsim/extra/mlag.vtep/defaults.yml | 20 ++++ netsim/extra/mlag.vtep/plugin.py | 81 +++++++++++++++++ .../mlag.vtep/01-vxlan-bridging.yml | 91 +++++++++++++++++++ 6 files changed, 262 insertions(+) create mode 100644 docs/plugins/mlag.vtep.md create mode 100644 netsim/extra/mlag.vtep/cumulus_nvue.j2 create mode 100644 netsim/extra/mlag.vtep/defaults.yml create mode 100644 netsim/extra/mlag.vtep/plugin.py create mode 100644 tests/integration/mlag.vtep/01-vxlan-bridging.yml diff --git a/docs/plugins.md b/docs/plugins.md index 377848b027..62b6b23f44 100644 --- a/docs/plugins.md +++ b/docs/plugins.md @@ -15,6 +15,7 @@ plugins/bgp.originate.md plugins/check.config.md plugins/fabric.md + plugins/mlag.vtep.md plugins/multilab.md plugins/node.clone.md plugins/vrrp.version.md diff --git a/docs/plugins/mlag.vtep.md b/docs/plugins/mlag.vtep.md new file mode 100644 index 0000000000..c8c66d025c --- /dev/null +++ b/docs/plugins/mlag.vtep.md @@ -0,0 +1,61 @@ +(plugin-mlag-vtep)= +# Combining MLAG redundancy with VXLAN VTEP functionality + +VXLAN enables L3 forwarding of L2 network traffic, and MLAG provides network state synchronization to support redundant active-active connectivity. Some designs require the combination of both, i.e. VXLAN endpoints with redundancy such that single device failures do not lead to broken network overlay paths. + +Conceptually, VXLAN redundancy requires the allocation of a shared logical VTEP IP address that is common across both MLAG peers. This enables both peers to send and receive VXLAN packets transparently, and if one fails the other can take over. This plugin coordinates the allocation of this logical anycast IP for MLAG peers, across multiple vendors. + +## Supported platforms + +This plugin supports the following devices (that also support both VXLAN and MLAG) + +| Operating system | Supports MLAG and VXLAN | Supported by mlag.vtep plugin | +| ------------------ | ----------------------- | ----------------------------- | +| Arista EOS | ✅ | ✅ | +| Aruba AOS-CX | ✅ | ❌ | +| Cumulus Linux 4.x | ❌ | ❌ | +| Cumulus 5.x (NVUE) | ✅ | ✅ | +| Dell OS10 | ✅ | ✅ | +| FRR | ❌ | ❌ | + +```eval_rst +.. contents:: Table of Contents + :depth: 2 + :local: + :backlinks: none +``` + +## Using the Plugin + +The plugin is enabled by simply including it in the toplogy: +``` +plugin: [ ...., mlag.vtep ] +``` + +At that point, anycast MLAG VTEPs are automatically enabled for any MLAG pair of devices in the topology. If this is not desired, the plugin can be disabled on a per-node level: +``` +nodes: + node_without_mlag_vtep: + lag.mlag.vtep: False +``` + +The MLAG VTEP should work for both static VXLAN and EVPN signalled VXLAN topologies, though to date it has only been tested with static VXLAN configs. + +### Customizing the address allocation pool + +By default, the plugin configures a pool for `10.101.101.0/24` to allocate its /32 IPs from (1 per MLAG pair). If desired, this configuration can be changed: +``` +defaults.mlag.vtep.address_pool: 10.99.99.0/24 +``` + +The regular loopback pool is passed as a secondary source to allocate from, should the first pool run out. + +### Elaborate example + +The integration tests contain an elaborate multi-vendor example topology, under `integration/mlag.vtep/01-vxlan-bridging.yml`. This topology presents a culmination of *Netlab* features, showcasing multi-vendor: +* VLANs (mixed trunk with regular native VLAN and overlay VLANs) +* OSPFv2 +* static VXLAN +* MLAG (2:2) with LACP signalled port-channels +* STP with PVRST (to prevent loops on the native VLAN) +* this new `mlag.vtep` plugin \ No newline at end of file diff --git a/netsim/extra/mlag.vtep/cumulus_nvue.j2 b/netsim/extra/mlag.vtep/cumulus_nvue.j2 new file mode 100644 index 0000000000..21ffe2c9d4 --- /dev/null +++ b/netsim/extra/mlag.vtep/cumulus_nvue.j2 @@ -0,0 +1,8 @@ +# nv set nve vxlan mlag shared-address +{% if lag.mlag.vtep is defined %} +- set: + nve: + vxlan: + mlag: + shared-address: {{ lag.mlag.vtep }} +{% endif %} \ No newline at end of file diff --git a/netsim/extra/mlag.vtep/defaults.yml b/netsim/extra/mlag.vtep/defaults.yml new file mode 100644 index 0000000000..19aecddbbb --- /dev/null +++ b/netsim/extra/mlag.vtep/defaults.yml @@ -0,0 +1,20 @@ +# mlag.vtep attributes +# +--- +devices: # Only devices that support MLAG can be supported by this plugin + dellos10: + features.lag.mlag.vtep: True + cumulus_nvue: + features.lag.mlag: + vtep: True + vtep_needs_script: True # Cumulus NVUE requires a specific configuration + eos: + features.lag.mlag.vtep: True # EOS also supports a more complex scheme using a secondary IP on loopback, not used here + +mlag.vtep: + address_pool: 10.101.101.0/24 # Address pool to allocate anycast VTEP loopback IPs from + +lag: + attributes: + node: + mlag.vtep: bool # Provide a way to selectively disable the plugin on nodes diff --git a/netsim/extra/mlag.vtep/plugin.py b/netsim/extra/mlag.vtep/plugin.py new file mode 100644 index 0000000000..b3fb333db9 --- /dev/null +++ b/netsim/extra/mlag.vtep/plugin.py @@ -0,0 +1,81 @@ +# +# Modified from https://github.com/ssasso/netsim-topologies/blob/main/multivendor-evpn/_plugins/vxlan_anycast_plugin.py +# + +import os + +from netsim.utils import log +from netsim.augment import addressing, devices, links +from netsim import api, data +from box import Box +import netaddr + +_config_name = 'mlag.vtep' +_requires = [ 'vxlan', 'lag' ] + +POOL_NAME = "mlag_vtep" + +def pre_link_transform(topology: Box) -> None: + global _config_name + # Error if vxlan/lag module is not loaded + if 'vxlan' not in topology.module or 'lag' not in topology.module: + log.error( + 'vxlan and/or lag module is not loaded.', + log.IncorrectValue, + _config_name) + +def topology_expand(topology: Box) -> None: + # Create address pool to check for overlap with other address ranges + topology.addressing[POOL_NAME] = { 'ipv4' : topology.defaults.mlag.vtep.address_pool, + 'prefix' : 32 } + +def post_transform(topology: Box) -> None: + # Allocate ANYCAST mlag VTEP Address for the loopbacks + change_ip = {} + for link in topology.get('links', []): + # Only for mlag peer links, XXX assuming at most 1 peergroup per node + if not link.get('lag.mlag.peergroup', False): + continue + peers = link.get('interfaces',[]) + if peers: + # vtep address - Replace currently allocated VTEP with a new anycast VTEP generated for each mlag pair + vtep_a = addressing.get(topology.pools, [POOL_NAME, 'loopback'])['ipv4'] + for i in peers: + node = topology.nodes[i.node] + features = devices.get_device_features(node,topology.defaults) + if 'lag.mlag.vtep' not in features: + log.error(f'Node {node.name} does not support the mlag.vtep plugin',log.IncorrectValue,_config_name) + if 'vtep' in node.vxlan and node.get('lag.mlag.vtep',None) is not False: + node.lag.mlag.vtep = change_ip[ node.vxlan.vtep ] = str(vtep_a.network) + + if 'vtep_needs_script' in features.lag.mlag: + api.node_config(node,_config_name) # Remember that we have to do extra configuration + + # On Cumulus, the source interface remains the unicast IP + else: + # Add an extra loopback interface with the allocated VTEP IP + vtep_loopback = data.get_empty_box() + vtep_loopback.type = 'loopback' + vtep_loopback.name = f"MLAG VTEP VXLAN interface shared between {' - '.join([i.node for i in peers])}" + vtep_loopback.ipv4 = node.lag.mlag.vtep + "/32" + vtep_loopback.vxlan.vtep = True + links.create_virtual_interface(node, vtep_loopback, topology.defaults) + if 'ospf' in node.get('module',[]): # Add it to OSPF when used, TODO ISIS + vtep_loopback.ospf = { 'area': "0.0.0.0", 'passive': True } + node.interfaces.append( vtep_loopback ) + + # Update VXLAN VTEP + node.vxlan.vtep_interface = vtep_loopback.ifname + node.vxlan.vtep = node.lag.mlag.vtep + + for n, ndata in topology.nodes.items(): + # Update remote vtep list in case of static flooding + if ndata.vxlan.get('flooding', '') == 'static': + + def replace(ip: str) -> str: + return change_ip[ip] if ip in change_ip else ip + mlag_vtep = ndata.get('lag.mlag.vtep',None) + ndata.vxlan.vtep_list = list({ replace(v) for v in ndata.vxlan.vtep_list if replace(v)!=mlag_vtep }) + for vl, vdata in ndata.get('vlans', {}).items(): + if 'vtep_list' in vdata: + vdata.vtep_list = list({ replace(v) for v in vdata.vtep_list if replace(v)!=mlag_vtep }) diff --git a/tests/integration/mlag.vtep/01-vxlan-bridging.yml b/tests/integration/mlag.vtep/01-vxlan-bridging.yml new file mode 100644 index 0000000000..914e21a7ab --- /dev/null +++ b/tests/integration/mlag.vtep/01-vxlan-bridging.yml @@ -0,0 +1,91 @@ +--- +message: | + The devices under test are 2 mlag pairs of VLAN-to-VXLAN bridges between two access VLANs + and two VXLAN VNIs. Both VLANs are using the same IP prefix to identify + potential inter-VLAN leaking. + + The first pair uses the mlag.vtep plugin to provision an anycast MLAG VTEP across both. + + * h1,h2 and h3,h4 should be able to ping each other + * h1 should not be able to reach h3 or h4 + + Please note it might take a while for the lab to work due to + STP learning phase + +defaults.devices.dellos10: + warnings.svi_ospf: False + features.vlan.svi_interface_name: vlan{vlan} + +plugin: [ mlag.vtep ] + +stp.protocol: pvrst + +groups: + _auto_create: True + hosts: + members: [ h1, h2, h3, h4 ] + device: linux + provider: clab + switches: + members: [ s1, s2, s3, s4 ] + module: [ vlan, vxlan, ospf, lag, stp ] # Requires STP to block massive loop + mtu: 1600 + +nodes: + s1: + device: cumulus_nvue + #lag.mlag.vtep: False + s2: + device: cumulus_nvue + #lag.mlag.vtep: False + +vlans: + ospf: + mode: irb # 'route' would be better, but Dell OS10 templates don't support it yet + id: 1 + red: + mode: bridge + prefix: + ipv4: 172.31.1.0/24 + links: [ s1-h1, s4-h2 ] + blue: + mode: bridge + prefix: + ipv4: 172.31.1.0/24 + links: [ s2-h3, s3-h4 ] + +vxlan.vlans: [ red, blue ] + +links: +- lag: + members: [ s1-s2 ] + mlag.peergroup: True +- lag: + members: [ s3-s4 ] + mlag.peergroup: True +- lag: + members: + - s1-s3 + - s1-s4 + - s2-s3 + - s2-s4 + vlan.trunk: [ ospf, red, blue ] + +validate: + ping_red: + description: Ping-based reachability test in VLAN red + wait_msg: Waiting for OSFP and STP to wake up + wait: 50 + nodes: [ h1 ] + plugin: ping('h2') + ping_blue: + description: Ping-based reachability test in VLAN blue + wait_msg: We might have to wait a bit longer + wait: 10 + nodes: [ h3 ] + plugin: ping('h4') + inter_vlan: + description: Ping-based reachability test between blue and red VLANs + nodes: [ h1 ] + devices: [ linux ] + plugin: ping('h4',expect='fail') From 54a59c67e60db7275f21337baac8122f9b6a2e34 Mon Sep 17 00:00:00 2001 From: Jeroen van Bemmel Date: Sun, 2 Feb 2025 13:01:17 -0600 Subject: [PATCH 02/12] Fix whitespace errors and indentation --- netsim/ansible/templates/vlan/cumulus_nvue.j2 | 16 ++++++++-------- netsim/extra/mlag.vtep/defaults.yml | 2 +- netsim/extra/mlag.vtep/plugin.py | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/netsim/ansible/templates/vlan/cumulus_nvue.j2 b/netsim/ansible/templates/vlan/cumulus_nvue.j2 index 5f1ee06ad5..fbb7515437 100644 --- a/netsim/ansible/templates/vlan/cumulus_nvue.j2 +++ b/netsim/ansible/templates/vlan/cumulus_nvue.j2 @@ -17,17 +17,17 @@ {% if loop.first %} interface: {% endif %} - {{ i.ifname }}: - bridge: - domain: - br_default: + {{ i.ifname }}: + bridge: + domain: + br_default: {% if i.vlan.trunk_id is defined +%} - vlan: + vlan: {% for v in i.vlan.trunk_id|sort %} - '{{ v }}': {} + '{{ v }}': {} {% endfor %} - untagged: {{ i.vlan.access_id if 'native' in i.vlan else 'none' }} + untagged: {{ i.vlan.access_id if 'native' in i.vlan else 'none' }} {% elif i.vlan.access_id is defined %} - access: {{ i.vlan.access_id }} + access: {{ i.vlan.access_id }} {% endif %} {% endfor %} diff --git a/netsim/extra/mlag.vtep/defaults.yml b/netsim/extra/mlag.vtep/defaults.yml index 19aecddbbb..3382ff053c 100644 --- a/netsim/extra/mlag.vtep/defaults.yml +++ b/netsim/extra/mlag.vtep/defaults.yml @@ -9,7 +9,7 @@ devices: # Only devices that support MLAG can be support vtep: True vtep_needs_script: True # Cumulus NVUE requires a specific configuration eos: - features.lag.mlag.vtep: True # EOS also supports a more complex scheme using a secondary IP on loopback, not used here + features.lag.mlag.vtep: True # EOS also has a more complex scheme using a secondary IP on loopback, not used here mlag.vtep: address_pool: 10.101.101.0/24 # Address pool to allocate anycast VTEP loopback IPs from diff --git a/netsim/extra/mlag.vtep/plugin.py b/netsim/extra/mlag.vtep/plugin.py index b3fb333db9..673fa588b0 100644 --- a/netsim/extra/mlag.vtep/plugin.py +++ b/netsim/extra/mlag.vtep/plugin.py @@ -44,7 +44,7 @@ def post_transform(topology: Box) -> None: node = topology.nodes[i.node] features = devices.get_device_features(node,topology.defaults) if 'lag.mlag.vtep' not in features: - log.error(f'Node {node.name} does not support the mlag.vtep plugin',log.IncorrectValue,_config_name) + log.error(f'Node {node.name} does not support the mlag.vtep plugin',log.IncorrectValue,_config_name) if 'vtep' in node.vxlan and node.get('lag.mlag.vtep',None) is not False: node.lag.mlag.vtep = change_ip[ node.vxlan.vtep ] = str(vtep_a.network) @@ -71,7 +71,7 @@ def post_transform(topology: Box) -> None: for n, ndata in topology.nodes.items(): # Update remote vtep list in case of static flooding if ndata.vxlan.get('flooding', '') == 'static': - + def replace(ip: str) -> str: return change_ip[ip] if ip in change_ip else ip mlag_vtep = ndata.get('lag.mlag.vtep',None) From af22276368fc0ecfb1442c19b7e92eebc7e9c7b4 Mon Sep 17 00:00:00 2001 From: Jeroen van Bemmel Date: Sun, 2 Feb 2025 14:22:18 -0600 Subject: [PATCH 03/12] Don't use VLAN 1 --- tests/integration/mlag.vtep/01-vxlan-bridging.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/integration/mlag.vtep/01-vxlan-bridging.yml b/tests/integration/mlag.vtep/01-vxlan-bridging.yml index 914e21a7ab..a84587ff63 100644 --- a/tests/integration/mlag.vtep/01-vxlan-bridging.yml +++ b/tests/integration/mlag.vtep/01-vxlan-bridging.yml @@ -40,9 +40,8 @@ nodes: #lag.mlag.vtep: False vlans: - ospf: + ospf_vxlan: mode: irb # 'route' would be better, but Dell OS10 templates don't support it yet - id: 1 red: mode: bridge prefix: @@ -69,7 +68,7 @@ links: - s1-s4 - s2-s3 - s2-s4 - vlan.trunk: [ ospf, red, blue ] + vlan.trunk: [ ospf_vxlan ] # 'red' and 'blue' are passed as VXLAN (vlan.access doesn't work) validate: ping_red: From a4daa709cc49cfbded6ee5363406c8490a09dcc3 Mon Sep 17 00:00:00 2001 From: Jeroen van Bemmel Date: Sun, 9 Feb 2025 18:44:56 -0600 Subject: [PATCH 04/12] Updated to a more sensible scenario without OSPF over MLAG --- .../mlag.vtep/01-vxlan-bridging.yml | 78 ++++++++++--------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/tests/integration/mlag.vtep/01-vxlan-bridging.yml b/tests/integration/mlag.vtep/01-vxlan-bridging.yml index a84587ff63..532c97bda6 100644 --- a/tests/integration/mlag.vtep/01-vxlan-bridging.yml +++ b/tests/integration/mlag.vtep/01-vxlan-bridging.yml @@ -1,10 +1,11 @@ --- message: | - The devices under test are 2 mlag pairs of VLAN-to-VXLAN bridges between two access VLANs + The devices under test are an mlag pair of VLAN-to-VXLAN bridges between two access VLANs and two VXLAN VNIs. Both VLANs are using the same IP prefix to identify potential inter-VLAN leaking. - The first pair uses the mlag.vtep plugin to provision an anycast MLAG VTEP across both. + The pair uses the mlag.vtep plugin to provision an anycast MLAG VTEP across both, such that + both devices are equivalent from the perspective of the 3rd VTEP (FRR) * h1,h2 and h3,h4 should be able to ping each other * h1 should not be able to reach h3 or h4 @@ -12,75 +13,78 @@ message: | Please note it might take a while for the lab to work due to STP learning phase -defaults.devices.dellos10: - warnings.svi_ospf: False - features.vlan.svi_interface_name: vlan{vlan} - plugin: [ mlag.vtep ] -stp.protocol: pvrst - groups: _auto_create: True + lag_hosts: + members: [ h1, h3 ] + module: [ lag ] + device: linux + # provider: clab, use Same provider as MLAG devices for LACP to work + hosts: - members: [ h1, h2, h3, h4 ] + members: [ h2, h4, h5 ] device: linux provider: clab + switches: - members: [ s1, s2, s3, s4 ] - module: [ vlan, vxlan, ospf, lag, stp ] # Requires STP to block massive loop - mtu: 1600 + members: [ dut_a, dut_b ] + module: [ vlan, vxlan, ospf, lag ] -nodes: - s1: - device: cumulus_nvue - #lag.mlag.vtep: False - s2: - device: cumulus_nvue - #lag.mlag.vtep: False + probes: + members: [ xs ] + module: [ vlan, vxlan, ospf ] + device: frr + provider: clab vlans: - ospf_vxlan: - mode: irb # 'route' would be better, but Dell OS10 templates don't support it yet red: mode: bridge prefix: ipv4: 172.31.1.0/24 - links: [ s1-h1, s4-h2 ] + links: [ xs-h2, dut_a-h5 ] # h5 is single connected blue: mode: bridge prefix: ipv4: 172.31.1.0/24 - links: [ s2-h3, s3-h4 ] + links: [ xs-h4 ] vxlan.vlans: [ red, blue ] links: - lag: - members: [ s1-s2 ] + members: [ dut_a-dut_b ] mlag.peergroup: True + - lag: - members: [ s3-s4 ] - mlag.peergroup: True + members: [ h1-dut_a, h1-dut_b ] + vlan.access: red - lag: - members: - - s1-s3 - - s1-s4 - - s2-s3 - - s2-s4 - vlan.trunk: [ ospf_vxlan ] # 'red' and 'blue' are passed as VXLAN (vlan.access doesn't work) + members: [ h3-dut_a, h3-dut_b ] + vlan.access: blue + +- xs-dut_a +- xs-dut_b validate: + no_dut_a_route: + description: Remove route via VTEP A + nodes: [ xs ] + devices: [ frr ] + exec: ip route replace {{ vxlan.vtep_list[0] }}/32 via {{ interfaces[1].neighbors[0].ipv4|ipaddr('address') }} dev {{ interfaces[1].ifname }} + adj: + description: Check OSPF adjacency with DUT_B + wait_msg: Waiting for OSPF adjacency process to complete + wait: 50 + nodes: [ xs ] + plugin: ospf_neighbor(nodes.dut_b.ospf.router_id) ping_red: description: Ping-based reachability test in VLAN red - wait_msg: Waiting for OSFP and STP to wake up - wait: 50 - nodes: [ h1 ] + nodes: [ h1, h5 ] plugin: ping('h2') ping_blue: description: Ping-based reachability test in VLAN blue - wait_msg: We might have to wait a bit longer - wait: 10 nodes: [ h3 ] plugin: ping('h4') inter_vlan: From a6d3a404b1ec491c5a31065d91bbee7db8346c98 Mon Sep 17 00:00:00 2001 From: Jeroen van Bemmel Date: Sun, 9 Feb 2025 19:04:33 -0600 Subject: [PATCH 05/12] Update test description to match reality --- docs/plugins/mlag.vtep.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/plugins/mlag.vtep.md b/docs/plugins/mlag.vtep.md index c8c66d025c..852ab14000 100644 --- a/docs/plugins/mlag.vtep.md +++ b/docs/plugins/mlag.vtep.md @@ -52,10 +52,10 @@ The regular loopback pool is passed as a secondary source to allocate from, shou ### Elaborate example -The integration tests contain an elaborate multi-vendor example topology, under `integration/mlag.vtep/01-vxlan-bridging.yml`. This topology presents a culmination of *Netlab* features, showcasing multi-vendor: -* VLANs (mixed trunk with regular native VLAN and overlay VLANs) +The integration tests contain an example topology, under `integration/mlag.vtep/01-vxlan-bridging.yml`. This topology presents a combination of *Netlab* features, showcasing multi-vendor: +* VLAN bridging * OSPFv2 * static VXLAN -* MLAG (2:2) with LACP signalled port-channels -* STP with PVRST (to prevent loops on the native VLAN) -* this new `mlag.vtep` plugin \ No newline at end of file +* MLAG to Linux hosts with LACP signalled port-channels +* MLAG orphan hosts (single connected) +* this new `mlag.vtep` plugin for logical VTEP redundancy \ No newline at end of file From 7c91ecca77aecda6c22e7d28cdedb3bbd8f281dd Mon Sep 17 00:00:00 2001 From: Jeroen van Bemmel Date: Sun, 9 Feb 2025 19:18:52 -0600 Subject: [PATCH 06/12] Fix line length of exec command --- tests/integration/mlag.vtep/01-vxlan-bridging.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/mlag.vtep/01-vxlan-bridging.yml b/tests/integration/mlag.vtep/01-vxlan-bridging.yml index 532c97bda6..3f381e0276 100644 --- a/tests/integration/mlag.vtep/01-vxlan-bridging.yml +++ b/tests/integration/mlag.vtep/01-vxlan-bridging.yml @@ -72,7 +72,9 @@ validate: description: Remove route via VTEP A nodes: [ xs ] devices: [ frr ] - exec: ip route replace {{ vxlan.vtep_list[0] }}/32 via {{ interfaces[1].neighbors[0].ipv4|ipaddr('address') }} dev {{ interfaces[1].ifname }} + exec: > + ip route replace {{ vxlan.vtep_list[0] }}/32 via + {{ interfaces[1].neighbors[0].ipv4|ipaddr('address') }} dev {{ interfaces[1].ifname }} adj: description: Check OSPF adjacency with DUT_B wait_msg: Waiting for OSPF adjacency process to complete From 64ffaabf7b733cfc1d2460926a444517fb7276f8 Mon Sep 17 00:00:00 2001 From: Jeroen van Bemmel Date: Sun, 9 Feb 2025 19:28:39 -0600 Subject: [PATCH 07/12] Set a default device to pass CI/CD integration testing --- tests/integration/mlag.vtep/01-vxlan-bridging.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/mlag.vtep/01-vxlan-bridging.yml b/tests/integration/mlag.vtep/01-vxlan-bridging.yml index 3f381e0276..52d36e6eb7 100644 --- a/tests/integration/mlag.vtep/01-vxlan-bridging.yml +++ b/tests/integration/mlag.vtep/01-vxlan-bridging.yml @@ -15,6 +15,8 @@ message: | plugin: [ mlag.vtep ] +defaults.device: eos + groups: _auto_create: True lag_hosts: From 36c4adadd2850a473a312eb9571ef4544c2a9e0e Mon Sep 17 00:00:00 2001 From: Jeroen van Bemmel Date: Sun, 9 Feb 2025 19:30:42 -0600 Subject: [PATCH 08/12] Fix check for plugin support --- netsim/extra/mlag.vtep/plugin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/netsim/extra/mlag.vtep/plugin.py b/netsim/extra/mlag.vtep/plugin.py index 673fa588b0..f52f0d97be 100644 --- a/netsim/extra/mlag.vtep/plugin.py +++ b/netsim/extra/mlag.vtep/plugin.py @@ -43,8 +43,8 @@ def post_transform(topology: Box) -> None: for i in peers: node = topology.nodes[i.node] features = devices.get_device_features(node,topology.defaults) - if 'lag.mlag.vtep' not in features: - log.error(f'Node {node.name} does not support the mlag.vtep plugin',log.IncorrectValue,_config_name) + if not features.get('lag.mlag.vtep',None): + log.error(f'Node {node.name}({node.device}) is not supported by the mlag.vtep plugin',log.IncorrectValue,_config_name) if 'vtep' in node.vxlan and node.get('lag.mlag.vtep',None) is not False: node.lag.mlag.vtep = change_ip[ node.vxlan.vtep ] = str(vtep_a.network) From 7d3c685dbd80260e3d811e583da415ceb08b91a7 Mon Sep 17 00:00:00 2001 From: Jeroen van Bemmel Date: Sun, 9 Feb 2025 19:45:23 -0600 Subject: [PATCH 09/12] Abort loop when plugin not supported --- netsim/extra/mlag.vtep/plugin.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/netsim/extra/mlag.vtep/plugin.py b/netsim/extra/mlag.vtep/plugin.py index f52f0d97be..e1270faa92 100644 --- a/netsim/extra/mlag.vtep/plugin.py +++ b/netsim/extra/mlag.vtep/plugin.py @@ -44,7 +44,9 @@ def post_transform(topology: Box) -> None: node = topology.nodes[i.node] features = devices.get_device_features(node,topology.defaults) if not features.get('lag.mlag.vtep',None): - log.error(f'Node {node.name}({node.device}) is not supported by the mlag.vtep plugin',log.IncorrectValue,_config_name) + log.error(f'Node {node.name}({node.device}) is not supported by the mlag.vtep plugin', + log.IncorrectValue,_config_name) + continue if 'vtep' in node.vxlan and node.get('lag.mlag.vtep',None) is not False: node.lag.mlag.vtep = change_ip[ node.vxlan.vtep ] = str(vtep_a.network) From 070e92383cea0a45c6c96ecb03c68b80f44d8649 Mon Sep 17 00:00:00 2001 From: Jeroen van Bemmel Date: Sun, 9 Feb 2025 19:47:28 -0600 Subject: [PATCH 10/12] Add support for the 'none' device --- netsim/extra/mlag.vtep/defaults.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/netsim/extra/mlag.vtep/defaults.yml b/netsim/extra/mlag.vtep/defaults.yml index 3382ff053c..3a3e477b8b 100644 --- a/netsim/extra/mlag.vtep/defaults.yml +++ b/netsim/extra/mlag.vtep/defaults.yml @@ -10,6 +10,8 @@ devices: # Only devices that support MLAG can be support vtep_needs_script: True # Cumulus NVUE requires a specific configuration eos: features.lag.mlag.vtep: True # EOS also has a more complex scheme using a secondary IP on loopback, not used here + none: + features.lag.mlag.vtep: True mlag.vtep: address_pool: 10.101.101.0/24 # Address pool to allocate anycast VTEP loopback IPs from From 9266ea2b5ae28ebb038b8fb5aa6cc2f110887d2c Mon Sep 17 00:00:00 2001 From: Jeroen van Bemmel Date: Sun, 9 Feb 2025 19:53:07 -0600 Subject: [PATCH 11/12] Use attribute names that don't conflict with bool/dict boundaries in the base code features.lag.mlag = True exists, so the plugin cannot use features.lag.mlag.vtep --- netsim/extra/mlag.vtep/defaults.yml | 18 +++++++++--------- netsim/extra/mlag.vtep/plugin.py | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/netsim/extra/mlag.vtep/defaults.yml b/netsim/extra/mlag.vtep/defaults.yml index 3a3e477b8b..83cc0bf4de 100644 --- a/netsim/extra/mlag.vtep/defaults.yml +++ b/netsim/extra/mlag.vtep/defaults.yml @@ -1,22 +1,22 @@ # mlag.vtep attributes # --- -devices: # Only devices that support MLAG can be supported by this plugin +devices: # Only devices that support MLAG can be supported by this plugin dellos10: - features.lag.mlag.vtep: True + features.lag.mlag_vtep: True cumulus_nvue: - features.lag.mlag: - vtep: True - vtep_needs_script: True # Cumulus NVUE requires a specific configuration + features.lag: + mlag_vtep: True + mlag_vtep_needs_script: True # Cumulus NVUE requires a specific configuration eos: - features.lag.mlag.vtep: True # EOS also has a more complex scheme using a secondary IP on loopback, not used here + features.lag.mlag_vtep: True # EOS also has a more complex scheme using a secondary IP on loopback, not used here none: - features.lag.mlag.vtep: True + features.lag.mlag_vtep: True mlag.vtep: - address_pool: 10.101.101.0/24 # Address pool to allocate anycast VTEP loopback IPs from + address_pool: 10.101.101.0/24 # Address pool to allocate anycast VTEP loopback IPs from lag: attributes: node: - mlag.vtep: bool # Provide a way to selectively disable the plugin on nodes + mlag.vtep: bool # Provide a way to selectively disable the plugin on nodes diff --git a/netsim/extra/mlag.vtep/plugin.py b/netsim/extra/mlag.vtep/plugin.py index e1270faa92..fef4c3f358 100644 --- a/netsim/extra/mlag.vtep/plugin.py +++ b/netsim/extra/mlag.vtep/plugin.py @@ -43,14 +43,14 @@ def post_transform(topology: Box) -> None: for i in peers: node = topology.nodes[i.node] features = devices.get_device_features(node,topology.defaults) - if not features.get('lag.mlag.vtep',None): + if not features.get('lag.mlag_vtep',None): log.error(f'Node {node.name}({node.device}) is not supported by the mlag.vtep plugin', log.IncorrectValue,_config_name) continue if 'vtep' in node.vxlan and node.get('lag.mlag.vtep',None) is not False: node.lag.mlag.vtep = change_ip[ node.vxlan.vtep ] = str(vtep_a.network) - if 'vtep_needs_script' in features.lag.mlag: + if 'mlag_vtep_needs_script' in features.lag: api.node_config(node,_config_name) # Remember that we have to do extra configuration # On Cumulus, the source interface remains the unicast IP From 932766d45b9f82f886c59527bba8d9b313b7aac9 Mon Sep 17 00:00:00 2001 From: Jeroen van Bemmel Date: Sun, 9 Feb 2025 20:45:29 -0600 Subject: [PATCH 12/12] Add EVPN test case --- docs/plugins/mlag.vtep.md | 2 +- .../mlag.vtep/01-vxlan-bridging.yml | 6 +- .../mlag.vtep/02-evpn-vxlan-bridging.yml | 102 ++++++++++++++++++ 3 files changed, 106 insertions(+), 4 deletions(-) create mode 100644 tests/integration/mlag.vtep/02-evpn-vxlan-bridging.yml diff --git a/docs/plugins/mlag.vtep.md b/docs/plugins/mlag.vtep.md index 852ab14000..441d7fa9af 100644 --- a/docs/plugins/mlag.vtep.md +++ b/docs/plugins/mlag.vtep.md @@ -39,7 +39,7 @@ nodes: lag.mlag.vtep: False ``` -The MLAG VTEP should work for both static VXLAN and EVPN signalled VXLAN topologies, though to date it has only been tested with static VXLAN configs. +The MLAG VTEP works for both static VXLAN and EVPN signalled topologies. ### Customizing the address allocation pool diff --git a/tests/integration/mlag.vtep/01-vxlan-bridging.yml b/tests/integration/mlag.vtep/01-vxlan-bridging.yml index 52d36e6eb7..84451b6486 100644 --- a/tests/integration/mlag.vtep/01-vxlan-bridging.yml +++ b/tests/integration/mlag.vtep/01-vxlan-bridging.yml @@ -1,7 +1,7 @@ --- message: | - The devices under test are an mlag pair of VLAN-to-VXLAN bridges between two access VLANs - and two VXLAN VNIs. Both VLANs are using the same IP prefix to identify + The devices under test are an mlag pair of VLAN-to-VXLAN bridges providing two access VLANs + and two VXLAN VNIs, using static VXLAN tunnels. Both VLANs are using the same IP prefix to identify potential inter-VLAN leaking. The pair uses the mlag.vtep plugin to provision an anycast MLAG VTEP across both, such that @@ -11,7 +11,7 @@ message: | * h1 should not be able to reach h3 or h4 Please note it might take a while for the lab to work due to - STP learning phase + STP learning phase and/or OSPF peering delays plugin: [ mlag.vtep ] diff --git a/tests/integration/mlag.vtep/02-evpn-vxlan-bridging.yml b/tests/integration/mlag.vtep/02-evpn-vxlan-bridging.yml new file mode 100644 index 0000000000..80a6a4ab81 --- /dev/null +++ b/tests/integration/mlag.vtep/02-evpn-vxlan-bridging.yml @@ -0,0 +1,102 @@ +--- +message: | + The devices under test are an mlag pair of VLAN-to-VXLAN bridges between two access VLANs + and two VXLAN VNIs. Both VLANs are using the same IP prefix to identify + potential inter-VLAN leaking. + + The pair uses the mlag.vtep plugin to provision an anycast MLAG VTEP across both, such that + both devices are equivalent from the perspective of the 3rd VTEP (FRR) + + This example uses BGP EVPN as the control plane for VXLAN + + * h1,h2 and h3,h4 should be able to ping each other + * h1 should not be able to reach h3 or h4 + + Please note it might take a while for the lab to work due to + STP learning phase + +plugin: [ mlag.vtep ] + +defaults.device: eos + +bgp.as: 65000 + +groups: + _auto_create: True + lag_hosts: + members: [ h1, h3 ] + module: [ lag ] + device: linux + # provider: clab, use Same provider as MLAG devices for LACP to work + + hosts: + members: [ h2, h4, h5 ] + device: linux + provider: clab + + switches: + members: [ dut_a, dut_b ] + module: [ vlan, vxlan, ospf, lag, bgp, evpn ] + + probes: + members: [ xs ] + module: [ vlan, vxlan, ospf, bgp, evpn ] + device: frr + provider: clab + +vlans: + red: + mode: bridge + prefix: + ipv4: 172.31.1.0/24 + links: [ xs-h2, dut_a-h5 ] # h5 is single connected + blue: + mode: bridge + prefix: + ipv4: 172.31.1.0/24 + links: [ xs-h4 ] + +vxlan.vlans: [ red, blue ] + +links: +- lag: + members: [ dut_a-dut_b ] + mlag.peergroup: True + +- lag: + members: [ h1-dut_a, h1-dut_b ] + vlan.access: red +- lag: + members: [ h3-dut_a, h3-dut_b ] + vlan.access: blue + +- xs-dut_a +- xs-dut_b + +validate: + no_dut_a_route: + description: Remove route via VTEP A + nodes: [ xs ] + devices: [ frr ] + exec: > + ip route replace 10.101.101.1/32 via + {{ interfaces[1].neighbors[0].ipv4|ipaddr('address') }} dev {{ interfaces[1].ifname }} + adj: + description: Check OSPF adjacency with DUT_B + wait_msg: Waiting for OSPF adjacency process to complete + wait: 50 + nodes: [ xs ] + plugin: ospf_neighbor(nodes.dut_b.ospf.router_id) + ping_red: + description: Ping-based reachability test in VLAN red + nodes: [ h1, h5 ] + plugin: ping('h2') + ping_blue: + description: Ping-based reachability test in VLAN blue + nodes: [ h3 ] + plugin: ping('h4') + inter_vlan: + description: Ping-based reachability test between blue and red VLANs + nodes: [ h1 ] + devices: [ linux ] + plugin: ping('h4',expect='fail')