Merge remote-tracking branch 'upstream/master' into default_route

abdosi · Dec 22, 2022 · b67f104 · b67f104
2 parents 2adc7d0 + ac40fda
commit b67f104
Show file tree

Hide file tree

Showing 58 changed files with 2,710 additions and 1,106 deletions.
diff --git a/.azure-pipelines/run-test-scheduler-template.yml b/.azure-pipelines/run-test-scheduler-template.yml
@@ -73,7 +73,7 @@ steps:
       echo "TestbedV2 is just online and might not be stable enough, for any issue, please send email to sonictestbedtools@microsoft.com"
       echo "Runtime detailed progress at https://www.testbed-tools.org/scheduler/testplan/$TEST_PLAN_ID"
       # When "LOCK_TESTBED" finish, it changes into "PREPARE_TESTBED"
-      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-states PREPARE_TESTBED EXECUTING KVMDUMP FINISHED CANCELLED FAILED
+      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-state LOCK_TESTBED
     env:
       TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL)
     displayName: Lock testbed
@@ -86,7 +86,7 @@ steps:
       echo "TestbedV2 is just online and might not be stable enough, for any issue, please send email to sonictestbedtools@microsoft.com"
       echo "Runtime detailed progress at https://www.testbed-tools.org/scheduler/testplan/$TEST_PLAN_ID"
       # When "PREPARE_TESTBED" finish, it changes into "EXECUTING"
-      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-states EXECUTING KVMDUMP FINISHED CANCELLED FAILED
+      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-state PREPARE_TESTBED
     env:
       TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL)
     displayName: Prepare testbed
@@ -97,7 +97,7 @@ steps:
       echo "TestbedV2 is just online and might not be stable enough, for any issue, please send email to sonictestbedtools@microsoft.com"
       echo "Runtime detailed progress at https://www.testbed-tools.org/scheduler/testplan/$TEST_PLAN_ID"
       # When "EXECUTING" finish, it changes into "KVMDUMP", "FAILED", "CANCELLED" or "FINISHED"
-      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-states KVMDUMP FINISHED CANCELLED FAILED
+      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-state EXECUTING
     env:
       TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL)
     displayName: Run test
@@ -108,7 +108,7 @@ steps:
       echo "TestbedV2 is just online and might not be stable enough, for any issue, please send email to sonictestbedtools@microsoft.com"
       echo "Runtime detailed progress at https://www.testbed-tools.org/scheduler/testplan/$TEST_PLAN_ID"
       # When "KVMDUMP" finish, it changes into "FAILED", "CANCELLED" or "FINISHED"
-      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-states FINISHED CANCELLED FAILED
+      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-state KVMDUMP
     condition: succeededOrFailed()
     env:
       TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL)

diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py
@@ -8,11 +8,23 @@
 
 import requests
 import yaml
+from enum import Enum
 
 PR_TEST_SCRIPTS_FILE = "pr_test_scripts.yaml"
 TOLERATE_HTTP_EXCEPTION_TIMES = 20
 
 
+class TestPlanStatus(Enum):
+    INIT = 10
+    LOCK_TESTBED = 20
+    PREPARE_TESTBED = 30
+    EXECUTING = 40
+    KVMDUMP = 50
+    FAILED = 60
+    CANCELLED = 70
+    FINISHED = 80
+
+
 def get_test_scripts(test_set):
     _self_path = os.path.abspath(__file__)
     pr_test_scripts_file = os.path.join(os.path.dirname(_self_path), PR_TEST_SCRIPTS_FILE)
@@ -21,6 +33,88 @@ def get_test_scripts(test_set):
         return pr_test_scripts.get(test_set, [])
 
 
+def test_plan_status_factory(status):
+    if status == "INIT":
+        return InitStatus()
+    elif status == "LOCK_TESTBED":
+        return LockStatus()
+    elif status == "PREPARE_TESTBED":
+        return PrePareStatus()
+    elif status == "EXECUTING":
+        return ExecutingStatus()
+    elif status == "KVMDUMP":
+        return KvmDumpStatus()
+    elif status == "FAILED":
+        return FailedStatus()
+    elif status == "CANCELLED":
+        return CancelledStatus()
+    elif status == "FINISHED":
+        return FinishStatus()
+
+    raise Exception("The status is not correct.")
+
+
+class AbstractStatus():
+    def __init__(self, status):
+        self.status = status
+
+    def get_status(self):
+        return self.status.value
+
+    def print_logs(self, test_plan_id, resp_data, start_time):
+        status = resp_data.get("status", None)
+        current_status = test_plan_status_factory(status).get_status()
+
+        if(current_status == self.get_status()):
+            print("Test plan id: {}, status: {},  elapsed: {:.0f} seconds"
+                  .format(test_plan_id, resp_data.get("status", None), time.time() - start_time))
+
+
+class InitStatus(AbstractStatus):
+    def __init__(self):
+        super().__init__(TestPlanStatus.INIT)
+
+
+class LockStatus(AbstractStatus):
+    def __init__(self):
+        super().__init__(TestPlanStatus.LOCK_TESTBED)
+
+
+class PrePareStatus(AbstractStatus):
+    def __init__(self):
+        super().__init__(TestPlanStatus.PREPARE_TESTBED)
+
+
+class ExecutingStatus(AbstractStatus):
+    def __init__(self):
+        super().__init__(TestPlanStatus.EXECUTING)
+
+    def print_logs(self, test_plan_id, resp_data, start_time):
+        print("Test plan id: {}, status: {}, progress: {}%, elapsed: {:.0f} seconds"
+              .format(test_plan_id, resp_data.get("status", None),
+                      resp_data.get("progress", 0) * 100, time.time() - start_time))
+
+
+class KvmDumpStatus(AbstractStatus):
+    def __init__(self):
+        super().__init__(TestPlanStatus.KVMDUMP)
+
+
+class FailedStatus(AbstractStatus):
+    def __init__(self):
+        super().__init__(TestPlanStatus.FAILED)
+
+
+class CancelledStatus(AbstractStatus):
+    def __init__(self):
+        super().__init__(TestPlanStatus.CANCELLED)
+
+
+class FinishStatus(AbstractStatus):
+    def __init__(self):
+        super().__init__(TestPlanStatus.FINISHED)
+
+
 class TestPlanManager(object):
 
     def __init__(self, url, tenant_id=None, client_id=None, client_secret=None):
@@ -153,14 +247,7 @@ def cancel(self, test_plan_id):
         print("Result of cancelling test plan at {}:".format(tp_url))
         print(str(resp["data"]))
 
-    def poll(self, test_plan_id, interval=60, timeout=-1, expected_states=""):
-        '''
-        The states of testplan can be described as below:
-                                                                |-- FAILED
-        INIT -- LOCK_TESTBED -- PREPARE_TESTBED -- EXECUTING -- |-- CANCELLED
-                                                                |-- FINISHED
-        '''
-
+    def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expected_states=""):
         print("Polling progress and status of test plan at https://www.testbed-tools.org/scheduler/testplan/{}"
               .format(test_plan_id))
         print("Polling interval: {} seconds".format(interval))
@@ -194,23 +281,57 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_states=""):
             status = resp_data.get("status", None)
             result = resp_data.get("result", None)
 
-            if status in ["FINISHED", "CANCELLED", "FAILED"]:
-                if result == "SUCCESS":
-                    print("Test plan is successfully {}. Elapsed {:.0f} seconds"
-                          .format(status, time.time() - start_time))
+            if expected_state:
+                current_status = test_plan_status_factory(status)
+                expected_status = test_plan_status_factory(expected_state)
+
+                if expected_status.get_status() == current_status.get_status():
+                    current_status.print_logs(test_plan_id, resp_data, start_time)
+                    time.sleep(interval)
+                elif expected_status.get_status() < current_status.get_status():
+                    steps = None
+                    step_status = None
+                    extra_params = resp_data.get("extra_params", None)
+
+                    if extra_params:
+                        steps = extra_params.get("steps", None)
+                    if steps:
+                        for step in steps:
+                            if step.get("step") == expected_state:
+                                step_status = step.get("status")
+                                break
+                    # We fail the step only if the step_status is "FAILED".
+                    # Other status such as "SKIPPED", "CANCELED" are considered successful.
+                    if step_status == "FAILED":
+                        raise Exception("Test plan id: {}, status: {}, result: {}, Elapsed {:.0f} seconds. "
+                                        "Check https://www.testbed-tools.org/scheduler/testplan/{} for test plan status"
+                                        .format(test_plan_id, step_status, result, time.time() - start_time,
+                                                test_plan_id))
+                    else:
+                        print("Current status is {}".format(step_status))
+                        return
+                else:
+                    print("Current state is {}, waiting for the state {}".format(status, expected_state))
+
+            # compate to sonic-buildimage
+            elif expected_states:
+                if status in ["FINISHED", "CANCELLED", "FAILED"]:
+                    if result == "SUCCESS":
+                        print("Test plan is successfully {}. Elapsed {:.0f} seconds"
+                              .format(status, time.time() - start_time))
+                        return
+                    else:
+                        raise Exception("Test plan id: {}, status: {}, result: {}, Elapsed {:.0f} seconds"
+                                        .format(test_plan_id, status, result, time.time() - start_time))
+                elif status in expected_states:
+                    if status == "KVMDUMP":
+                        raise Exception("Test plan id: {}, status: {}, result: {}, Elapsed {:.0f} seconds"
+                                        .format(test_plan_id, status, result, time.time() - start_time))
                     return
                 else:
-                    raise Exception("Test plan id: {}, status: {}, result: {}, Elapsed {:.0f} seconds"
-                                    .format(test_plan_id, status, result, time.time() - start_time))
-            elif status in expected_states:
-                if status == "KVMDUMP":
-                    raise Exception("Test plan id: {}, status: {}, result: {}, Elapsed {:.0f} seconds"
-                                    .format(test_plan_id, status, result, time.time() - start_time))
-                return
-            else:
-                print("Test plan id: {}, status: {}, progress: {}%, elapsed: {:.0f} seconds"
-                      .format(test_plan_id, status, resp_data.get("progress", 0) * 100, time.time() - start_time))
-                time.sleep(interval)
+                    print("Test plan id: {}, status: {}, progress: {}%, elapsed: {:.0f} seconds"
+                          .format(test_plan_id, status, resp_data.get("progress", 0) * 100, time.time() - start_time))
+                    time.sleep(interval)
 
         else:
             raise Exception("Max polling time reached, test plan at {} is not successfully finished or cancelled"
@@ -346,6 +467,14 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_states=""):
         required=False,
         help="Token to download the repo from Azure DevOps"
     )
+    parser_create.add_argument(
+        "--azp-pr-id",
+        type=str,
+        dest="azp_pr_id",
+        default="",
+        required=False,
+        help="Pullrequest ID from Azure Pipelines"
+    )
 
     parser_poll = subparsers.add_parser("poll", help="Poll test plan status.")
     parser_cancel = subparsers.add_parser("cancel", help="Cancel running test plan.")
@@ -365,9 +494,17 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_states=""):
         dest="expected_states",
         required=False,
         nargs='*',
-        help="Expected state.",
+        help="Expected states.",
         default="FINISHED"
     )
+    parser_poll.add_argument(
+        "--expected-state",
+        type=str,
+        dest="expected_state",
+        required=False,
+        help="Expected state.",
+        default=""
+    )
     parser_poll.add_argument(
         "--interval",
         type=int,
@@ -422,7 +559,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_states=""):
             env["client_secret"])
 
         if args.action == "create":
-            pr_id = os.environ.get("SYSTEM_PULLREQUEST_PULLREQUESTNUMBER")
+            pr_id = args.azp_pr_id if args.azp_pr_id else os.environ.get("SYSTEM_PULLREQUEST_PULLREQUESTNUMBER")
             repo = os.environ.get("BUILD_REPOSITORY_PROVIDER")
             reason = os.environ.get("BUILD_REASON")
             build_id = os.environ.get("BUILD_BUILDID")
@@ -459,7 +596,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_states=""):
                 azp_repo_access_token=args.azp_repo_access_token
             )
         elif args.action == "poll":
-            tp.poll(args.test_plan_id, args.interval, args.timeout, args.expected_states)
+            tp.poll(args.test_plan_id, args.interval, args.timeout, args.expected_state, args.expected_states)
         elif args.action == "cancel":
             tp.cancel(args.test_plan_id)
         sys.exit(0)

diff --git a/ansible/roles/fanout/tasks/rootfanout_connect.yml b/ansible/roles/fanout/tasks/rootfanout_connect.yml
@@ -30,7 +30,7 @@
 - name: Collect DUTs vlans
   set_fact:
     dev_vlans: "{{ dev_vlans|default([]) + item.value }}"
-  loop: "{{ devinfo['ansible_facts']['device_vlan_range'] | dict2items }}"
+  loop: "{{ devinfo['ansible_facts']['device_vlan_range'] | default ({}) | dict2items }}"
 
 - name: Find the root fanout switch
   set_fact:

diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py
@@ -769,7 +769,7 @@ def generate_ping_dut_lo(self):
                                     ip_src=self.from_server_src_addr,
                                     ip_dst=dut_lo_ipv4)
 
-        self.ping_dut_exp_packet  = Mask(exp_packet)
+        self.ping_dut_exp_packet = Mask(exp_packet)
         self.ping_dut_exp_packet.set_do_not_care_scapy(scapy.Ether, "dst")
         self.ping_dut_exp_packet.set_do_not_care_scapy(scapy.IP, "dst")
         self.ping_dut_exp_packet.set_do_not_care_scapy(scapy.IP, "id")
@@ -1559,12 +1559,15 @@ def no_flood(self, packet):
         """
         This method filters packets which are unique (i.e. no floods).
         """
-        if (not int(str(packet[scapyall.TCP].payload)) in self.unique_id) and (packet[scapyall.Ether].src == self.dut_mac):
+        if (not int(str(packet[scapyall.TCP].payload)) in self.unique_id) and \
+        (packet[scapyall.Ether].src == self.dut_mac or packet[scapyall.Ether].src == self.vlan_mac):
             # This is a unique (no flooded) received packet.
+            # for dualtor, t1->server rcvd pkt will have src MAC as vlan_mac, and server->t1 rcvd pkt will have src MAC as dut_mac
             self.unique_id.append(int(str(packet[scapyall.TCP].payload)))
             return True
-        elif packet[scapyall.Ether].dst == self.dut_mac:
+        elif packet[scapyall.Ether].dst == self.dut_mac or packet[scapyall.Ether].dst == self.vlan_mac:
             # This is a sent packet.
+            # for dualtor, t1->server sent pkt will have dst MAC as dut_mac, and server->t1 sent pkt will have dst MAC as vlan_mac
             return True
         else:
             return False
@@ -1630,14 +1633,18 @@ def examine_flow(self, filename = None):
             missed_t1_to_vlan = 0
             self.disruption_start, self.disruption_stop = None, None
             for packet in packets:
-                if packet[scapyall.Ether].dst == self.dut_mac:
+                if packet[scapyall.Ether].dst == self.dut_mac or packet[scapyall.Ether].dst == self.vlan_mac:
                     # This is a sent packet - keep track of it as payload_id:timestamp.
+                    # for dualtor both MACs are needed:
+                    #   t1->server sent pkt will have dst MAC as dut_mac, and server->t1 sent pkt will have dst MAC as vlan_mac
                     sent_payload = int(str(packet[scapyall.TCP].payload))
                     sent_packets[sent_payload] = packet.time
                     sent_counter += 1
                     continue
-                if packet[scapyall.Ether].src == self.dut_mac:
+                if packet[scapyall.Ether].src == self.dut_mac or packet[scapyall.Ether].src == self.vlan_mac:
                     # This is a received packet.
+                    # for dualtor both MACs are needed:
+                    #   t1->server rcvd pkt will have src MAC as vlan_mac, and server->t1 rcvd pkt will have src MAC as dut_mac
                     received_time = packet.time
                     received_payload = int(str(packet[scapyall.TCP].payload))
                     if (received_payload % 5) == 0 :   # From vlan to T1.
@@ -1652,6 +1659,8 @@ def examine_flow(self, filename = None):
                     continue
                 if received_payload - prev_payload > 1:
                     # Packets in a row are missing, a disruption.
+                    self.log("received_payload: {}, prev_payload: {}, sent_counter: {}, received_counter: {}".format(
+                        received_payload, prev_payload, sent_counter, received_counter))
                     lost_id = (received_payload -1) - prev_payload # How many packets lost in a row.
                     disrupt = (sent_packets[received_payload] - sent_packets[prev_payload + 1]) # How long disrupt lasted.
                     # Add disrupt to the dict:
@@ -1762,7 +1771,7 @@ def wait_dut_to_warm_up(self):
                 up_time = None
 
             if elapsed > warm_up_timeout_secs:
-                raise Exception("Control plane didn't come up within warm up timeout")
+                raise Exception("IO didn't come up within warm up timeout. Control plane: {}, Data plane: {}".format(ctrlplane, dataplane))
             time.sleep(1)
 
         # check until flooding is over. Flooding happens when FDB entry of
@@ -1974,6 +1983,11 @@ def pingDut(self):
 
         total_rcv_pkt_cnt = testutils.count_matched_packets_all_ports(self, self.ping_dut_exp_packet, self.vlan_ports, timeout=self.PKT_TOUT)
 
+        if self.vlan_mac != self.dut_mac:
+            # handle two-for-one icmp reply for dual tor (when vlan and dut mac are diff):
+            # icmp_responder will also generate a response for this ICMP req, ignore that reply
+            total_rcv_pkt_cnt = total_rcv_pkt_cnt - self.ping_dut_pkts
+
         self.log("Send %5d Received %5d ping DUT" % (self.ping_dut_pkts, total_rcv_pkt_cnt), True)
 
         return total_rcv_pkt_cnt