In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import seaborn as sns
import swifter
import json
from collections import namedtuple
import re
import copy

import itertools
from pprint import pprint

# Functions

In [None]:
# Analyse every case.
EVENTS = ['LTE_HO', 'MN_HO', 'MN_HO_to_eNB', 
          'SN_setup', 'SN_Rel', 'SN_HO', 
          'RLF_II', 'RLF_III', 'SCG_RLF']

REs = [r'\((\d+), (\d+)\) -> \((\d+), (\d+)\) \| O', 
       r'\((\d+), (\d+)\) -> \((\d+), (\d+)\) \| (\d+)', 
       r'\((\d+), (\d+)\) -> \((\d+), (\d+)\) \| (\d+) -> O',
       r'\((\d+), (\d+)\) \| O -> (\d+)', 
       r'\((\d+), (\d+)\) \| (\d+) -> O', 
       r'\((\d+), (\d+)\) \| (\d+) -> (\d+)',
       r'\((\d+), (\d+)\) -> \((\d+), (\d+)\) \| (\d+) -> O', 
       r'\((\d+), (\d+)\) -> \((\d+), (\d+)\) \| (\d+) -> O',
       r'\((\d+), (\d+)\) \| (\d+) -> O']

def get_re_from_type(event):
    if event in EVENTS:
        index = EVENTS.index(event)
        return REs[index]
    else:
        return None
    
EVENTS1 = ['LTE_HO', 'MN_HO', 'MN_HO_to_eNB']

CASES = ['all', 'two_event', 'one_event'] + \
        ['two_RLF', 'two_scg_failure', 'one_RLF_one_scg', 'one_RLF', 'one_scg_failure'] + \
        ['two_exact_identicle_HO'] + [f'two_exact_identicle_{type}' for type in EVENTS] + \
        ['two_identicle_RLF_SN_setup'] + \
        ['pci_earfcn_identicle_HO_eNB'] + [f'pci_earfcn_identicle_{type}' for type in EVENTS1] + \
        ['pci_identicle_HO_eNB'] + [f'pci_identicle_{type}' for type in EVENTS1] + \
        ['pci_identicle_HO_gNB'] + \
        ['pci_earfcn_identicle_RLF'] + ['pci_identicle_RLF'] + ['pci_identicle_sRLF'] + \
        ['pci_identicle_RLF_setup_cause_pci', 'pci_identicle_RLF_setup_cause_pci_earfcn'] + \
        ['SN_setup_of_RLF_MN_HO_to_eNB', 'SN_setup_of_RLF_SN_Rel', 'SN_setup_of_MN_HO_to_eNB_SN_Rel'] + \
        ['SN_setup_same_cause_rlf_pci', 'SN_setup_same_cause_rlf_pci_earfcn'] + \
        ['SN_setup_of_same_cause_MN_HO_to_eNB', 'SN_setup_of_same_cause_SN_Rel', 'SN_setup_same_cause_rlf']

ANALYSIS = namedtuple('ANALYSIS', CASES, defaults = [0]*len(CASES))

def Analyze(pkgs):
    
    # Useful functions.
    # 
    def remove_elements(A, B, condition):
        indexes_to_remove = [i for i, a in enumerate(A) if condition(a)]
        for index in sorted(indexes_to_remove, reverse=True):
            del A[index]
            del B[index]
        return A, B
    
    # Case functions.
    # Case source and target cause type, trans are exactly the same.
    # A is cause list and B is trans list. 
    # Totally identical in trans string.
    def find_exact_identicle(A1, B1, A2, B2):

        L = []
        # Take out before, during, and after prefix.
        A1_ = [a1.split(' ')[-1] for a1 in A1]
        A2_ = [a2.split(' ')[-1] for a2 in A2]

        for i, (a1, b1) in enumerate(zip(A1_, B1)):    
            for j, (a2, b2) in enumerate(zip(A2_, B2)):
                if a1 == a2 and b1 == b2:
                    L.append((i, j))
                    
        return L
    
    # Case source and target cause type, trans "pci" and "earfcn" are the same.
    # A is cause list and B is trans list.
    # This case only deal with ['LTE_HO', 'MN_HO', 'MN_HO_to_eNB'].
    def find_pci_earfcn_identicle(A1, B1, A2, B2):

        L = []

        def extract_coordinates(input_string, event):
            pattern = get_re_from_type(event)
            match = re.match(pattern, input_string)
            
            if match:
                return f'({match.group(1)}, {match.group(2)}) -> ({match.group(3)}, {match.group(4)})'
            else:
                return None
            
        # Take out before, during, and after prefix.
        A1_ = [a1.split(' ')[-1] for a1 in A1]; B1_ = [element for element in B1]
        A2_ = [a2.split(' ')[-1] for a2 in A2]; B2_ = [element for element in B2]
        A1_, B1_ = remove_elements(A1_, B1_, lambda a: a not in EVENTS1)
        A2_, B2_ = remove_elements(A2_, B2_, lambda a: a not in EVENTS1)
        B1_ = [extract_coordinates(b1, a1) for b1, a1 in zip(B1_, A1_)]
        B2_ = [extract_coordinates(b2, a2) for b2, a2 in zip(B2_, A2_)]

        for i, (a1, b1) in enumerate(zip(A1_, B1_)):    
            for j, (a2, b2) in enumerate(zip(A2_, B2_)):                
                if b1 == b2:
                    L.append((i, j))

        return L

    # Case source and target cause type, trans "pci" are exactly the same.
    # A is cause list and B is trans list.
    # This case only deal with ['LTE_HO', 'MN_HO', 'MN_HO_to_eNB'].
    def find_pci_identicle(A1, B1, A2, B2):

        L = []

        def extract_coordinates(input_string, event):
            pattern = get_re_from_type(event)
            match = re.match(pattern, input_string)
            
            if match:
                return f'{match.group(1)} -> {match.group(3)}'
            else:
                return None
        
        # Take out before, during, and after prefix.
        A1_ = [a1.split(' ')[-1] for a1 in A1]; B1_ = [element for element in B1]
        A2_ = [a2.split(' ')[-1] for a2 in A2]; B2_ = [element for element in B2]
        A1_, B1_ = remove_elements(A1_, B1_, lambda a: a not in EVENTS1)
        A2_, B2_ = remove_elements(A2_, B2_, lambda a: a not in EVENTS1)
        B1_ = [extract_coordinates(b1, a1) for b1, a1 in zip(B1_, A1_)]
        B2_ = [extract_coordinates(b2, a2) for b2, a2 in zip(B2_, A2_)]

        for i, (a1, b1) in enumerate(zip(A1_, B1_)):    
            for j, (a2, b2) in enumerate(zip(A2_, B2_)):                
                if b1 == b2:
                    L.append((i, j))

        return L
    
    # Case source and target cause type, trans "pci" are exactly the same.
    # A is cause list and B is trans list.
    # This case only deal with 'gNB_HO.
    def find_pci_identicle_gNB(A1, B1, A2, B2):

        L = []

        def extract_coordinates(input_string, event):
            pattern = get_re_from_type(event)
            match = re.match(pattern, input_string)
            
            if match:
                return f'({match.group(3)}, {match.group(4)})'
            else:
                return None
        
        # Take out before, during, and after prefix.
        A1_ = [a1.split(' ')[-1] for a1 in A1]; B1_ = [element for element in B1]
        A2_ = [a2.split(' ')[-1] for a2 in A2]; B2_ = [element for element in B2]
        A1_, B1_ = remove_elements(A1_, B1_, lambda a: a not in ['SN_HO'])
        A2_, B2_ = remove_elements(A2_, B2_, lambda a: a not in ['SN_HO'])
        B1_ = [extract_coordinates(b1, a1) for b1, a1 in zip(B1_, A1_)]
        B2_ = [extract_coordinates(b2, a2) for b2, a2 in zip(B2_, A2_)]

        for i, (a1, b1) in enumerate(zip(A1_, B1_)):    
            for j, (a2, b2) in enumerate(zip(A2_, B2_)):                
                if a1 == a2 and b1 == b2:
                    L.append((i, j))

        return L
    
    # Case source and target cause type, trans source "pci" and "earfcn" are exactly the same.
    # A is cause list and B is trans list.
    # This case only deal with ['RLF_II'] now.
    def find_pci_earfcn_identicle_RLF(A1, B1, A2, B2):

        L = []

        def extract_coordinates(input_string, event):
            pattern = get_re_from_type(event)
            match = re.match(pattern, input_string)
            
            if match:
                return f'({match.group(1)}, {match.group(2)})'
            else:
                return None
        
        # Take out before, during, and after prefix.
        A1_ = [a1.split(' ')[-1] for a1 in A1]; B1_ = [element for element in B1]
        A2_ = [a2.split(' ')[-1] for a2 in A2]; B2_ = [element for element in B2]
        A1_, B1_ = remove_elements(A1_, B1_, lambda a: a not in ['RLF_II'])
        A2_, B2_ = remove_elements(A2_, B2_, lambda a: a not in ['RLF_II'])
        B1_ = [extract_coordinates(b1, a1) for b1, a1 in zip(B1_, A1_)]
        B2_ = [extract_coordinates(b2, a2) for b2, a2 in zip(B2_, A2_)]

        for i, (a1, b1) in enumerate(zip(A1_, B1_)):    
            for j, (a2, b2) in enumerate(zip(A2_, B2_)):                
                if a1 == a2 and b1 == b2:
                    L.append((i, j))

        return L
    
    # Case source and target cause type, trans source "pci" are exactly the same.
    # A is cause list and B is trans list.
    # This case only deal with ['RLF_II'] now.
    def find_pci_identicle_RLF(A1, B1, A2, B2):

        L = []

        def extract_coordinates(input_string, event):
            pattern = get_re_from_type(event)
            match = re.match(pattern, input_string)
            
            if match:
                return f'{match.group(1)}'
            else:
                return None
        
        # Take out before, during, and after prefix.
        A1_ = [a1.split(' ')[-1] for a1 in A1]; B1_ = [element for element in B1]
        A2_ = [a2.split(' ')[-1] for a2 in A2]; B2_ = [element for element in B2]
        A1_, B1_ = remove_elements(A1_, B1_, lambda a: a not in ['RLF_II'])
        A2_, B2_ = remove_elements(A2_, B2_, lambda a: a not in ['RLF_II'])
        B1_ = [extract_coordinates(b1, a1) for b1, a1 in zip(B1_, A1_)]
        B2_ = [extract_coordinates(b2, a2) for b2, a2 in zip(B2_, A2_)]

        for i, (a1, b1) in enumerate(zip(A1_, B1_)):    
            for j, (a2, b2) in enumerate(zip(A2_, B2_)):                
                if a1 == a2 and b1 == b2:
                    L.append((i, j))

        return L
    
    # This case checks whether a loss/excl dual packet with 
    # cause1 of RLF and cause2 of SN_setup has the same origin 
    # of rlf cell.
    def find_RLF_SN_Setup_Same_Cause_pci(A1, B1, A2, C2):

        L = []

        def extract_coordinates(input_string, event, seq):
            pattern = get_re_from_type(event)
            match = re.match(pattern, input_string)
            
            if match:
                return f'{match.group(seq)}'
            else:
                return None
        
        def extract_coordinates2(input_string, pattern, seq):

            match = re.search(pattern, input_string)
            
            if match:
                return f'{match.group(seq)}'
            else:
                return None
        
        # Take out before, during, and after prefix.
        A1_ = [a1.split(' ')[-1] for a1 in A1]; B1_ = [element for element in B1]
        A2_ = [a2.split(' ')[-1] for a2 in A2]; C2_ = [element for element in C2]
        A1_, B1_ = remove_elements(A1_, B1_, lambda a: a not in ['RLF_II', 'RLF_III'])
        A2_, C2_ = remove_elements(A2_, C2_, lambda a: a not in ['SN_setup'])
        B1_ = [extract_coordinates(b1, a1, 1) for b1, a1 in zip(B1_, A1_)]
        C2_ = [extract_coordinates2(c2, r'\((\d+), (\d+)\)', 1) for c2 in C2_]

        for i, (a1, b1) in enumerate(zip(A1_, B1_)):    
            for j, (a2, c2) in enumerate(zip(A2_, C2_)):                
                if a1 == a2 and b1 == c2:
                    L.append((i, j))

        return L

    # This case checks whether a loss/excl dual packet with 
    # cause1 of RLF and cause2 of SN_setup has the same origin 
    # of rlf cell.
    def find_RLF_SN_Setup_Same_Cause_pci_earfcn(A1, B1, A2, C2):

        L = []

        def extract_coordinates(input_string, event, seq):
            pattern = get_re_from_type(event)
            match = re.match(pattern, input_string)
            
            if match:
                return f'({match.group(seq[0])}, {match.group(seq[1])})'
            else:
                return None
        
        def extract_coordinates2(input_string, pattern, seq):

            match = re.search(pattern, input_string)
            
            if match:
                return f'({match.group(seq[0])}, {match.group(seq[1])})'
            else:
                return None
        
        # Take out before, during, and after prefix.
        A1_ = [a1.split(' ')[-1] for a1 in A1]; B1_ = [element for element in B1]
        A2_ = [a2.split(' ')[-1] for a2 in A2]; C2_ = [element for element in C2]
        A1_, B1_ = remove_elements(A1_, B1_, lambda a: a not in ['RLF_II', 'RLF_III'])
        A2_, C2_ = remove_elements(A2_, C2_, lambda a: a not in ['SN_setup'])
        B1_ = [extract_coordinates(b1, a1, [1, 2]) for b1, a1 in zip(B1_, A1_)]
        C2_ = [extract_coordinates2(c2, r'\((\d+), (\d+)\)', [1, 2]) for c2 in C2_]

        for i, (a1, b1) in enumerate(zip(A1_, B1_)):    
            for j, (a2, c2) in enumerate(zip(A2_, C2_)):                
                if a1 == a2 and b1 == c2:
                    L.append((i, j))

        return L

    # This case checks whether a loss/excl dual packet with 
    # cause1 of RLF and cause2 of SN_setup has the same origin 
    # of rlf cell.
    def find_SN_Setup_Same_Cause_pci_earfcn(A1, C1, A2, C2):

        L = []
        
        def extract_coordinates(input_string, pattern, seq):

            match = re.search(pattern, input_string)
            
            if match:
                return f'({match.group(seq[0])}, {match.group(seq[1])})'
            else:
                return None
        
        # Take out before, during, and after prefix.
        A1_ = [a1.split(' ')[-1] for a1 in A1]; C1_ = [element for element in C1]
        A2_ = [a2.split(' ')[-1] for a2 in A2]; C2_ = [element for element in C2]
        A1_, C1_ = remove_elements(A1_, C1_, lambda a: a not in ['SN_setup'])
        A2_, C2_ = remove_elements(A2_, C2_, lambda a: a not in ['SN_setup'])
        C1_ = [extract_coordinates(c1, r'\((\d+), (\d+)\)', [1, 2]) for c1 in C1_]
        C2_ = [extract_coordinates(c2, r'\((\d+), (\d+)\)', [1, 2]) for c2 in C2_]

        for i, (a1, c1) in enumerate(zip(A1_, C1_)):    
            for j, (a2, c2) in enumerate(zip(A2_, C2_)):                
                if a1 == a2 and c1 == c2:
                    L.append((i, j))

        return L


    # This case checks whether a loss/excl dual packet with 
    # cause1 of RLF and cause2 of SN_setup has the same origin 
    # of rlf cell.
    def find_SN_Setup_Same_Cause_pci(A1, C1, A2, C2):

        L = []
        
        def extract_coordinates(input_string, pattern, seq):

            match = re.search(pattern, input_string)
            
            if match:
                return f'{match.group(seq[0])}'
            else:
                return None
        
        # Take out before, during, and after prefix.
        A1_ = [a1.split(' ')[-1] for a1 in A1]; C1_ = [element for element in C1]
        A2_ = [a2.split(' ')[-1] for a2 in A2]; C2_ = [element for element in C2]
        A1_, C1_ = remove_elements(A1_, C1_, lambda a: a not in ['SN_setup'])
        A2_, C2_ = remove_elements(A2_, C2_, lambda a: a not in ['SN_setup'])
        C1_ = [extract_coordinates(c1, r'\((\d+), (\d+)\)', [1]) for c1 in C1_]
        C2_ = [extract_coordinates(c2, r'\((\d+), (\d+)\)', [1]) for c2 in C2_]

        for i, (a1, c1) in enumerate(zip(A1_, C1_)):    
            for j, (a2, c2) in enumerate(zip(A2_, C2_)):                
                if a1 == a2 and c1 == c2:
                    L.append((i, j))

        return L


    # Case source and target cause type, trans source "pci" are exactly the same.
    # A is cause list and B is trans list.
    # This case only deal with 'SCG_RLF' now.
    def find_pci_identicle_sRLF(A1, B1, A2, B2):

        L = []

        def extract_coordinates(input_string, event):
            pattern = get_re_from_type(event)
            match = re.match(pattern, input_string)
            
            if match:
                return f'{match.group(3)}'
            else:
                return None
        
        # Take out before, during, and after prefix.
        A1_ = [a1.split(' ')[-1] for a1 in A1]; B1_ = [element for element in B1]
        A2_ = [a2.split(' ')[-1] for a2 in A2]; B2_ = [element for element in B2]
        A1_, B1_ = remove_elements(A1_, B1_, lambda a: a not in ['SCG_RLF'])
        A2_, B2_ = remove_elements(A2_, B2_, lambda a: a not in ['SCG_RLF'])
        B1_ = [extract_coordinates(b1, a1) for b1, a1 in zip(B1_, A1_)]
        B2_ = [extract_coordinates(b2, a2) for b2, a2 in zip(B2_, A2_)]

        for i, (a1, b1) in enumerate(zip(A1_, B1_)):    
            for j, (a2, b2) in enumerate(zip(A2_, B2_)):                
                if a1 == a2 and b1 == b2:
                    L.append((i, j))

        return L

    # Count the number of every case. 
    nums = {k: 0 for k in CASES}
    nums['all'] = len(pkgs)

    for pkg in pkgs:
        
        cause1_string = "".join(pkg.cause1)
        cause2_string = "".join(pkg.cause2)
        others1_string = "".join(pkg.others1)
        others2_string = "".join(pkg.others2)

        # Event related check.
        if cause1_string and cause2_string:
            nums['two_event'] += 1
        elif cause1_string or cause2_string:
            nums['one_event'] += 1

        # RLF related case.
        if ('RLF_' in cause1_string) and ('RLF_' in cause2_string):   
            nums['two_RLF'] += 1

            # Nested Events
            l1 = find_pci_identicle_RLF(pkg.cause1, pkg.trans1, pkg.cause2, pkg.trans2)
            if len(l1) != 0: 
                nums['pci_identicle_RLF'] += 1
                
                l2 = find_pci_earfcn_identicle_RLF(pkg.cause1, pkg.trans1, pkg.cause2, pkg.trans2)
                if len(l2) != 0:
                    nums['pci_earfcn_identicle_RLF'] += 1

        elif ('RLF_' in cause1_string) and ('SN_setup' in cause2_string and 'Near after RLF' in others2_string):

            l1 = find_RLF_SN_Setup_Same_Cause_pci(pkg.cause1, pkg.trans1, pkg.cause2, pkg.others2)
            if len(l1) != 0:
                nums['pci_identicle_RLF_setup_cause_pci'] += 1

                l2 = find_RLF_SN_Setup_Same_Cause_pci_earfcn(pkg.cause1, pkg.trans1, pkg.cause2, pkg.others2)
                if len(l1) != 0:
                    nums['pci_identicle_RLF_setup_cause_pci_earfcn'] += 1

        elif ('SN_setup' in cause1_string and 'Near after RLF' in others1_string) and ('RLF_' in cause2_string):

            l1 = find_RLF_SN_Setup_Same_Cause_pci(pkg.cause2, pkg.trans2, pkg.cause1, pkg.others1)
            if len(l1) != 0:
                nums['pci_identicle_RLF_setup_cause_pci'] += 1

                l2 = find_RLF_SN_Setup_Same_Cause_pci_earfcn(pkg.cause2, pkg.trans2, pkg.cause1, pkg.others1)
                if len(l1) != 0:
                    nums['pci_identicle_RLF_setup_cause_pci_earfcn'] += 1

        elif ('RLF_' in cause1_string) or ('RLF_' in cause2_string):  
            nums['one_RLF'] += 1

        # elif ('SCG_RLF' in cause1_string) and ('SCG_RLF' in cause2_string): 
        #     nums['two_scg_failure'] += 1
        
        # elif (('RLF_' in cause1_string) and ('SCG_RLF' in cause2_string) ) or (('SCG_RLF' in cause1_string) and ('RLF_' in cause2_string) ):
        #     nums['one_RLF_one_scg'] += 1

        # elif ('SCG_RLF' in cause1_string) or ('SCG_RLF' in cause2_string): 
        #     nums['one_scg_failure'] += 1

        # elif ('RLF_' in cause1_string) or ('RLF_' in cause2_string):  
        #     nums['one_RLF'] += 1

        # SN setup case observation.
        if ('SN_setup' in cause1_string) and ('SN_setup' in cause2_string):
            
            p = r' Near after (.*?)\.'
            
            try: match1 = re.search(p, others1_string); s1 = match1.group(1)
            except: s1 = 'Unknown'

            try: match2 = re.search(p, others2_string); s2 = match2.group(1)
            except: s2 = 'Unknown'

            if ('RLF' in s1) and ('RLF' in s2):
                
                nums['SN_setup_same_cause_rlf'] += 1

                l1 = find_SN_Setup_Same_Cause_pci(pkg.cause1, pkg.others1, pkg.cause2, pkg.others2)
                if len(l1) != 0:
                    nums['SN_setup_same_cause_rlf_pci'] += 1

                    l2 = find_SN_Setup_Same_Cause_pci_earfcn(pkg.cause1, pkg.others1, pkg.cause2, pkg.others2)
                    if len(l2) != 0:
                        nums['SN_setup_same_cause_rlf_pci_earfcn'] += 1

            elif ('MN_HO_to_eNB' in s1) and ('MN_HO_to_eNB' in s2):
                nums['SN_setup_of_same_cause_MN_HO_to_eNB'] += 1
            elif ('SN_Rel' in s1) and ('SN_Rel' in s2):
                nums['SN_setup_of_same_cause_SN_Rel'] += 1
            elif ('RLF' in s1 or 'RLF' in s2) and ('MN_HO_to_eNB' in s1 or 'MN_HO_to_eNB' in s2):
                nums['SN_setup_of_RLF_MN_HO_to_eNB'] += 1
            elif ('RLF' in s1 or 'RLF' in s2) and ('SN_Rel' in s1 or 'SN_Rel' in s2):
                nums['SN_setup_of_RLF_SN_Rel'] += 1
            elif ('MN_HO_to_eNB' in s1 or 'MN_HO_to_eNB' in s2) and ('SN_Rel' in s1 or 'SN_Rel' in s2):
                nums['SN_setup_of_MN_HO_to_eNB_SN_Rel'] += 1

        # Identicle HO related case.
        # Case Exact Identicle.
        L1 = find_exact_identicle(pkg.cause1, pkg.trans1, pkg.cause2, pkg.trans2)
        if len(L1) != 0: nums['two_exact_identicle_HO'] += 1
        
        identicle_types = []
        for (i, _) in L1:
            _, ho_type = pkg.cause1[i].split(' ')
            identicle_types.append(ho_type)
            nums[f'two_exact_identicle_{ho_type}'] += 1

        if ('RLF_II' in identicle_types or 'RLF_III' in identicle_types) and ('SN_setup' in identicle_types):
            nums[f'two_identicle_RLF_SN_setup'] += 1

        # eNB HO related
        L2 = find_pci_identicle(pkg.cause1, pkg.trans1, pkg.cause2, pkg.trans2) 
        
        if len(L2) != 0: 
            nums['pci_identicle_HO_eNB'] += 1

            l1 = find_pci_earfcn_identicle(pkg.cause1, pkg.trans1, pkg.cause2, pkg.trans2) 
            if len(l1) != 0: 
                nums['pci_earfcn_identicle_HO_eNB'] += 1

        # gNB related
        L3 = find_pci_identicle_gNB(pkg.cause1, pkg.trans1, pkg.cause2, pkg.trans2) 
        if len(L3) != 0: nums['pci_identicle_HO_gNB'] += 1

        # srlf related cases.
        L4 = find_pci_identicle_sRLF(pkg.cause1, pkg.trans1, pkg.cause2, pkg.trans2) 
        if len(L4) != 0: nums['pci_identicle_sRLF'] += 1

    return ANALYSIS(*nums.values())

# Main

In [None]:
# This place give a XXXX-XX-XX.md file and find the experiment directory path
# and the corresponding band settings. It will be presented by a list of special
# instance EXPERIMENTs.
md_files = [
    # '/home/wmnlab/D/database/2023-08-29/2023-08-29.md', 
    # '/home/wmnlab/D/database/2023-09-12_1/2023-09-12.md',
    # '/home/wmnlab/D/database/2023-09-22/2023-09-22.md',
    # '/home/wmnlab/D/database/2023-10-24/2023-10-24.md',
    '/home/wmnlab/D/database/2023-11-21/2023-11-21.md',
    # '/home/wmnlab/D/database/2023-12-26/2023-12-26.md'
    ]
EXPs = []

for md_file_path in md_files:

    date_dir_path = os.path.dirname(md_file_path)

    with open(md_file_path) as f:

        exp = f.readline()[:-1]
        settings = f.readline()[:-1]

        while exp != '#endif' and settings:
            E = EXPERIMENT(os.path.join(date_dir_path, exp), settings)
            EXPs.append(E)
            exp = f.readline()[:-1]
            settings = f.readline()[:-1]

pprint(EXPs)

# Single Radio Analysis

In [None]:
# This code evaluate the nearby event of a packet loss/ excl packet of a single radio performance.

# Record
keys = ['dl_loss', 'dl_excl', 'ul_loss', 'ul_excl']
cases = ANALYSIS._fields
analysis_dict_all = {}
for k in keys:
    analysis_dict_all[k] = {case: 0 for case in cases}

analysis_dicts = []
corresponding_list = []

# Some settings
keys = ['dl_loss', 'dl_excl', 'ul_loss', 'ul_excl']
count_events = ['LTE_HO', 'MN_HO', 'MN_HO_to_eNB', 
          'SN_setup', 'SN_Rel', 'SN_HO', 
          'RLF_II', 'RLF_III', 'SCG_RLF', 'Conn_Req']
bdr = ['Before', 'During', 'After']

comb = itertools.product(bdr, count_events)
count_bdr_events = [t+' '+ho for t, ho in comb]

analysis_dicts = []
corresponding_list = []

# Record how many total packet.
total_Modem_Action_Test = {k:0 for k in keys}
total_Modem_Action_Test_v2_1 = {k:0 for k in keys}
toral_Modem_Action_Test_v2_2 = {k:0 for k in keys}
total_Modem_Control_Group = {k:0 for k in keys}
total_Modem_Control_Group2 = {k:0 for k in keys}
total_Modem_Control_Group3 = {k:0 for k in keys}

for EXP in EXPs:

    exp_dir_path = EXP.path
    settings = EXP.settings 

    dev_dir_list = find_device_under_exp(exp_dir_path)
    date = exp_dir_path.split('/')[-2]
    name = exp_dir_path.split('/')[-1]

    # Below can be used to filtering.
    # if name != 'Modem_Action_Test':
    #     continue

    for dev_dir_path in dev_dir_list:
        
        dev = dev_dir_path.split('/')[-1]
        trace_dir_list = find_trace_under_device(dev_dir_path)

        for trace_dir_path in trace_dir_list:

            trace = trace_dir_path.split('/')[-1]
            print(date, name, dev, trace)
            corresponding_list.append((date, name, dev, trace))

            data_dir_path = os.path.join(trace_dir_path, 'data')
            rrc_file = [p for p in os.listdir(data_dir_path) if p.endswith('_rrc.csv')][0]
            rrc_file_path = os.path.join(data_dir_path, rrc_file)
            dl_file_path = os.path.join(data_dir_path, 'udp_dnlk_loss_latency.csv')
            ul_file_path = os.path.join(data_dir_path, 'udp_uplk_loss_latency.csv')

            dl_loss_pkgs, dl_excl_pkgs  = loss_excl_cause(dl_file_path, rrc_file_path)
            ul_loss_pkgs, ul_excl_pkgs = loss_excl_cause(ul_file_path, rrc_file_path)
        
            # Count total number
            counts = [len(dl_loss_pkgs), len(dl_excl_pkgs), len(ul_loss_pkgs), len(ul_excl_pkgs)]

            for k, num in zip(keys, counts):
                if name == 'Modem_Action_Test':
                    total_Modem_Action_Test[k] += num
                elif name == 'Modem_Action_Test_v2_1':
                    total_Modem_Action_Test_v2_1[k] += num
                elif name == 'Modem_Action_Test_v2_2':
                    toral_Modem_Action_Test_v2_2[k] += num
                elif name == 'Modem_Control_Group':
                    total_Modem_Control_Group[k] += num
                elif name == 'Modem_Control_Group2':
                    total_Modem_Control_Group2[k] += num
                elif name == 'Modem_Control_Group3':
                    total_Modem_Control_Group3[k] += num
                    
            # Count event caused number
            analysis_dict = {}

            for pkgs, k in zip([dl_loss_pkgs, dl_excl_pkgs, ul_loss_pkgs, ul_excl_pkgs], keys):
                
                d = {e: 0 for e in count_bdr_events}
                
                for pkg in pkgs:
                    for possible_cause, other in zip(pkg.cause, pkg.others):
                        
                        for count_e in count_bdr_events:                            
                            if count_e == possible_cause:

                                # This Conn_Req should be count in RLF III end.
                                if ('Conn_Req' in count_e) and ('After RLF III.' in other):
                                    continue

                                d[count_e] += 1
            
                analysis_dict[k] = d
            
            analysis_dicts.append(analysis_dict)

# corresponding_list

In [None]:
action_group = {}
action_group_v2_1 = {}
action_group_v2_2 = {}
control_group = {} # All + All
control_group2 = {} # All + LTE
control_group3 = {} # Lock Band+Lock Band

for k in keys:
    action_group[k] = {case: 0 for case in count_bdr_events}
    action_group_v2_1[k] = {case: 0 for case in count_bdr_events}
    action_group_v2_2[k] = {case: 0 for case in count_bdr_events}
    control_group[k] = {case: 0 for case in count_bdr_events}
    control_group2[k] = {case: 0 for case in count_bdr_events}
    control_group3[k] = {case: 0 for case in count_bdr_events}

for analysis_dict, info in zip(analysis_dicts, corresponding_list):

    name, dev = info[1], info[3]
    
    for k, d in analysis_dict.items():

        for kk, v in d.items():

            if name == 'Modem_Action_Test':
                action_group[k][kk] += v
            elif name == 'Modem_Action_Test_v2_1':
                action_group_v2_1[k][kk] += v
            elif name == 'Modem_Action_Test_v2_2':
                action_group_v2_2[k][kk] += v
            elif name == 'Modem_Control_Group':
                control_group[k][kk] += v
            elif name == 'Modem_Control_Group2':
                control_group2[k][kk] += v
            elif name == 'Modem_Control_Group3':
                control_group3[k][kk] += v
        

In [None]:
# corresponding_list

In [None]:
# analysis_dicts

In [None]:
# action_group

In [None]:
# Change Here
group = action_group_v2_1
total = total_Modem_Action_Test_v2_1

fig, axes = plt.subplots(2,2, figsize=(12, 8))

metric_categories = []
metric_values = []

for k in keys:
    
    target = group[k]
    total_num = total[k]

    categories = []
    values = []

    for e in count_events:
        num = 0
        for t in bdr:
            num += target[t+' '+e]
        categories.append(e)
        values.append(num/total_num)

    metric_categories.append(categories)
    metric_values.append(values)

# Change Conn_req to Change_Band
for categories in metric_categories:
    idx = categories.index('Conn_Req')
    categories[idx] = 'Change_Band'

titles = ['DL Loss', 'DL Excessive Latency', 'Ul Loss', 'UL Excessive Latency']
for i, ax in enumerate(axes.flatten()):

    x = np.arange(len(metric_categories[i]))
    ax.bar(x, metric_values[i], width=0.6)
    ax.set_xticks(x)
    ax.set_xticklabels(metric_categories[i], rotation=45)
    ax.set_title(titles[i])

    # For not see change band.
    ax.set_xlim([0-0.6,9-0.6])


# plt.xticks(fontsize=8)

plt.tight_layout()
plt.show()

In [None]:
# Change Here
group = action_group_v2_1
total = total_Modem_Action_Test_v2_1

fig, axes = plt.subplots(2,2, figsize=(12, 8))

metric_categories = []
metric_values = []

for k in keys:
    
    target = group[k]
    total_num = total[k]
    
    categories = []
    values = []

    for e in count_events:
        
        v_split = []
        for t in bdr:
            num = target[t+' '+e]
            v_split.append(num/total_num)
        categories.append(e)
        values.append(v_split)

    metric_categories.append(categories)
    metric_values.append(values)

# Change Conn_req to Change_Band
for categories in metric_categories:
    idx = categories.index('Conn_Req')
    categories[idx] = 'Change_Band'

titles = ['DL Loss', 'DL Excessive Latency', 'Ul Loss', 'UL Excessive Latency']
for i, ax in enumerate(axes.flatten()):
    
    x1 = [j-0.25 for j in range(len(metric_categories[i]))]
    x2 = [j for j in range(len(metric_categories[i]))]
    x3 = [j+0.25 for j in range(len(metric_categories[i]))]

    v1 = [x[0] for x in metric_values[i]]
    v2 = [x[1] for x in metric_values[i]]
    v3 = [x[2] for x in metric_values[i]]

    ax.bar(x1, v1, width=0.25)
    ax.bar(x2, v2, width=0.25)
    ax.bar(x3, v3, width=0.25)
    ax.set_xticks(x2)
    ax.set_xticklabels(metric_categories[i], rotation=45)
    ax.set_title(titles[i])
    ax.legend(bdr)

    # For not see change band.
    ax.set_xlim([0-0.6,9-0.6])


# plt.xticks(fontsize=8)

plt.tight_layout()
plt.show()

# Dual Radio Analysis

In [None]:
# This code counts the number of occurrences for each special case for dual radio transmission.
# Still need to revise here.

# Record
keys = ['dl_loss', 'dl_excl', 'ul_loss', 'ul_excl']
cases = ANALYSIS._fields
analysis_dict_all = {}
for k in keys:
    analysis_dict_all[k] = {case: 0 for case in cases}

# # Some settings
# count_events = ['LTE_HO', 'MN_HO', 'MN_HO_to_eNB', 
#           'SN_setup', 'SN_Rel', 'SN_HO', 
#           'RLF_II', 'RLF_III', 'SCG_RLF', 'Conn_Req']
# bdr = ['Before', 'During', 'After']

# comb = itertools.product(bdr, count_events)
# count_events = [t+' '+ho for t, ho in comb]

analysis_dicts = []
analysis_dicts2 = []
corresponding_list = []

# Record how many total packet.
total_Modem_Action_Test = {k:0 for k in keys}
total_Modem_Action_Test_v2_1 = {k:0 for k in keys}
toral_Modem_Action_Test_v2_2 = {k:0 for k in keys}
total_Modem_Control_Group = {k:0 for k in keys}
total_Modem_Control_Group2 = {k:0 for k in keys}
total_Modem_Control_Group3 = {k:0 for k in keys}


for EXP in EXPs:

    exp_dir_path = EXP.path
    settings = EXP.settings 

    dev_dir_list = find_device_under_exp(exp_dir_path)
    comb = itertools.combinations(dev_dir_list, 2)
    date = exp_dir_path.split('/')[-2]
    name = exp_dir_path.split('/')[-1]
    
    for dev_dir_path1, dev_dir_path2 in comb:
        
        dev1 = dev_dir_path1.split('/')[-1]
        dev2 = dev_dir_path2.split('/')[-1]
        
        trace_dir_list1 = find_trace_under_device(dev_dir_path1)
        trace_dir_list2 = find_trace_under_device(dev_dir_path2)

        for trace_dir_path1, trace_dir_path2 in zip(trace_dir_list1, trace_dir_list2):

            trace = trace_dir_path1.split('/')[-1]
            print(date, name, trace, dev1, dev2)

            data_dir_path1 = os.path.join(trace_dir_path1, 'data')
            rrc_file1 = [p for p in os.listdir(data_dir_path1) if p.endswith('_rrc.csv')][0]
            rrc_file_path1 = os.path.join(data_dir_path1, rrc_file1)
            dl_file_path1 = os.path.join(data_dir_path1, 'udp_dnlk_loss_latency.csv')
            ul_file_path1 = os.path.join(data_dir_path1, 'udp_uplk_loss_latency.csv')

            data_dir_path2 = os.path.join(trace_dir_path2, 'data')
            rrc_file2 = [p for p in os.listdir(data_dir_path2) if p.endswith('_rrc.csv')][0]
            rrc_file_path2 = os.path.join(data_dir_path2, rrc_file2)
            dl_file_path2 = os.path.join(data_dir_path2, 'udp_dnlk_loss_latency.csv')
            ul_file_path2 = os.path.join(data_dir_path2, 'udp_uplk_loss_latency.csv')

            dl_loss_pkgs, dl_excl_pkgs  = loss_excl_cause_dual(dl_file_path1, dl_file_path2, rrc_file_path1, rrc_file_path2)
            ul_loss_pkgs, ul_excl_pkgs = loss_excl_cause_dual(ul_file_path1, ul_file_path2, rrc_file_path1, rrc_file_path2)

            # Special case analysis.
            values = [Analyze(dl_loss_pkgs), Analyze(dl_excl_pkgs), Analyze(ul_loss_pkgs), Analyze(ul_excl_pkgs)]
            analysis_dict = {k: v for k, v in zip(keys, values)}
            
            # Count total number
            counts = [len(dl_loss_pkgs), len(dl_excl_pkgs), len(ul_loss_pkgs), len(ul_excl_pkgs)]

            for k, num in zip(keys, counts):
                if name == 'Modem_Action_Test':
                    total_Modem_Action_Test[k] += num
                elif name == 'Modem_Action_Test_v2_1':
                    total_Modem_Action_Test_v2_1[k] += num
                elif name == 'Modem_Action_Test_v2_2':
                    toral_Modem_Action_Test_v2_2[k] += num
                elif name == 'Modem_Control_Group':
                    total_Modem_Control_Group[k] += num
                elif name == 'Modem_Control_Group2':
                    total_Modem_Control_Group2[k] += num
                elif name == 'Modem_Control_Group3':
                    total_Modem_Control_Group3[k] += num

            # Count caused event combinations.
            analysis_dict2 = {}

            for pkgs, k in zip([dl_loss_pkgs, dl_excl_pkgs, ul_loss_pkgs, ul_excl_pkgs], keys):
                
                d = {}
                
                for pkg in pkgs:

                    cause1 = [c.split(' ')[-1] for c in pkg.cause1]
                    cause2 = [c.split(' ')[-1] for c in pkg.cause2]

                    def remove_duplicates(lst):
                        return list(set(lst))

                    cause1 = remove_duplicates(cause1)
                    cause2 = remove_duplicates(cause2)
                    counted = []

                    for c1, o1 in zip(cause1, pkg.others1):
                        if (c1 == 'Conn_Req') and ('After RLF III.' in o1):
                            print('working')
                            continue
                        
                        for c2, o2 in zip(cause2, pkg.others2):
                            if (c2 == 'Conn_Req') and ('After RLF III.' in o2):
                                print('working')
                                continue
                            
                            new_k = ' | '.join(sorted([c1,c2]))

                            if new_k  in d.keys() and new_k not in counted:
                                d[new_k] += 1
                                counted.append(new_k)
                            else:
                                d[new_k] = 1
                                counted.append(new_k)
            
                analysis_dict2[k] = d
            
            analysis_dicts2.append(analysis_dict2)

            for k in keys:
                for i, case in enumerate(cases):
                    analysis_dict_all[k][case] += analysis_dict[k][i]
            
            analysis_dicts.append(analysis_dict)
            corresponding_list.append((date, name, trace, dev1, dev2))

In [None]:
action_group = {}
action_group_v2_1 = {}
action_group_v2_2 = {}
control_group = {}
control_group2 = {}
control_group3 = {}


keys = ['dl_loss', 'dl_excl', 'ul_loss', 'ul_excl']
for k in keys:
    action_group[k] = {}
    action_group_v2_1[k] = {}
    action_group_v2_2[k] = {}
    control_group[k] = {}
    control_group2[k] = {}
    control_group3[k] = {}

for d2, info in zip(analysis_dicts2, corresponding_list):

    name = info[1]

    for k in keys:

        target = d2[k]
        
        for event_comb, v in target.items():

            if name == 'Modem_Action_Test':
                
                if event_comb in action_group[k].keys():
                    action_group[k][event_comb] += v
                else:
                    action_group[k][event_comb] = v

            elif name == 'Modem_Action_Test_v2_1':
                
                if event_comb in action_group_v2_1[k].keys():
                    action_group_v2_1[k][event_comb] += v
                else:
                    action_group_v2_1[k][event_comb] = v

            elif name == 'Modem_Action_Test_v2_2':
                
                if event_comb in action_group_v2_2[k].keys():
                    action_group_v2_2[k][event_comb] += v
                else:
                    action_group_v2_2[k][event_comb] = v

            elif name == 'Modem_Control_Group':
                
                if event_comb in control_group[k].keys():
                    control_group[k][event_comb] += v
                else:
                    control_group[k][event_comb] = v
            
            elif name == 'Modem_Control_Group2':
                
                if event_comb in control_group2[k].keys():
                    control_group2[k][event_comb] += v
                else:
                    control_group2[k][event_comb] = v

            elif name == 'Modem_Control_Group3':
                
                if event_comb in control_group3[k].keys():
                    control_group3[k][event_comb] += v
                else:
                    control_group3[k][event_comb] = v

#  test, c1, c2
# 9, 10, 6

In [None]:
analysis_dicts2[4], analysis_dicts2[5]

In [None]:
action_group_v2_1, action_group_v2_2

In [None]:
target_group = control_group3
total = total_Modem_Control_Group3

for k in keys:
    
    data = target_group[k]
    num = total[k]

    sorted_items = sorted(data.items(), key=lambda x: -x[1])
    sorted_items = [(a, round(b/num, 2)) for (a, b) in sorted_items]
    # sorted_items = sorted_items[:10]
    
    break

In [None]:
sorted_items # DL

In [None]:
# control_group, total_Modem_Control_Group

In [None]:
# control_group2, total_Modem_Control_Group2

In [None]:
# action_group, total_Modem_Action_Test

In [None]:
keys_to_remove = ['two_scg_failure', 'one_RLF_one_scg', 'one_scg_failure'] + \
                ['pci_earfcn_identicle_LTE_HO', 'pci_earfcn_identicle_MN_HO', 'pci_earfcn_identicle_MN_HO_to_eNB'] + \
                ['pci_identicle_LTE_HO', 'pci_identicle_MN_HO', 'pci_identicle_MN_HO_to_eNB']

for inner_dict in analysis_dict_all.values():
    for key in keys_to_remove:
        inner_dict.pop(key, None)

# Some Plot

In [None]:
# Data
T = 'Dual radio loss of B1B3+B7B8' # Title
categories = [x[0].replace(' | ','+') for x in sorted_items]
values = [x[1] for x in sorted_items]

# 使用Seaborn绘制横条图
plt.figure(figsize=(4,3))
sns.barplot(x=values, y=categories, palette='viridis')  # 使用barplot函数绘制横条图，设置颜色主题为'viridis'
plt.xlabel('Proportion')  # x轴标签
plt.title(T)  # 图表标题
plt.show()