In [None]:
import pandas as pd
import datetime as dt
import time
import numpy as np
import os
import swifter
from collections import namedtuple
from pathlib import Path
import copy
import fnmatch
import json

# Functions

## MeasureReport

In [None]:
class REPORTCONFIG:
    def __init__(self, name, parameter):
        self.name = name.split(' ')[0]  
        self.parameters = self.parse_parameter(parameter)
    
    def parse_parameter(self, parameter):
        L = []
        start = False
        for i in range(len(parameter)):
            if parameter[i] == "'" and start == False:
                s = ''
                start = True
                continue
            
            if start:
                if parameter[i] == "'":
                    L.append(s)
                    start = False
                s += parameter[i]
        
        P = dict()
        filter = '+-0123456789[]()&'
        for i in range(0,len(L),2):
            x = ''
            for c in L[i+1]:
                if c in filter:
                    x += c
            try:
                P[L[i]] = int(x)
            except:
                P[L[i]] = x
        return P

    def __str__(self):
        return self.name

    def __repr__(self):
        return self.name

class MEASOBJ:

    def __init__(self, obj, freq):
        self.name = obj
        self.freq = freq

    def __str__(self):
        return f'({self.name}, {self.freq})'

    def __repr__(self):
        return f'({self.name}, {self.freq})'

def parse_measIdToAddMod(s):
    a = s.replace('(','')
    a = a.replace(')','')
    a = a.split('&')
    return (a[0], a[1], a[2])

In [None]:
def MeasureReport(file):

    mi_rrc_df = pd.read_csv(file)
    mi_rrc_df["Timestamp"] = mi_rrc_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
    unused = ['DL frequency','UL frequency', 'DL bandwidth', 'UL bandwidth', 'Cell Identity', 'TAC','Band ID', 'MCC', 'MNC']
    mi_rrc_df = mi_rrc_df.drop(columns=unused)
    mi_rrc_df = mi_rrc_df.dropna()
    cols_to_covert = ['measObjectId', 'carrierFreq', 'carrierFreq-r15', 'lte-reportConfigId', 'nr-reportConfigId', 'lte-measIdToRemoveList', 'measId', 'ssbFrequency', 'lte-MeasIdToAddMod']
    mi_rrc_df[cols_to_covert] = mi_rrc_df[cols_to_covert].astype('str')

    measobj_dict, report_config_dict, measId_dict = {}, {}, {}
    nr_measobj_dict, nr_report_config_dict, nr_measId_dict = {}, {}, {}

    def reset():

        global measobj_dict, report_config_dict, measId_dict, nr_measobj_dict, nr_report_config_dict, nr_measId_dict  
        measobj_dict, report_config_dict, measId_dict = {}, {}, {}
        nr_measobj_dict, nr_report_config_dict, nr_measId_dict = {}, {}, {}

    MR = namedtuple('MR',['time', 'event', 'others'], defaults=[None,None,''])
    L = []

    RRC_connected = True
    Unknown = REPORTCONFIG('Unknown', {})

    for i in range(len(mi_rrc_df)):

        if mi_rrc_df['type_id'].iloc[i] == "5G_NR_RRC_OTA_Packet" or mi_rrc_df['type_id'].iloc[i] == "LTE_RRC_Serv_Cell_Info":
            continue

        time = mi_rrc_df['Timestamp'].iloc[i]
        others = ''
        
        # if mi_rrc_df["rrcConnectionRelease"].iloc[i] == 1:      
        #     reset()

        if mi_rrc_df["lte-measIdToRemoveList"].iloc[i] != '0':

            measIdToRemove_list = mi_rrc_df["lte-measIdToRemoveList"].iloc[i].split('@')
            if len(measIdToRemove_list) == 32:
                measId_dict = {}
            elif len(measId_dict) != 0:
                for a in range(len(measIdToRemove_list)):
                    try: measId_dict.pop(measIdToRemove_list[a])
                    except: pass

        if mi_rrc_df["lte-measurementReport"].iloc[i] == 1:
            
            others += 'E-UTRAN'
            id = str(int(float(mi_rrc_df['measId'].iloc[i])))

            try:
                x = measId_dict[id]
                event = report_config_dict[x[1]]
                mr = MR(time = time, event = event, others = others)
            except:
                mr = MR(time = time, event = copy.deepcopy(Unknown), others = others)

            L.append(mr)

        if mi_rrc_df["nr-measurementReport"].iloc[i] == 1:
            
            if not (mi_rrc_df['measId'].iloc[i] == 'none'):    
                others += 'NR'
                id = str(int(float(mi_rrc_df['measId'].iloc[i])))

                try:
                    x = nr_measId_dict[id]
                    event = nr_report_config_dict[x[1]]
                    mr = MR(time = time, event = event, others = others)
                except:
                    mr = MR(time = time, event = copy.deepcopy(Unknown), others = others)
                
                L.append(mr)

        if mi_rrc_df["lte-MeasObjectToAddMod"].iloc[i] == 1:

            Id_list = mi_rrc_df["measObjectId"].iloc[i].split('@')
            measobj_list = mi_rrc_df["measObject"].iloc[i].split('@')
            carrierFreq_list = mi_rrc_df["carrierFreq"].iloc[i].split('@')
            carrierFreq_r15_list = mi_rrc_df["carrierFreq-r15"].iloc[i].split('@')
            
            for a in range(len(Id_list)):
                if measobj_list[a] == "measObjectEUTRA (0)":
                    measobj_dict[Id_list[a]] = MEASOBJ(measobj_list[a], carrierFreq_list[0])
                    carrierFreq_list.pop(0)
                elif measobj_list[a] == "measObjectNR-r15 (5)":
                    measobj_dict[Id_list[a]] = MEASOBJ(measobj_list[a], carrierFreq_r15_list[0])
                    carrierFreq_r15_list.pop(0)
    

        if mi_rrc_df["nr-MeasObjectToAddMod"].iloc[i] == 1:

            Id_list = mi_rrc_df["measObjectId"].iloc[i].split('@')
            measobj_list = mi_rrc_df["measObject"].iloc[i].split('@')
            ssbFrequency_list = mi_rrc_df["ssbFrequency"].iloc[i].split('@')

            for a in range(len(Id_list)):
                if measobj_list[a] == "measObjectNR (0)":
                    nr_measobj_dict[Id_list[a]] = MEASOBJ(measobj_list[a], ssbFrequency_list[0])
                    ssbFrequency_list.pop(0)     

            
        if mi_rrc_df["lte-ReportConfigToAddMod"].iloc[i] == 1:

            reportConfigId_list = mi_rrc_df["lte-reportConfigId"].iloc[i].split('@')
            eventId_list = mi_rrc_df["lte-eventId"].iloc[i].split('@')
            parameter_list = mi_rrc_df["lte-parameter"].iloc[i].split('@')
            for a in range(len(reportConfigId_list)):
                report_config_dict[reportConfigId_list[a]] = REPORTCONFIG(eventId_list[a], parameter_list[a])


        if mi_rrc_df["nr-ReportConfigToAddMod"].iloc[i] == 1: #############

            reportConfigId_list = mi_rrc_df["nr-reportConfigId"].iloc[i].split('@')
            eventId_list = mi_rrc_df["nr-eventId"].iloc[i].split('@')
            parameter_list = mi_rrc_df["nr-parameter"].iloc[i].split('@')
            for a in range(len(reportConfigId_list)):
                nr_report_config_dict[reportConfigId_list[a]] = REPORTCONFIG(eventId_list[a], parameter_list[a])

        if mi_rrc_df["lte-MeasIdToAddMod"].iloc[i] not in  ['0', '0.0']:

            MeasIdToAdd_list = mi_rrc_df["lte-MeasIdToAddMod"].iloc[i].split('@')
            for a in range(len(MeasIdToAdd_list)):
                x = parse_measIdToAddMod(MeasIdToAdd_list[a])
                measId_dict[x[0]] = (x[1],x[2])


        if mi_rrc_df["nr-MeasIdToAddMod"].iloc[i] != '0' and mi_rrc_df["nr-MeasIdToAddMod"].iloc[i] != 0:

            MeasIdToAdd_list = mi_rrc_df["nr-MeasIdToAddMod"].iloc[i].split('@')
            for a in range(len(MeasIdToAdd_list)):
                x = parse_measIdToAddMod(MeasIdToAdd_list[a])
                nr_measId_dict[x[0]] = (x[1],x[2])

    # Sort to Dict
    types = ['eventA1','eventA2','E-UTRAN-eventA3', 'eventA5', 'eventA6','NR-eventA3', 'eventB1-NR-r15','reportCGI', 'reportStrongestCells', 'others']
    D = {k: [] for k in types}

    for mr in L:

        if 'E-UTRAN' in mr.others and 'eventA1' in mr.event.name:
            D['eventA1'].append(mr)
        
        elif 'E-UTRAN' in mr.others and 'eventA2' in mr.event.name:
            D['eventA2'].append(mr)  
        
        elif 'E-UTRAN' in mr.others and 'eventA3' in mr.event.name:
            D['E-UTRAN-eventA3'].append(mr)
        
        elif 'E-UTRAN' in mr.others and 'eventA5' in mr.event.name:
            D['eventA5'].append(mr)

        elif 'E-UTRAN' in mr.others and 'eventA6' in mr.event.name:
            D['eventA6'].append(mr)  
        
        elif 'E-UTRAN' in mr.others and 'eventB1-NR-r15' in mr.event.name:
            D['eventB1-NR-r15'].append(mr)
        
        elif 'E-UTRAN' in mr.others and 'reportCGI' in mr.event.name:
            D['reportCGI'].append(mr)
        
        elif 'E-UTRAN' in mr.others and 'reportStrongestCells' in mr.event.name:
            D['reportStrongestCells'].append(mr)
        
        elif 'NR' in mr.others and 'eventA3' in mr.event.name:
            D['NR-eventA3'].append(mr)       
        
        else:
            D['others'].append(mr)

    return D

In [None]:
# Correct MR with HO
def correct_MR_with_HO(MRs, HOs):
    MR = namedtuple('MR',['time', 'event', 'others'], defaults=[None,None,''])
    new_MRs = copy.deepcopy(MRs)
    del new_MRs['others']

    for mr in MRs['others']:
        if 'E-UTRAN' in mr.others:
            for ho in HOs['LTE_HO'] + HOs['MN_HO']:
                if 0.3 > (ho.start - mr.time).total_seconds() > 0:
                    new_MRs['E-UTRAN-eventA3'].append(MR(time = mr.time, event = 'eventA3', others = mr.others))
    
        elif 'NR' in mr.others:
            for ho in HOs['SN_HO']:
                if 0.3 > (ho.start - mr.time).total_seconds() > 0:
                    new_MRs['NR-eventA3'].append(MR(time = mr.time, event = 'eventA3', others = mr.others))
    
    return new_MRs                

## HO

In [None]:
def parse_mi_ho(f):

    df = pd.read_csv(f)
    df["Timestamp"] = df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8)) 
    nr_pci = 'O'
    scells = []

    def NR_OTA(idx):

        if df["type_id"].iloc[idx] == "5G_NR_RRC_OTA_Packet": return True
        else: return False
    
    def LTE_SERV_INFO(idx):

        if df["type_id"].iloc[idx] == "LTE_RRC_Serv_Cell_Info": return True
        else: return False
    
    def find_1st_after(start_idx, target, look_after=1):
        for j in range(start_idx, len(df)):
            t_ = df["Timestamp"].iloc[j]
            if NR_OTA(j) or LTE_SERV_INFO(j):
                continue
            if (t_ - t).total_seconds() > look_after:
                return None, None
            if df[target].iloc[j] not in [0,'0'] and not np.isnan(df[target].iloc[j]):
                return t_, j
        return None, None
    
    def find_1st_before(start_idx, target, look_before=1):
        for j in range(start_idx, -1, -1):
            t_ = df["Timestamp"].iloc[j]
            if NR_OTA(j) or LTE_SERV_INFO(j):
                continue
            if (t - t_).total_seconds() > look_before:
                return None, None
            if df[target].iloc[j] not in [0,'0'] and not np.isnan(df[target].iloc[j]):
                return t_, j
        return None, None
    
    def find_1st_before_with_special_value(start_idx, target, target_value, look_before=1):
        for j in range(start_idx, -1, -1):
            t_ = df["Timestamp"].iloc[j]
            if NR_OTA(j) or LTE_SERV_INFO(j):
                continue
            if (t - t_).total_seconds() > look_before:
                return None, None
            if df[target].iloc[j] in [target_value] and not np.isnan(df[target].iloc[j]):
                return t_, j
        return None, None
    
    def find_in_D_exact(targets):

        l = []
        # In l : (second, ho_type)
        for target in targets:
            for ho in D[target]:
                l.append(((t - ho.start).total_seconds(), target))

        if len(l) != 0:
            for x in l:
                if (x[0]== 0):
                    return x[1]
        
        return None
    
    def find_in_D_first_before(targets, look_before=1):

        l = []
        # In l : (second, ho_type)
        for target in targets:
            for ho in D[target]:
                if ho.end is None:
                    l.append(((t - ho.start).total_seconds(), target, ho))
                else:
                    l.append(((t - ho.end).total_seconds(), target, ho))

        if len(l) != 0:
            closest = min(filter(lambda x: x[0] > 0, l), key=lambda x: x[0])
            if 0 <= closest[0] < look_before:
                return closest[1], closest[2]
        
        return None, None

    HO = namedtuple('HO',['start', 'end', 'others', 'trans'], defaults=[None,None,'',''])
    
    D = {
        'Conn_Rel':[], 
        'Conn_Req':[], # Setup
        'LTE_HO': [], # LTE -> newLTE
        'MN_HO': [], # LTE + NR -> newLTE + NR
        'MN_HO_to_eNB': [], # LTE + NR -> newLTE
        'SN_setup': [], # LTE -> LTE + NR => NR setup
        'SN_Rel': [], # LTE + NR -> LTE
        'SN_HO': [], # LTE + NR -> LTE + newNR  
        'RLF_II': [],
        'RLF_III': [],
        'SCG_RLF': [],
        'Add_SCell': [],
        }
    
    for i in range(len(df)):

        # Pass NR RRC packet. In NSA mode, LTE RRC packet include NR packet message.
        if NR_OTA(i) or LTE_SERV_INFO(i):
            continue

        try: lte_pci, lte_earfcn
        except: 
            lte_pci = df["PCI"].iloc[i]
            lte_earfcn = int(df["Freq"].iloc[i])

        others = ''
        t = df["Timestamp"].iloc[i]

        if df["rrcConnectionRelease"].iloc[i] == 1:
            D['Conn_Rel'].append(HO(start=t))
            nr_pci = 'O'

        if df["rrcConnectionRequest"].iloc[i] == 1:
            
            # Define end of rrcConnectionRequest to be rrcConnectionReconfigurationComplete or securityModeComplete.
            a = find_1st_after(i, 'rrcConnectionReconfigurationComplete',look_after=2)[0]
            b = find_1st_after(i, 'securityModeComplete',look_after=2)[0]
        
            if a is None and b is None: end = None
            elif a is None and b is not None: end = b
            elif a is not None and b is None: end = a 
            else: end = a if a > b else b
            
            _, idx = find_1st_after(i, 'ueCapabilityInformation',look_after=1)
            if idx is not None:
                sup_band = df['bandEUTRA'].iloc[idx]
                others += f' supported band: {sup_band}.' 

            serv_cell, serv_freq = df["PCI"].iloc[i], int(df["Freq"].iloc[i])
            trans = f'({lte_pci}, {lte_earfcn}) -> ({serv_cell}, {serv_freq})'
            
            # Check if caused by RLF III.
            a, idx = find_1st_before(i, 'rrcConnectionReestablishmentReject', look_before=1)
            if a is not None:
                others += ' After RLF III.'

            D['Conn_Req'].append(HO(start=t,end=end,trans=trans, others=others))

            nr_pci = 'O'
            lte_pci = serv_cell
            lte_earfcn = serv_freq
            
        if df["lte-rrc.t304"].iloc[i] == 1:
            
            end, _ = find_1st_after(i, 'rrcConnectionReconfigurationComplete')
            serv_cell, target_cell = df["PCI"].iloc[i], int(df['lte_targetPhysCellId'].iloc[i])
            serv_freq, target_freq = int(df["Freq"].iloc[i]), int(df['dl-CarrierFreq'].iloc[i])

            lte_pci = target_cell
            lte_earfcn = target_freq

            if df["SCellToAddMod-r10"].iloc[i] == 1:
                n =len(str(df["SCellIndex-r10.1"].iloc[i]).split('@'))
                others += f' Set up {n} SCell.'
            else:
                scells = []
            
            if serv_freq != target_freq:
                a,b = find_1st_before(i, "rrcConnectionReestablishmentRequest", 1)
                others += " Inter frequency HO."
                if a is not None:
                    others += " Near after RLF."
                
            if df["nr-rrc.t304"].iloc[i] == 1 and df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:
                
                if serv_cell == target_cell and serv_freq == target_freq:

                    a, _ = find_1st_before(i, "rrcConnectionReestablishmentRequest", 2)
                    
                    if a is not None:

                        ho_type, ho = find_in_D_first_before(['RLF_II', 'RLF_III'], 2)
                        others += f' Near after RLF of trans: {ho.trans}.'

                    else:
                        
                        ho_type, _ = find_in_D_first_before(['MN_HO_to_eNB', 'SN_Rel'], 2)
                        if ho_type is not None:
                            others += f' Near after {ho_type}.'

                    ori_serv = nr_pci
                    nr_pci = int(df['nr_physCellId'].iloc[i])
                    trans = f'({serv_cell}, {serv_freq}) | {ori_serv} -> {nr_pci}'
                    D['SN_setup'].append(HO(start=t, end=end, others=others, trans=trans))

                else:
                    
                    nr_pci = int(df['nr_physCellId'].iloc[i])
                    trans = f'({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq}) | {nr_pci}'
                    D['MN_HO'].append(HO(start=t, end=end, others=others, trans=trans))

            else:
                
                if serv_cell == target_cell and serv_freq == target_freq:

                    a, b = find_1st_before(i, "scgFailureInformationNR-r15")
                    if a is not None:
                        others += " Caused by scg-failure."
                    
                    orig_serv = nr_pci
                    nr_pci = 'O'
                    trans = f'({serv_cell}, {serv_freq}) | {orig_serv} -> {nr_pci}'
                    D['SN_Rel'].append(HO(start=t, end=end, others=others, trans=trans))
                    
                else:

                    a, _ = find_1st_before(i,"rrcConnectionSetup",3)
                    if a is not None:
                        others += ' Near After connection setup.'
                    if nr_pci == 'O':
                        trans = f'({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq}) | {nr_pci}'
                        D['LTE_HO'].append(HO(start=t, end=end, others=others, trans=trans))
                    else:
                        orig_serv = nr_pci
                        nr_pci = 'O'
                        trans = f'({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq}) | {orig_serv} -> {nr_pci}'
                        D['MN_HO_to_eNB'].append(HO(start=t, end=end, others=others, trans=trans))


        if df["nr-rrc.t304"].iloc[i] == 1 and not df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:

            end, _ = find_1st_after(i,'rrcConnectionReconfigurationComplete')
        
            serv_cell, serv_freq = df["PCI"].iloc[i], int(df["Freq"].iloc[i])
            orig_serv = nr_pci
            nr_pci = int(df['nr_physCellId'].iloc[i])
            trans = f'({serv_cell}, {serv_freq}) | {orig_serv} -> {nr_pci}'
            D['SN_HO'].append(HO(start=t,end=end,trans=trans))


        if df["rrcConnectionReestablishmentRequest"].iloc[i] == 1:

            end1, _ = find_1st_after(i, 'rrcConnectionReestablishmentComplete', look_after=1)
            b, _ = find_1st_after(i, 'rrcConnectionReestablishmentReject', look_after=1)
            end2, _ = find_1st_after(i, 'securityModeComplete',look_after=3)

            others += ' ' + df["reestablishmentCause"].iloc[i] + '.'
            scells = []

            c, _ = find_1st_before(i, 'scgFailureInformationNR-r15', 1)
            if c != None:
                others  += ' caused by scgfailure.'
                
            serv_cell, rlf_cell = df["PCI"].iloc[i], int(df['physCellId.3'].iloc[i])
            serv_freq = int(df['Freq'].iloc[i])
            
            # Type II & Type III
            if end1 is not None: 

                orig_serv = nr_pci
                nr_pci = 'O'
                
                try:
                    _, idx = find_1st_before_with_special_value(i, 'PCI', rlf_cell, look_before=30)
                    rlf_freq = int(df['Freq'].iloc[idx])
                except:
                    rlf_freq = 'O'
                trans = f'({rlf_cell}, {rlf_freq}) -> ({serv_cell}, {serv_freq}) | {orig_serv} -> {nr_pci}'
                D['RLF_II'].append(HO(start=t,end=end1,others=others,trans=trans))

                lte_pci = serv_cell
                lte_earfcn = serv_freq

            elif b is not None and end2 is not None:
                
                orig_serv = nr_pci
                nr_pci = 'O'
                _, idx = find_1st_before_with_special_value(i, 'PCI', rlf_cell, look_before=10)
                rlf_freq = int(df['Freq'].iloc[idx])

                _, idx = find_1st_after(i, "rrcConnectionRequest", 2)
                recon_cell, recon_freq = df['PCI'].iloc[idx], int(float(df['Freq'].iloc[idx]))
                
                trans = f'({rlf_cell}, {rlf_freq}) -> ({recon_cell}, {recon_freq}) | {orig_serv} -> {nr_pci}'
                D['RLF_III'].append(HO(start=t,end=end2,others=others,trans=trans)) 

                # lte_pci, lte_earfcn will be updated in rrcConnectionRequest.     
                
            else:

                others+=' No end.'
                D['RLF_II'].append(HO(start=t,others=others))
                print('No end for RLF')

        if df["scgFailureInformationNR-r15"].iloc[i] == 1:

            others += ' ' + df["failureType-r15"].iloc[i] + '.'
            a, idx1 = find_1st_after(i, "rrcConnectionReestablishmentRequest", look_after=1)
            b, idx2 = find_1st_after(i, "lte-rrc.t304", look_after=10)

            if a is not None:

                end1, _ = find_1st_after(idx1, 'rrcConnectionReestablishmentComplete', look_after=1)
                b, _ = find_1st_after(idx1, 'rrcConnectionReestablishmentReject', look_after=1)
                end2 = find_1st_after(idx1, 'securityModeComplete',look_after=3)[0]

                others += ' Result in rrcReestablishment.'
                    
                # Type II & Type III Result
                if end1 is not None: 
                    D['SCG_RLF'].append(HO(start=t,end=end1,others=others))
                elif b is not None and end2 is not None: 
                    D['SCG_RLF'].append(HO(start=t,end=end2,others=others))
                else:
                    others += ' No end.'
                    D['SCG_RLF'].append(HO(start=t,others=others))
                    print('No end for scg failure result in rrcReestablishment.')

            elif b is not None:

                end, _ = find_1st_after(idx2, 'rrcConnectionReconfigurationComplete')
                serv_cell, target_cell = df["PCI"].iloc[idx2], df['lte_targetPhysCellId'].iloc[idx2]
                serv_freq, target_freq = int(df["Freq"].iloc[idx2]), df['dl-CarrierFreq'].iloc[idx2]
                # We do not change nr_pci here. Instead, we will change it at gNB_Rel event.
                trans = f'({serv_cell}, {serv_freq}) | {nr_pci} -> O'
                
                if serv_cell == target_cell and serv_freq == target_freq:
                    others += ' Result in gNB release.'
                    D['SCG_RLF'].append(HO(start=t,end=end,others=others,trans=trans))
                else:
                    others += ' Result in MN HO to eNB.'
                    D['SCG_RLF'].append(HO(start=t,end=end,others=others,trans=trans))                  

            else:

                print('No end for scg failure.')
                others += ' No end.'
                D['SCG_RLF'].append(HO(start=t,others=others))
        
        if df['SCellToAddMod-r10'].iloc[i] == 1 and df['physCellId-r10'].iloc[i] != 'nr or cqi report':

            others = ''
            pcis = str(df["physCellId-r10"].iloc[i]).split('@')
            freqs = str(df["dl-CarrierFreq-r10"].iloc[i]).split('@')
            orig_scells = scells
            scells = [(int(float(pci)), int(float(freq))) for pci, freq in zip(pcis, freqs)]

            others += f' Set up {len(scells)} SCell.'
            trans = f'{orig_scells} -> {scells}'

            end, _ = find_1st_after(i,'rrcConnectionReconfigurationComplete')
            
            a, _ = find_1st_before(i, "rrcConnectionReestablishmentRequest", 3)
            if a is not None:
                others += ' Near after RLF.'

            a = find_in_D_exact(['LTE_HO', 'MN_HO', 'MN_HO_to_eNB', 'SN_setup', 'SN_Rel'])
            if a is not None:
                others += f' With {a}.'

            D['Add_SCell'].append(HO(start=t,end=end,others=others, trans=trans))
    
    return D


In [None]:
def pop_dict(band, d):
    D = d.copy()
    for key in list(d.keys()):
        if not key.endswith(' '+band):
            D.pop(key)
    return D

class ss_dict:
    def __init__(self,pd_data=None,d=None): ## Input pd_df.iloc[index]
        self.dict = {'PCell':[[],[],[]]}
        if pd_data is not None:
            self.nei_cell(pd_data)
            self.serv_cell(pd_data)
        if d is not None:
            self.dict = d
    def serv_cell(self, pd_data):
        earfcn = pd_data["EARFCN"]
        serv_cell_id = pd_data["Serving Cell Index"]
        pci = pd_data["PCI"]
        rsrp = float(pd_data["RSRP(dBm)"])
        rsrq = float(pd_data["RSRQ(dB)"])
        t = pd_data["Timestamp"]
        if serv_cell_id == "PCell":
            self.dict['PCell'][0].append(rsrp)
            self.dict['PCell'][1].append(rsrq)
            self.dict['PCell'][2].append(t)
            # self.dict[pci+' '+earfcn] = [[rsrp], [rsrq], [t]]
        else:
            self.dict[pci+' '+earfcn] = [[rsrp], [rsrq], [t]]
            # s = pci + ' ' + self.earfcn
            # if s in 
    def nei_cell(self, pd_data):
        earfcn = pd_data["EARFCN"]
        t = pd_data["Timestamp"]
        for i in range(9, len(pd_data), 3):

            if pd_data[i] == '-' or np.isnan(float(pd_data[i])):
                break
            else:
                rsrp = float(pd_data[i+1])
                rsrq = float(pd_data[i+2])
                self.dict[str(pd_data[i])+' '+earfcn] = [[rsrp], [rsrq], [t]]              
    
    def __add__(self, sd2):
        d1 = self.dict
        d2 = sd2.dict
        for key in list(d2.keys()):
            if key in list(d1.keys()):
                d1[key][0] = d1[key][0] + d2[key][0]
                d1[key][1] += d2[key][1]
                d1[key][2] += d2[key][2]
            else:
                d1[key] = d2[key]
        return ss_dict(d=d1)
    
    def __repr__(self):
        return str(self.dict)

    def sort_dict_by_time(self):
        def sort_element(element):
            d1 = [ [element[0][i], element[1][i], element[2][i]] for i in range(len(element[0]))]
            d1.sort(key=lambda data:data[2])
            RSRP = [i[0] for i in d1]
            RSRQ = [i[1] for i in d1]
            T = [i[2] for i in d1]
            return [RSRP, RSRQ, T]
        sorted_D = {}
        for k in list(self.dict.keys()):
            sorted_D[k] = sort_element(self.dict[k])
        self.dict = sorted_D


class nr_ss_dict:
    def __init__(self, pd_data=None, d=None):
        self.dict = {'PSCell':[[],[],[]]}
        if pd_data is not None:
            self.nei_cell(pd_data)
            self.serv_cell(pd_data)
        if d is not None:
            self.dict = d
    
    def serv_cell(self, pd_data):
        self.pscell = pd_data["Serving Cell PCI"]
        do = False
        for cell in self.dict.keys():
            if self.pscell == cell:
                self.dict["PSCell"][0] += self.dict[cell][0]
                self.dict["PSCell"][1] += self.dict[cell][1]
                self.dict["PSCell"][2] += self.dict[cell][2]
                do,x = True, cell
                break
        if do:
            self.dict.pop(x)
            
    def nei_cell(self, pd_data):
        arfcn = pd_data["Raster ARFCN"]
        t = pd_data["Timestamp"]
        for i in range(6, len(pd_data), 3):
            if pd_data[i] == '-' or np.isnan(float(pd_data[i])):
                break
            else:
                rsrp = float(pd_data[i+1])
                rsrq = float(pd_data[i+2])
                self.dict[pd_data[i]] = [[rsrp], [rsrq], [t]]

    def __repr__(self):
        return str(self.dict)

    def __add__(self, sd2):
        d1 = self.dict
        d2 = sd2.dict
        for key in list(d2.keys()):
            if key in list(d1.keys()):
                d1[key][0] += d2[key][0]
                d1[key][1] += d2[key][1]
                d1[key][2] += d2[key][2]
            else:
                d1[key] = d2[key]
        return nr_ss_dict(d=d1)

# Single Radio

In [None]:
# Single Radio
def data_create(dir, outfile, ul_df, dl_df):
    
    base_dir = dir
    f = open(outfile, 'w') 
    d = os.path.join(base_dir,"data")

    excessive_latency_value = 0.1
    
    matches = filter(lambda x: x.endswith('ml1.csv'), os.listdir(d))
    ml1_filenames = sorted(list(matches))
    mi_ml1_file = os.path.join(d, ml1_filenames[0])
    mi_ml1_df = pd.read_csv(mi_ml1_file, dtype=str)
    mi_ml1_df["Timestamp"] = mi_ml1_df["Timestamp"].apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))

    nr_mi_ml1_file = os.path.join(d, ml1_filenames[1])
    nr_mi_ml1_df = pd.read_csv(nr_mi_ml1_file, dtype=str)
    nr_mi_ml1_df["Timestamp"] = nr_mi_ml1_df["Timestamp"].apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))

    # Collect HO, MR information
    matches = filter(lambda x: x.endswith('rrc.csv'), os.listdir(d))
    mi_rrc_filename = list(matches)[0]
    mi_rrc_file = os.path.join(d, mi_rrc_filename)
    mi_rrc_df = pd.read_csv(mi_rrc_file)
    mi_rrc_df["Timestamp"] = mi_rrc_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
    
    HO_events = parse_mi_ho(mi_rrc_file)
    MRs = MeasureReport(mi_rrc_file)
    MRs = correct_MR_with_HO(MRs, HO_events)

    columns = [
        "Timestamp", "PCI", "EARFCN", "NR-PCI",
        "num_of_neis","RSRP","RSRQ","RSRP1","RSRQ1","RSRP2","RSRQ2", # LTE RSRP, RSRQ
        "nr-RSRP","nr-RSRQ","nr-RSRP1","nr-RSRQ1","nr-RSRP2","nr-RSRQ2" # NR ss-RSRP, ss-RSRQ
    ] + list(MRs.keys()) + list(HO_events.keys()) + ['RLF_cause'] + ["dl-loss", "ul-loss", "dl-exc-lat", "ul-exc-lat","dl-latency", "ul-latency"]

    f.write(",".join(columns)+"\n")

    # For increase speed
    i_pcap = [0, 0] # For pcap
    i_ = [0, 0] # For ml1 nr_ml1
    i_rrc = 0 # For rrc
    data_buffers = {'rsrp':0, 'rsrq':0}
    pci, earfcn, nr_pci = None, None, None

    for time_point in [start + dt.timedelta(seconds=i) for i in np.arange(0, N+TS, TS, dtype='float64')]:    
        
        # Get pci, earfcn, nr_pci
        for i in range(i_rrc, len(mi_rrc_df)):
            t = mi_rrc_df['Timestamp'].iloc[i]
            if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point):
                if mi_rrc_df['type_id'].iloc[i] == 'LTE_RRC_OTA_Packet':
                    pci = mi_rrc_df['PCI'].iloc[i]
                    earfcn = int(mi_rrc_df['Freq'].iloc[i])
                elif mi_rrc_df['type_id'].iloc[i] == 'LTE_RRC_Serv_Cell_Info':
                    pci = mi_rrc_df['PCI'].iloc[i]
                    earfcn = int(mi_rrc_df['DL frequency'].iloc[i])
                elif mi_rrc_df['type_id'].iloc[i] == '5G_NR_RRC_OTA_Packet':
                    nr_pci = mi_rrc_df['PCI'].iloc[i]
            elif t > time_point:
                i_rrc = i
                break

        cell_related = [pci, earfcn, nr_pci]
        cell_related = [str(feature) for feature in cell_related]
        # Get signal strength informations
        ss_related = []

        SS_DICT = ss_dict()
        for i in range(i_[0], len(mi_ml1_df)):
            t = mi_ml1_df['Timestamp'].iloc[i]
            serv_cell_idx = mi_ml1_df['Serving Cell Index'].iloc[i]
            
            if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point) and serv_cell_idx=='PCell':
                SS_DICT += ss_dict(mi_ml1_df.iloc[i])
            elif t > time_point:
                i_[0] = i
                break
        
        num_of_nei = len(SS_DICT.dict) - 1

        # Get primary serv cell rsrp, rsrq 
        if len(SS_DICT.dict["PCell"][0]) != 0:
            pcell_rsrp = sum(SS_DICT.dict["PCell"][0])/len(SS_DICT.dict["PCell"][0])
            pcell_rsrq = sum(SS_DICT.dict["PCell"][1])/len(SS_DICT.dict["PCell"][0])
            data_buffers['rsrp'], data_buffers['rsrq'] = pcell_rsrp, pcell_rsrq
        else:
            pcell_rsrp, pcell_rsrq = data_buffers['rsrp'], data_buffers['rsrq'] # No sample value, use the previous one
        SS_DICT.dict.pop("PCell") 

        # Get 1st, 2nd neighbor cell rsrp, rsrq
        if len(SS_DICT.dict) != 0:
            cell1 = max(SS_DICT.dict, key=lambda x:sum(SS_DICT.dict[x][0])/len(SS_DICT.dict[x][0]))
            cell1_rsrp = sum(SS_DICT.dict[cell1][0])/len(SS_DICT.dict[cell1][0])
            cell1_rsrq = sum(SS_DICT.dict[cell1][1])/len(SS_DICT.dict[cell1][0])
            SS_DICT.dict.pop(cell1)
        else:
            # cell1_rsrp, cell1_rsrq = '-', '-'
            cell1_rsrp, cell1_rsrq = 0,0 # No sample value, assign 0

        if len(SS_DICT.dict) != 0:
            cell2 = max(SS_DICT.dict, key=lambda x:sum(SS_DICT.dict[x][0])/len(SS_DICT.dict[x][0]))
            cell2_rsrp = sum(SS_DICT.dict[cell2][0])/len(SS_DICT.dict[cell2][0])
            cell2_rsrq = sum(SS_DICT.dict[cell2][1])/len(SS_DICT.dict[cell2][0])
            SS_DICT.dict.pop(cell2)
        else:
            # cell2_rsrp, cell2_rsrq = '-', '-'
            cell2_rsrp, cell2_rsrq = 0,0 # No sample value, assign 0

            # print(f"{time_point} {pcell_rsrp}, {pcell_rsrq} {cell1_rsrp}, {cell1_rsrq} {cell2_rsrp}, {cell2_rsrq}")
        ss_related += [num_of_nei, pcell_rsrp, pcell_rsrq, cell1_rsrp, cell1_rsrq, cell2_rsrp, cell2_rsrq]

        NR_SS_DICT = nr_ss_dict()
        for i in range(i_[1], len(nr_mi_ml1_df)):
            t = nr_mi_ml1_df['Timestamp'].iloc[i]
            serv_cell_idx = nr_mi_ml1_df['Serving Cell PCI'].iloc[i]
            
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                NR_SS_DICT += nr_ss_dict(nr_mi_ml1_df.iloc[i])

            elif t > time_point:
                i_[1] = i
                break
        
        # Get primary secondary serv cell rsrp, rsrq 
        if len(NR_SS_DICT.dict["PSCell"][0]) != 0:
            pscell_rsrp = sum(NR_SS_DICT.dict["PSCell"][0])/len(NR_SS_DICT.dict["PSCell"][0])
            pscell_rsrq = sum(NR_SS_DICT.dict["PSCell"][1])/len(NR_SS_DICT.dict["PSCell"][0])
        else:
            # pscell_rsrp, pscell_rsrq = '-', '-'
            pscell_rsrp, pscell_rsrq = 0,0 # No nr serving or no sample value assign 0
        NR_SS_DICT.dict.pop("PSCell")

        # Get 1st, 2nd neighbor cell rsrp, rsrq
        if len(NR_SS_DICT.dict) != 0:
            cell1 = max(NR_SS_DICT.dict, key=lambda x:sum(NR_SS_DICT.dict[x][0])/len(NR_SS_DICT.dict[x][0]))
            cell1_rsrp = sum(NR_SS_DICT.dict[cell1][0])/len(NR_SS_DICT.dict[cell1][0])
            cell1_rsrq = sum(NR_SS_DICT.dict[cell1][1])/len(NR_SS_DICT.dict[cell1][0])
            NR_SS_DICT.dict.pop(cell1)
        else:
            # cell1_rsrp, cell1_rsrq = '-', '-'
            cell1_rsrp, cell1_rsrq = 0,0 # No sample value, assign 0

        if len(NR_SS_DICT.dict) != 0:
            cell2 = max(NR_SS_DICT.dict, key=lambda x:sum(NR_SS_DICT.dict[x][0])/len(NR_SS_DICT.dict[x][0]))
            cell2_rsrp = sum(NR_SS_DICT.dict[cell2][0])/len(NR_SS_DICT.dict[cell2][0])
            cell2_rsrq = sum(NR_SS_DICT.dict[cell2][1])/len(NR_SS_DICT.dict[cell2][0])
            NR_SS_DICT.dict.pop(cell2)
        else:
            # cell2_rsrp, cell2_rsrq = '-', '-'
            cell2_rsrp, cell2_rsrq = 0,0 # No sample value, assign 0
        
        # print(f"{time_point} {pscell_rsrp}, {pscell_rsrq} {cell1_rsrp}, {cell1_rsrq} {cell2_rsrp}, {cell2_rsrq}")
        ss_related += [pscell_rsrp, pscell_rsrq, cell1_rsrp, cell1_rsrq, cell2_rsrp, cell2_rsrq]
        ss_related = [str(feature) for feature in ss_related]
        # ================================================================================
        # Get HO informations
        HO_related = [0] * len(HO_events.keys())
        rlf_cause = ''
        for i, ho_type in  enumerate(list(HO_events.keys())):

            for ho in HO_events[ho_type]:
                t = ho.start
                if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point):                    
                    if HO_related[i] == 0:
                        HO_related[i] = (t - (time_point - dt.timedelta(seconds=tp_range))).total_seconds()
                        # Record Type of RLF
                        if ho_type in ['RLF_II', 'RLF_III']:
                            for key_word in ['reconfigurationFailure', 'handoverFailure', 'otherFailure']:
                                if key_word in ho.others:
                                    rlf_cause = key_word
                    else: pass # Let's just see the first HO in the second now.
                        # x = t - (time_point - dt.timedelta(seconds=tp_range))
                        # HO_related[i] = str(HO_related[i]) + '@' + str(x.total_seconds())
                elif t > time_point:
                    break
        
        HO_related = [str(feature) for feature in HO_related] + [rlf_cause]

        # Get Event informations
        MR_related = [0] * len(MRs.keys())

        for i, mr_type in  enumerate(list(MRs.keys())):
            for mr in MRs[mr_type]:
                t = mr.time
                if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point):                    
                    if MR_related[i] == 0:
                        MR_related[i] = (t - (time_point - dt.timedelta(seconds=tp_range))).total_seconds()
                    else: pass # Let;s just see the first HO in the second now.
                        # x = t - (time_point - dt.timedelta(seconds=tp_range))
                        # MR_related[i] = str(MR_related[i]) + '@' + str(x.total_seconds())
                elif t > time_point:
                    break
        
        MR_related = [str(feature) for feature in MR_related]
        # ========================================================================
        # Get DL/UL latency, loss...
        performance_related = []

        loss_col = "lost"
        latency_col = "latency"
        
        dl_lats, dl_excessive_lats, dl_losses = [], [], []
        for i in range(i_pcap[0], len(dl_df)):
            t = dl_df['Timestamp'].iloc[i]
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:

                dl_loss = dl_df[loss_col].iloc[i]             
                
                if dl_loss:
                    dl_losses.append(t)
                else:
                    dl_lat = float(dl_df[latency_col].iloc[i])                
                    dl_lats.append(dl_lat)
                    if dl_lat >  excessive_latency_value:
                        dl_excessive_lats.append(t)

            elif t > time_point:
                i_pcap[0] = i
                break

        if len(dl_lats) == 0:
            pass # No package arrive; will use previous value
        else:
            dl_avg_lat = sum(dl_lats)/len(dl_lats)

        if len(dl_lats)==0 and len(dl_losses)==0: 
            pass  # No package arrive and loss; will use previous value
        else:
            dl_exc = len(dl_excessive_lats)/(len(dl_lats) + len(dl_losses))
            dl_loss = len(dl_losses)/(len(dl_lats) + len(dl_losses))

        ul_lats, ul_excessive_lats, ul_losses = [], [], []
        for i in range(i_pcap[1], len(ul_df)):
            t = ul_df['Timestamp'].iloc[i]
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                
                ul_loss = ul_df[loss_col].iloc[i]

                if ul_loss:
                    ul_losses.append(t)
                else:
                    ul_lat = float(ul_df[latency_col].iloc[i])
                    ul_lats.append(ul_lat)
                    if ul_lat >  excessive_latency_value:
                        ul_excessive_lats.append(t)

            elif t > time_point:
                i_pcap[1] = i
                break

        if len(ul_lats) == 0:
            pass # No package arrive; will use previous value
        else:
            ul_avg_lat = sum(ul_lats)/len(ul_lats)

        if len(ul_lats)==0 and len(ul_losses)==0: 
            pass  # No package arrive and loss; will use previous value
        else:
            ul_exc = len(ul_excessive_lats)/(len(ul_lats) + len(ul_losses))
            ul_loss = len(ul_losses)/(len(ul_lats) + len(ul_losses))
            
        performance_related += [dl_loss, ul_loss, dl_exc, ul_exc, dl_avg_lat, ul_avg_lat]
        performance_related = [str(feature) for feature in performance_related]
        f.write(",".join([str(time_point)] + cell_related + ss_related + MR_related + HO_related + performance_related)+"\n") 

    f.close()

In [None]:
# Automation
## Data type
front_cut, back_cut = 1, 1
TS, tp_range = 0.2, 0.2

## outpath
out_path = '/home/wmnlab/sheng-ru/ml_data/200ms'
database = '/home/wmnlab/D/database'
unwanted = ['2023-05-04','2023-05-07','2023-12-16']

stop_point = ('2023-10-19', '#02', 'sm07', 'B7B8')
stop = True

for date in sorted(os.listdir(database)):
    pattern = '20*'
    if not fnmatch.fnmatch(date, pattern) or date in unwanted:
        continue
    date_dir = os.path.join(database, date)
    record = [os.path.join(date_dir, item) for item in os.listdir(date_dir) if fnmatch.fnmatch(item, pattern) and item.endswith('.md')][0]
    with open(record, 'r') as file:
        while True:
            line1 = file.readline()
            line2 = file.readline()
            if (not line1 and not line2) or ('#endif' in line1 or '#endif' in line2):
                break  
            if 'Modem_Action' in line1: # Let's not take the action test data in it now.
                continue
            exp_dir = os.path.join(date_dir, line1.strip())
            Setting = json.loads(line2.strip())
            exp_name = f'_{line1}'

            devs = [x for x in os.listdir(exp_dir) if ('sm' in x) or ('qc' in x)]

            for dev in sorted(devs):
                device_dir = os.path.join(exp_dir, dev)
                for trace in sorted(os.listdir(device_dir)):

                    # Record band-locking information
                    b = Setting[dev]
                    
                    # From previous stop point continue
                    if (date, trace, dev, b) == stop_point: # Stop point
                        stop = False

                    if stop:
                        continue

                    print(date, trace, dev, b)

                    dt_dir = os.path.join(device_dir, trace) # device trace directory

                    ul_loss_lat = os.path.join(dt_dir, 'data', "udp_uplk_loss_latency.csv")
                    ul_loss_lat_df = pd.read_csv(ul_loss_lat)
                    ul_loss_lat_df["Timestamp"] = ul_loss_lat_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

                    dl_loss_lat = os.path.join(dt_dir, 'data', "udp_dnlk_loss_latency.csv")
                    dl_loss_lat_df = pd.read_csv(dl_loss_lat)
                    dl_loss_lat_df["Timestamp"] = dl_loss_lat_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

                    # Get timepoint from start to end
                    start = dl_loss_lat_df["Timestamp"].iloc[0] + dt.timedelta(seconds=front_cut) # open the downlink file to decide start time and end time
                    end = dl_loss_lat_df["Timestamp"].iloc[-1] - dt.timedelta(seconds=back_cut)
                    start, end = start.replace(microsecond=0), end.replace(microsecond=0)
                    print(f'Experiment Time from {start} to {end}.')
                    N = int((end - start).total_seconds()) # How many time_point in second

                    # Out file here
                    outfile = os.path.join(out_path, date + f'_{dev}_{trace}_{b}.csv')
                    outfile = os.path.join(out_path, date + f'_{dev}{exp_name}_{trace}_{b}.csv')
                    print(outfile+'\n')
                    
                    data_create(dt_dir, outfile, ul_loss_lat_df, dl_loss_lat_df) 


## Check Data

In [None]:
# Delete the initial part where packet start transmission but mobileinsight has not start recorded
from pprint import pprint
# 資料夾路徑
folder_path = "/home/wmnlab/sheng-ru/ml_data/v22"

# 取得資料夾中所有的CSV檔案
file_list = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".csv") ]
file_list.remove(os.path.join(folder_path, 'record.csv'))
file_list.sort()
# pprint(file_list)

time.sleep(.5)
warning = input('Sure to process the listed data? (y/n)')
if warning == 'y': pass
else: raise

# 迭代處理每個CSV檔案
for file_name in file_list:
    
    print(file_name)
    
    # 讀取CSV檔案
    df = pd.read_csv(file_name)

    # 找到開頭不為0的第一行的索引
    start_index = df.index[df['RSRP'].iloc[:] != 0][0]

    # 刪除開頭為0的行
    df = df.iloc[start_index:]

    # 寫入更新後的CSV檔案
    df.to_csv(file_name, index=False)

In [None]:
from tqdm.notebook import tqdm

def check_continuous_values(df, column_name, consecutive_count=120):
    
    if column_name not in df.columns:
        raise ValueError(f"Column '{column_name}' not found in DataFrame.")

    column_values = df[column_name]

    for i in range(len(column_values) - consecutive_count + 1):
        consecutive_values = column_values[i:i+consecutive_count]
        if len(set(consecutive_values)) == 1:
            print(i)
            return True

    return False

# 資料夾路徑
folder_path = "/home/wmnlab/sheng-ru/ml_data/v22"

# 取得資料夾中所有的CSV檔案
file_list = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".csv") ]
file_list.remove(os.path.join(folder_path, 'record.csv'))
file_list.sort()
# pprint(file_list)

time.sleep(.5)
warning = input('Sure to check the listed data? (y/n)')
if warning == 'y': pass
else: raise

for file_name in tqdm(file_list):
    
    df = pd.read_csv(file_name)
    if check_continuous_values(df, 'RSRP', 30):
        print(file_name)