# Functions

In [2]:
import pandas as pd
import datetime as dt
import time
import numpy as np
import os
import swifter
from collections import namedtuple
from pathlib import Path

In [8]:
def Average(L):
    return sum(L)/len(L)

def mi_event_parsing(miinfofile):
    def nr_pci_track():
        if miinfofile.loc[i, "PCI"] == 65535: ## 65535 is for samgsung phone.
            nr_pci = '-'
        else:
            nr_pci = miinfofile.loc[i, "PCI"]
        return nr_pci

    nr_pci = None ## Initial Unknown
     
    lte_4G_handover_list = []   #4G 狀態下LTE eNB 的 handover
    
    nr_setup_list = []          #gNB cell addition
    nr_handover_list = []       #gNB cell changes (eNB stays the same)
    nr_removal_list = []        #gNB cell removal
        
    lte_5G_handover_list = []   #(eNB1, gNB1) -> (eNB2, gNB1) #gNB stays the same
    nr_lte_handover_list = []   #both NR cell and LTE cell have handover
    
    eNB_to_MN_list = []
    MN_to_eNB_list = []
    
    scg_failure_list = []       #gNB handover failure
    reestablish_list_type2 = [] #eNB handover failure
    reestablish_list_type3 = []
    
    nr_handover = 0
    nr_handover_start_index = None
    lte_handover = 0
    lte_handover_start_index = None
    nr_release = 0
    nr_release_start_index = None
    
    lte_failure = 0
    lte_failure_start_index = None
    
    handover_num = 0
    
    for i in range(len(miinfofile)):
        if miinfofile.loc[i, "type_id"] == "5G_NR_RRC_OTA_Packet":
            nr_pci = nr_pci_track()
            continue
            
        if miinfofile.loc[i, "nr-rrc.t304"]:
            if nr_handover == 0:    
                nr_handover = 1
                nr_handover_start_index = i
                
        if miinfofile.loc[i, "lte-rrc.t304"]:
            if lte_handover == 0:
                lte_handover = 1
                lte_handover_start_index = i
                
        if miinfofile.loc[i, "nr-Config-r15: release (0)"]:
            if nr_release == 0:
                nr_release = 1
                nr_release_start_index = i
           
        if (nr_handover or lte_handover or nr_release) and miinfofile.loc[i, "rrcConnectionReconfigurationComplete"]:
            handover_num +=1
        
        
        #handover 種類分類
        #------------------------------------------------------------------------------
        if lte_handover and not nr_handover and not nr_release and miinfofile.loc[i, "rrcConnectionReconfigurationComplete"]:  # just lte cell handover event
            lte_handover = 0
            lte_4G_handover_list.append([miinfofile.loc[lte_handover_start_index, "time"], miinfofile.loc[i, "time"]])
            

        if lte_handover and not nr_handover and nr_release and miinfofile.loc[i, "rrcConnectionReconfigurationComplete"]:    # LTE Ho and nr release 
            lte_handover = 0
            nr_release = 0
            MN_to_eNB_list.append([miinfofile.loc[lte_handover_start_index, "time"], miinfofile.loc[i, "time"]])
        
        if nr_handover and not lte_handover and miinfofile.loc[i, "rrcConnectionReconfigurationComplete"]:  # just nr cell handover event
            nr_handover = 0
            if miinfofile.loc[nr_handover_start_index, "dualConnectivityPHR: setup (1)"]:     #This if-else statement classifies whether it is nr addition or nr handover
                nr_setup_list.append([miinfofile.loc[nr_handover_start_index, "time"], miinfofile.loc[i, "time"]])       
            else:
                nr_handover_list.append([miinfofile.loc[nr_handover_start_index, "time"], miinfofile.loc[i, "time"]])
            #additional judgement:
            #----------------------------
            #if miinfofile.loc[nr_handover_start_index, "dualConnectivityPHR: setup (1)"] and nr_pci != None:
            #    print("Warning: dualConnectivityPHR setup may not mean nr cell addition", mi_file, i)
            #if miinfofile.loc[nr_handover_start_index, "dualConnectivityPHR: setup (1)"]==0 and not (nr_pci != None and nr_pci != miinfofile.loc[nr_handover_start_index, "nr_pci"]): 
            #    print("Warning: nr-rrc.t304 without dualConnectivityPHR setup may not mean nr cell handover", mi_file, i, nr_handover_start_index, miinfofile.loc[nr_handover_start_index, "nr_pci"], nr_pci)
                
        if lte_handover and nr_handover and miinfofile.loc[i, "rrcConnectionReconfigurationComplete"]:      # both nr cell and lte cell handover event
            lte_handover = 0
            nr_handover = 0
            if nr_pci == miinfofile.loc[lte_handover_start_index, "nr_physCellId"]: 
                lte_5G_handover_list.append([miinfofile.loc[lte_handover_start_index, "time"], miinfofile.loc[i, "time"]])
            else:
                ##############
                if miinfofile.loc[nr_handover_start_index, "dualConnectivityPHR: setup (1)"]:     #This if-else statement classifies whether it is nr addition or nr handover
                    eNB_to_MN_list.append([miinfofile.loc[nr_handover_start_index, "time"], miinfofile.loc[i, "time"]])       
                else:
                    nr_lte_handover_list.append([miinfofile.loc[lte_handover_start_index, "time"], miinfofile.loc[i, "time"]])
            
        if not lte_handover and  nr_release and miinfofile.loc[i, "rrcConnectionReconfigurationComplete"]:
            nr_release=0
            nr_removal_list.append([miinfofile.loc[nr_release_start_index, "time"], miinfofile.loc[i, "time"]])
            
        if miinfofile.loc[i, "scgFailureInformationNR-r15"]:
            scg_failure_list.append([miinfofile.loc[i, "time"], miinfofile.loc[i, "time"]]) 
            
        if miinfofile.loc[i, "rrcConnectionReestablishmentRequest"]:
            if lte_failure == 0:
                lte_failure = 1
                lte_failure_start_index = i
        if lte_failure and miinfofile.loc[i, "rrcConnectionReestablishment"]:
            lte_failure = 0
            reestablish_list_type2.append([miinfofile.loc[lte_failure_start_index, "time"], miinfofile.loc[lte_failure_start_index, "time"]])
        if lte_failure and miinfofile.loc[i, "rrcConnectionReestablishmentReject"]:
            lte_failure = 0
            reestablish_list_type3.append([miinfofile.loc[lte_failure_start_index, "time"], miinfofile.loc[lte_failure_start_index, "time"]])
            
    return [lte_4G_handover_list, nr_setup_list, nr_handover_list, nr_removal_list, lte_5G_handover_list, nr_lte_handover_list, eNB_to_MN_list, MN_to_eNB_list, scg_failure_list, reestablish_list_type2, reestablish_list_type3], handover_num

def collect_ho_event(mi_rrc_df):
        l, _ = mi_event_parsing(mi_rrc_df)
        for i in range(0, 11):
            l[i] = [j[0] for j in l[i]]
        d = {'lte': l[0], 'nr_setup': l[1], 'gNB_ho': l[2], 'nr_rel': l[3], "MN_changed": l[4],"MN_SN_changed": l[5], "eNB_to_MN_changed": l[6], "MN_to_eNB_changed": l[7], "gNB_fail": l[8], "type2_fail": l[9], "type3_fail": l[10]}
        return d

In [9]:
def parse_mi_ho(df):
    def NR_OTA():
        if df["type_id"].iloc[i] == "5G_NR_RRC_OTA_Packet":
            return True
        else:
            return False

    def find_1st_after(target, look_after=1):
        for j in range(i, len(df)):
            t_ = df["Timestamp"].iloc[j]
            if (t_ - t).total_seconds() > look_after:
                return None, None
            if df[target].iloc[j] not in [0,'0']:
                return t_, j

    def find_1st_before(target, look_before=1):
        for j in range(i, -1, -1):
            t_ = df["Timestamp"].iloc[j]
            if (t - t_).total_seconds() > look_before:
                return None, None
            if df[target].iloc[j] not in [0,'0']:
                return t_, j

    HO = namedtuple('HO','start, end, others', defaults=(None,None))

    D = {
        'Conn_Rel':[], 
        'Conn_Req':[], # Setup
        'LTE_HO': [], # LTE -> newLTE
        'MN_HO': [], # LTE + NR -> newLTE + NR
        'eNB_to_ENDC': [], # LTE -> LTE + NR => NR setup
        'gNB_Rel': [], # LTE + NR -> LTE
        'gNB_HO': [], # LTE + NR -> LTE + newNR
        # 'HOF': [], # Didn't defined yet.
        'RLF': [],
        'SCG_RLF': [],
        }

    for i in range(len(df)):
        if NR_OTA():
            continue

        # t = df["time"].iloc[i]
        t = df["Timestamp"].iloc[i]
        
        if df["rrcConnectionRelease"].iloc[i] == 1:
            D['Conn_Rel'].append(HO(start=t))

        if df["rrcConnectionRequest"].iloc[i] == 1:
            a = find_1st_after('rrcConnectionReconfigurationComplete',look_after=2)[0]
            b = find_1st_after('securityModeComplete',look_after=2)[0]
            end = a if a > b else b
            D['Conn_Req'].append(HO(start=t,end=end))
        
        if df["lte-rrc.t304"].iloc[i] == 1:
            others = ''
            end, _ = find_1st_after('rrcConnectionReconfigurationComplete')
            serv_cell, target_cell = df["PCI"].iloc[i], df['lte_targetPhysCellId'].iloc[i]
            serv_freq, target_freq = df["Freq"].iloc[i], df['dl-CarrierFreq'].iloc[i]
            if df["SCellToAddMod-r10"].iloc[i] == 1:
                n =len(str(df["SCellIndex-r10.1"].iloc[i]).split('@'))
                others=f'Set up {n} SCell.'
            
            if serv_freq != target_freq:
                others += " Inter freq. HO"

            if df["nr-rrc.t304"].iloc[i] == 1 and df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:
                if serv_cell == target_cell and serv_freq == target_freq:
                    D['eNB_to_ENDC'].append(HO(start=t, end=end, others=others))
                    # print(1, t, f"Serving Cell: {serv_cell}->{target_cell}")  
                else:    
                    D['MN_HO'].append(HO(start=t, end=end, others=others))
            else:
                if serv_cell == target_cell and serv_freq == target_freq:
                    a, b = find_1st_before("scgFailureInformationNR-r15")
                    if a is not None:
                        others += " Caused by scg-failure."
                    D['gNB_Rel'].append(HO(start=t, end=end, others=others))
                else:
                    D['LTE_HO'].append(HO(start=t, end=end, others=others))

        if df["nr-rrc.t304"].iloc[i] == 1 and not df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:
            end, _ = find_1st_after('rrcConnectionReconfigurationComplete')
            D['gNB_HO'].append(HO(start=t,end=end))

        if df["rrcConnectionReestablishmentRequest"].iloc[i] == 1:
            end, _ = find_1st_after('rrcConnectionReestablishmentComplete', look_after=1)
            b, _ = find_1st_after('rrcConnectionReestablishmentReject', look_after=1)
            others = df["reestablishmentCause"].iloc[i]
            if end is not None: 
                # Type II
                D['RLF'].append(HO(start=t,end=end,others=others))
            else: 
                # Type III
                D['RLF'].append(HO(start=t,end=b,others=others)) # End for Type III?
            
        if df["scgFailureInformationNR-r15"].iloc[i] == 1:
            others = df["failureType-r15"].iloc[i]
            D['SCG_RLF'].append(HO(start=t,others=others))
    
    return D

In [10]:
def pop_dict(band, d):
    D = d.copy()
    for key in list(d.keys()):
        if not key.endswith(' '+band):
            D.pop(key)
    return D

class ss_dict:
    def __init__(self,pd_data=None,d=None): ## Input pd_df.iloc[index]
        self.dict = {'PCell':[[],[],[]]}
        if pd_data is not None:
            self.nei_cell(pd_data)
            self.serv_cell(pd_data)
        if d is not None:
            self.dict = d
    def serv_cell(self, pd_data):
        earfcn = pd_data["EARFCN"]
        serv_cell_id = pd_data["Serving Cell Index"]
        pci = pd_data["PCI"]
        rsrp = float(pd_data["RSRP(dBm)"])
        rsrq = float(pd_data["RSRQ(dB)"])
        t = pd_data["Timestamp"]
        if serv_cell_id == "PCell":
            self.dict['PCell'][0].append(rsrp)
            self.dict['PCell'][1].append(rsrq)
            self.dict['PCell'][2].append(t)
            # self.dict[pci+' '+earfcn] = [[rsrp], [rsrq], [t]]
        else:
            self.dict[pci+' '+earfcn] = [[rsrp], [rsrq], [t]]
            # s = pci + ' ' + self.earfcn
            # if s in 
    def nei_cell(self, pd_data):
        earfcn = pd_data["EARFCN"]
        t = pd_data["Timestamp"]
        for i in range(9, len(pd_data), 3):
            if pd_data[i] == '-':
                break
            else:
                rsrp = float(pd_data[i+1])
                rsrq = float(pd_data[i+2])
                self.dict[str(pd_data[i])+' '+earfcn] = [[rsrp], [rsrq], [t]]              
    
    def __add__(self, sd2):
        d1 = self.dict
        d2 = sd2.dict
        for key in list(d2.keys()):
            if key in list(d1.keys()):
                d1[key][0] = d1[key][0] + d2[key][0]
                d1[key][1] += d2[key][1]
                d1[key][2] += d2[key][2]
            else:
                d1[key] = d2[key]
        return ss_dict(d=d1)
    
    def __repr__(self):
        return str(self.dict)

    def sort_dict_by_time(self):
        def sort_element(element):
            d1 = [ [element[0][i], element[1][i], element[2][i]] for i in range(len(element[0]))]
            d1.sort(key=lambda data:data[2])
            RSRP = [i[0] for i in d1]
            RSRQ = [i[1] for i in d1]
            T = [i[2] for i in d1]
            return [RSRP, RSRQ, T]
        sorted_D = {}
        for k in list(self.dict.keys()):
            sorted_D[k] = sort_element(self.dict[k])
        self.dict = sorted_D


class nr_ss_dict:
    def __init__(self, pd_data=None, d=None):
        self.dict = {'PSCell':[[],[],[]]}
        if pd_data is not None:
            self.nei_cell(pd_data)
            self.serv_cell(pd_data)
        if d is not None:
            self.dict = d
    
    def serv_cell(self, pd_data):
        self.pscell = pd_data["Serving Cell PCI"]
        do = False
        for cell in self.dict.keys():
            if self.pscell == cell:
                self.dict["PSCell"][0] += self.dict[cell][0]
                self.dict["PSCell"][1] += self.dict[cell][1]
                self.dict["PSCell"][2] += self.dict[cell][2]
                do,x = True, cell
                break
        if do:
            self.dict.pop(x)
            
    def nei_cell(self, pd_data):
        arfcn = pd_data["Raster ARFCN"]
        t = pd_data["Timestamp"]
        for i in range(6, len(pd_data), 3):
            if pd_data[i] == '-':
                break
            else:
                rsrp = float(pd_data[i+1])
                rsrq = float(pd_data[i+2])
                self.dict[pd_data[i]] = [[rsrp], [rsrq], [t]]

    def __repr__(self):
        return str(self.dict)

    def __add__(self, sd2):
        d1 = self.dict
        d2 = sd2.dict
        for key in list(d2.keys()):
            if key in list(d1.keys()):
                d1[key][0] += d2[key][0]
                d1[key][1] += d2[key][1]
                d1[key][2] += d2[key][2]
            else:
                d1[key] = d2[key]
        return nr_ss_dict(d=d1)

# All data for a trace

In [11]:
def data_create(dir1, dir2, ci_file, outfile, ul_df, dl_df):
    base_dir1 = dir1
    base_dir2 = dir2
    # out_file = "/home/wmnlab/test1.csv" ## Out file !!!!!!!!
    out_file = outfile
    f = open(out_file, 'w') 

    d1 = os.path.join(base_dir1,"data")
    d2 = os.path.join(base_dir2,"data")

    excessive_latency_value = 0.1
    
    # # Collect gps and gpsspeed from cellinfo
    try:
        ci_df = pd.read_csv(ci_file, dtype=str)
        ci_df["Date"] = ci_df["Date"].swifter.apply(lambda x: pd.to_datetime(x))
    except pd.errors.ParserError:
        print(f'preprocess {ci_file}')
        gps_dir = '/'.join(ci_file.split('/')[:-1])
        os.system(f'python3 ./csv_processing.py {gps_dir}')
        ci_df = pd.read_csv(ci_file[:-4]+'_new.csv', dtype=str)
        ci_df["Date"] = ci_df["Date"].swifter.apply(lambda x: pd.to_datetime(x))

    GPS_info = namedtuple('gps_info','lat, long, gpsspeed')
    
    # Collect rsrp infomation
    mi_ml1_dfs = []
    nr_mi_ml1_dfs = []
    HO_events_list = []
    

    for d in [d1, d2]:
        matches = filter(lambda x: x.endswith('ml1.csv'), os.listdir(d))
        ml1_filenames = sorted(list(matches))
        mi_ml1_file = os.path.join(d, ml1_filenames[0])
        mi_ml1_df = pd.read_csv(mi_ml1_file, dtype=str)
        mi_ml1_df["Timestamp"] = mi_ml1_df["Timestamp"].apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
        mi_ml1_dfs.append(mi_ml1_df)

        nr_mi_ml1_file = os.path.join(d, ml1_filenames[1])
        nr_mi_ml1_df = pd.read_csv(nr_mi_ml1_file, dtype=str)
        nr_mi_ml1_df["Timestamp"] = nr_mi_ml1_df["Timestamp"].apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
        nr_mi_ml1_dfs.append(nr_mi_ml1_df)

        # Collect Ho information
        matches = filter(lambda x: x.endswith('rrc.csv'), os.listdir(d))
        mi_rrc_filename = list(matches)[0]
        mi_rrc_file = os.path.join(d, mi_rrc_filename)
        mi_rrc_df = pd.read_csv(mi_rrc_file)
        mi_rrc_df["Timestamp"] = mi_rrc_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
        HO_events = parse_mi_ho(mi_rrc_df)
        HO_events.pop('Conn_Rel'), HO_events.pop('Conn_Req')
        HO_events_list.append(HO_events)

    columns = ["Timestamp", "lat", "long", "gpsspeed"]+[
        'LTE_HO','MN_HO','eNB_to_ENDC','gNB_Rel','gNB_HO','RLF','SCG_RLF',
        "RSRP","RSRQ","RSRP1","RSRQ1","RSRP2","RSRQ2",
        "nr-RSRP","nr-RSRQ","nr-RSRP1","nr-RSRQ1","nr-RSRP2","nr-RSRQ2",
    ]*2 + ["dl-lossrate", "ul-lossrate", "dl-exc-lat", "ul-exc-lat","dl-latency", "ul-latency"]


    f.write(",".join(columns)+"\n")

    i_ci = 0
    i_pcap = [0,0]
    i_ = [[0,0], [0,0]] # For increase speed
    data_buffers = [{'rsrp':0, 'rsrq':0}, {'rsrp':0, 'rsrq':0}]

    
    for time_point in [start + dt.timedelta(seconds=i) for i in range(0, N+1, TS)]:
        ss_relateds = []
        HO_relateds = []
        # Get GPS informations
        # ========================================================================
        gps_related = []

        for i in range(i_ci, len(ci_df)):
            t = ci_df['Date'].iloc[i]
            lat = ci_df['GPSLat'].iloc[i]
            long = ci_df['GPSLon'].iloc[i]
            gpsspeed = ci_df['GPSSpeed'].iloc[i]
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                gps_info = GPS_info(lat=lat,long=long,gpsspeed=gpsspeed)
            elif t > time_point:
                i_ci = i
                break
        

        gps_related += [gps_info.lat, gps_info.long, gps_info.gpsspeed]
        gps_related = [str(feature) for feature in gps_related]
        # print(f"{time_point} {gps_info}")

        for j in range(2):
            # ==========================================================================
            # Get signal strength informations
            ss_related = []

            SS_DICT = ss_dict()
            for i in range(i_[j][0], len(mi_ml1_df)):
                t = mi_ml1_df['Timestamp'].iloc[i]
                serv_cell_idx = mi_ml1_df['Serving Cell Index'].iloc[i]
                
                if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point) and serv_cell_idx=='PCell':
                    SS_DICT += ss_dict(mi_ml1_df.iloc[i])
                elif t > time_point:
                    i_[j][0] = i
                    break
            
            # Get primary serv cell rsrp, rsrq 
            if len(SS_DICT.dict["PCell"][0]) != 0:
                pcell_rsrp = sum(SS_DICT.dict["PCell"][0])/len(SS_DICT.dict["PCell"][0])
                pcell_rsrq = sum(SS_DICT.dict["PCell"][1])/len(SS_DICT.dict["PCell"][0])
                data_buffers[j]['rsrp'], data_buffers[j]['rsrq'] = pcell_rsrp, pcell_rsrq
            else:
                pcell_rsrp, pcell_rsrq = data_buffers[j]['rsrp'], data_buffers[j]['rsrq'] # No sample value, use the previous one
            SS_DICT.dict.pop("PCell") 

            # Get 1st, 2nd neighbor cell rsrp, rsrq
            if len(SS_DICT.dict) != 0:
                cell1 = max(SS_DICT.dict, key=lambda x:sum(SS_DICT.dict[x][0])/len(SS_DICT.dict[x][0]))
                cell1_rsrp = sum(SS_DICT.dict[cell1][0])/len(SS_DICT.dict[cell1][0])
                cell1_rsrq = sum(SS_DICT.dict[cell1][1])/len(SS_DICT.dict[cell1][0])
                SS_DICT.dict.pop(cell1)
            else:
                # cell1_rsrp, cell1_rsrq = '-', '-'
                cell1_rsrp, cell1_rsrq = 0,0 # No sample value, assign 0

            if len(SS_DICT.dict) != 0:
                cell2 = max(SS_DICT.dict, key=lambda x:sum(SS_DICT.dict[x][0])/len(SS_DICT.dict[x][0]))
                cell2_rsrp = sum(SS_DICT.dict[cell2][0])/len(SS_DICT.dict[cell2][0])
                cell2_rsrq = sum(SS_DICT.dict[cell2][1])/len(SS_DICT.dict[cell2][0])
                SS_DICT.dict.pop(cell2)
            else:
                # cell2_rsrp, cell2_rsrq = '-', '-'
                cell2_rsrp, cell2_rsrq = 0,0 # No sample value, assign 0

                # print(f"{time_point} {pcell_rsrp}, {pcell_rsrq} {cell1_rsrp}, {cell1_rsrq} {cell2_rsrp}, {cell2_rsrq}")
            ss_related += [pcell_rsrp, pcell_rsrq, cell1_rsrp, cell1_rsrq, cell2_rsrp, cell2_rsrq]

            NR_SS_DICT = nr_ss_dict()
            for i in range(i_[j][1], len(nr_mi_ml1_df)):
                t = nr_mi_ml1_df['Timestamp'].iloc[i]
                serv_cell_idx = nr_mi_ml1_df['Serving Cell PCI'].iloc[i]
                
                if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                    NR_SS_DICT += nr_ss_dict(nr_mi_ml1_df.iloc[i])

                elif t > time_point:
                    i_[j][1] = i
                    break
            
            # Get primary secondary serv cell rsrp, rsrq 
            if len(NR_SS_DICT.dict["PSCell"][0]) != 0:
                pscell_rsrp = sum(NR_SS_DICT.dict["PSCell"][0])/len(NR_SS_DICT.dict["PSCell"][0])
                pscell_rsrq = sum(NR_SS_DICT.dict["PSCell"][1])/len(NR_SS_DICT.dict["PSCell"][0])
            else:
                # pscell_rsrp, pscell_rsrq = '-', '-'
                pscell_rsrp, pscell_rsrq = 0,0 # No nr serving or no sample value assign 0
            NR_SS_DICT.dict.pop("PSCell")

            # Get 1st, 2nd neighbor cell rsrp, rsrq
            if len(NR_SS_DICT.dict) != 0:
                cell1 = max(NR_SS_DICT.dict, key=lambda x:sum(NR_SS_DICT.dict[x][0])/len(NR_SS_DICT.dict[x][0]))
                cell1_rsrp = sum(NR_SS_DICT.dict[cell1][0])/len(NR_SS_DICT.dict[cell1][0])
                cell1_rsrq = sum(NR_SS_DICT.dict[cell1][1])/len(NR_SS_DICT.dict[cell1][0])
                NR_SS_DICT.dict.pop(cell1)
            else:
                # cell1_rsrp, cell1_rsrq = '-', '-'
                cell1_rsrp, cell1_rsrq = 0,0 # No sample value, assign 0

            if len(NR_SS_DICT.dict) != 0:
                cell2 = max(NR_SS_DICT.dict, key=lambda x:sum(NR_SS_DICT.dict[x][0])/len(NR_SS_DICT.dict[x][0]))
                cell2_rsrp = sum(NR_SS_DICT.dict[cell2][0])/len(NR_SS_DICT.dict[cell2][0])
                cell2_rsrq = sum(NR_SS_DICT.dict[cell2][1])/len(NR_SS_DICT.dict[cell2][0])
                NR_SS_DICT.dict.pop(cell2)
            else:
                # cell2_rsrp, cell2_rsrq = '-', '-'
                cell2_rsrp, cell2_rsrq = 0,0 # No sample value, assign 0
            
            # print(f"{time_point} {pscell_rsrp}, {pscell_rsrq} {cell1_rsrp}, {cell1_rsrq} {cell2_rsrp}, {cell2_rsrq}")
            ss_related += [pscell_rsrp, pscell_rsrq, cell1_rsrp, cell1_rsrq, cell2_rsrp, cell2_rsrq]
            ss_related = [str(feature) for feature in ss_related]
            ss_relateds.append(ss_related)
            # ================================================================================
            # Get HO informations
            HO_related = [0] * len(HO_events.keys())

            for i, ho_type in  enumerate(list(HO_events.keys())):
                for ho in HO_events[ho_type]:
                    t = ho.start
                    if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point):
                        HO_related[i] += 1
                    elif t > time_point:
                        break
            
            HO_related = [str(feature) for feature in HO_related]
            HO_relateds.append(HO_related)
        # ========================================================================
        # Get DL/UL latency, loss...
        performance_related = []

        loss_col = f"lost.{Setting[dev1]}.{Setting[dev2]}"
        latency_col = f"latency.{Setting[dev1]}.{Setting[dev2]}"

        dl_lats, dl_excessive_lats, dl_losses = [], [], []
        for i in range(i_pcap[0], len(dl_df)):
            t = dl_df['Timestamp'].iloc[i]
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                dl_lat = float(dl_df[latency_col].iloc[i])
                dl_loss = dl_df[loss_col].iloc[i]
                dl_lats.append(dl_lat)
                if dl_loss:
                    dl_losses.append(t)
                if dl_lat >  excessive_latency_value:
                    dl_excessive_lats.append(t)
            elif t > time_point:
                i_pcap[0] = i
                break

        if len(dl_lats) == 0:
            pass # No package arrive; will use previous value
        else:
            dl_avg_lat = sum(dl_lats)/len(dl_lats)
            dl_exc_rate = len(dl_excessive_lats)/len(dl_lats)

        if (len(dl_losses)+len(dl_lats)) == 0:
            pass # No package; will use previous value
        else:
            dl_loss_rate = len(dl_losses)/(len(dl_losses)+len(dl_lats))


        ul_lats, ul_excessive_lats, ul_losses = [], [], []
        for i in range(i_pcap[1], len(ul_df)):
            t = ul_df['Timestamp'].iloc[i]
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                ul_lat = float(ul_df[latency_col].iloc[i])
                ul_loss = ul_df[loss_col].iloc[i]
                ul_lats.append(ul_lat)
                if ul_loss:
                    ul_losses.append(t)
                if ul_lat >  excessive_latency_value:
                    ul_excessive_lats.append(t)
            elif t > time_point:
                i_pcap[1] = i
                break

        if len(ul_lats) == 0:
            pass # No package arrive; will use previous value
        else:
            ul_avg_lat = sum(ul_lats)/len(ul_lats)
            ul_exc_rate = len(ul_excessive_lats)/len(ul_lats)

        if (len(ul_losses)+len(ul_lats)) == 0:
            pass # No package; will use previous value
        else:
            ul_loss_rate = len(ul_losses)/(len(ul_losses)+len(ul_lats))
            
        performance_related += [dl_loss_rate, ul_loss_rate, dl_exc_rate, ul_exc_rate, dl_avg_lat, ul_avg_lat]
        performance_related = [str(feature) for feature in performance_related]


        f.write(",".join([str(time_point)]+gps_related+HO_relateds[0]+ss_relateds[0]+HO_relateds[1]+ss_relateds[1]+performance_related)+"\n") 

    f.close()

In [13]:
Setting = {'qc01': 'B3', 'qc02': 'B7', 'qc03': 'B8'}
base_dir = '/home/wmnlab/D/sheng-ru/test/test_data/'
base_dir = '/home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/'
date = [x for x in list(base_dir.split('/')) if len(x) != 0][-2]

matches = filter(lambda x: x.startswith('qc') or x.startswith('sm'), os.listdir(base_dir))
combo_dir = os.path.join(base_dir, 'combo')
device_dir = [os.path.join(base_dir, x) for x in list(matches)]
device_dir.sort()

parent_dir = str(Path(base_dir).parent.absolute())
gps_dir = os.path.join(parent_dir, 'gps')
matches = list(filter(lambda x: 'ci' in x, os.listdir(gps_dir)))
matches.sort()
ci_file = os.path.join(gps_dir, matches[-1])

for trace in sorted(os.listdir(combo_dir)):
    ct_dir = os.path.join(combo_dir,trace) # combo+trace dir

    ul_loss_lat = os.path.join(ct_dir, "udp_uplk_combo_loss_latency.csv")
    ul_loss_lat_df = pd.read_csv(ul_loss_lat)
    ul_loss_lat_df["Timestamp"] = ul_loss_lat_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

    dl_loss_lat = os.path.join(ct_dir, "udp_dnlk_combo_loss_latency.csv")
    dl_loss_lat_df = pd.read_csv(dl_loss_lat)
    dl_loss_lat_df["Timestamp"] = dl_loss_lat_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

    # Get timepoint from start to end
    front_cut, back_cut = 5, 5
    TS = 1
    tp_range = 1
    start = dl_loss_lat_df["Timestamp"].iloc[0] + dt.timedelta(seconds=front_cut) # open the downlink file to decide start time and end time
    end = dl_loss_lat_df["Timestamp"].iloc[-1] - dt.timedelta(seconds=back_cut)
    start, end = start.replace(microsecond=0), end.replace(microsecond=0)
    print(f'Trace {trace} from {start} to {end}.')
    N = int((end - start).total_seconds()) # How many time_point


    for i, device in enumerate(device_dir):
        for j in range(i+1,len(device_dir)):
            device2 = device_dir[j]
            
            dt_dir = os.path.join(device, trace) # device+trace dir
            dt_dir2 = os.path.join(device2, trace)
            print(dt_dir, dt_dir2)
            dev1, dev2 = device[-4:], device2[-4:]
            b1, b2 = Setting[dev1], Setting[dev2]
            outfile = os.path.join('/home/wmnlab/D/sheng-ru/ml_data', date + f'{trace}_{b1}&{b2}.csv')
            print(outfile)
            print(dev1, dev2)
            data_create(dt_dir, dt_dir2, ci_file, outfile, ul_loss_lat_df, dl_loss_lat_df) 


Trace #01 from 2023-02-04 14:59:01 to 2023-02-04 15:06:23.
/home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc01/#01 /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01
/home/wmnlab/D/sheng-ru/ml_data/2023-02-04#01_B3&B7.csv
qc01 qc02
/home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc01/#01 /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc03/#01
/home/wmnlab/D/sheng-ru/ml_data/2023-02-04#01_B3&B8.csv
qc01 qc03
/home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01 /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc03/#01
/home/wmnlab/D/sheng-ru/ml_data/2023-02-04#01_B7&B8.csv
qc02 qc03
Trace #02 from 2023-02-04 15:57:53 to 2023-02-04 16:02:26.
/home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc01/#02 /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#02
/home/wmnlab/D/sheng-ru/ml_data/2023-02-04#02_B3&B7.csv
qc01 qc02
/home/wmnlab/D/database/20

In [192]:
A = [x for x in list(ul_loss_lat_df.columns) if Setting[dev1] in x and Setting[dev2] in x]
matches = filter(lambda x: 'lost' in x,)

['lost.B3.B7', 'excl.B3.B7', 'latency.B3.B7']

In [164]:
x = Path(base_dir)
str(x.parent.absolute())

'/home/wmnlab/D/sheng-ru/test'

# Split

In [7]:
base_dir = "/home/wmnlab/Code_Test_Space/sheng-ru/2022-12-22/_Bandlock_Udp_B1_B3/sm05/#01"
out_file = "/home/wmnlab/test1.csv" ## Out file !!!!!!!!
f = open(out_file, 'w') 

# Collecting the UDP Latency and Loss information first
dir = os.path.join(base_dir,"data")

dl_lat_file = os.path.join(dir, "udp_dnlk_latency.csv")
dl_lat_df = pd.read_csv(dl_lat_file)
dl_lat_df["Timestamp"] = dl_lat_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

dl_loss_file = os.path.join(dir, "udp_dnlk_loss_timestamp.csv")
dl_loss_df = pd.read_csv(dl_loss_file)
dl_loss_df["Timestamp"] = dl_loss_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

ul_lat_file = os.path.join(dir, "udp_uplk_latency.csv")
ul_lat_df = pd.read_csv(ul_lat_file)
ul_lat_df["Timestamp"] = ul_lat_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

ul_loss_file = os.path.join(dir, "udp_uplk_loss_timestamp.csv")
ul_loss_df = pd.read_csv(ul_loss_file)
ul_loss_df["Timestamp"] = ul_loss_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

# Get timepoint from start to end and get the latency and loss
front_cut = 5
back_cut = 5
TS = 1
tp_range = 1

start = dl_lat_df["Timestamp"].iloc[0] + dt.timedelta(seconds=front_cut)
end = dl_lat_df["Timestamp"].iloc[-1] - dt.timedelta(seconds=back_cut)
start, end = start.replace(microsecond=0), end.replace(microsecond=0)
print(f'From {start} to {end}.')
N = int((end - start).total_seconds()) # How many time_point

# Collect rsrp infomation
matches = filter(lambda x: x.endswith('ml1_new.csv'), os.listdir(dir))
ml1_filenames = sorted(list(matches))
mi_ml1_file = os.path.join(dir, ml1_filenames[0])
mi_ml1_df = pd.read_csv(mi_ml1_file, dtype=str)
mi_ml1_df = mi_ml1_df[mi_ml1_df.type_id == 'LTE_PHY_Connected_Mode_Intra_Freq_Meas']
mi_ml1_df["time"] = mi_ml1_df["time"].apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))

nr_mi_ml1_file = os.path.join(dir, ml1_filenames[1])
nr_mi_ml1_df = pd.read_csv(nr_mi_ml1_file, dtype=str)
nr_mi_ml1_df["time"] = nr_mi_ml1_df["time"].apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))


# Collect gps and gpsspeed from cellinfo
dir = os.path.join(base_dir,"middle")
matches = filter(lambda x: x.startswith('cimon'), os.listdir(dir))
ci_filename = list(matches)[0]
ci_file = os.path.join(dir, ci_filename)
ci_df = pd.read_csv(ci_file, dtype=str)
ci_df["Date"] = ci_df["Date"].swifter.apply(lambda x: pd.to_datetime(x))

GPS_info = namedtuple('gps_info','lat, long, gpsspeed')

# Collect Ho information
matches = filter(lambda x: x.endswith('rrc.csv'), os.listdir(dir))
mi_rrc_filename = list(matches)[0]
mi_rrc_file = os.path.join(dir, mi_rrc_filename)
mi_rrc_df = pd.read_csv(mi_rrc_file)
mi_rrc_df["time"] = mi_rrc_df["time"].swifter.apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
HO_events = parse_mi_ho(mi_rrc_df)
HO_events.pop('Conn_Rel'), HO_events.pop('Conn_Req')

columns = [
    "Timestamp",
    "lat", "long", "gpsspeed",
    'LTE_HO','MN_HO','eNB_to_ENDC','gNB_Rel','gNB_HO','RLF_II','RLF_III','SCG_RLF',
    "RSRP","RSRQ","RSRP1","RSRQ1","RSRP2","RSRQ2",
    "nr-RSRP","nr-RSRQ","nr-RSRP1","nr-RSRQ1","nr-RSRP2","nr-RSRQ2",
    "DL-lat", "DL-lossrate", "UL-lat", "UL-lossrate"
]
f.write(",".join(columns)+"\n")

i_ = [0,0,0,0,0,0,0] # For increase speed
for time_point in [start + dt.timedelta(seconds=i) for i in range(0, N+1, TS)]:

    # ========================================================================
    # Get DL/UL latency, loss...
    perfermance_related = []

    dl_lats = []
    for i in range(i_[0], len(dl_lat_df)):
        t = dl_lat_df['Timestamp'].iloc[i]
        if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
            dl_lat = float(dl_lat_df['latency'].iloc[i])
            dl_lats.append(dl_lat)
            # if lat >  excessive_latency_value:
            #     excessive_latency.append(lat)
        elif t > time_point:
            i_[0] = i
            break

    if len(dl_lats) == 0:
        # print(f"{time_point} No package arrive")
        # perfermance_related.append('-')
        perfermance_related.append(dl_avg_lat) # Apeend previous value
    else:
        dl_avg_lat = sum(dl_lats)/len(dl_lats)
        # print(f"{time_point} average latency: {avg_lat}")
        perfermance_related.append(dl_avg_lat)

    dl_losses = []
    for i in range(i_[1], len(dl_loss_df)):
        t = dl_loss_df['Timestamp'].iloc[i]
        if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
            dl_losses.append(t)
        elif t > time_point:
            i_[1] = i
            break

    if (len(dl_losses)+len(dl_lats)) == 0:
        # print(f"{time_point} No package arrive")
        perfermance_related.append('-')
    else:
        loss_rate = len(dl_losses)/(len(dl_losses)+len(dl_lats))
        # print(f"{time_point} loss rate: {loss_rate}")
        perfermance_related.append(loss_rate)

    ul_lats = []
    for i in range(i_[2], len(ul_lat_df)):
        t = ul_lat_df['Timestamp'].iloc[i]
        if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
            ul_lat = float(ul_lat_df['latency'].iloc[i])
            ul_lats.append(ul_lat)
            # if lat >  excessive_latency_value:
            #     excessive_latency.append(lat)
        elif t > time_point:
            i_[2] = i
            break

    if len(ul_lats) == 0:
        # print(f"{time_point} No package arrive")
        perfermance_related.append('-')
        perfermance_related.append(ul_avg_lat) # Apeend previous value
    else:
        ul_avg_lat = sum(ul_lats)/len(ul_lats)
        # print(f"{time_point} average latency: {avg_lat}")
        perfermance_related.append(ul_avg_lat)

    ul_losses = []
    for i in range(i_[3], len(ul_loss_df)):
        t = ul_loss_df['Timestamp'].iloc[i]
        if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
            ul_losses.append(t)
        elif t > time_point:
            i_[3] = i
            break

    if (len(ul_losses)+len(ul_lats)) == 0:
        # print(f"{time_point} No package arrive")
        perfermance_related.append('-')
    else:
        loss_rate = len(ul_losses)/(len(ul_losses)+len(ul_lats))
        # print(f"{time_point} loss rate: {loss_rate}")
        perfermance_related.append(loss_rate)

    perfermance_related = [str(feature) for feature in perfermance_related]

    # ==========================================================================
    # Get GPS informations
    gps_related = []

    for i in range(i_[4], len(ci_df)):
        t = ci_df['Date'].iloc[i]
        lat = ci_df['GPSLat'].iloc[i]
        long = ci_df['GPSLon'].iloc[i]
        gpsspeed = ci_df['GPSSpeed'].iloc[i]
        if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
            gps_info = GPS_info(lat=lat,long=long,gpsspeed=gpsspeed)
        elif t > time_point:
            i_[4] = i
            break
    

    gps_related += [gps_info.lat, gps_info.long, gps_info.gpsspeed]
    gps_related = [str(feature) for feature in gps_related]
    # print(f"{time_point} {gps_info}")
    
    # ===========================================================================
    # Get signal strength informations
    ss_related = []

    SS_DICT = ss_dict()
    for i in range(i_[5], len(mi_ml1_df)):
        t = mi_ml1_df['time'].iloc[i]
        serv_cell_idx = mi_ml1_df['Serving Cell Index'].iloc[i]
        
        if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point) and serv_cell_idx=='PCell':
            SS_DICT += ss_dict(mi_ml1_df.iloc[i])
        elif t > time_point:
            i_[5] = i
            break
    
    # Get primary serv cell rsrp, rsrq 
    if len(SS_DICT.dict["PCell"][0]) != 0:
        pcell_rsrp = sum(SS_DICT.dict["PCell"][0])/len(SS_DICT.dict["PCell"][0])
        pcell_rsrq = sum(SS_DICT.dict["PCell"][1])/len(SS_DICT.dict["PCell"][0])
    else:
        # pcell_rsrp, pcell_rsrq = '-', '-'
        pcell_rsrp, pcell_rsrq = pcell_rsrp, pcell_rsrq # No sample value, use the previous one
    SS_DICT.dict.pop("PCell") 

    # Get 1st, 2nd neighbor cell rsrp, rsrq
    if len(SS_DICT.dict) != 0:
        cell1 = max(SS_DICT.dict, key=lambda x:sum(SS_DICT.dict[x][0])/len(SS_DICT.dict[x][0]))
        cell1_rsrp = sum(SS_DICT.dict[cell1][0])/len(SS_DICT.dict[cell1][0])
        cell1_rsrq = sum(SS_DICT.dict[cell1][1])/len(SS_DICT.dict[cell1][0])
        SS_DICT.dict.pop(cell1)
    else:
        # cell1_rsrp, cell1_rsrq = '-', '-'
        cell1_rsrp, cell1_rsrq = 0,0 # No sample value, assign 0

    if len(SS_DICT.dict) != 0:
        cell2 = max(SS_DICT.dict, key=lambda x:sum(SS_DICT.dict[x][0])/len(SS_DICT.dict[x][0]))
        cell2_rsrp = sum(SS_DICT.dict[cell2][0])/len(SS_DICT.dict[cell2][0])
        cell2_rsrq = sum(SS_DICT.dict[cell2][1])/len(SS_DICT.dict[cell2][0])
        SS_DICT.dict.pop(cell2)
    else:
        # cell2_rsrp, cell2_rsrq = '-', '-'
        cell2_rsrp, cell2_rsrq = 0,0 # No sample value, assign 0

        # print(f"{time_point} {pcell_rsrp}, {pcell_rsrq} {cell1_rsrp}, {cell1_rsrq} {cell2_rsrp}, {cell2_rsrq}")
    ss_related += [pcell_rsrp, pcell_rsrq, cell1_rsrp, cell1_rsrq, cell2_rsrp, cell2_rsrq]

    NR_SS_DICT = nr_ss_dict()
    for i in range(i_[6], len(nr_mi_ml1_df)):
        t = nr_mi_ml1_df['time'].iloc[i]
        serv_cell_idx = nr_mi_ml1_df['Serving Cell PCI'].iloc[i]
        
        if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
            NR_SS_DICT += nr_ss_dict(nr_mi_ml1_df.iloc[i])

        elif t > time_point:
            i_[6] = i
            break
    
    # Get primary secondary serv cell rsrp, rsrq 
    if len(NR_SS_DICT.dict["PSCell"][0]) != 0:
        pscell_rsrp = sum(NR_SS_DICT.dict["PSCell"][0])/len(NR_SS_DICT.dict["PSCell"][0])
        pscell_rsrq = sum(NR_SS_DICT.dict["PSCell"][1])/len(NR_SS_DICT.dict["PSCell"][0])
    else:
        # pscell_rsrp, pscell_rsrq = '-', '-'
        pscell_rsrp, pscell_rsrq = 0,0 # No nr serving or no sample value assign 0
    NR_SS_DICT.dict.pop("PSCell")

    # Get 1st, 2nd neighbor cell rsrp, rsrq
    if len(NR_SS_DICT.dict) != 0:
        cell1 = max(NR_SS_DICT.dict, key=lambda x:sum(NR_SS_DICT.dict[x][0])/len(NR_SS_DICT.dict[x][0]))
        cell1_rsrp = sum(NR_SS_DICT.dict[cell1][0])/len(NR_SS_DICT.dict[cell1][0])
        cell1_rsrq = sum(NR_SS_DICT.dict[cell1][1])/len(NR_SS_DICT.dict[cell1][0])
        NR_SS_DICT.dict.pop(cell1)
    else:
        # cell1_rsrp, cell1_rsrq = '-', '-'
        cell1_rsrp, cell1_rsrq = 0,0 # No sample value, assign 0

    if len(NR_SS_DICT.dict) != 0:
        cell2 = max(NR_SS_DICT.dict, key=lambda x:sum(NR_SS_DICT.dict[x][0])/len(NR_SS_DICT.dict[x][0]))
        cell2_rsrp = sum(NR_SS_DICT.dict[cell2][0])/len(NR_SS_DICT.dict[cell2][0])
        cell2_rsrq = sum(NR_SS_DICT.dict[cell2][1])/len(NR_SS_DICT.dict[cell2][0])
        NR_SS_DICT.dict.pop(cell2)
    else:
        # cell2_rsrp, cell2_rsrq = '-', '-'
        cell2_rsrp, cell2_rsrq = 0,0 # No sample value, assign 0
    
    # print(f"{time_point} {pscell_rsrp}, {pscell_rsrq} {cell1_rsrp}, {cell1_rsrq} {cell2_rsrp}, {cell2_rsrq}")
    ss_related += [pscell_rsrp, pscell_rsrq, cell1_rsrp, cell1_rsrq, cell2_rsrp, cell2_rsrq]

    ss_related = [str(feature) for feature in ss_related]

    # ================================================================================
    # Get HO informations
    HO_related = [0] * len(HO_events.keys())

    for i, ho_type in  enumerate(list(HO_events.keys())):
        for ho in HO_events[ho_type]:
            t = ho.start
            if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point):
                HO_related[i] += 1
            elif t > time_point:
                break
    
    HO_related = [str(feature) for feature in HO_related]

    f.write(",".join([str(time_point)]+gps_related+HO_related+ss_related+perfermance_related)+"\n") 

f.close()

From 2022-12-22 15:04:43 to 2022-12-22 15:10:56.


In [8]:
NR_SS_DICT

{'422': [[-90.109, -92.797, -93.578, -121.141, -103.07], [-15.492, -16.68, -23.008, -28.727, -25.156], [Timestamp('2022-12-22 15:10:55.193070'), Timestamp('2022-12-22 15:10:55.352650'), Timestamp('2022-12-22 15:10:55.672631'), Timestamp('2022-12-22 15:10:55.832622'), Timestamp('2022-12-22 15:10:55.992652')]]}

# Input create from a directory.

In [48]:
def ss_append(d, key):
        ss_related.append(Average(d[key][0])) ## Avg RSRP of PCell
        ss_related.append(Average(d[key][1])) ## Avg RSRQ of PCell
        ss_related.append(d[key][0][0]) ## RSRP of first measure
        ss_related.append(d[key][0][-1]) ## RSRP of of last measure
        ss_related.append(d[key][1][0]) ## RSRQ of first measure
        ss_related.append(d[key][1][-1]) ## RSRQ of last measure

dir = "/home/wmnlab/Code_Test_Space/sheng-ru/test/_Bandlock_Udp"
All_ml1_files = []
CI_files = []

redo=0
CI_down = False
for a in sorted(os.listdir(dir)):
    if a == 'ml_data':
        redo=1
        continue
    d0 = os.path.join(dir, a)
    # CI_down = False
    for b in sorted(os.listdir(d0)):
        d00 = os.path.join(d0, b, 'data')
        for c in sorted(os.listdir(d00)): 
            if c.endswith('txt_ml1_new.csv'):
                d5 = os.path.join(d00, c)
            if c.startswith('cimon') and c.endswith('new.csv') and CI_down == False:
                CI_files.append(os.path.join(d00,c))
                CI_down = True
        All_ml1_files.append(d5)

p_num = len(os.listdir(dir))-redo # How many device
t_num = len(os.listdir(d0)) # How many trace

ml1_files = []

for i in range(t_num):
    A = [All_ml1_files[(i)+t_num*j] for j in range(p_num)]
    ml1_files.append(A)

CI_dfs = []
for i in range(1):
    df = pd.read_csv(CI_files[i])
    df["Date"] = df["Date"].apply(lambda x: pd.to_datetime(x))
    CI_dfs.append(df)

for tt in range(t_num):
    print(f"Processing trace #{tt+1}")
    
    ml1_df_list = []
    for file in ml1_files[tt]:
        mi_ml1_df = pd.read_csv(file)
        mi_ml1_df = mi_ml1_df[mi_ml1_df.type_id == 'LTE_PHY_Connected_Mode_Intra_Freq_Meas']
        mi_ml1_df["time"] = mi_ml1_df["time"].apply(lambda x: pd.to_datetime(x)+dt.timedelta(hours=8))
        ml1_df_list.append(mi_ml1_df)
    
    print(f"Data loading for trace #{tt+1} done.")
    
    start, end = start_end[tt]
    TS = 30 # Time Slot (sec)
    tp_range = 30 # Every time point look back range (sec)
    num_cell = 3
    #####################################################
    try:
        os.listdir(os.path.join(dir,'ml_data'))
    except:
        os.system(f"mkdir {os.path.join(dir,'ml_data')}")
    f = open(os.path.join(dir,'ml_data',f'input#{tt+1}.csv'), 'w') ## Out file !!!!!!!!
    print(os.path.join(dir,'ml_data',f'input#{tt+1}.csv'))
    #####################################################
    n = int((end - start).total_seconds())
    # HO = collect_ho_event(mi_rrc_df)
    # A = list(HO.keys())
    signal_strength = ["avg_rsrp", "avg_rsrq", "rsrp_1st", "rsrp_last", "rsrq_1st", "rsrq_last"]
    gps = ['Latitude', 'Longtitude', 'GPSSpeed1','GPSSpeed2','GPSSpeed3','GPSSpeed4']
    B = ["B1 num"] + signal_strength * num_cell + ["B3 num"] + signal_strength * num_cell + ["B28 num"] + signal_strength * num_cell
    B[-1] += '\n'
    f.write(','.join(['time']+gps + B))

    for time_point in [start + dt.timedelta(seconds=i) for i in range(0, n+1, TS)]:
        ######################################################################
        # ho_time = list(np.zeros(len(A)))
        # for i in range(len(A)):
        #     for t in HO[A[i]]:
        #         if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
        #             ho_time[i] += 1
        # ho_time = [str(i) for i in ho_time]
        #######################################################################
        # GPS
        CI_times = []
        gps_lat = []
        gps_lon = []
        gps_speed = []

        ci_df = CI_dfs[0]
        for i in range(len(ci_df)):
            t = ci_df["Date"].iloc[i]
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                CI_times.append(t)
                gps_lat.append(ci_df["GPSLat"].iloc[i])
                gps_lon.append(ci_df["GPSLon"].iloc[i])
                gps_speed.append(ci_df["GPSSpeed"].iloc[i])
            elif t > time_point:
                break

        x = abs((CI_times[0] - (time_point - dt.timedelta(seconds=tp_range/2))).total_seconds()) 
        latitude = gps_lat[0]
        lontitude = gps_lon[0]

        for i, t in enumerate(CI_times):
            x_ = abs((t - (time_point - dt.timedelta(seconds=tp_range/2))).total_seconds())
            if x_ < x:
                x = x_
                latitude = gps_lat[i]
                lontitude = gps_lon[i]
        
        
        ind = list(np.linspace(0,len(gps_speed)-1,4))
        ind = [round(i) for i in ind]
        speed_related = [gps_speed[i] for i in ind]

        GPS_related = [latitude, lontitude] + speed_related
        GPS_related = [str(i) for i in GPS_related]
        #######################################################################
        # Signal Strength
        d = ss_dict()
        for df in ml1_df_list:
            for i in range(len(df)):
                t = df["time"].iloc[i]
                if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                    d += ss_dict(df.iloc[i])
                elif t > time_point:
                    break
        d.sort_dict_by_time()
        d = d.dict

        ss_related = []
        # ss_append(d, "PCell")
        b1 = pop_dict('275',d)
        b3 = pop_dict('1275',d)
        b28 = pop_dict('9560',d)

        for Band in [b1,b3,b28]:
            ss_related.append(len(Band)) ## Num of detected serv + nei cell
            N = num_cell
            while N > 0:
                if len(Band) == 0:
                    for i in range(6):
                        ss_related.append('-')
                    N -= 1
                else:
                    a = max(Band, key=lambda x:Average(Band[x][0]))
                    ss_append(Band, a)
                    Band.pop(a)
                    N -= 1

        ss_related = [str(i) for i in ss_related]
        if time_point != [start + dt.timedelta(seconds=i) for i in range(0, n+1, TS)][-1]:
            ss_related[-1] += '\n'
        #######################################################################
        
        f.write(','.join([str(time_point)] + GPS_related + ss_related))
    f.close()
    print(f'Trace {tt+1} done.')

Processing trace #1
Data loading for trace #1 done.
/home/wmnlab/Code_Test_Space/sheng-ru/test/_Bandlock_Udp/ml_data/input#1.csv
Trace 1 done.
Processing trace #2
Data loading for trace #2 done.
/home/wmnlab/Code_Test_Space/sheng-ru/test/_Bandlock_Udp/ml_data/input#2.csv
Trace 2 done.


# Label input of all files from a directory

In [60]:
dir = "/home/wmnlab/Code_Test_Space/sheng-ru/test/_Bandlock_Udp"
All_input_files = []

input_files = []
label_files = []
if 'ml_data' in os.listdir(dir):
    d0 = os.path.join(dir, 'ml_data')
    for b in sorted(os.listdir(d0)):
        if 'input' in b and not 'label' in b and not '.csv#' in b:
            input_files.append(os.path.join(d0, b))
        elif 'label' in b and not 'input' in b:
            label_files.append(os.path.join(d0, b))
else:
    print("Error, no dir ml_data")

input_files.sort()
label_files.sort()

for i in range(len(input_files)):
    input_file = input_files[i]
    f1 = open(input_file, 'r')

    label_file = label_files[i]
    f2 = open(label_file, 'r')
    ##############################################
    labeld_input = input_files[i][:-4]+'_labeled.csv'
    f3 = open(labeld_input, 'w')
    ##############################################

    A = f1.readlines()
    B = f2.readlines()

    for j, (a,b) in enumerate(zip(A,B)):
        if j != len(A):
            f3.write(a[:-1] + ','+b.split(',')[-1])
        else:   
            f3.write(a[:] + ','+b.split(','))

    f1.close()
    f2.close()
    f3.close()
    print(f'Trace {i+1} done.')

for f in input_files:
    os.system(f'rm {f}')
print('Original unlabeled file deleted.')

Trace 1 done.
Trace 2 done.
Original unlabeled file deleted.


# Database Building

In [1]:
# ml_data
ml_data = '/home/wmnlab/ml_data'
files = [os.path.join(ml_data, x) for x in os.listdir(ml_data)]
files.sort()

In [5]:
def find_text(x):
    ind = x.find('#')
    x = x[ind+4:-4].split('&')
    x.sort()
    x = '&'.join(x)
    return x

database = open('/home/wmnlab/ntu-experiments/sheng-ru/experiment/mobileinsight/database.csv', 'w')
columns = ['B3&B7', 'B3&B8', 'B7&B8']
database.write(','.join(columns)+'\n')

experiment_time = 300

for file in files:
    band_set = find_text(file)
    f = open(file)
    df = pd.read_csv(f)

    print(band_set)
    
    if len(df) > experiment_time:
        print(len(df[-300:]))
    else:
        print(len(df[:]))
    
    f.close()

database.close()

B3&B7
300
B3&B8
300
B7&B8
300
B3&B7
274
B3&B8
274
B7&B8
274
