# Functions

In [5]:
import pandas as pd
import datetime as dt
import time
import numpy as np
import os
import swifter
from collections import namedtuple
from pathlib import Path

In [6]:
def Average(L):
    return sum(L)/len(L)

In [7]:
def parse_mi_ho(df):
    def NR_OTA():
        if df["type_id"].iloc[i] == "5G_NR_RRC_OTA_Packet":
            return True
        else:
            return False

    def find_1st_after(target, look_after=1):
        for j in range(i, len(df)):
            t_ = df["Timestamp"].iloc[j]
            if (t_ - t).total_seconds() > look_after:
                return None, None
            if df[target].iloc[j] not in [0,'0']:
                return t_, j
        return None, None
    
    def find_1st_before(target, look_before=1):
        for j in range(i, -1, -1):
            t_ = df["Timestamp"].iloc[j]
            if (t - t_).total_seconds() > look_before:
                return None, None
            if df[target].iloc[j] not in [0,'0']:
                return t_, j

    HO = namedtuple('HO','start, end, others', defaults=(None,None))

    D = {
        'Conn_Rel':[], 
        'Conn_Req':[], # Setup
        'LTE_HO': [], # LTE -> newLTE
        'MN_HO': [], # LTE + NR -> newLTE + NR
        'eNB_to_ENDC': [], # LTE -> LTE + NR => NR setup
        'gNB_Rel': [], # LTE + NR -> LTE
        'gNB_HO': [], # LTE + NR -> LTE + newNR
        # 'HOF': [], # Didn't defined yet.
        'RLF': [],
        'SCG_RLF': [],
        }

    for i in range(len(df)):
        if NR_OTA():
            continue

        # t = df["time"].iloc[i]
        t = df["Timestamp"].iloc[i]
        
        if df["rrcConnectionRelease"].iloc[i] == 1:
            D['Conn_Rel'].append(HO(start=t))

        if df["rrcConnectionRequest"].iloc[i] == 1:
            a = find_1st_after('rrcConnectionReconfigurationComplete',look_after=2)[0]
            b = find_1st_after('securityModeComplete',look_after=2)[0]
            try: end = a if a > b else b
            except: end = None
            D['Conn_Req'].append(HO(start=t,end=end))
        
        if df["lte-rrc.t304"].iloc[i] == 1:
            others = ''
            end, _ = find_1st_after('rrcConnectionReconfigurationComplete')
            serv_cell, target_cell = df["PCI"].iloc[i], df['lte_targetPhysCellId'].iloc[i]
            serv_freq, target_freq = df["Freq"].iloc[i], df['dl-CarrierFreq'].iloc[i]
            if df["SCellToAddMod-r10"].iloc[i] == 1:
                n =len(str(df["SCellIndex-r10.1"].iloc[i]).split('@'))
                others=f'Set up {n} SCell.'
            
            if serv_freq != target_freq:
                others += " Inter freq. HO"

            if df["nr-rrc.t304"].iloc[i] == 1 and df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:
                if serv_cell == target_cell and serv_freq == target_freq:
                    D['eNB_to_ENDC'].append(HO(start=t, end=end, others=others))
                    # print(1, t, f"Serving Cell: {serv_cell}->{target_cell}")  
                else:    
                    D['MN_HO'].append(HO(start=t, end=end, others=others))
            else:
                if serv_cell == target_cell and serv_freq == target_freq:
                    a, b = find_1st_before("scgFailureInformationNR-r15")
                    if a is not None:
                        others += " Caused by scg-failure."
                    D['gNB_Rel'].append(HO(start=t, end=end, others=others))
                else:
                    D['LTE_HO'].append(HO(start=t, end=end, others=others))

        if df["nr-rrc.t304"].iloc[i] == 1 and not df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:
            end, _ = find_1st_after('rrcConnectionReconfigurationComplete')
            D['gNB_HO'].append(HO(start=t,end=end))

        if df["rrcConnectionReestablishmentRequest"].iloc[i] == 1:
            end, _ = find_1st_after('rrcConnectionReestablishmentComplete', look_after=1)
            b, _ = find_1st_after('rrcConnectionReestablishmentReject', look_after=1)
            others = df["reestablishmentCause"].iloc[i]
            if end is not None: 
                # Type II
                D['RLF'].append(HO(start=t,end=end,others=others))
            else: 
                # Type III
                D['RLF'].append(HO(start=t,end=b,others=others)) # End for Type III?
            
        if df["scgFailureInformationNR-r15"].iloc[i] == 1:
            others = df["failureType-r15"].iloc[i]
            D['SCG_RLF'].append(HO(start=t,others=others))
    
    return D

In [8]:
def pop_dict(band, d):
    D = d.copy()
    for key in list(d.keys()):
        if not key.endswith(' '+band):
            D.pop(key)
    return D

class ss_dict:
    def __init__(self,pd_data=None,d=None): ## Input pd_df.iloc[index]
        self.dict = {'PCell':[[],[],[]]}
        if pd_data is not None:
            self.nei_cell(pd_data)
            self.serv_cell(pd_data)
        if d is not None:
            self.dict = d
    def serv_cell(self, pd_data):
        earfcn = pd_data["EARFCN"]
        serv_cell_id = pd_data["Serving Cell Index"]
        pci = pd_data["PCI"]
        rsrp = float(pd_data["RSRP(dBm)"])
        rsrq = float(pd_data["RSRQ(dB)"])
        t = pd_data["Timestamp"]
        if serv_cell_id == "PCell":
            self.dict['PCell'][0].append(rsrp)
            self.dict['PCell'][1].append(rsrq)
            self.dict['PCell'][2].append(t)
            # self.dict[pci+' '+earfcn] = [[rsrp], [rsrq], [t]]
        else:
            self.dict[pci+' '+earfcn] = [[rsrp], [rsrq], [t]]
            # s = pci + ' ' + self.earfcn
            # if s in 
    def nei_cell(self, pd_data):
        earfcn = pd_data["EARFCN"]
        t = pd_data["Timestamp"]
        for i in range(9, len(pd_data), 3):

            if pd_data[i] == '-' or np.isnan(float(pd_data[i])):
                break
            else:
                rsrp = float(pd_data[i+1])
                rsrq = float(pd_data[i+2])
                self.dict[str(pd_data[i])+' '+earfcn] = [[rsrp], [rsrq], [t]]              
    
    def __add__(self, sd2):
        d1 = self.dict
        d2 = sd2.dict
        for key in list(d2.keys()):
            if key in list(d1.keys()):
                d1[key][0] = d1[key][0] + d2[key][0]
                d1[key][1] += d2[key][1]
                d1[key][2] += d2[key][2]
            else:
                d1[key] = d2[key]
        return ss_dict(d=d1)
    
    def __repr__(self):
        return str(self.dict)

    def sort_dict_by_time(self):
        def sort_element(element):
            d1 = [ [element[0][i], element[1][i], element[2][i]] for i in range(len(element[0]))]
            d1.sort(key=lambda data:data[2])
            RSRP = [i[0] for i in d1]
            RSRQ = [i[1] for i in d1]
            T = [i[2] for i in d1]
            return [RSRP, RSRQ, T]
        sorted_D = {}
        for k in list(self.dict.keys()):
            sorted_D[k] = sort_element(self.dict[k])
        self.dict = sorted_D


class nr_ss_dict:
    def __init__(self, pd_data=None, d=None):
        self.dict = {'PSCell':[[],[],[]]}
        if pd_data is not None:
            self.nei_cell(pd_data)
            self.serv_cell(pd_data)
        if d is not None:
            self.dict = d
    
    def serv_cell(self, pd_data):
        self.pscell = pd_data["Serving Cell PCI"]
        do = False
        for cell in self.dict.keys():
            if self.pscell == cell:
                self.dict["PSCell"][0] += self.dict[cell][0]
                self.dict["PSCell"][1] += self.dict[cell][1]
                self.dict["PSCell"][2] += self.dict[cell][2]
                do,x = True, cell
                break
        if do:
            self.dict.pop(x)
            
    def nei_cell(self, pd_data):
        arfcn = pd_data["Raster ARFCN"]
        t = pd_data["Timestamp"]
        for i in range(6, len(pd_data), 3):
            if pd_data[i] == '-' or np.isnan(float(pd_data[i])):
                break
            else:
                rsrp = float(pd_data[i+1])
                rsrq = float(pd_data[i+2])
                self.dict[pd_data[i]] = [[rsrp], [rsrq], [t]]

    def __repr__(self):
        return str(self.dict)

    def __add__(self, sd2):
        d1 = self.dict
        d2 = sd2.dict
        for key in list(d2.keys()):
            if key in list(d1.keys()):
                d1[key][0] += d2[key][0]
                d1[key][1] += d2[key][1]
                d1[key][2] += d2[key][2]
            else:
                d1[key] = d2[key]
        return nr_ss_dict(d=d1)

# Create Dual Radio csv Data

In [None]:
def data_create_dual(dir1, dir2, ci_df, outfile, ul_df, dl_df):
    base_dir1 = dir1
    base_dir2 = dir2
    # out_file = "/home/wmnlab/test1.csv" ## Out file !!!!!!!!
    out_file = outfile
    f = open(out_file, 'w') 

    d1 = os.path.join(base_dir1,"data")
    d2 = os.path.join(base_dir2,"data")

    excessive_latency_value = 0.1

    GPS_info = namedtuple('gps_info','lat, long, gpsspeed')
    
    # Collect rsrp infomation
    mi_ml1_dfs = []
    nr_mi_ml1_dfs = []
    HO_events_list = []
    

    for d in [d1, d2]:
        matches = filter(lambda x: x.endswith('ml1.csv'), os.listdir(d))
        ml1_filenames = sorted(list(matches))
        mi_ml1_file = os.path.join(d, ml1_filenames[0])
        mi_ml1_df = pd.read_csv(mi_ml1_file, dtype=str)
        mi_ml1_df["Timestamp"] = mi_ml1_df["Timestamp"].apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
        mi_ml1_dfs.append(mi_ml1_df)

        nr_mi_ml1_file = os.path.join(d, ml1_filenames[1])
        nr_mi_ml1_df = pd.read_csv(nr_mi_ml1_file, dtype=str)
        nr_mi_ml1_df["Timestamp"] = nr_mi_ml1_df["Timestamp"].apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
        nr_mi_ml1_dfs.append(nr_mi_ml1_df)

        # Collect Ho information
        matches = filter(lambda x: x.endswith('rrc.csv'), os.listdir(d))
        mi_rrc_filename = list(matches)[0]
        mi_rrc_file = os.path.join(d, mi_rrc_filename)
        mi_rrc_df = pd.read_csv(mi_rrc_file)
        mi_rrc_df["Timestamp"] = mi_rrc_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
        HO_events = parse_mi_ho(mi_rrc_df)
        HO_events.pop('Conn_Rel'), HO_events.pop('Conn_Req')
        HO_events_list.append(HO_events)

    columns = ["Timestamp", "lat", "long", "gpsspeed"]+[
        'LTE_HO','MN_HO','eNB_to_ENDC','gNB_Rel','gNB_HO','RLF','SCG_RLF',
        "num_of_neis","RSRP","RSRQ","RSRP1","RSRQ1","RSRP2","RSRQ2",
        "nr-RSRP","nr-RSRQ","nr-RSRP1","nr-RSRQ1","nr-RSRP2","nr-RSRQ2",
    ]*2 + ["dl-loss", "ul-loss", "dl-exc-lat", "ul-exc-lat","dl-latency", "ul-latency"]


    f.write(",".join(columns)+"\n")

    i_ci = 0
    i_pcap = [0,0]
    i_ = [[0,0], [0,0]] # For increase speed
    data_buffers = [{'rsrp':0, 'rsrq':0}, {'rsrp':0, 'rsrq':0}]

    
    for time_point in [start + dt.timedelta(seconds=i) for i in range(0, N+1, TS)]:
        ss_relateds = []
        HO_relateds = []
        # Get GPS informations
        # ========================================================================
        gps_related = []

        for i in range(i_ci, len(ci_df)):
            t = ci_df['Date'].iloc[i]
            lat = ci_df['GPSLat'].iloc[i]
            long = ci_df['GPSLon'].iloc[i]
            gpsspeed = ci_df['GPSSpeed'].iloc[i]
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                gps_info = GPS_info(lat=lat,long=long,gpsspeed=gpsspeed)
            elif t > time_point:
                i_ci = i
                break
        
        try : gps_info
        except:
            gps_info = GPS_info(lat='-',long='-',gpsspeed='-')
            
        gps_related += [gps_info.lat, gps_info.long, gps_info.gpsspeed]
        gps_related = [str(feature) for feature in gps_related]
        # print(f"{time_point} {gps_info}")

        for j in range(2):
            # ==========================================================================
            # Get signal strength informations
            ss_related = []

            SS_DICT = ss_dict()
            for i in range(i_[j][0], len(mi_ml1_df)):
                t = mi_ml1_df['Timestamp'].iloc[i]
                serv_cell_idx = mi_ml1_df['Serving Cell Index'].iloc[i]
                
                if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point) and serv_cell_idx=='PCell':
                    SS_DICT += ss_dict(mi_ml1_df.iloc[i])
                elif t > time_point:
                    i_[j][0] = i
                    break
            
            num_of_nei = len(SS_DICT.dict) - 1

            # Get primary serv cell rsrp, rsrq 
            if len(SS_DICT.dict["PCell"][0]) != 0:
                pcell_rsrp = sum(SS_DICT.dict["PCell"][0])/len(SS_DICT.dict["PCell"][0])
                pcell_rsrq = sum(SS_DICT.dict["PCell"][1])/len(SS_DICT.dict["PCell"][0])
                data_buffers[j]['rsrp'], data_buffers[j]['rsrq'] = pcell_rsrp, pcell_rsrq
            else:
                pcell_rsrp, pcell_rsrq = data_buffers[j]['rsrp'], data_buffers[j]['rsrq'] # No sample value, use the previous one
            SS_DICT.dict.pop("PCell") 

            # Get 1st, 2nd neighbor cell rsrp, rsrq
            if len(SS_DICT.dict) != 0:
                cell1 = max(SS_DICT.dict, key=lambda x:sum(SS_DICT.dict[x][0])/len(SS_DICT.dict[x][0]))
                cell1_rsrp = sum(SS_DICT.dict[cell1][0])/len(SS_DICT.dict[cell1][0])
                cell1_rsrq = sum(SS_DICT.dict[cell1][1])/len(SS_DICT.dict[cell1][0])
                SS_DICT.dict.pop(cell1)
            else:
                # cell1_rsrp, cell1_rsrq = '-', '-'
                cell1_rsrp, cell1_rsrq = 0,0 # No sample value, assign 0

            if len(SS_DICT.dict) != 0:
                cell2 = max(SS_DICT.dict, key=lambda x:sum(SS_DICT.dict[x][0])/len(SS_DICT.dict[x][0]))
                cell2_rsrp = sum(SS_DICT.dict[cell2][0])/len(SS_DICT.dict[cell2][0])
                cell2_rsrq = sum(SS_DICT.dict[cell2][1])/len(SS_DICT.dict[cell2][0])
                SS_DICT.dict.pop(cell2)
            else:
                # cell2_rsrp, cell2_rsrq = '-', '-'
                cell2_rsrp, cell2_rsrq = 0,0 # No sample value, assign 0

                # print(f"{time_point} {pcell_rsrp}, {pcell_rsrq} {cell1_rsrp}, {cell1_rsrq} {cell2_rsrp}, {cell2_rsrq}")
            ss_related += [num_of_nei, pcell_rsrp, pcell_rsrq, cell1_rsrp, cell1_rsrq, cell2_rsrp, cell2_rsrq]

            NR_SS_DICT = nr_ss_dict()
            for i in range(i_[j][1], len(nr_mi_ml1_df)):
                t = nr_mi_ml1_df['Timestamp'].iloc[i]
                serv_cell_idx = nr_mi_ml1_df['Serving Cell PCI'].iloc[i]
                
                if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                    NR_SS_DICT += nr_ss_dict(nr_mi_ml1_df.iloc[i])

                elif t > time_point:
                    i_[j][1] = i
                    break
            
            # Get primary secondary serv cell rsrp, rsrq 
            if len(NR_SS_DICT.dict["PSCell"][0]) != 0:
                pscell_rsrp = sum(NR_SS_DICT.dict["PSCell"][0])/len(NR_SS_DICT.dict["PSCell"][0])
                pscell_rsrq = sum(NR_SS_DICT.dict["PSCell"][1])/len(NR_SS_DICT.dict["PSCell"][0])
            else:
                # pscell_rsrp, pscell_rsrq = '-', '-'
                pscell_rsrp, pscell_rsrq = 0,0 # No nr serving or no sample value assign 0
            NR_SS_DICT.dict.pop("PSCell")

            # Get 1st, 2nd neighbor cell rsrp, rsrq
            if len(NR_SS_DICT.dict) != 0:
                cell1 = max(NR_SS_DICT.dict, key=lambda x:sum(NR_SS_DICT.dict[x][0])/len(NR_SS_DICT.dict[x][0]))
                cell1_rsrp = sum(NR_SS_DICT.dict[cell1][0])/len(NR_SS_DICT.dict[cell1][0])
                cell1_rsrq = sum(NR_SS_DICT.dict[cell1][1])/len(NR_SS_DICT.dict[cell1][0])
                NR_SS_DICT.dict.pop(cell1)
            else:
                # cell1_rsrp, cell1_rsrq = '-', '-'
                cell1_rsrp, cell1_rsrq = 0,0 # No sample value, assign 0

            if len(NR_SS_DICT.dict) != 0:
                cell2 = max(NR_SS_DICT.dict, key=lambda x:sum(NR_SS_DICT.dict[x][0])/len(NR_SS_DICT.dict[x][0]))
                cell2_rsrp = sum(NR_SS_DICT.dict[cell2][0])/len(NR_SS_DICT.dict[cell2][0])
                cell2_rsrq = sum(NR_SS_DICT.dict[cell2][1])/len(NR_SS_DICT.dict[cell2][0])
                NR_SS_DICT.dict.pop(cell2)
            else:
                # cell2_rsrp, cell2_rsrq = '-', '-'
                cell2_rsrp, cell2_rsrq = 0,0 # No sample value, assign 0
            
            # print(f"{time_point} {pscell_rsrp}, {pscell_rsrq} {cell1_rsrp}, {cell1_rsrq} {cell2_rsrp}, {cell2_rsrq}")
            ss_related += [pscell_rsrp, pscell_rsrq, cell1_rsrp, cell1_rsrq, cell2_rsrp, cell2_rsrq]
            ss_related = [str(feature) for feature in ss_related]
            ss_relateds.append(ss_related)
            # ================================================================================
            # Get HO informations
            HO_related = [0] * len(HO_events.keys())

            for i, ho_type in  enumerate(list(HO_events.keys())):
                for ho in HO_events[ho_type]:
                    t = ho.start
                    if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point):
                        HO_related[i] += 1
                    elif t > time_point:
                        break
            
            HO_related = [str(feature) for feature in HO_related]
            HO_relateds.append(HO_related)
        # ========================================================================
        # Get DL/UL latency, loss...
        performance_related = []

        loss_col = f"lost_{Setting[dev1]}+{Setting[dev2]}"
        latency_col = f"latency_{Setting[dev1]}+{Setting[dev2]}"
        
        dl_lats, dl_excessive_lats, dl_losses = [], [], []
        for i in range(i_pcap[0], len(dl_df)):
            t = dl_df['Timestamp'].iloc[i]
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:

                dl_loss = dl_df[loss_col].iloc[i]             
                
                if dl_loss:
                    dl_losses.append(t)
                else:
                    dl_lat = float(dl_df[latency_col].iloc[i])                
                    dl_lats.append(dl_lat)
                    if dl_lat >  excessive_latency_value:
                        dl_excessive_lats.append(t)

            elif t > time_point:
                i_pcap[0] = i
                break

        if len(dl_lats) == 0:
            pass # No package arrive; will use previous value
        else:
            dl_avg_lat = sum(dl_lats)/len(dl_lats)
        
        dl_exc_num = len(dl_excessive_lats)
        dl_loss_num = len(dl_losses)

        ul_lats, ul_excessive_lats, ul_losses = [], [], []
        for i in range(i_pcap[1], len(ul_df)):
            t = ul_df['Timestamp'].iloc[i]
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                
                ul_loss = ul_df[loss_col].iloc[i]

                if ul_loss:
                    ul_losses.append(t)
                else:
                    ul_lat = float(ul_df[latency_col].iloc[i])
                    ul_lats.append(ul_lat)
                    if ul_lat >  excessive_latency_value:
                        ul_excessive_lats.append(t)

            elif t > time_point:
                i_pcap[1] = i
                break

        if len(ul_lats) == 0:
            pass # No package arrive; will use previous value
        else:
            ul_avg_lat = sum(ul_lats)/len(ul_lats)
        
        ul_exc_num = len(ul_excessive_lats)
        ul_loss_num = len(ul_losses)

        performance_related += [dl_loss_num, ul_loss_num, dl_exc_num, ul_exc_num, dl_avg_lat, ul_avg_lat]
        performance_related = [str(feature) for feature in performance_related]


        f.write(",".join([str(time_point)]+gps_related+HO_relateds[0]+ss_relateds[0]+HO_relateds[1]+ss_relateds[1]+performance_related)+"\n") 

    f.close()

In [None]:
Setting = {'qc00': 'All', 'qc01': 'LTE', 'qc02': 'B7', 'qc03': 'B8'}
# base_dir = '/home/wmnlab/D/sheng-ru/test/test_data/'
base_dir = '/home/wmnlab/D/database/2023-04-17/_Bandlock_Udp_All_LTE_B7_B8_RM500Q/'
weird_trace = []
date = [x for x in list(base_dir.split('/')) if len(x) != 0][-2]

matches = filter(lambda x: x.startswith('qc') or x.startswith('sm'), os.listdir(base_dir))
combo_dir = os.path.join(base_dir, 'combo')
device_dir = [os.path.join(base_dir, x) for x in list(matches)]
device_dir.sort()

parent_dir = str(Path(base_dir).parent.absolute())
matches = list(filter(lambda x: 'ci' in x, os.listdir(parent_dir)))
matches.sort()
ci_file = os.path.join(parent_dir, matches[-1])

try:
    ci_df = pd.read_csv(ci_file, dtype=str)
    ci_df["Date"] = ci_df["Date"].swifter.apply(lambda x: pd.to_datetime(x))
except pd.errors.ParserError:
    print(f'preprocess {ci_file}')
    gps_dir = '/'.join(ci_file.split('/')[:-1])
    os.system(f'python3 ./csv_processing.py {gps_dir}')
    ci_df = pd.read_csv(ci_file[:-4]+'_new.csv', dtype=str)
    ci_df["Date"] = ci_df["Date"].swifter.apply(lambda x: pd.to_datetime(x))



for trace in sorted(os.listdir(combo_dir)):
    
    if trace in weird_trace:
        continue

    ct_dir = os.path.join(combo_dir,trace) # combo+trace dir

    ul_loss_lat = os.path.join(ct_dir, "udp_uplk_combo_loss_latency.csv")
    ul_loss_lat_df = pd.read_csv(ul_loss_lat)
    ul_loss_lat_df["Timestamp"] = ul_loss_lat_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

    dl_loss_lat = os.path.join(ct_dir, "udp_dnlk_combo_loss_latency.csv")
    dl_loss_lat_df = pd.read_csv(dl_loss_lat)
    dl_loss_lat_df["Timestamp"] = dl_loss_lat_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

    # Get timepoint from start to end
    front_cut, back_cut = 1, 0
    TS = 1
    tp_range = 1
    start = dl_loss_lat_df["Timestamp"].iloc[0] + dt.timedelta(seconds=front_cut) # open the downlink file to decide start time and end time
    end = dl_loss_lat_df["Timestamp"].iloc[-1] - dt.timedelta(seconds=back_cut)
    start, end = start.replace(microsecond=0), end.replace(microsecond=0)
    print(f'Trace {trace} from {start} to {end}.')
    N = int((end - start).total_seconds()) # How many time_point


    for i, device in enumerate(device_dir):
        for j in range(i+1,len(device_dir)):
            device2 = device_dir[j]
            
            dt_dir = os.path.join(device, trace) # device+trace dir
            dt_dir2 = os.path.join(device2, trace)
            print(dt_dir, dt_dir2)
            dev1, dev2 = device[-4:], device2[-4:]
            b1, b2 = Setting[dev1], Setting[dev2]
            outfile = os.path.join('/home/wmnlab/D/sheng-ru/ml_data/dual/', date + f'{trace}_{b1}&{b2}.csv')
            print(outfile)
            print(dev1, dev2)
            data_create_dual(dt_dir, dt_dir2, ci_df, outfile, ul_loss_lat_df, dl_loss_lat_df) 


# Single Radio

In [9]:
# Single Radio
def data_create(dir, ci_df, outfile, ul_df, dl_df):
    base_dir = dir
    # out_file = "/home/wmnlab/test1.csv" ## Out file !!!!!!!!
    out_file = outfile
    f = open(out_file, 'w') 

    d = os.path.join(base_dir,"data")

    excessive_latency_value = 0.1

    GPS_info = namedtuple('gps_info','lat, long, gpsspeed')
    
    # Collect rsrp infomation
    mi_ml1_dfs = []
    nr_mi_ml1_dfs = []
    HO_events_list = []
    
    matches = filter(lambda x: x.endswith('ml1.csv'), os.listdir(d))
    ml1_filenames = sorted(list(matches))
    mi_ml1_file = os.path.join(d, ml1_filenames[0])
    mi_ml1_df = pd.read_csv(mi_ml1_file, dtype=str)
    mi_ml1_df["Timestamp"] = mi_ml1_df["Timestamp"].apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
    mi_ml1_dfs.append(mi_ml1_df)

    nr_mi_ml1_file = os.path.join(d, ml1_filenames[1])
    nr_mi_ml1_df = pd.read_csv(nr_mi_ml1_file, dtype=str)
    nr_mi_ml1_df["Timestamp"] = nr_mi_ml1_df["Timestamp"].apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
    nr_mi_ml1_dfs.append(nr_mi_ml1_df)

    # Collect Ho information
    matches = filter(lambda x: x.endswith('rrc.csv'), os.listdir(d))
    mi_rrc_filename = list(matches)[0]
    mi_rrc_file = os.path.join(d, mi_rrc_filename)
    mi_rrc_df = pd.read_csv(mi_rrc_file)
    mi_rrc_df["Timestamp"] = mi_rrc_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
    HO_events = parse_mi_ho(mi_rrc_df)
    HO_events.pop('Conn_Rel'), HO_events.pop('Conn_Req')
    HO_events_list.append(HO_events)

    columns = ["Timestamp", "lat", "long", "gpsspeed"]+[
        'LTE_HO','MN_HO','eNB_to_ENDC','gNB_Rel','gNB_HO','RLF','SCG_RLF',
        "num_of_neis","RSRP","RSRQ","RSRP1","RSRQ1","RSRP2","RSRQ2",
        "nr-RSRP","nr-RSRQ","nr-RSRP1","nr-RSRQ1","nr-RSRP2","nr-RSRQ2",
    ] + ["dl-loss", "ul-loss", "dl-exc-lat", "ul-exc-lat","dl-latency", "ul-latency"]


    f.write(",".join(columns)+"\n")

    i_ci = 0
    i_pcap = [0, 0]
    i_ = [0, 0] # For increase speed
    data_buffers = {'rsrp':0, 'rsrq':0}

    
    for time_point in [start + dt.timedelta(seconds=i) for i in np.arange(0, N+TS, TS, dtype='float64')]:
    # for time_point in [start + dt.timedelta(seconds=i) for i in range(0, N+1, TS)]:
        # Get GPS informations
        # ========================================================================
        gps_related = []

        for i in range(i_ci, len(ci_df)):
            t = ci_df['Date'].iloc[i]
            lat = ci_df['GPSLat'].iloc[i]
            long = ci_df['GPSLon'].iloc[i]
            gpsspeed = ci_df['GPSSpeed'].iloc[i]
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                gps_info = GPS_info(lat=lat,long=long,gpsspeed=gpsspeed)
            elif t > time_point:
                i_ci = i
                break
        
        try : gps_info
        except:
            gps_info = GPS_info(lat='-',long='-',gpsspeed='-')

        gps_related += [gps_info.lat, gps_info.long, gps_info.gpsspeed]
        gps_related = [str(feature) for feature in gps_related]
        # print(f"{time_point} {gps_info}")

        # ==========================================================================
        # Get signal strength informations
        ss_related = []

        SS_DICT = ss_dict()
        for i in range(i_[0], len(mi_ml1_df)):
            t = mi_ml1_df['Timestamp'].iloc[i]
            serv_cell_idx = mi_ml1_df['Serving Cell Index'].iloc[i]
            
            if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point) and serv_cell_idx=='PCell':
                SS_DICT += ss_dict(mi_ml1_df.iloc[i])
            elif t > time_point:
                i_[0] = i
                break
        
        num_of_nei = len(SS_DICT.dict) - 1

        # Get primary serv cell rsrp, rsrq 
        if len(SS_DICT.dict["PCell"][0]) != 0:
            pcell_rsrp = sum(SS_DICT.dict["PCell"][0])/len(SS_DICT.dict["PCell"][0])
            pcell_rsrq = sum(SS_DICT.dict["PCell"][1])/len(SS_DICT.dict["PCell"][0])
            data_buffers['rsrp'], data_buffers['rsrq'] = pcell_rsrp, pcell_rsrq
        else:
            pcell_rsrp, pcell_rsrq = data_buffers['rsrp'], data_buffers['rsrq'] # No sample value, use the previous one
        SS_DICT.dict.pop("PCell") 

        # Get 1st, 2nd neighbor cell rsrp, rsrq
        if len(SS_DICT.dict) != 0:
            cell1 = max(SS_DICT.dict, key=lambda x:sum(SS_DICT.dict[x][0])/len(SS_DICT.dict[x][0]))
            cell1_rsrp = sum(SS_DICT.dict[cell1][0])/len(SS_DICT.dict[cell1][0])
            cell1_rsrq = sum(SS_DICT.dict[cell1][1])/len(SS_DICT.dict[cell1][0])
            SS_DICT.dict.pop(cell1)
        else:
            # cell1_rsrp, cell1_rsrq = '-', '-'
            cell1_rsrp, cell1_rsrq = 0,0 # No sample value, assign 0

        if len(SS_DICT.dict) != 0:
            cell2 = max(SS_DICT.dict, key=lambda x:sum(SS_DICT.dict[x][0])/len(SS_DICT.dict[x][0]))
            cell2_rsrp = sum(SS_DICT.dict[cell2][0])/len(SS_DICT.dict[cell2][0])
            cell2_rsrq = sum(SS_DICT.dict[cell2][1])/len(SS_DICT.dict[cell2][0])
            SS_DICT.dict.pop(cell2)
        else:
            # cell2_rsrp, cell2_rsrq = '-', '-'
            cell2_rsrp, cell2_rsrq = 0,0 # No sample value, assign 0

            # print(f"{time_point} {pcell_rsrp}, {pcell_rsrq} {cell1_rsrp}, {cell1_rsrq} {cell2_rsrp}, {cell2_rsrq}")
        ss_related += [num_of_nei, pcell_rsrp, pcell_rsrq, cell1_rsrp, cell1_rsrq, cell2_rsrp, cell2_rsrq]

        NR_SS_DICT = nr_ss_dict()
        for i in range(i_[1], len(nr_mi_ml1_df)):
            t = nr_mi_ml1_df['Timestamp'].iloc[i]
            serv_cell_idx = nr_mi_ml1_df['Serving Cell PCI'].iloc[i]
            
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                NR_SS_DICT += nr_ss_dict(nr_mi_ml1_df.iloc[i])

            elif t > time_point:
                i_[1] = i
                break
        
        # Get primary secondary serv cell rsrp, rsrq 
        if len(NR_SS_DICT.dict["PSCell"][0]) != 0:
            pscell_rsrp = sum(NR_SS_DICT.dict["PSCell"][0])/len(NR_SS_DICT.dict["PSCell"][0])
            pscell_rsrq = sum(NR_SS_DICT.dict["PSCell"][1])/len(NR_SS_DICT.dict["PSCell"][0])
        else:
            # pscell_rsrp, pscell_rsrq = '-', '-'
            pscell_rsrp, pscell_rsrq = 0,0 # No nr serving or no sample value assign 0
        NR_SS_DICT.dict.pop("PSCell")

        # Get 1st, 2nd neighbor cell rsrp, rsrq
        if len(NR_SS_DICT.dict) != 0:
            cell1 = max(NR_SS_DICT.dict, key=lambda x:sum(NR_SS_DICT.dict[x][0])/len(NR_SS_DICT.dict[x][0]))
            cell1_rsrp = sum(NR_SS_DICT.dict[cell1][0])/len(NR_SS_DICT.dict[cell1][0])
            cell1_rsrq = sum(NR_SS_DICT.dict[cell1][1])/len(NR_SS_DICT.dict[cell1][0])
            NR_SS_DICT.dict.pop(cell1)
        else:
            # cell1_rsrp, cell1_rsrq = '-', '-'
            cell1_rsrp, cell1_rsrq = 0,0 # No sample value, assign 0

        if len(NR_SS_DICT.dict) != 0:
            cell2 = max(NR_SS_DICT.dict, key=lambda x:sum(NR_SS_DICT.dict[x][0])/len(NR_SS_DICT.dict[x][0]))
            cell2_rsrp = sum(NR_SS_DICT.dict[cell2][0])/len(NR_SS_DICT.dict[cell2][0])
            cell2_rsrq = sum(NR_SS_DICT.dict[cell2][1])/len(NR_SS_DICT.dict[cell2][0])
            NR_SS_DICT.dict.pop(cell2)
        else:
            # cell2_rsrp, cell2_rsrq = '-', '-'
            cell2_rsrp, cell2_rsrq = 0,0 # No sample value, assign 0
        
        # print(f"{time_point} {pscell_rsrp}, {pscell_rsrq} {cell1_rsrp}, {cell1_rsrq} {cell2_rsrp}, {cell2_rsrq}")
        ss_related += [pscell_rsrp, pscell_rsrq, cell1_rsrp, cell1_rsrq, cell2_rsrp, cell2_rsrq]
        ss_related = [str(feature) for feature in ss_related]
        # ================================================================================
        # Get HO informations
        HO_related = [0] * len(HO_events.keys())

        for i, ho_type in  enumerate(list(HO_events.keys())):
            for ho in HO_events[ho_type]:
                t = ho.start
                if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point):
                    
                    HO_related[i] += 1

                    # if HO_related[i] == 0:
                    #     HO_related[i] = (t - (time_point - dt.timedelta(seconds=tp_range))).total_seconds()
                    # else:
                    #     x = t - (time_point - dt.timedelta(seconds=tp_range))
                    #     HO_related[i] = str(HO_related[i]) + '@' + str(x.total_seconds())

                elif t > time_point:
                    break
        
        HO_related = [str(feature) for feature in HO_related]

        # ========================================================================
        # Get DL/UL latency, loss...
        performance_related = []

        loss_col = f"lost"
        latency_col = f"latency"
        
        dl_lats, dl_excessive_lats, dl_losses = [], [], []
        for i in range(i_pcap[0], len(dl_df)):
            t = dl_df['Timestamp'].iloc[i]
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:

                dl_loss = dl_df[loss_col].iloc[i]             
                
                if dl_loss:
                    dl_losses.append(t)
                else:
                    dl_lat = float(dl_df[latency_col].iloc[i])                
                    dl_lats.append(dl_lat)
                    if dl_lat >  excessive_latency_value:
                        dl_excessive_lats.append(t)

            elif t > time_point:
                i_pcap[0] = i
                break

        if len(dl_lats) == 0:
            pass # No package arrive; will use previous value
        else:
            dl_avg_lat = sum(dl_lats)/len(dl_lats)

        dl_exc = len(dl_excessive_lats)
        dl_loss = len(dl_losses)

        ul_lats, ul_excessive_lats, ul_losses = [], [], []
        for i in range(i_pcap[1], len(ul_df)):
            t = ul_df['Timestamp'].iloc[i]
            if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
                
                ul_loss = ul_df[loss_col].iloc[i]

                if ul_loss:
                    ul_losses.append(t)
                else:
                    ul_lat = float(ul_df[latency_col].iloc[i])
                    ul_lats.append(ul_lat)
                    if ul_lat >  excessive_latency_value:
                        ul_excessive_lats.append(t)

            elif t > time_point:
                i_pcap[1] = i
                break

        if len(ul_lats) == 0:
            pass # No package arrive; will use previous value
        else:
            ul_avg_lat = sum(ul_lats)/len(ul_lats)

        ul_exc = len(ul_excessive_lats)
        ul_loss = len(ul_losses)

        performance_related += [dl_loss, ul_loss, dl_exc, ul_exc, dl_avg_lat, ul_avg_lat]
        performance_related = [str(feature) for feature in performance_related]


        f.write(",".join([str(time_point)]+gps_related+HO_related+ss_related+performance_related)+"\n") 

    f.close()

In [12]:
## Data type
front_cut, back_cut = 1, 1
TS, tp_range = 1, 1

## outpath
out_path = '/home/wmnlab/sheng-ru/ml_data/single'
# out_path = '/home/wmnlab/D/sheng-ru/ml_data/single0.5/'

# Setting
# Setting = {'qc00': 'All', 'qc03': 'All'}
# Setting = {'sm00': 'All', 'sm01': 'LTE'}
# Setting = {'sm00': 'All(wo_B1)', 'sm01': 'B3', 'sm02': 'B7', 'sm03': 'B8', 'sm04': 'B3B7', 'sm05': 'B3B8', 'sm06': 'B7B8', 'sm07': 'LTE'}
Setting = { "sm00": "All", "sm01": "All", "sm02": "B3", "sm03": "B7", "sm04": "B8", "sm05": "B3B7", "sm06": "B3B8", "sm07": "B7B8", "sm08": "LTE" }

## Which data dir to create 
# base_dir = '/home/wmnlab/D/sheng-ru/test/test_data/'
base_dir = '/home/wmnlab/D/database/2023-09-12_2/Bandlock_9_Schemes_Phone_UDP'
exp_name = '_udp'
weird_devs = []; weird_trace = []
cell_info = False

date = [x for x in list(base_dir.split('/')) if len(x) != 0][-2]

print(f"basedir is {base_dir}\n" + f"Out Files at {out_path}\n" +
      f"Experiment name: {exp_name}; weird trace {weird_trace}; cell_info: {cell_info}\n" + 
      f"TS, tp_range = {TS}, {tp_range}\n" + f"Setting is {Setting}\n"
      )

time.sleep(.2)
warning = input('Correct Setting? (y/n)')
if warning == 'y': pass
else: raise

# Collect gps and gpsspeed from cellinfo
if not cell_info:
    ci_df = pd.DataFrame({'Date' : []})
else:
    # For modem only one cell info
    parent_dir = str(Path(base_dir).parent.absolute())
    matches = list(filter(lambda x: 'ci' in x and not os.path.isdir(x), os.listdir(parent_dir)))
    matches.sort()
    ci_file = os.path.join(parent_dir, matches[-1])

    # For phones multiple cellinfo
    # parent_dir = str(Path(base_dir).parent.absolute())
    # ci_dir = os.path.join(parent_dir, 'cimon')
    # ci_file = os.path.join(ci_dir, os.listdir(ci_dir)[0])

    try:
        ci_df = pd.read_csv(ci_file, dtype=str)
        ci_df["Date"] = ci_df["Date"].swifter.apply(lambda x: pd.to_datetime(x))
    except pd.errors.ParserError:
        print(f'preprocess {ci_file}')
        # gps_dir = '/'.join(ci_file.split('/')[:-1])
        os.system(f'python3 ./csv_processing.py {ci_file}')
        ci_df = pd.read_csv(ci_file[:-4]+'_new.csv', dtype=str)
        ci_df["Date"] = ci_df["Date"].swifter.apply(lambda x: pd.to_datetime(x))

devs = [x for x in os.listdir(base_dir) if ('sm' in x) or ('qc' in x)]

for dev in sorted(devs):
    
    if dev in weird_devs:
        continue
    
    device_dir = os.path.join(base_dir, dev)

    for trace in sorted(os.listdir(device_dir)):
        if trace in weird_trace:
            continue
    
        dt_dir = os.path.join(device_dir, trace) # device trace directory

        ul_loss_lat = os.path.join(dt_dir, 'data', "udp_uplk_loss_latency.csv")
        ul_loss_lat_df = pd.read_csv(ul_loss_lat)
        ul_loss_lat_df["Timestamp"] = ul_loss_lat_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

        dl_loss_lat = os.path.join(dt_dir, 'data', "udp_dnlk_loss_latency.csv")
        dl_loss_lat_df = pd.read_csv(dl_loss_lat)
        dl_loss_lat_df["Timestamp"] = dl_loss_lat_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

        # Get timepoint from start to end
        start = dl_loss_lat_df["Timestamp"].iloc[0] + dt.timedelta(seconds=front_cut) # open the downlink file to decide start time and end time
        end = dl_loss_lat_df["Timestamp"].iloc[-1] - dt.timedelta(seconds=back_cut)
        start, end = start.replace(microsecond=0), end.replace(microsecond=0)
        print(f'Trace {trace} from {start} to {end}.')
        N = int((end - start).total_seconds()) # How many time_point in second
        
        b = Setting[dev]
        print(dev, b)

        # Out file here
        # outfile = os.path.join(out_path, date + f'_{dev}_{trace}_{b}.csv')
        outfile = os.path.join(out_path, date + f'_{dev}{exp_name}_{trace}_{b}.csv')

        print(outfile)

        data_create(dt_dir, ci_df, outfile, ul_loss_lat_df, dl_loss_lat_df) 

basedir is /home/wmnlab/D/database/2023-09-12_2/Bandlock_9_Schemes_Phone_UDP
Out Files at /home/wmnlab/sheng-ru/ml_data/single
Experiment name: _udp; weird trace []; cell_info: False
TS, tp_range = 1, 1
Setting is {'sm00': 'All', 'sm01': 'All', 'sm02': 'B3', 'sm03': 'B7', 'sm04': 'B8', 'sm05': 'B3B7', 'sm06': 'B3B8', 'sm07': 'B7B8', 'sm08': 'LTE'}

Trace #01 from 2023-09-12 13:34:17 to 2023-09-12 14:21:30.
sm00 All
/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm00_udp_#01_All.csv


  mi_rrc_df = pd.read_csv(mi_rrc_file)


Trace #02 from 2023-09-12 14:52:30 to 2023-09-12 15:40:46.
sm00 All
/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm00_udp_#02_All.csv


  mi_rrc_df = pd.read_csv(mi_rrc_file)


Trace #01 from 2023-09-12 13:34:17 to 2023-09-12 14:21:30.
sm01 All
/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm01_udp_#01_All.csv
Trace #02 from 2023-09-12 14:52:30 to 2023-09-12 15:40:45.
sm01 All
/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm01_udp_#02_All.csv
Trace #01 from 2023-09-12 13:34:17 to 2023-09-12 14:21:30.
sm02 B3
/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm02_udp_#01_B3.csv
Trace #02 from 2023-09-12 14:52:30 to 2023-09-12 15:40:45.
sm02 B3
/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm02_udp_#02_B3.csv
Trace #01 from 2023-09-12 13:34:17 to 2023-09-12 14:21:30.
sm03 B7
/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm03_udp_#01_B7.csv
Trace #02 from 2023-09-12 14:52:30 to 2023-09-12 15:40:45.
sm03 B7
/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm03_udp_#02_B7.csv
Trace #01 from 2023-09-12 13:34:17 to 2023-09-12 14:21:30.
sm04 B8
/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm04_udp_#01_B8.csv
Trace #02 from 2023-09-12 14:52:30 to

In [13]:
import pprint
# 資料夾路徑
folder_path = "/home/wmnlab/sheng-ru/ml_data/single"

# 取得資料夾中所有的CSV檔案
file_list = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".csv") and ('09-12' in file)]

pprint.pprint(file_list)
time.sleep(.5)
warning = input('Sure to process the listed data? (y/n)')
if warning == 'y': pass
else: raise

# 迭代處理每個CSV檔案
for file_name in file_list:
    file_path = os.path.join(folder_path, file_name)

    # 讀取CSV檔案
    df = pd.read_csv(file_path)

    # 找到開頭不為0的第一行的索引
    start_index = df.index[df['RSRP'].iloc[:] != 0][0]

    # 刪除開頭為0的行
    df = df.iloc[start_index:]

    # 寫入更新後的CSV檔案
    df.to_csv(file_path, index=False)

['/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm06_udp_#01_B3B8.csv',
 '/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm02_udp_#02_B3.csv',
 '/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm07_udp_#01_B7B8.csv',
 '/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm04_udp_#02_B8.csv',
 '/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm03_udp_#01_B7.csv',
 '/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm08_udp_#01_LTE.csv',
 '/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm06_udp_#02_B3B8.csv',
 '/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm05_udp_#01_B3B7.csv',
 '/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm00_udp_#02_All.csv',
 '/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm08_udp_#02_LTE.csv',
 '/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm01_udp_#01_All.csv',
 '/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm04_udp_#01_B8.csv',
 '/home/wmnlab/sheng-ru/ml_data/single/2023-09-12_2_sm00_udp_#01_All.csv',
 '/home/wmnlab/sheng-ru/m

# Split

In [None]:
base_dir = "/home/wmnlab/Code_Test_Space/sheng-ru/2022-12-22/_Bandlock_Udp_B1_B3/sm05/#01"
out_file = "/home/wmnlab/test1.csv" ## Out file !!!!!!!!
f = open(out_file, 'w') 

# Collecting the UDP Latency and Loss information first
dir = os.path.join(base_dir,"data")

dl_lat_file = os.path.join(dir, "udp_dnlk_latency.csv")
dl_lat_df = pd.read_csv(dl_lat_file)
dl_lat_df["Timestamp"] = dl_lat_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

dl_loss_file = os.path.join(dir, "udp_dnlk_loss_timestamp.csv")
dl_loss_df = pd.read_csv(dl_loss_file)
dl_loss_df["Timestamp"] = dl_loss_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

ul_lat_file = os.path.join(dir, "udp_uplk_latency.csv")
ul_lat_df = pd.read_csv(ul_lat_file)
ul_lat_df["Timestamp"] = ul_lat_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

ul_loss_file = os.path.join(dir, "udp_uplk_loss_timestamp.csv")
ul_loss_df = pd.read_csv(ul_loss_file)
ul_loss_df["Timestamp"] = ul_loss_df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x))

# Get timepoint from start to end and get the latency and loss
front_cut = 5
back_cut = 5
TS = 1
tp_range = 1

start = dl_lat_df["Timestamp"].iloc[0] + dt.timedelta(seconds=front_cut)
end = dl_lat_df["Timestamp"].iloc[-1] - dt.timedelta(seconds=back_cut)
start, end = start.replace(microsecond=0), end.replace(microsecond=0)
print(f'From {start} to {end}.')
N = int((end - start).total_seconds()) # How many time_point

# Collect rsrp infomation
matches = filter(lambda x: x.endswith('ml1_new.csv'), os.listdir(dir))
ml1_filenames = sorted(list(matches))
mi_ml1_file = os.path.join(dir, ml1_filenames[0])
mi_ml1_df = pd.read_csv(mi_ml1_file, dtype=str)
mi_ml1_df = mi_ml1_df[mi_ml1_df.type_id == 'LTE_PHY_Connected_Mode_Intra_Freq_Meas']
mi_ml1_df["time"] = mi_ml1_df["time"].apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))

nr_mi_ml1_file = os.path.join(dir, ml1_filenames[1])
nr_mi_ml1_df = pd.read_csv(nr_mi_ml1_file, dtype=str)
nr_mi_ml1_df["time"] = nr_mi_ml1_df["time"].apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))


# Collect gps and gpsspeed from cellinfo
dir = os.path.join(base_dir,"middle")
matches = filter(lambda x: x.startswith('cimon'), os.listdir(dir))
ci_filename = list(matches)[0]
ci_file = os.path.join(dir, ci_filename)
ci_df = pd.read_csv(ci_file, dtype=str)
ci_df["Date"] = ci_df["Date"].swifter.apply(lambda x: pd.to_datetime(x))

GPS_info = namedtuple('gps_info','lat, long, gpsspeed')

# Collect Ho information
matches = filter(lambda x: x.endswith('rrc.csv'), os.listdir(dir))
mi_rrc_filename = list(matches)[0]
mi_rrc_file = os.path.join(dir, mi_rrc_filename)
mi_rrc_df = pd.read_csv(mi_rrc_file)
mi_rrc_df["time"] = mi_rrc_df["time"].swifter.apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
HO_events = parse_mi_ho(mi_rrc_df)
HO_events.pop('Conn_Rel'), HO_events.pop('Conn_Req')

columns = [
    "Timestamp",
    "lat", "long", "gpsspeed",
    'LTE_HO','MN_HO','eNB_to_ENDC','gNB_Rel','gNB_HO','RLF_II','RLF_III','SCG_RLF',
    "RSRP","RSRQ","RSRP1","RSRQ1","RSRP2","RSRQ2",
    "nr-RSRP","nr-RSRQ","nr-RSRP1","nr-RSRQ1","nr-RSRP2","nr-RSRQ2",
    "DL-lat", "DL-lossrate", "UL-lat", "UL-lossrate"
]
f.write(",".join(columns)+"\n")

i_ = [0,0,0,0,0,0,0] # For increase speed
for time_point in [start + dt.timedelta(seconds=i) for i in range(0, N+1, TS)]:

    # ========================================================================
    # Get DL/UL latency, loss...
    perfermance_related = []

    dl_lats = []
    for i in range(i_[0], len(dl_lat_df)):
        t = dl_lat_df['Timestamp'].iloc[i]
        if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
            dl_lat = float(dl_lat_df['latency'].iloc[i])
            dl_lats.append(dl_lat)
            # if lat >  excessive_latency_value:
            #     excessive_latency.append(lat)
        elif t > time_point:
            i_[0] = i
            break

    if len(dl_lats) == 0:
        # print(f"{time_point} No package arrive")
        # perfermance_related.append('-')
        perfermance_related.append(dl_avg_lat) # Apeend previous value
    else:
        dl_avg_lat = sum(dl_lats)/len(dl_lats)
        # print(f"{time_point} average latency: {avg_lat}")
        perfermance_related.append(dl_avg_lat)

    dl_losses = []
    for i in range(i_[1], len(dl_loss_df)):
        t = dl_loss_df['Timestamp'].iloc[i]
        if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
            dl_losses.append(t)
        elif t > time_point:
            i_[1] = i
            break

    if (len(dl_losses)+len(dl_lats)) == 0:
        # print(f"{time_point} No package arrive")
        perfermance_related.append('-')
    else:
        loss_rate = len(dl_losses)/(len(dl_losses)+len(dl_lats))
        # print(f"{time_point} loss rate: {loss_rate}")
        perfermance_related.append(loss_rate)

    ul_lats = []
    for i in range(i_[2], len(ul_lat_df)):
        t = ul_lat_df['Timestamp'].iloc[i]
        if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
            ul_lat = float(ul_lat_df['latency'].iloc[i])
            ul_lats.append(ul_lat)
            # if lat >  excessive_latency_value:
            #     excessive_latency.append(lat)
        elif t > time_point:
            i_[2] = i
            break

    if len(ul_lats) == 0:
        # print(f"{time_point} No package arrive")
        perfermance_related.append('-')
        perfermance_related.append(ul_avg_lat) # Apeend previous value
    else:
        ul_avg_lat = sum(ul_lats)/len(ul_lats)
        # print(f"{time_point} average latency: {avg_lat}")
        perfermance_related.append(ul_avg_lat)

    ul_losses = []
    for i in range(i_[3], len(ul_loss_df)):
        t = ul_loss_df['Timestamp'].iloc[i]
        if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
            ul_losses.append(t)
        elif t > time_point:
            i_[3] = i
            break

    if (len(ul_losses)+len(ul_lats)) == 0:
        # print(f"{time_point} No package arrive")
        perfermance_related.append('-')
    else:
        loss_rate = len(ul_losses)/(len(ul_losses)+len(ul_lats))
        # print(f"{time_point} loss rate: {loss_rate}")
        perfermance_related.append(loss_rate)

    perfermance_related = [str(feature) for feature in perfermance_related]

    # ==========================================================================
    # Get GPS informations
    gps_related = []

    for i in range(i_[4], len(ci_df)):
        t = ci_df['Date'].iloc[i]
        lat = ci_df['GPSLat'].iloc[i]
        long = ci_df['GPSLon'].iloc[i]
        gpsspeed = ci_df['GPSSpeed'].iloc[i]
        if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
            gps_info = GPS_info(lat=lat,long=long,gpsspeed=gpsspeed)
        elif t > time_point:
            i_[4] = i
            break
    

    gps_related += [gps_info.lat, gps_info.long, gps_info.gpsspeed]
    gps_related = [str(feature) for feature in gps_related]
    # print(f"{time_point} {gps_info}")
    
    # ===========================================================================
    # Get signal strength informations
    ss_related = []

    SS_DICT = ss_dict()
    for i in range(i_[5], len(mi_ml1_df)):
        t = mi_ml1_df['time'].iloc[i]
        serv_cell_idx = mi_ml1_df['Serving Cell Index'].iloc[i]
        
        if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point) and serv_cell_idx=='PCell':
            SS_DICT += ss_dict(mi_ml1_df.iloc[i])
        elif t > time_point:
            i_[5] = i
            break
    
    # Get primary serv cell rsrp, rsrq 
    if len(SS_DICT.dict["PCell"][0]) != 0:
        pcell_rsrp = sum(SS_DICT.dict["PCell"][0])/len(SS_DICT.dict["PCell"][0])
        pcell_rsrq = sum(SS_DICT.dict["PCell"][1])/len(SS_DICT.dict["PCell"][0])
    else:
        # pcell_rsrp, pcell_rsrq = '-', '-'
        pcell_rsrp, pcell_rsrq = pcell_rsrp, pcell_rsrq # No sample value, use the previous one
    SS_DICT.dict.pop("PCell") 

    # Get 1st, 2nd neighbor cell rsrp, rsrq
    if len(SS_DICT.dict) != 0:
        cell1 = max(SS_DICT.dict, key=lambda x:sum(SS_DICT.dict[x][0])/len(SS_DICT.dict[x][0]))
        cell1_rsrp = sum(SS_DICT.dict[cell1][0])/len(SS_DICT.dict[cell1][0])
        cell1_rsrq = sum(SS_DICT.dict[cell1][1])/len(SS_DICT.dict[cell1][0])
        SS_DICT.dict.pop(cell1)
    else:
        # cell1_rsrp, cell1_rsrq = '-', '-'
        cell1_rsrp, cell1_rsrq = 0,0 # No sample value, assign 0

    if len(SS_DICT.dict) != 0:
        cell2 = max(SS_DICT.dict, key=lambda x:sum(SS_DICT.dict[x][0])/len(SS_DICT.dict[x][0]))
        cell2_rsrp = sum(SS_DICT.dict[cell2][0])/len(SS_DICT.dict[cell2][0])
        cell2_rsrq = sum(SS_DICT.dict[cell2][1])/len(SS_DICT.dict[cell2][0])
        SS_DICT.dict.pop(cell2)
    else:
        # cell2_rsrp, cell2_rsrq = '-', '-'
        cell2_rsrp, cell2_rsrq = 0,0 # No sample value, assign 0

        # print(f"{time_point} {pcell_rsrp}, {pcell_rsrq} {cell1_rsrp}, {cell1_rsrq} {cell2_rsrp}, {cell2_rsrq}")
    ss_related += [pcell_rsrp, pcell_rsrq, cell1_rsrp, cell1_rsrq, cell2_rsrp, cell2_rsrq]

    NR_SS_DICT = nr_ss_dict()
    for i in range(i_[6], len(nr_mi_ml1_df)):
        t = nr_mi_ml1_df['time'].iloc[i]
        serv_cell_idx = nr_mi_ml1_df['Serving Cell PCI'].iloc[i]
        
        if time_point - dt.timedelta(seconds=tp_range) < t <= time_point:
            NR_SS_DICT += nr_ss_dict(nr_mi_ml1_df.iloc[i])

        elif t > time_point:
            i_[6] = i
            break
    
    # Get primary secondary serv cell rsrp, rsrq 
    if len(NR_SS_DICT.dict["PSCell"][0]) != 0:
        pscell_rsrp = sum(NR_SS_DICT.dict["PSCell"][0])/len(NR_SS_DICT.dict["PSCell"][0])
        pscell_rsrq = sum(NR_SS_DICT.dict["PSCell"][1])/len(NR_SS_DICT.dict["PSCell"][0])
    else:
        # pscell_rsrp, pscell_rsrq = '-', '-'
        pscell_rsrp, pscell_rsrq = 0,0 # No nr serving or no sample value assign 0
    NR_SS_DICT.dict.pop("PSCell")

    # Get 1st, 2nd neighbor cell rsrp, rsrq
    if len(NR_SS_DICT.dict) != 0:
        cell1 = max(NR_SS_DICT.dict, key=lambda x:sum(NR_SS_DICT.dict[x][0])/len(NR_SS_DICT.dict[x][0]))
        cell1_rsrp = sum(NR_SS_DICT.dict[cell1][0])/len(NR_SS_DICT.dict[cell1][0])
        cell1_rsrq = sum(NR_SS_DICT.dict[cell1][1])/len(NR_SS_DICT.dict[cell1][0])
        NR_SS_DICT.dict.pop(cell1)
    else:
        # cell1_rsrp, cell1_rsrq = '-', '-'
        cell1_rsrp, cell1_rsrq = 0,0 # No sample value, assign 0

    if len(NR_SS_DICT.dict) != 0:
        cell2 = max(NR_SS_DICT.dict, key=lambda x:sum(NR_SS_DICT.dict[x][0])/len(NR_SS_DICT.dict[x][0]))
        cell2_rsrp = sum(NR_SS_DICT.dict[cell2][0])/len(NR_SS_DICT.dict[cell2][0])
        cell2_rsrq = sum(NR_SS_DICT.dict[cell2][1])/len(NR_SS_DICT.dict[cell2][0])
        NR_SS_DICT.dict.pop(cell2)
    else:
        # cell2_rsrp, cell2_rsrq = '-', '-'
        cell2_rsrp, cell2_rsrq = 0,0 # No sample value, assign 0
    
    # print(f"{time_point} {pscell_rsrp}, {pscell_rsrq} {cell1_rsrp}, {cell1_rsrq} {cell2_rsrp}, {cell2_rsrq}")
    ss_related += [pscell_rsrp, pscell_rsrq, cell1_rsrp, cell1_rsrq, cell2_rsrp, cell2_rsrq]

    ss_related = [str(feature) for feature in ss_related]

    # ================================================================================
    # Get HO informations
    HO_related = [0] * len(HO_events.keys())

    for i, ho_type in  enumerate(list(HO_events.keys())):
        for ho in HO_events[ho_type]:
            t = ho.start
            if (time_point - dt.timedelta(seconds=tp_range) < t <= time_point):
                HO_related[i] += 1
            elif t > time_point:
                break
    
    HO_related = [str(feature) for feature in HO_related]

    f.write(",".join([str(time_point)]+gps_related+HO_related+ss_related+perfermance_related)+"\n") 

f.close()

# Database Building

In [None]:
# ml_data
ml_data = '/home/wmnlab/ml_data'
files = [os.path.join(ml_data, x) for x in os.listdir(ml_data)]
files.sort()

In [None]:
def find_text(x):
    ind = x.find('#')
    x = x[ind+4:-4].split('&')
    x.sort()
    x = '&'.join(x)
    return x

def get_points(num):
    L = []
    a, b = divmod(num, 30)
    if b != 0:
        L.append(b)
    L += [b+i*30 for i in range(1,a+1)]
    return L

experiment_time = 300
time_slot = 30
database = open('/home/wmnlab/ntu-experiments/sheng-ru/experiment/mobileinsight/database.csv', 'w')
smoothing_lambda = 0.5
columns = ['B3&B7', 'B3&B8', 'B7&B8']
rows = [[0]*len(columns) for i in range(int(experiment_time/time_slot))]
database.write(','.join(columns)+'\n')

for file in files:
    band_set = find_text(file)
    set_ind = columns.index(band_set)
    f = open(file)
    df = pd.read_csv(f)

    print(band_set, columns.index(band_set))
    
    if len(df) > experiment_time:
        df = df[-300:]
        time_points = get_points(300)
        i = 0
        for j, l in enumerate(time_points):
            count, val = 0, 0
            while i < l:
                count += 1
                x = df.iloc[i]
                val += x['dl-lossrate'] + x['ul-lossrate'] + x['dl-exc-lat'] + x['ul-exc-lat']
                i += 1
            if rows[j][set_ind] == 0:
                rows[j][set_ind] = val/count
            else:
                rows[j][set_ind] += val/count
                rows[j][set_ind] /= 2

    else:
        time_points = get_points(len(df))
        i = 0
        for j, l in enumerate(time_points):
            count, val = 0, 0
            while i < l:
                count += 1
                x = df.iloc[i]
                val += x['dl-lossrate'] + x['ul-lossrate'] + x['dl-exc-lat'] + x['ul-exc-lat']
                i += 1
            if rows[j][set_ind] == 0:
                rows[j][set_ind] = val/count
            else:
                rows[j][set_ind] += val/count
                rows[j][set_ind] /= 2
                
    f.close()

# Write database
for row in rows:
    row = [str(x) for x in row]
    database.write(','.join(row)+'\n')

database.close()