In [10]:
import re
import csv
import numpy as np
import pandas as pd
import datetime
import time

In [11]:
appname = 'tiktok'
log_name = 'log_tiktok.txt'
Data_file_location = 'F:/airscope/data/2023.9.16_tiktok'
total_log_num = 519
id_str = 0  # start sample index of the class

# save file path
data_folder = 'C:/Users/Jarvis/Desktop/Academic/Dissertation/Data/data_9_16'
X_train_path = 'C:/Users/Jarvis/Desktop/Academic/Dissertation/Data/all_data.csv'
y_train_path = 'C:/Users/Jarvis/Desktop/Academic/Dissertation/Data/label.csv'  # Label file
save_path = 'encoded_feature_9_16.pkl'  # Encoded data pkl file
save_encoded_path = 'encoded_feature_9_16.csv'  # Encoded data csv file

downlink_path = 'test_downlink_' + appname + '.csv'  # the path for downlink traffic without cutting off
uplink_path = 'test_uplink_' + appname + '.csv'  # the path for uplink traffic without cutting off
series_data_path = data_folder + 'Data_' + appname + '.csv'  # the path for the final result
app_timelog_path = 'F:/airscope/data/2023.9.16_tiktok/' + log_name  # the path for the time stamp
RNTI_path = Data_file_location + '/RNTI.txt'  # the path for the '.pcap' file

In [12]:
def read_start_time(log_path):
    log_file = log_path+'/airscope.log'
    input_data = open(log_file,'r')
    for line in input_data:
        line = line.split(' ')
        if line[0] != '\n':
            return line[0]

In [13]:
def rnti_filter(rnti_path,log_path):
    input_data = open(rnti_path, 'r')
    rnti_list = [[],[]]
    rnti_final = [[],[]]
    pattern = r'\b(\d+\.\d+)\b'
    start_time = read_start_time(log_path)
    start_time = datetime.datetime.strptime(start_time,'%H:%M:%S.%f')
    for line in input_data:
        if 'Time' in line:
            line = next(input_data)
            match = re.search(pattern, line)
            if match:
                time_value = float(match.group(1))
                time_value = datetime.timedelta(seconds=time_value)+start_time
                rnti_list[0].append(str(time_value.time()))
            else:
                print("Time not found in the data line.")
        if "RNTI=" in line:
            index1 = line.find("=")
            index2 = line.find(')')
            rnti_list[1].append(str(hex(int(line[index1+1:index2]))))
    for idx, rnti in enumerate(rnti_list[1]):
        if rnti not in '0xfffe':
            rnti_final[0].append(rnti_list[0][idx])
            rnti_final[1].append(rnti)
    return rnti_final

RNTI_list = rnti_filter(RNTI_path, Data_file_location)

In [14]:
def time_period_judge(RNTI_time, RNTI_list, RRC_time_idx):
    '''
    RNTI_time: the time of the reading RNTI
    RNTI_list: the target RNTI list with time
    RRC_time_idx: the index of the target RNTI
    '''
    RNTI_time = datetime.datetime.strptime(RNTI_time,'%H:%M:%S.%f').time()
    print(RNTI_time)
    RRC_time = datetime.datetime.strptime(RNTI_list[0][RRC_time_idx],'%H:%M:%S.%f').time()
    print(RRC_time)
    if RRC_time_idx == len(RNTI_list[1])-1:
        next_RRC_time = None
    else:
        next_RRC_time = datetime.datetime.strptime(RNTI_list[0][RRC_time_idx+1],'%H:%M:%S.%f')

    if next_RRC_time == None:
        if RNTI_time > RRC_time:
            return True
    else:
        if RRC_time < RNTI_time < next_RRC_time:
            return True
    return False

In [15]:
def filter_data(log_path, csv_write_down, csv_write_up, rntilist):
    input_data = open(log_path, 'r')
    for line in input_data:
        line = line.split(' ')
        for idx, rnti in enumerate(rntilist[1]):
            if 'rnti=%s,' % rnti in line:
                if '[MAC' in line or '[HI]' in line:
                    break

                else:
                    if time_period_judge(line[0],rntilist,idx):
                        tbs_string = [s for s in line if "tbs" in s]
                        rnti_string = [s for s in line if "rnti" in s]
                        uldl_string = [s for s in line if "DL" in s or 'UL' in s]
                        tbs_match = [line[0], rnti_string[0][5:], uldl_string[0], re.findall(r"\d+", tbs_string[0])[0]]
                        if '[DL]' in uldl_string:
                            csv_write_down.writerow(tbs_match)
                        else:
                            csv_write_up.writerow(tbs_match)

In [16]:
def generate_traffic_files(dl_path, ul_path, RNTIlist, file_path, total_log_num ):
    """
    :param dl_path: the file path to save downlink traffic block size (tbs_dl)
    :param ul_path: the file path to save uplink traffic block size (tbs_ul)
    :param RNTIlist: inout path of the RNTI list for targe UE
    :param file_path: the path of the log file from airscope
    :param total_log_num: the total number of log files
    """
    f1 = open(dl_path, 'w', newline='')
    write_down = csv.writer(f1)
    write_down.writerow(['time', 'rnti', 'link', 'tbs_dl'])

    f2 = open(ul_path, 'w', newline='')
    write_up = csv.writer(f2)
    write_up.writerow(['time', 'rnti', 'link', 'tbs_ul'])

    for i in range(0,  total_log_num+1):
        print('read the %dth file' % i)
        try:
            if i == 0:
                path = file_path + "/airscope.log"
                filter_data(path, write_down, write_up, RNTIlist)
            else:
                path = file_path + "/airscope.log.%d" % i
                filter_data(path, write_down, write_up, RNTIlist)
        except:
            print("An exception occurred")
            break

generate_traffic_files(downlink_path, uplink_path, RNTI_list, Data_file_location, total_log_num)

read the 0th file
13:41:55.619443
13:45:42.763065
13:41:55.861512
13:45:42.763065
13:41:55.870250
13:45:42.763065
13:41:56.711251
13:45:42.763065
13:41:56.720288
13:45:42.763065
13:41:58.721243
13:45:42.763065
13:41:58.728206
13:45:42.763065
13:41:58.736199
13:45:42.763065
13:41:58.744227
13:45:42.763065
13:41:58.781255
13:45:42.763065
13:41:58.788794
13:45:42.763065
13:41:58.789347
13:45:42.763065
13:41:58.797244
13:45:42.763065
13:41:58.801205
13:45:42.763065
13:41:58.809372
13:45:42.763065
13:41:58.816236
13:45:42.763065
13:41:58.817212
13:45:42.763065
13:41:58.824346
13:45:42.763065
13:41:58.847220
13:45:42.763065
13:41:58.874258
13:45:42.763065
13:41:58.891237
13:45:42.763065
13:41:58.898228
13:45:42.763065
13:41:58.899270
13:45:42.763065
13:41:58.907229
13:45:42.763065
13:41:58.917725
13:45:42.763065
13:41:58.941257
13:45:42.763065
13:41:58.951830
13:45:42.763065
13:41:58.958595
13:45:42.763065
13:41:58.961244
13:45:42.763065
13:41:58.968349
13:45:42.763065
13:41:58.971724
13:45:

In [17]:
airscope_csv_path = Data_file_location+'/airscope.csv'
def read_airscope_csv_file(file_path):
    df =pd.read_csv(file_path,sep=';')
    for idx, row in enumerate(df['tstamp']):
        str_time = datetime.datetime.fromtimestamp(row / 1000000.0).strftime('%H:%M:%S.%f')
        df['tstamp'][idx] = str_time
    return df
# airscope_csv_data = read_airscope_csv_file(airscope_csv_path)