In [1]:
import os
from datetime import datetime
import operator
import re

import numpy as np

import mmap

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rc

plt.rc('text', usetex=True)
rc('font',**{'family':'serif','serif':['Times']})
plt.rcParams['text.latex.preamble'] = [r'\boldmath']

In [16]:
# Searching through server logs
server_log_folder = "../logs_server/logs"
server_folder_files = []

for r, d, f in os.walk(server_log_folder):
    for file in f:
        if '.txt' in file:
            server_folder_files.append(os.path.join(r, file))

server_log_files = []
            
for curr_file in server_folder_files:
    cf_orig = curr_file
    curr_file = curr_file.split("/")[3].split("_")
    
    cf_type = curr_file[0]
    cf_time = curr_file[1].split(".txt")[0]
    
    cft_obj = datetime.strptime(cf_time, '%Y-%m-%d %H:%M:%S')
    cft_unix = cft_obj.timestamp()
    
    file_info = [cft_unix, cf_orig]

    globals()["server_log_files"].append(file_info)
    
############################
    
# Searching through device logs 
device_logs_folder = "../logs_devices/huawei"
devices_folder_files = []

for r, d, f in os.walk(device_logs_folder):
    for file in f:
        if '.txt' in file:
            devices_folder_files.append(os.path.join(r, file))
            
devices_log_files = []
            
for curr_file in devices_folder_files:
    cf_orig = curr_file
    curr_file = curr_file.split("/")[-1].split("_")
    cf_time = float(curr_file[1].split(".txt")[0]) / 1000
    
    file_info = [str(cf_time), cf_orig]
    globals()["devices_log_files"].append(file_info)
    
# sort by unix_timestamp
server_log_files = sorted(server_log_files, key=operator.itemgetter(0))
devices_log_files = sorted(devices_log_files, key=operator.itemgetter(0))  

In [5]:
# useful functions

def find_nearest(array, value):
    # Returning nearest value and index
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return array[idx], idx

def frames_finder_device(file_name):
    file_frames = []
    with open(file_name) as search:
        for line in search:
            line = line.rstrip()  # remove '\n' at end of line
            if "sent with size" in line:
                frame_no = int(re.findall(r'frame(.+?)sent with size', line)[0])
                file_frames.append(frame_no)         
    return file_frames
    
def frames_finder_server(file_name):
    file_frames = []
    with open(file_name) as search:
        for line in search:
            line = line.rstrip()  # remove '\n' at end of line
            if "received, filesize:" in line:
                frame_no = int(re.findall(r'Frame(.+?)received, filesize:', line)[0])
                file_frames.append(frame_no)
    return file_frames

def frames_comparer(server_log, device_log):
    device_frames = frames_finder_device(device_log)
    server_frames = frames_finder_server(server_log)
    
    list_comparison = set(device_frames) & set(server_frames)
    device_server_percent = (len(list_comparison) / len(server_frames)) * 100
    
    if device_server_percent > 80:
        return True
    else:
        return False

In [18]:
# matching both server and device log files to as close together as possible 

# extracting server log file creation time
sl_times = [lst[:1] for lst in server_log_files]
sl_times = np.array(sl_times)

matched_logs = []

for i_dl in range(len(devices_log_files)):
    curr_dl = devices_log_files[i_dl] # curr device log
    dl_creation = int(float(curr_dl[0])) # int time for device log creation
    
    # finding closest server log file
    closest_sl = find_nearest(sl_times, dl_creation)
    csl_time = closest_sl[0]
    csl_index = closest_sl[1]
    server_log_file = server_log_files[csl_index] # select the logfile
    
    #print([server_log_file[1], curr_dl[1]])
    compare_logs = frames_comparer(server_log_file[1], curr_dl[1])
    if compare_logs:
        matched_logs.append([server_log_file[1], curr_dl[1]]) # save only logfiles

In [44]:
def log_parser(server_lf, device_lf):    
    # number of lines in file
    with open(server_lf) as f:
        num_lines_s = sum(1 for _ in f)
    with open(device_lf) as g:
        num_lines_d = sum(1 for _ in g)
    
    # number of frames sent from the device
    no_frames = open(server_lf, 'r').read().count("received, filesize:")
    
    # reading the file contents of both logs
    contents_s = open(server_lf, "r").readlines()
    contents_d = open(device_lf, "r").readlines()
    
    # storing current frame being considered
    cf_ticker = []
    
    # requests
    requests = []
    
    # arrays for latencies which are in units of ms
    session_latencies = np.zeros([no_frames, 18])
    transfer_latencies = np.zeros([no_frames, 5])
    
    clients = []
    
    cf_considered = 0
    # looping over server log
    for lf_line in range(num_lines_s):
        curr_line = contents_s[lf_line] 
        
        if not curr_line == "\n":
            cl_split = curr_line.split(" ")
        
        if "received, filesize:" in curr_line:
            curr_frame_no = cl_split[1]
            file_size = cl_split[4]
            device_ip = cl_split[-1]
            time_received = cl_split[6]
            dip_int = device_ip.replace('.', '').replace('\n', '')
            if dip_int not in clients:
                clients.append(dip_int)
            if session_latencies[cf_considered][0] == 0:
                session_latencies[cf_considered][0] = float(curr_frame_no)
                session_latencies[cf_considered][14] = float(dip_int)
                session_latencies[cf_considered][17] = float(file_size)
                
            s_receive = float(re.findall(" at (.*)\n", curr_line)[0])
            
            # averaging multiple times together 
            sift_points = []
            match_sift = []

        if "SIFT points extracted in time" in curr_line:
            sift_time = float(cl_split[-1]) * 1000
            sift_points.append(sift_time)
            sp_average = np.average(sift_points)
            session_latencies[cf_considered][3] = sp_average

        if "PCA encoding time" in curr_line:
            pca_time = float(cl_split[-1]) * 1000
            session_latencies[cf_considered][4] = pca_time

        if "Fisher Vector encoding time" in curr_line:
            fsh_time = float(cl_split[-1]) * 1000
            session_latencies[cf_considered][5] = fsh_time
            
        if "time before matching" in curr_line:
            tbf_timestamp = float(cl_split[-1])

        if "LSH NN searching time" in curr_line:
            lshnn_time = float(cl_split[-1]) * 1000
            session_latencies[cf_considered][6] = lshnn_time
            
        if "after matching" in curr_line:
            af_time = float(cl_split[-1])
            fhy_time = (af_time - tbf_timestamp) * 1000
            #session_latencies[cf_considered][7] = fhy_time
            
        if "MatchSiftData time" in curr_line:
            msd_time = float(cl_split[-2])
            match_sift.append(msd_time)
            msd_average = np.average(msd_time)
            session_latencies[cf_considered][7] = msd_average
            
        if "Matching features" in curr_line:
            mf_percentage = float(cl_split[-2].replace('%', ''))
            session_latencies[cf_considered][9] = mf_percentage
            
        # was the cache used and was it succesful
        if "Cache query - time before matching:" in curr_line:
            session_latencies[cf_considered][11] = 1
        
        if "Added item to cache" in curr_line: 
            session_latencies[cf_considered][13] = 1
        
        if "res sent, marker#:" in curr_line:
            marker_no = float(cl_split[5])
            session_latencies[cf_considered][10] = marker_no
            
            cache_query = session_latencies[cf_considered][11]
            
            mf_percentage = session_latencies[cf_considered][9]
            if (cache_query) and (0 < mf_percentage < 100 ) and (marker_no == 1):
                session_latencies[cf_considered][12] = 1
                
            s_send = float(cl_split[-1])
            s_total = (s_send - s_receive) * 1000
            session_latencies[cf_considered][15] = s_total
            
            cf_considered += 1

#     succesful_recognitions = []
#     succesful_cache = []
#     # search through all the frames
#     for i in range(len(session_latencies)):
#         curr_frame = session_latencies[i]
        
#         cf_id = curr_frame[0]
#         cf_recog = curr_frame[10]
#         cf_cache = curr_frame[12] # cache queried
        
#         cf_add_cache = curr_frame[13] # item was added to cache
        
#         if int(cf_recog) == 1:
#             # frame has had succesful recognition performed or cache recognition
            
#             if int(cf_add_cache):
#                 succesful_recognitions.append(cf_id)
#             else:
#                 succesful_cache.append(cf_id)
        
    # Client times
    pp_begin_uxtime = 0
    frame_sent_ux_time = 0 
    curr_frame_no = 0
    curr_frame_loc = 0
    
    # offset of phone in seconds
    android_offset = 0
    
    for lf_line in range(num_lines_d):
        curr_line = contents_d[lf_line]
        cl_split = curr_line.split(" ")

        if "get gray scaled frame data at" in curr_line:
            c_pre_begin = float(cl_split[-1])
        
        if "sent with size" in curr_line:
            # find frame number
            frame_no = float(re.search("frame(.*)sent with size", curr_line).group(1))

            sl_where = np.where(session_latencies[:,0] == frame_no)[0]

            if not sl_where.size == 0:
                curr_frame_no = frame_no
                curr_frame_loc = sl_where[0]

                c_send = float(cl_split[-1]) 

                c_pre = c_send - c_pre_begin
                session_latencies[curr_frame_loc][1] = c_pre

        if "res received at" in curr_line:
            c_receive = (float(cl_split[-1]) / 1000)

        if "image border created:" in curr_line:
            curr_date = cl_split[0]
            curr_year = "19"

            curr_time = cl_split[1]
            dt_string = curr_year + "-" + curr_date + " " + curr_time
            curr_dt = datetime.strptime(dt_string, '%y-%m-%d %H:%M:%S.%f')
            cdt_unix = curr_dt.timestamp()
            
            # time from client sending to client receiving
            c_send_receive = (c_receive - (c_send/1000)) * 1000
            
            # total data transfer times 
            data_transfer = c_send_receive - session_latencies[curr_frame_loc][15]
            dt_fraction = session_latencies[curr_frame_loc][17] / (session_latencies[curr_frame_loc][17] + 512)
            data_transfer = dt_fraction * data_transfer
            
            session_latencies[curr_frame_loc][2] = data_transfer

            # post-processing time
            c_post = cdt_unix - c_receive
            session_latencies[curr_frame_loc][8] = c_post * 1000
            
            c_total = c_pre + c_post
            session_latencies[curr_frame_loc][16] = c_total
                 
    #print(session_latencies)
    return session_latencies

In [53]:
# parsing matched server and client log files to extract latencies

results_array = np.zeros([1,18])

# counter = 1
# for i in range(len(matched_logs)):
#     curr_logs = matched_logs[i]

#     curr_server_log = curr_logs[0]
#     curr_device_log = curr_logs[1]
    
#     results = log_parser(curr_server_log, curr_device_log)
    
# #     if counter == len(matched_logs):
# #         print(curr_server_log, curr_device_log)
# #         results_array = np.append(results_array, results, axis=0)
        
#     results_array = np.append(results_array, results, axis=0)
    
#     counter += 1

curr_server_log = server_log_files[-1][1]

for i in range(len(devices_log_files)):
    curr_log = devices_log_files[i]
    curr_device_log = curr_log[1]
    
    results = log_parser(curr_server_log, curr_device_log)
    results_array = np.append(results_array, results, axis=0)

results_array = results_array[~np.all(results_array == 0, axis=1)]
results_array[results_array == 0] = np.nan

tasks = {
    1 : "Client pre-processing: ",
    2 : "Data transfer: ",
    3 : "SIFT feature extraction: ",
    4 : "PCA dimension reduction: ",
    5 : "FV encoding with GMM: ",
    6 : "LSH NN searching: ",
    7 : "Template matching: ",
    8 : "Client post-processing: "
}

print("")
    
overall_latency_med = 0
overall_latency_std = 0
for i in range(len(results_array[:,0])):
    curr_index = i + 1 
    if curr_index <= len(tasks):
        curr_col = results_array[:,curr_index]
        
        # removing outliers
        curr_col = curr_col[abs(curr_col - np.nanmean(curr_col)) < 2 * np.nanstd(curr_col)]
        
        med_val = np.nanmedian(curr_col)
        std_val = np.nanstd(curr_col)

        overall_latency_med += med_val
        overall_latency_std += std_val`

        print(tasks[curr_index] + str(med_val) + " +- " + str(std_val))

print("Overall latency: " + str(overall_latency_med) + " +- " + str(std_val))


Client pre-processing: 11.0 +- 2.9851554597864123
Data transfer: 264.8790381694201 +- 15.790993282101912
SIFT feature extraction: 1.74093246459961 +- 0.4914205959629532
PCA dimension reduction: 0.44405460357666005 +- 0.521548442540035
FV encoding with GMM: 7.39002227783203 +- 0.5605457351183846
LSH NN searching: 0.58293342590332 +- 0.09909787150313927
Template matching: 0.1 +- 0.036756583347221455
Client post-processing: 6.999969482421875 +- 1.2472349320385085
Overall latency: 293.1369504237536 +- 1.2472349320385085


