In [1]:
import os
from datetime import datetime
import operator
import re

import numpy as np

import mmap

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rc

plt.rc('text', usetex=True)
rc('font',**{'family':'serif','serif':['Times']})
plt.rcParams['text.latex.preamble'] = [r'\boldmath']

In [3]:
# Searching through server logs
server_log_folder = "../logs_server/logs"
server_folder_files = []

for r, d, f in os.walk(server_log_folder):
    for file in f:
        if '.txt' in file:
            server_folder_files.append(os.path.join(r, file))

server_log_files = []
            
for curr_file in server_folder_files:
    cf_orig = curr_file
    curr_file = curr_file.split("/")[3].split("_")
    
    cf_type = curr_file[0]
    cf_time = curr_file[1].split(".txt")[0]
    
    cft_obj = datetime.strptime(cf_time, '%Y-%m-%d %H:%M:%S')
    cft_unix = cft_obj.timestamp()
    
    file_info = [cft_unix, cf_orig]

    globals()["server_log_files"].append(file_info)

In [5]:
# useful functions

def find_nearest(array, value):
    # Returning nearest value and index
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return array[idx], idx

def frames_finder_device(file_name):
    file_frames = []
    with open(file_name) as search:
        for line in search:
            line = line.rstrip()  # remove '\n' at end of line
            if "sent with size" in line:
                frame_no = int(re.findall(r'frame(.+?)sent with size', line)[0])
                file_frames.append(frame_no)         
    return file_frames
    
def frames_finder_server(file_name):
    file_frames = []
    with open(file_name) as search:
        for line in search:
            line = line.rstrip()  # remove '\n' at end of line
            if "received, filesize:" in line:
                frame_no = int(re.findall(r'Frame(.+?)received, filesize:', line)[0])
                file_frames.append(frame_no)
    return file_frames

def frames_comparer(server_log, device_log):
    device_frames = frames_finder_device(device_log)
    server_frames = frames_finder_server(server_log)
    
    list_comparison = set(device_frames) & set(server_frames)
    device_server_percent = (len(list_comparison) / len(server_frames)) * 100
    
    if device_server_percent > 80:
        return True
    else:
        return False

In [16]:
def log_parser(server_lf):    
    # number of lines in file
    with open(server_lf) as f:
        num_lines_s = sum(1 for _ in f)
    
    # number of frames sent from the device
    no_frames = open(server_lf, 'r').read().count("received, filesize:")
    
    # reading the file contents of both logs
    contents_s = open(server_lf, "r").readlines()
    
    # storing current frame being considered
    cf_ticker = []
    
    # requests
    requests = []
    
    # arrays for latencies which are in units of ms
    session_latencies = np.zeros([no_frames, 18])
    transfer_latencies = np.zeros([no_frames, 5])
    
    clients = []
    
    cf_considered = 0
    # looping over server log
    for lf_line in range(num_lines_s):
        curr_line = contents_s[lf_line] 
        
        if not curr_line == "\n":
            cl_split = curr_line.split(" ")
        
        if "received, filesize:" in curr_line:
            curr_frame_no = cl_split[1]
            file_size = cl_split[4]
            device_ip = cl_split[-1]
            time_received = cl_split[6]
            dip_int = device_ip.replace('.', '').replace('\n', '')
            if dip_int not in clients:
                clients.append(dip_int)
            if session_latencies[cf_considered][0] == 0:
                session_latencies[cf_considered][0] = float(curr_frame_no)
                session_latencies[cf_considered][14] = float(dip_int)
                session_latencies[cf_considered][17] = float(file_size)
                
            s_receive = float(re.findall(" at (.*)\n", curr_line)[0])
            
            # averaging multiple times together 
            sift_points = []
            match_sift = []

        if "SIFT points extracted in time" in curr_line:
            sift_time = float(cl_split[-1]) * 1000
            sift_points.append(sift_time)
            sp_average = np.average(sift_points)
            session_latencies[cf_considered][3] = sp_average

        if "PCA encoding time" in curr_line:
            pca_time = float(cl_split[-1]) * 1000
            session_latencies[cf_considered][4] = pca_time

        if "Fisher Vector encoding time" in curr_line:
            fsh_time = float(cl_split[-1]) * 1000
            session_latencies[cf_considered][5] = fsh_time
            
        if "time before matching" in curr_line:
            tbf_timestamp = float(cl_split[-1])

        if "LSH NN searching time" in curr_line:
            lshnn_time = float(cl_split[-1]) * 1000
            session_latencies[cf_considered][6] = lshnn_time
            
        if "after matching" in curr_line:
            af_time = float(cl_split[-1])
            fhy_time = (af_time - tbf_timestamp) * 1000
            #session_latencies[cf_considered][7] = fhy_time
            
        if "MatchSiftData time" in curr_line:
            msd_time = float(cl_split[-2])
            match_sift.append(msd_time)
            msd_average = np.average(msd_time)
            session_latencies[cf_considered][7] = msd_average
            
        if "Matching features" in curr_line:
            mf_percentage = float(cl_split[-2].replace('%', ''))
            session_latencies[cf_considered][9] = mf_percentage
            
        # was the cache used and was it succesful
        if "Cache query - time before matching:" in curr_line:
            session_latencies[cf_considered][11] = 1
        
        if "Added item to cache" in curr_line: 
            session_latencies[cf_considered][13] = 1
        
        if "res sent, marker#:" in curr_line:
            marker_no = float(cl_split[5])
            session_latencies[cf_considered][10] = marker_no
            
            cache_query = session_latencies[cf_considered][11]
            
            mf_percentage = session_latencies[cf_considered][9]
            if (cache_query) and (0 < mf_percentage < 100 ) and (marker_no == 1):
                session_latencies[cf_considered][12] = 1
                
            s_send = float(cl_split[-1])
            s_total = (s_send - s_receive) * 1000
            session_latencies[cf_considered][15] = s_total
            
            cf_considered += 1
    return session_latencies

In [17]:
print(server_log_files[-1][-1])

../logs_server/logs/log_2019-12-09 14:42:41.txt


In [19]:
parsed = log_parser(server_log_files[-1][-1])

In [24]:
# parsing matched server and client log files to extract latencies

results_array = parsed

results_array = results_array[~np.all(results_array == 0, axis=1)]
results_array[results_array == 0] = np.nan

tasks = {
    1 : "Client pre-processing: ",
    2 : "Data transfer: ",
    3 : "SIFT feature extraction: ",
    4 : "PCA dimension reduction: ",
    5 : "FV encoding with GMM: ",
    6 : "LSH NN searching: ",
    7 : "Template matching: ",
    8 : "Client post-processing: "
}

print("")
    
overall_latency_med = 0
overall_latency_std = 0

for i in range(len(results_array[:,0])):
    curr_index = i + 1 
    if curr_index <= len(tasks):
        curr_col = results_array[:,curr_index]
        
        # removing outliers
        curr_col = curr_col[abs(curr_col - np.nanmean(curr_col)) < 2 * np.nanstd(curr_col)]
        
        med_val = np.nanmedian(curr_col)
        std_val = np.nanstd(curr_col)
        
        if np.isnan(med_val):
            med_val = 0
            std_val = 0

        overall_latency_med += med_val
        overall_latency_std += std_val

        print(tasks[curr_index] + str(med_val) + " +- " + str(std_val))

print("Overall latency: " + str(overall_latency_med) + " +- " + str(overall_latency_std))


Client pre-processing: 0 +- 0
Data transfer: 0 +- 0
SIFT feature extraction: 1.912434895833335 +- 0.633542398809233
PCA dimension reduction: 1.8504858016967751 +- 0.6144360238090882
FV encoding with GMM: 7.081985473632814 +- 0.616317657840921
LSH NN searching: 2.61902809143066 +- 1.057119774502953
Template matching: 0.11 +- 0.058823919525774815
Client post-processing: 0 +- 0
Overall latency: 13.573934262593585 +- 2.9802397744879703


