In [1]:
import os
from datetime import datetime
import operator
import re

import numpy as np

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rc

plt.rc('text', usetex=True)
rc('font',**{'family':'serif','serif':['Times']})
plt.rcParams['text.latex.preamble'] = [r'\boldmath']

In [2]:
# Searching for the log and error files and storing them into lists
log_folder = "../logs"
folder_files = []

for r, d, f in os.walk(log_folder):
    for file in f:
        if '.txt' in file:
            folder_files.append(os.path.join(r, file))

log_files = []
error_files = []
            
for curr_file in folder_files:
    cf_orig = curr_file
    curr_file = curr_file.split("/")[2].split("_")
    
    cf_type = curr_file[0]
    cf_time = curr_file[1].split(".txt")[0]
    
    cft_obj = datetime.strptime(cf_time, '%Y-%m-%d %H:%M:%S')
    cft_unix = cft_obj.timestamp()
    
    file_info = [cft_unix, cf_orig]
    
    globals()[str(cf_type)+"_files"].append(file_info)

In [34]:
def single_log_parser(log_file):    
    # number of lines in file
    with open(log_file) as f:
        log_num_lines = sum(1 for _ in f)
    
    # no of frames dealt with
    no_frames = open(log_file, 'r').read().count("Image size ")
    
    # read file contents of logs
    lf_file = open(log_file, "r").readlines()
    
    # requests
    requests = []
    
    # arrays for latencies which are in units of ms
    session_latencies = np.zeros([no_frames, 15])
    
    cf_considered = 0
    # looping over log
    for lf_line in range(log_num_lines):
        curr_line = lf_file[lf_line] 
        
        if not curr_line == "\n":
            cl_split = curr_line.split(" ")
        
        if "Image size " in curr_line:
            if cf_considered not in requests:
                requests.append(cf_considered)

        if "SIFT points extracted in time" in curr_line:
            try:
                sift_time = float(cl_split[-1]) * 1000
                session_latencies[cf_considered][3] = sift_time
            except:
                pass

        if "PCA encoding time" in curr_line:
            pca_time = float(cl_split[-1]) * 1000
            session_latencies[cf_considered][4] = pca_time

        if "Fisher Vector encoding time" in curr_line:
            fsh_time = float(cl_split[-1]) * 1000
            session_latencies[cf_considered][5] = fsh_time
            
        if "LSH NN searching time" in curr_line:
            lshnn_time = float(cl_split[-1]) * 1000
            session_latencies[cf_considered][6] = lshnn_time
            
        if "MatchSiftData time" in curr_line:
            msd_time = float(cl_split[-2])
            session_latencies[cf_considered][7] = msd_time
            
        if "Matching features" in curr_line:     
            cf_considered += 1
                
    #print(session_latencies)
    return session_latencies
   

In [44]:
# Sort by UNIX timestamp in ascending order
log_files = sorted(log_files, key=operator.itemgetter(0))

files_to_consider = log_files[-18:-1]

results_array = np.zeros([1,15])

for i in range(len(files_to_consider)):
    curr_log = files_to_consider[i]
    file_name = curr_log[1]

    # Send file names to parser
    log_results = single_log_parser(file_name)
    lat_results = log_results

    results_array = np.append(results_array, lat_results, axis=0)
    
results_array = results_array[~np.all(results_array == 0, axis=1)]
#results_array[results_array == 0] = np.nan


tasks = {
    1 : "Client pre-processing: ",
    2 : "Data transfer: ",
    3 : "SIFT feature extraction: ",
    4 : "PCA dimension reduction: ",
    5 : "FV encoding with GMM: ",
    6 : "LSH NN searching: ",
    7 : "Template matching: ",
    8 : "Client post-processing: "
}

print("")
    
overall_latency_med = 0
overall_latency_std = 0
for i in range(len(results_array[:,0])):
    curr_index = i + 1 
    if curr_index <= len(tasks):
        curr_col = results_array[:,curr_index]
        
        # removing outliers
        curr_col = curr_col[abs(curr_col - np.mean(curr_col)) < 2 * np.std(curr_col)]
        print(np.shape(curr_col))
        
        med_val = np.nanmedian(curr_col)
        std_val = np.nanstd(curr_col)
        
        if med_val == np.nan:
            med_val = 0

        overall_latency_med += med_val
        overall_latency_std += std_val

        print(tasks[curr_index] + str(med_val) + " +- " + str(std_val))

print("Overall latency: " + str(overall_latency_med) + " +- " + str(std_val))


Client pre-processing: nan +- nan
Data transfer: nan +- nan
SIFT feature extraction: 1.250025 +- 3.0022105907219028
PCA dimension reduction: 0.868797 +- 1.2357421878872346
FV encoding with GMM: 8.936879999999999 +- 3.231433869737246
LSH NN searching: 0.602007 +- 1.9920623388194951
Template matching: 0.22 +- 0.4534841362532346
Client post-processing: nan +- nan
Overall latency: nan +- nan
