In [1]:
import pandas as pd
import numpy as np
import time

def get_delta(row):
    t1 = row['Time']
    t2 = row['Id']
    if (t1==t2):
        return 0
    t = t1-t2  # pandas._libs.tslibs.timedeltas.Timedelta
    t = t.seconds * 1000000 + t.microseconds
    return t/1000000

path1 = './data/namenode.log'
path2 = './data/client.log'

# Load name node data from csv file
data1 = pd.read_csv(path1, delimiter=';', header=None)
data1.columns = ['Time', 'Method', 'Id'] 
data2 = pd.read_csv(path2, delimiter=';', header=None)
data2.columns = ['Time', 'Method', 'Id'] 
data = pd.concat([data1, data2])

# Compute Delta
data['Time'] =  pd.to_datetime(data['Time'])
data['Id'] =  pd.to_datetime(data['Id'])
data['Delta'] = data.apply(get_delta, axis=1)
data = data.drop("Time", axis=1)

print(data.head())

# Pivot
data = data.pivot(index='Id', columns='Method', values='Delta')
data['Total'] = data.sum(axis=1)
data.head()

                           Method                         Id  Delta
0          NameNode_post_fileList 2019-11-15 11:15:43.320423  1.811
1  NameNode_post_fileList_replica 2019-11-15 11:15:43.320423  3.250
2          NameNode_post_fileList 2019-11-15 11:15:43.345423  0.937
3  NameNode_post_fileList_replica 2019-11-15 11:15:43.345423  1.893
4          NameNode_post_fileList 2019-11-15 11:15:43.360423  0.603


Method,Client_get_fileNode,NameNode_post_fileList,NameNode_post_fileList_replica,Total
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-11-15 11:15:43.320423,0.0,1.811,3.25,5.061
2019-11-15 11:15:43.345423,0.0,0.937,1.893,2.83
2019-11-15 11:15:43.360423,0.0,0.603,2.226,2.829
2019-11-15 11:15:43.370423,0.0,1.158,2.26,3.418
2019-11-15 11:15:43.376423,0.0,0.558,1.551,2.109


In [144]:
# Measure 1
data['NameNode_post_fileList'].describe()

count    100.000000
mean       1.078210
std        0.570398
min        0.023000
25%        0.633250
50%        1.156000
75%        1.589250
max        1.980000
Name: NameNode_post_fileList, dtype: float64

In [145]:
# Measure 2
data['NameNode_post_fileList_replica'].describe()

count    100.000000
mean       2.002140
std        0.769218
min        0.226000
25%        1.461750
50%        2.023000
75%        2.504500
max        3.603000
Name: NameNode_post_fileList_replica, dtype: float64

In [146]:
# Total
data['Total'].describe()

count    100.00000
mean       3.08035
std        1.22427
min        0.34800
25%        2.25550
50%        3.22600
75%        3.96825
max        5.58300
Name: Total, dtype: float64

In [147]:
from datetime import datetime, timedelta
import random

started = datetime.now()
milliseconds = sorted(list(set([random.randint(10, 2000) for iter in range(110)])))[:100]
latencies1 = [random.randint(10, 2000) for iter in range(100)]
latencies2 = [random.randint(10, 2000) for iter in range(100)]

# Client log
for i in range(0,100):
    now = started + timedelta(milliseconds=milliseconds[i])
    print('%s;%s_%s;%s' % (now, 'Client', 'get_fileNode', now))

print('/n****************************************************/n')
    
# Name node log
for i in range(0,100):
    now = started + timedelta(milliseconds=milliseconds[i])
    # Name node 1
    print('%s;%s_%s;%s' % (now + timedelta(milliseconds=latencies1[i]), 'NameNode', 'post_fileList', now))
    # Name node 2
    print('%s;%s_%s;%s' % (now + timedelta(milliseconds=latencies1[i]+latencies2[i]), 'NameNode', 'post_fileList_replica', now))



2019-11-15 11:16:42.480740;Client_get_fileNode;2019-11-15 11:16:42.480740
2019-11-15 11:16:42.510740;Client_get_fileNode;2019-11-15 11:16:42.510740
2019-11-15 11:16:42.540740;Client_get_fileNode;2019-11-15 11:16:42.540740
2019-11-15 11:16:42.545740;Client_get_fileNode;2019-11-15 11:16:42.545740
2019-11-15 11:16:42.547740;Client_get_fileNode;2019-11-15 11:16:42.547740
2019-11-15 11:16:42.553740;Client_get_fileNode;2019-11-15 11:16:42.553740
2019-11-15 11:16:42.565740;Client_get_fileNode;2019-11-15 11:16:42.565740
2019-11-15 11:16:42.582740;Client_get_fileNode;2019-11-15 11:16:42.582740
2019-11-15 11:16:42.595740;Client_get_fileNode;2019-11-15 11:16:42.595740
2019-11-15 11:16:42.603740;Client_get_fileNode;2019-11-15 11:16:42.603740
2019-11-15 11:16:42.617740;Client_get_fileNode;2019-11-15 11:16:42.617740
2019-11-15 11:16:42.637740;Client_get_fileNode;2019-11-15 11:16:42.637740
2019-11-15 11:16:42.651740;Client_get_fileNode;2019-11-15 11:16:42.651740
2019-11-15 11:16:42.657740;Client_get_