# ostn-03-19 vs 03-20 Processing Time 

In [None]:
for i in range(len(pair1)):
    draw_graph(pair1[i], pair2[i])    

In [1]:
# !pip install pyhmy --upgrade
import json
import pandas as pd
import os
import shutil
import re
import plotly.express as px 
import plotly.graph_objects as go
import numpy as np
from IPython.core.display import display, HTML

In [2]:
def read_data(files, path):
    data = []
    for file in files:
        if "zerolog" in file:
            with open(path + file, errors='ignore') as f:
                for line in f.readlines():
                    if '"log-topic":"ds"' in line:
                        data.append(json.loads(line))
    return data

In [3]:
def data_processing(data):
    df = pd.DataFrame(data, columns = [ 'time', 'message'])
    df["message"] = df["message"].apply(lambda c: c.strip())
#     pattern = re.compile('(.*?)\..*?')
#     df['time'] = df['time'].apply(lambda c: re.findall(pattern, c)[0].replace("T"," "))
    label_pattern = re.compile('ds-(.*?)-.*?')
    df['label'] = df['message'].apply(lambda c: re.findall(label_pattern, c)[0])
    df['time'] = pd.to_datetime(df['time'], format = '%Y-%m-%dT%H:%M:%S.%f')
    df.sort_values(by=['time', 'message'], ascending=[True, False], inplace = True)
    df.reset_index(inplace = True, drop = True)
    return df

In [46]:
def get_time_diff(df):
    pair = []
    for name, group in df.groupby('label'):
        group['time_diff'] = group['time'].diff().dt.microseconds /1000000
        label = group.iloc[0].label
        new = group[group['message'] == 'ds-' + label + "-end"]
        new.reset_index(inplace = True, drop = True)
        pair.append(new)
    return pair

In [66]:
def draw_graph(new, new2):
    
    label = new.iloc[0].label
    html_path = "https://harmony-one.github.io/harmony-log-analysis/" + html_dir.replace("../../docs/", "") + \
    label+"_processing_time.html"
    png_path = fig_dir 
    print("png graphs saved in " + png_path)
    print('html graphs saved in ')
    display(HTML("<a href='" + html_path + "' target='_blank'>" + html_path + "</a>"))
    if not os.path.exists(html_dir):
        os.makedirs(html_dir)
        
    if not os.path.exists(png_path):
        os.makedirs(png_path)
    
    trace1 = go.Scatter(
        x= new["time"],
        y= new["time_diff"],
        mode='lines',
        name=label,
        line_color= "#00AEE9",
        hovertemplate = "processing time: %{y}<br>" +
        "UTC Time: %{x}<br>" +
        "<extra></extra>"
    )
    
    trace2 = go.Scatter(
        x= new2["time"],
        y= new2["time_diff"],
        mode='lines',
        name=label,
        line_color= "#FFA07A",
        hovertemplate = "processing time: %{y}<br>" +
        "UTC Time: %{x}<br>" +
        "<extra></extra>"
    )
    
    data = [trace1, trace2]

    layout = go.Layout(
        title=label,
        yaxis=dict(
            title='Processing Time/ seconds'
        ),
        legend_orientation="h"
    )
    
    fig = go.Figure(data=data, layout=layout)

    fig.show()
    fig.write_html(html_dir + label+"_processing_time.html")
    fig.write_image(png_path + label+"_processing_time.png",width=1000, height=500)
    

In [4]:
log_dir_1 = "../../logs/node_logs/ostn_03_19/s0-leader/"
fig_dir = "../../graphs/processing_time/ostn_03_20/s0-leader/"
html_dir = "../../docs/graphs/processing_time/ostn_03_20/s0-leader/"
files_1 = os.listdir(log_dir_1)
data_1 = read_data(files_1, log_dir_1)
df_1 = data_processing(data_1)

In [7]:
log_dir_2 = "../../logs/node_logs/ostn_03_20/s0-leader/"
files_2 = os.listdir(log_dir_2)
data_2 = read_data(files_2, log_dir_2)
df_2 = data_processing(data_2)

In [38]:
df_2['epoch'] = df_2['message'].apply(lambda c: int(re.findall(r'\b\d+\b', c)[0]) if re.findall(r'\b\d+\b', c) else np.nan)
df_2['block'] = df_2['message'].apply(lambda c: int(re.findall(r'\b\d+\b', c)[1]) if re.findall(r'\b\d+\b', c) else np.nan)
digit_pattern = '[0-9]'
df_2['message'] = df_2['message'].apply(lambda c: re.sub(digit_pattern, '', c).strip())

In [53]:
pair1 = get_time_diff(df_1)
pair2 = get_time_diff(df_2)

In [44]:
new_df_2 = df_2.dropna()

In [45]:
new_df_2

Unnamed: 0,time,message,label,epoch,block
137554,2020-03-20 20:11:57.432062296+00:00,ds-accumulateRewardBeaconchainSelfPayout-start...,accumulateRewardBeaconchainSelfPayout,245.0,9316.0
137555,2020-03-20 20:11:57.465789480+00:00,ds-accumulateRewardBeaconchainSelfPayout-end 2...,accumulateRewardBeaconchainSelfPayout,245.0,9316.0
137556,2020-03-20 20:11:57.465810351+00:00,ds-accumulateRewardShardchainPayout-start 245 ...,accumulateRewardShardchainPayout,245.0,9316.0
137557,2020-03-20 20:11:57.575368935+00:00,ds-accumulateRewardShardchainPayout-end 245 9316,accumulateRewardShardchainPayout,245.0,9316.0
137558,2020-03-20 20:12:59.451643948+00:00,ds-accumulateRewardBeaconchainSelfPayout-start...,accumulateRewardBeaconchainSelfPayout,245.0,9317.0
...,...,...,...,...,...
143247,2020-03-20 21:10:37.733852263+00:00,ds-accumulateRewardBeaconchainSelfPayout-start...,accumulateRewardBeaconchainSelfPayout,254.0,9654.0
143248,2020-03-20 21:10:37.757856882+00:00,ds-accumulateRewardBeaconchainSelfPayout-end 2...,accumulateRewardBeaconchainSelfPayout,254.0,9654.0
143249,2020-03-20 21:10:37.757879052+00:00,ds-accumulateRewardShardchainPayout-start 254 ...,accumulateRewardShardchainPayout,254.0,9654.0
143250,2020-03-20 21:10:37.832294096+00:00,ds-accumulateRewardShardchainPayout-end 254 9654,accumulateRewardShardchainPayout,254.0,9654.0
