In [2]:
import json
import pandas as pd
import os
import gzip
import re

In [3]:
def read_data(files, path, keyword):
    data = []
    for file in files:
        if keyword in file:
            with gzip.open(path + file) as f:
                for line in f.readlines():
                    if 'Received prepared message'.encode('utf-8') in line \
                    or 'Receive announce message'.encode('utf-8') in line\
                    or 'Receive committed message'.encode('utf-8') in line:
                        data.append(json.loads(line))
    return data

In [4]:
def data_processing(data):
    df = pd.DataFrame(data)
    df['time'] = pd.to_datetime(df['time'], format = '%Y-%m-%dT%H:%M:%S.%f')
    df.sort_values(by=['time','message'], inplace = True)
    df.reset_index(inplace = True, drop = True)
    return df

In [29]:
def getMissingMsgDf(nodes):
    announce = []
    commit = []
    prepare = []
    node_lst = list(nodes)
    for i in range(len(nodes)):
        try:
            data = read_data(files, log_dir, node_lst[i])
        except:
            print("can't read data", node_lst[i])
        try:
            df = data_processing(data)
        except:
            print("can't process data", node_lst[i])
        count = df.groupby('message')['message'].count()
        if "[OnPrepared] Received prepared message" in count.index:
            prepare.append(count["[OnPrepared] Received prepared message"])
        else:
            prepare.append(0)
        if "[OnAnnounce] Receive announce message" in count.index:
            announce.append(count["[OnAnnounce] Receive announce message"])
        else:
            announce.append(0)
        if "[OnCommitted] Receive committed message" in count.index:
            commit.append(count["[OnCommitted] Receive committed message"])
        else:
            commit.append(0)

    col_name = ["node", "[OnAnnounce] Receive announce message", "[OnPrepared] Received prepared message", "[OnCommitted] Receive committed message"]
    df = pd.DataFrame(list(zip(node_lst, announce, prepare, commit)), columns = col_name)
    return df

In [9]:
log_dir = "/home/ubuntu/jupyter/logs/os/20/04/14/19:35:10/validator/tmp_log/log-20200405.194121/"
files = os.listdir(log_dir)
nodes = set()
pattern = re.compile("zerolog-validator-(.*?)-9000")
for file in files:
    nodes.add(re.findall(pattern, file)[0])

In [None]:
getMissingMsgDf(nodes)

In [12]:
log_dir = "/home/ubuntu/jupyter/logs/os/20/04/14/19:35:10/validator/tmp_log/log-20200405.194121/"
files = os.listdir(log_dir)
data = read_data(files, log_dir, '13.56.179.90')
df = data_processing(data)
df.groupby('message')['message'].count()

message
[OnPrepared] Received prepared message    6110
Name: message, dtype: int64

In [13]:
count = df.groupby('message')['message'].count()

In [17]:
count['[OnPrepared] Received prepared message']

6110

In [10]:
data = read_data(files, log_dir, '107.20.95.47')
df = data_processing(data)
df.groupby('message')['message'].count()

message
[OnPrepared] Received prepared message    6123
Name: message, dtype: int64

In [None]:
data = read_data(files, log_dir, '13.56.179.90')
df = data_processing(data)
df.groupby('message')['message'].count()