In [11]:
import json
import pandas as pd
import os
from os import path
import gzip
import re
import numpy as np

In [2]:
def read_data(files, log_dir, date):
    data = []
    for file in files:
        if date in file:
            with gzip.open(path.join(log_dir, file)) as f:
                for line in f.readlines():
                    if 'Received prepared message'.encode('utf-8') in line \
                    or 'Receive announce message'.encode('utf-8') in line\
                    or 'Receive committed message'.encode('utf-8') in line:
                        data.append(json.loads(line))
    return data

In [3]:
def data_processing(data):
    df = pd.DataFrame(data)
    df['time'] = pd.to_datetime(df['time'], format = '%Y-%m-%dT%H:%M:%S.%f')
    df.sort_values(by=['time','message'], inplace = True)
    while df.message.iloc[0] != '[OnAnnounce] Receive announce message':
        df = df[1:]
    while df.message.iloc[-1] != '[OnCommitted] Receive committed message':
        df = df[:-1]
    df.reset_index(inplace = True, drop = True)
    return df

In [29]:
def get_missing_msg_index(df):
    for name, group in df.groupby(df.index //3):
        if group.iloc[0]['message'] != '[OnAnnounce] Receive announce message':
            index = group.iloc[0].name
            break
    else:
        index = None
    return index

In [36]:
def getMissingMsgDf(nodes, date):
    announce = []
    commit = []
    prepare = []
    node = []
    time = []
    for i in range(len(nodes)):
        node.append(nodes[i])
        log = path.join(log_dir, nodes[i])
        files = os.listdir(log)
        try:
            data = read_data(files, log, date)
        except:
            print("can't read data", nodes[i])
        try:
            df = data_processing(data)
        except:
            print("can't process data", nodes[i])
        count = df.groupby('message')['message'].count()
        announce.append(count.iloc[0])
        commit.append(count.iloc[1])
        prepare.append(count.iloc[2])
        idx = get_missing_msg_index(df)
        if idx:
            time.append(df.iloc[idx].time)
        else:
            time.append(np.nan)

    col_name = count.index.tolist()
    col_name.insert(0, "node")
    col_name.append("missing-time")
    df = pd.DataFrame(list(zip(node, announce, prepare, commit, time)), columns = col_name)
    return df

In [6]:
log_dir = "/home/ubuntu/jupyter/logs/mainnet/"
nodes = os.listdir(log_dir)

In [7]:
getMissingMsgDf(nodes, '04-19')

Unnamed: 0,node,[OnAnnounce] Receive announce message,[OnCommitted] Receive committed message,[OnPrepared] Received prepared message,missing-time
0,18.202.231.246,11709,11709,11710,2020-04-19 01:48:16.717392020+00:00
1,3.125.154.121,11762,11762,11763,2020-04-19 01:48:16.720819820+00:00
2,34.251.240.119,11723,11723,11724,2020-04-19 01:48:16.717049459+00:00
3,18.197.90.17,9409,9409,9410,2020-04-19 01:48:16.727088057+00:00
4,3.121.218.212,11758,11758,11759,2020-04-19 01:48:16.721927202+00:00
5,3.123.30.30,9424,9424,9425,2020-04-19 01:48:16.720679307+00:00
6,34.218.48.95,11765,11765,11766,2020-04-19 01:48:16.654354897+00:00
7,34.212.183.107,9378,9378,9379,2020-04-19 01:48:16.654107326+00:00
8,54.212.72.201,11738,11738,11739,2020-04-19 01:48:16.655819992+00:00
9,18.196.217.89,9418,9418,9419,2020-04-19 01:48:16.721413364+00:00


In [30]:
getMissingMsgDf(nodes, '04-18')

Unnamed: 0,node,[OnAnnounce] Receive announce message,[OnCommitted] Receive committed message,[OnPrepared] Received prepared message,missing-time
0,18.202.231.246,9381,9381,9381,
1,3.125.154.121,9414,9414,9414,
2,34.251.240.119,9377,9377,9377,
3,18.197.90.17,11779,11779,11779,
4,3.121.218.212,9411,9411,9411,
5,3.123.30.30,11788,11788,11788,
6,34.218.48.95,9410,9410,9410,
7,34.212.183.107,11743,11743,11743,
8,54.212.72.201,9385,9385,9385,
9,18.196.217.89,9420,9420,9420,


In [31]:
getMissingMsgDf(nodes, '04-17')

Unnamed: 0,node,[OnAnnounce] Receive announce message,[OnCommitted] Receive committed message,[OnPrepared] Received prepared message,missing-time
0,18.202.231.246,11705,11705,11705,
1,3.125.154.121,11780,11780,11780,
2,34.251.240.119,11702,11702,11702,
3,18.197.90.17,9424,9424,9424,
4,3.121.218.212,11769,11769,11769,
5,3.123.30.30,9424,9424,9424,
6,34.218.48.95,11789,11789,11789,
7,34.212.183.107,9396,9396,9396,
8,54.212.72.201,11718,11718,11718,
9,18.196.217.89,11799,11799,11799,


In [32]:
getMissingMsgDf(nodes, '04-16')

Unnamed: 0,node,[OnAnnounce] Receive announce message,[OnCommitted] Receive committed message,[OnPrepared] Received prepared message,missing-time
0,18.202.231.246,9404,9404,9404,
1,3.125.154.121,9430,9430,9430,
2,34.251.240.119,9419,9419,9419,
3,18.197.90.17,9449,9449,9449,
4,3.121.218.212,9425,9425,9425,
5,3.123.30.30,11826,11826,11826,
6,34.218.48.95,9439,9439,9439,
7,34.212.183.107,11760,11760,11760,
8,54.212.72.201,11739,11739,11739,
9,18.196.217.89,9426,9426,9426,


In [33]:
getMissingMsgDf(nodes, '04-15')

Unnamed: 0,node,[OnAnnounce] Receive announce message,[OnCommitted] Receive committed message,[OnPrepared] Received prepared message,missing-time
0,18.202.231.246,11731,11731,11731,
1,3.125.154.121,11759,11759,11759,
2,34.251.240.119,11715,11715,11715,
3,18.197.90.17,11785,11785,11785,
4,3.121.218.212,11789,11789,11789,
5,3.123.30.30,9427,9427,9427,
6,34.218.48.95,11759,11759,11759,
7,34.212.183.107,9389,9389,9389,
8,54.212.72.201,11738,11738,11738,
9,18.196.217.89,11768,11768,11768,


In [34]:
getMissingMsgDf(nodes, '04-14')

Unnamed: 0,node,[OnAnnounce] Receive announce message,[OnCommitted] Receive committed message,[OnPrepared] Received prepared message,missing-time
0,18.202.231.246,9351,9351,9352,2020-04-14 11:55:35.323709096+00:00
1,3.125.154.121,9401,9401,9402,2020-04-14 11:55:35.332241283+00:00
2,34.251.240.119,9365,9365,9366,2020-04-14 11:55:35.323477151+00:00
3,18.197.90.17,9418,9418,9419,2020-04-14 11:55:35.336683209+00:00
4,3.121.218.212,9413,9413,9414,2020-04-14 11:55:35.331506851+00:00
5,3.123.30.30,11792,11792,11793,2020-04-14 11:55:35.331844929+00:00
6,34.218.48.95,9398,9398,9399,2020-04-14 11:55:35.260844611+00:00
7,34.212.183.107,11751,11751,11752,2020-04-14 11:55:35.260831481+00:00
8,54.212.72.201,9392,9392,9393,2020-04-14 11:55:35.260765168+00:00
9,18.196.217.89,9412,9412,9413,2020-04-14 11:55:35.332701592+00:00


In [38]:
getMissingMsgDf(nodes, '04-13')

Unnamed: 0,node,[OnAnnounce] Receive announce message,[OnCommitted] Receive committed message,[OnPrepared] Received prepared message,missing-time
0,18.202.231.246,11724,11724,11724,
1,3.125.154.121,11789,11789,11789,
2,34.251.240.119,11756,11756,11756,
3,18.197.90.17,11792,11792,11792,
4,3.121.218.212,11782,11782,11782,
5,3.123.30.30,9445,9445,9445,
6,34.218.48.95,11797,11797,11797,
7,34.212.183.107,9401,9401,9401,
8,54.212.72.201,11782,11782,11782,
9,18.196.217.89,11766,11766,11766,


In [39]:
getMissingMsgDf(nodes, '04-12')

Unnamed: 0,node,[OnAnnounce] Receive announce message,[OnCommitted] Receive committed message,[OnPrepared] Received prepared message,missing-time
0,18.202.231.246,11537,11537,11537,
1,3.125.154.121,9431,9431,9431,
2,34.251.240.119,9403,9403,9403,
3,18.197.90.17,9426,9426,9426,
4,3.121.218.212,9430,9430,9430,
5,3.123.30.30,11816,11816,11816,
6,34.218.48.95,9436,9436,9436,
7,34.212.183.107,11768,11768,11768,
8,54.212.72.201,9436,9436,9436,
9,18.196.217.89,9409,9409,9409,


In [40]:
getMissingMsgDf(nodes, '04-11')

Unnamed: 0,node,[OnAnnounce] Receive announce message,[OnCommitted] Receive committed message,[OnPrepared] Received prepared message,missing-time
0,18.202.231.246,11747,11747,11747,
1,3.125.154.121,9416,9416,9416,
2,34.251.240.119,11727,11727,11727,
3,18.197.90.17,11772,11772,11772,
4,3.121.218.212,11796,11796,11796,
5,3.123.30.30,9452,9452,9452,
6,34.218.48.95,9407,9407,9407,
7,34.212.183.107,9397,9397,9397,
8,54.212.72.201,11770,11770,11770,
9,18.196.217.89,11760,11760,11760,
