In [1]:
import json
import pandas as pd
import os
import gzip
import re

In [21]:
def read_data(files, path, date):
    data = []
    for file in files:
        if date in file:
            with gzip.open(path + file) as f:
                for line in f.readlines():
                    if 'Received prepared message'.encode('utf-8') in line \
                    or 'Receive announce message'.encode('utf-8') in line\
                    or 'Receive committed message'.encode('utf-8') in line:
                        data.append(json.loads(line))
    return data

In [3]:
def data_processing(data):
    df = pd.DataFrame(data)
    df['time'] = pd.to_datetime(df['time'], format = '%Y-%m-%dT%H:%M:%S.%f')
    df.sort_values(by=['time','message'], inplace = True)
    df.reset_index(inplace = True, drop = True)
    return df

In [4]:
def get_missing_msg_index(df):
    for name, group in df.groupby(df.index //3):
        if group.iloc[0]['message'] != '[OnAnnounce] Receive announce message':
            index = group.iloc[0].name
            print(index)
            break

In [5]:
log_dir = "/home/ubuntu/jupyter/logs/mainnet/34.212.183.107/"
files = os.listdir(log_dir)
data = read_data(files, log_dir, '04-19')
df = data_processing(data)
df.groupby('message')['message'].count()

message
[OnAnnounce] Receive announce message      9378
[OnCommitted] Receive committed message    9379
[OnPrepared] Received prepared message     9378
Name: message, dtype: int64

In [6]:
get_missing_msg_index(df)

3138


In [7]:
df.iloc[3132:3142]

Unnamed: 0,level,port,ip,caller,time,message,MsgBlockNum,MsgViewID
3132,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-19 01:47:56.681301789+00:00,[OnAnnounce] Receive announce message,,
3133,info,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-19 01:47:58.449389185+00:00,[OnPrepared] Received prepared message,3087118.0,3087198.0
3134,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-19 01:48:01.419759313+00:00,[OnCommitted] Receive committed message,,
3135,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-19 01:48:09.668910694+00:00,[OnAnnounce] Receive announce message,,
3136,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-19 01:48:11.421768371+00:00,[OnCommitted] Receive committed message,,
3137,info,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-19 01:48:11.563534346+00:00,[OnPrepared] Received prepared message,3087119.0,3087199.0
3138,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-19 01:48:16.654107326+00:00,[OnCommitted] Receive committed message,,
3139,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-19 01:48:18.000227752+00:00,[OnAnnounce] Receive announce message,,
3140,info,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-19 01:48:19.846507883+00:00,[OnPrepared] Received prepared message,3087120.0,3087200.0
3141,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-19 01:48:22.891395333+00:00,[OnCommitted] Receive committed message,,


In [8]:
data = read_data(files, log_dir, '04-18')
df = data_processing(data)
df.groupby('message')['message'].count()

message
[OnAnnounce] Receive announce message      11743
[OnCommitted] Receive committed message    11743
[OnPrepared] Received prepared message     11743
Name: message, dtype: int64

In [9]:
data = read_data(files, log_dir, '04-17')
df = data_processing(data)
df.groupby('message')['message'].count()

message
[OnAnnounce] Receive announce message      9396
[OnCommitted] Receive committed message    9396
[OnPrepared] Received prepared message     9396
Name: message, dtype: int64

In [16]:
data = read_data(files, log_dir, '04-16')
df = data_processing(data)
df.groupby('message')['message'].count()

message
[OnAnnounce] Receive announce message      11760
[OnCommitted] Receive committed message    11760
[OnPrepared] Received prepared message     11760
Name: message, dtype: int64

In [12]:
data = read_data(files, log_dir, '04-15')
df = data_processing(data)
df.groupby('message')['message'].count()

message
[OnAnnounce] Receive announce message      9389
[OnCommitted] Receive committed message    9390
[OnPrepared] Received prepared message     9390
Name: message, dtype: int64

In [14]:
df

Unnamed: 0,level,port,ip,caller,time,message,MsgBlockNum,MsgViewID
0,info,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 22:50:15.561375756+00:00,[OnPrepared] Received prepared message,3043785.0,3043865.0
1,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 22:50:18.525176419+00:00,[OnCommitted] Receive committed message,,
2,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 22:50:22.042109375+00:00,[OnAnnounce] Receive announce message,,
3,info,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 22:50:23.723591012+00:00,[OnPrepared] Received prepared message,3043786.0,3043866.0
4,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 22:50:26.740047604+00:00,[OnCommitted] Receive committed message,,
...,...,...,...,...,...,...,...,...
28164,info,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-15 20:16:52.702605495+00:00,[OnPrepared] Received prepared message,3053173.0,3053253.0
28165,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-15 20:16:55.647165362+00:00,[OnCommitted] Receive committed message,,
28166,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-15 20:16:59.364493855+00:00,[OnAnnounce] Receive announce message,,
28167,info,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-15 20:17:00.958669588+00:00,[OnPrepared] Received prepared message,3053174.0,3053254.0


In [27]:
data = read_data(files, log_dir, '04-14')
df = data_processing(data)
df.groupby('message')['message'].count()

message
[OnAnnounce] Receive announce message      11752
[OnCommitted] Receive committed message    11753
[OnPrepared] Received prepared message     11751
Name: message, dtype: int64

In [23]:
df

Unnamed: 0,level,port,ip,caller,time,message,MsgBlockNum,MsgViewID
0,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-13 19:59:54.552898738+00:00,[OnCommitted] Receive committed message,,
1,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-13 19:59:57.869071308+00:00,[OnAnnounce] Receive announce message,,
2,info,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-13 19:59:59.554797544+00:00,[OnPrepared] Received prepared message,3032034.0,3032114.0
3,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-13 20:00:03.889666848+00:00,[OnCommitted] Receive committed message,,
4,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-13 20:00:06.042407845+00:00,[OnAnnounce] Receive announce message,,
...,...,...,...,...,...,...,...,...
35251,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 22:50:02.321966720+00:00,[OnCommitted] Receive committed message,,
35252,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 22:50:05.662677558+00:00,[OnAnnounce] Receive announce message,,
35253,info,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 22:50:07.541041414+00:00,[OnPrepared] Received prepared message,3043784.0,3043864.0
35254,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 22:50:10.508881447+00:00,[OnCommitted] Receive committed message,,


In [28]:
df.iloc[1:-1].groupby('message')['message'].count()

message
[OnAnnounce] Receive announce message      11751
[OnCommitted] Receive committed message    11752
[OnPrepared] Received prepared message     11751
Name: message, dtype: int64

In [32]:
new_df = df.iloc[1:-1].reset_index(drop = True)
get_missing_msg_index(new_df)

20919


In [34]:
new_df.iloc[20913:20923]

Unnamed: 0,level,port,ip,caller,time,message,MsgBlockNum,MsgViewID
20913,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 11:55:15.470801024+00:00,[OnAnnounce] Receive announce message,,
20914,info,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 11:55:17.291594236+00:00,[OnPrepared] Received prepared message,3039005.0,3039085.0
20915,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 11:55:20.210606584+00:00,[OnCommitted] Receive committed message,,
20916,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 11:55:28.767791848+00:00,[OnAnnounce] Receive announce message,,
20917,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 11:55:30.211871748+00:00,[OnCommitted] Receive committed message,,
20918,info,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 11:55:30.642144377+00:00,[OnPrepared] Received prepared message,3039006.0,3039086.0
20919,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 11:55:35.260831481+00:00,[OnCommitted] Receive committed message,,
20920,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 11:55:36.988741509+00:00,[OnAnnounce] Receive announce message,,
20921,info,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 11:55:38.781491088+00:00,[OnPrepared] Received prepared message,3039007.0,3039087.0
20922,debug,9000,34.212.183.107,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-14 11:55:41.876929946+00:00,[OnCommitted] Receive committed message,,
