In [1]:
import json
import pandas as pd
import os
from os import path
import gzip
import re
import numpy as np

In [2]:
def read_data(files, log_dir, date):
    data = []
    for file in files:
        if date in file:
            with gzip.open(path.join(log_dir, file)) as f:
                for line in f.readlines():
                    if '[OnPrepared] Received prepared message'.encode('utf-8') in line:
                        data.append(json.loads(line))
    return data

In [3]:
def data_processing(data):
    df = pd.DataFrame(data)
    df['time'] = pd.to_datetime(df['time'], format = '%Y-%m-%dT%H:%M:%S.%f')
    df.sort_values(by=['time','message'], inplace = True)
    df.reset_index(inplace = True, drop = True)
    df['viewID-diff'] = df['MsgViewID']-df['MsgViewID'].shift()
    return df

In [4]:
def getViewIdDiff(nodes, date):
    node_dict = {}
    for i in range(len(nodes)):
        log_path = path.join(log_dir, nodes[i])
        files = os.listdir(log_path)
        try:
            data = read_data(files, log_path, date)
        except:
            print("can't read data", nodes[i])
        try:
            df = data_processing(data)
        except:
            print("can't process data", nodes[i])
        count = df.groupby('viewID-diff')['viewID-diff'].count()
        node_dict[nodes[i]] = count
    return pd.DataFrame(node_dict)

In [5]:
log_dir = "/home/ubuntu/jupyter/logs/mainnet/"
nodes = os.listdir(log_dir)
nodes.remove("3.112.219.248")

In [6]:
getViewIdDiff(nodes, '04-19')

Unnamed: 0_level_0,18.202.231.246,3.125.154.121,34.251.240.119,18.197.90.17,3.121.218.212,3.123.30.30,34.218.48.95,34.212.183.107,54.212.72.201,18.196.217.89
viewID-diff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1.0,11708,11761,11722,9408,11758,9424,11765,9377,11737,9418


In [7]:
getViewIdDiff(nodes, '04-18')

Unnamed: 0_level_0,18.202.231.246,3.125.154.121,34.251.240.119,18.197.90.17,3.121.218.212,3.123.30.30,34.218.48.95,34.212.183.107,54.212.72.201,18.196.217.89
viewID-diff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1.0,9380,9413,9376,11779,9410,11787,9410,11742,9384,9420


In [8]:
getViewIdDiff(nodes, '04-17')

Unnamed: 0_level_0,18.202.231.246,3.125.154.121,34.251.240.119,18.197.90.17,3.121.218.212,3.123.30.30,34.218.48.95,34.212.183.107,54.212.72.201,18.196.217.89
viewID-diff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1.0,11705,11779,11701,9423,11769,9423,11789,9395,11717,11798


In [9]:
getViewIdDiff(nodes, '04-16')

Unnamed: 0_level_0,18.202.231.246,3.125.154.121,34.251.240.119,18.197.90.17,3.121.218.212,3.123.30.30,34.218.48.95,34.212.183.107,54.212.72.201,18.196.217.89
viewID-diff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1.0,9403.0,9429.0,9419.0,9449.0,9425.0,11826.0,9439.0,11759.0,11737,9425.0
11719.0,,,,,,,,,1,


In [10]:
log_path = path.join(log_dir, "54.212.72.201")
files = os.listdir(log_path)
data = read_data(files, log_path, '04-16')
df = data_processing(data)
count = df.groupby('viewID-diff')['viewID-diff'].count()


In [11]:
df[df['viewID-diff'] > 1]

Unnamed: 0,level,port,ip,MsgBlockNum,MsgViewID,caller,time,message,viewID-diff
9398,info,9000,54.212.72.201,3075350,3075430,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-17 22:55:29.125475714+00:00,[OnPrepared] Received prepared message,11719.0


In [12]:
df.iloc[9390:9400]

Unnamed: 0,level,port,ip,MsgBlockNum,MsgViewID,caller,time,message,viewID-diff
9390,info,9000,54.212.72.201,3063624,3063704,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-16 20:08:39.112894521+00:00,[OnPrepared] Received prepared message,1.0
9391,info,9000,54.212.72.201,3063625,3063705,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-16 20:08:47.340586399+00:00,[OnPrepared] Received prepared message,1.0
9392,info,9000,54.212.72.201,3063626,3063706,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-16 20:08:55.494007777+00:00,[OnPrepared] Received prepared message,1.0
9393,info,9000,54.212.72.201,3063627,3063707,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-16 20:09:04.089877897+00:00,[OnPrepared] Received prepared message,1.0
9394,info,9000,54.212.72.201,3063628,3063708,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-16 20:09:12.032658931+00:00,[OnPrepared] Received prepared message,1.0
9395,info,9000,54.212.72.201,3063629,3063709,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-16 20:09:20.450873969+00:00,[OnPrepared] Received prepared message,1.0
9396,info,9000,54.212.72.201,3063630,3063710,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-16 20:09:28.822449988+00:00,[OnPrepared] Received prepared message,1.0
9397,info,9000,54.212.72.201,3063631,3063711,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-16 20:09:36.825427075+00:00,[OnPrepared] Received prepared message,1.0
9398,info,9000,54.212.72.201,3075350,3075430,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-17 22:55:29.125475714+00:00,[OnPrepared] Received prepared message,11719.0
9399,info,9000,54.212.72.201,3075351,3075431,/mnt/jenkins/workspace/harmony-release/harmony...,2020-04-17 22:55:37.427181349+00:00,[OnPrepared] Received prepared message,1.0


In [13]:
getViewIdDiff(nodes, '04-15')

Unnamed: 0_level_0,18.202.231.246,3.125.154.121,34.251.240.119,18.197.90.17,3.121.218.212,3.123.30.30,34.218.48.95,34.212.183.107,54.212.72.201,18.196.217.89
viewID-diff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1.0,11730,11759,11714,11785,11788,9426,11758,9389,11737,11767
