In [None]:
%matplotlib nbagg
import matplotlib.pyplot as plt
from IPython.display import display
##
import json
import re
##
import numpy           as np
import pandas          as pd
import statsmodels.api as sm
##
from elasticsearch     import Elasticsearch, helpers
from elasticsearch_dsl import Search, A, Q

##
import thundermint.plot  as plot
import thundermint.splot as splot

In [None]:
with open("credentials.json") as f:
    credentials = json.load(f)
es = Elasticsearch("%s:%s@elastic.hxr.team" % (credentials['login'], credentials['password']), 
                   port=443, 
                   use_ssl=True,
                   )

# Global parameters for search

In [None]:
index = 'xenochain-2018-12-03'

# Queries

In [None]:
# -------------------------------------------------------------------
# Primitive filters

def flt_host(host) :
    "filter by host"
    return lambda s : s.filter('term', host=host)
def flt_cluster(cluster) :
    "filter by cluster"
    return lambda s : s.filter('term', env=cluster)
def flt_severity(sev) :
    "Filter by severity"
    return lambda s : s.filter('term',  sev=sev)
def flt_namespace(ns) :
    "Filter on namespace"
    return lambda s : s.filter('term',  ns=ns)
def flt_msg(q, msg):
    "Filter on log message"
    return lambda s : s.filter(q, msg=msg)

flt_consensus = flt_namespace('consensus')
flt_mempool   = flt_namespace('mempool')
flt_net       = flt_namespace('net')

def flt_time(start, delta):
    "Filter time range"
    (h1,m1) = start
    (dh,dm) = delta
    h2 = h1 + dh + (m1+dm) // 60
    m2 = (m1+dm) % 60   
    r  = re.match("^.*-(\d+-\d+-\d+)", index)
    t1 = "%sT%02i:%02i:00Z" % (r.group(1), h1, m1)
    t2 = "%sT%02i:%02i:00Z" % (r.group(1), h2, m2)
    return lambda s : s.filter('range', at ={"gte":t1, "lt":t2})

def make_seatch(cluster, filters) :
    "Create Search object from filters"
    s = Search(using=es, index=index)
    for f in filters:
        s = f(s)
    return s

# -------------------------------------------------------------------
# Postprocessing

def postprocess_entries(s) :
    r       = pd.DataFrame.from_records([x.to_dict() for x in s.scan()],
                                        columns=['at','host','data','msg'])
    r['at'] = pd.to_datetime(r['at'])
    r       = r.sort_values('at')
    return r

def postprocess_mempool(r):
    r['size']      = r['data'].apply(lambda x: x['size'])
    r['filtered']  = r['data'].apply(lambda x: x['filtered'])
    r['added']     = r['data'].apply(lambda x: x['added'])
    r['discarded'] = r['data'].apply(lambda x: x['discarded'])
    r = r.drop('data', axis=1)
    return r

def postprocess_gossip(r):
    r  = r[r['msg'] == 'Gossip stats'].reset_index(drop=True)
    df = pd.DataFrame.from_records(r['data'].values)
    df['at']   = r['at']
    df['host'] = r['host']
    return df

def split_on_host(df):
    return {k : d.reset_index(drop=True) for k,d in df.groupby('host')}

# -------------------------------------------------------------------
# Queries

def q_agg_uniq(cluster, filters, field):
    "Aggregate by unique field"
    s = make_seatch(cluster, filters)
    s.aggs.bucket('unique_ids', A('terms', field=field))
    return s[0:0].execute().aggregations.unique_ids.buckets

def q_scan(cluster, filters, host=None, source=['at','msg','data','host'], postprocess=[]):
    "Fetch data from elastic"
    s = make_seatch(cluster, filters).source(source)
    s = flt_cluster(cluster)(s)
    if host is not None:
        s = flt_host(host)(s)
    r = postprocess_entries(s)
    for f in postprocess:
        r = f(r)
    if host is None:
        r = split_on_host(r)
    return r

# Restarts information

In [None]:
def show_restarts():
    r = q_scan('atum', [ flt_msg("match", "Starting consensus engine"),
                         flt_namespace("consensus"),
                       ])
    for (k,v) in r.items() : 
        print( k)
        display( v)

In [None]:
show_restarts()

# Plots for commits

In [None]:
logsC = q_scan('atum', [flt_namespace('consensus'),
                        flt_time((15,35), (0,5))])

In [None]:
plot.plot_commit_time(logsC)
None

In [None]:
plot.plot_n_tx_in_block(logsC)
None

In [None]:
splot.splot(logsC, w=32000)

# Rounds vs time

In [None]:
plot.plot_round(logsC)
None

# Mempool plots

In [None]:
logsM = q_scan('atum', [flt_namespace('mempool'),
                        flt_time((15,20), (0,5))],
                postprocess=[postprocess_mempool])

In [None]:
plot.plot_mempool_size([x for x in logsM.values()])
None

# Gossip stats

In [None]:
logsG = q_scan('atum', [flt_namespace('net'), 
                        flt_msg('match', 'Gossip stats'),
                        flt_time((15,35), (0,30))
                       ],
                postprocess=[postprocess_gossip])

In [None]:
plot.plot_gossip(logsG, "RxPV")
plot.plot_gossip(logsG, "TxPV")

plot.plot_gossip(logsG, "RxPC")
plot.plot_gossip(logsG, "TxPC")

plot.plot_gossip(logsG, "RxP")
plot.plot_gossip(logsG, "TxP")

plot.plot_gossip(logsG, "RxB")
plot.plot_gossip(logsG, "TxB")
None

# Scratchpad

In [None]:
def plot_gossip_rxtx_ratio(logs, key, delta=False):
    fig,ax = plot.figure_with_legend()
    plt.grid()
    plt.title("Ratio TX/RX for %s" % key)
    for k,d in logs.items():
        tx = d['Tx'+key]
        rx = d['Rx'+key]
        if delta :
            tx = tx - tx.values[0]
            rx = rx - rx.values[0]
        ax.plot(d['at'], tx/rx, '+', label=k, markersize=1.5)        
    plot.add_legend(ax)
    return fig
plot_gossip_rxtx_ratio(logsG, 'PC')
None

In [None]:
def calc_rxtx(logs, key, delta=False) :
    def gen() :
        for _,d in logs.items():
            tx = d['Tx'+key]
            rx = d['Rx'+key]
            if delta :
                tx = tx - tx.values[0]
                rx = rx - rx.values[0]
            yield np.average(tx/rx)
    return list(gen())

def calc_rate(logs, key):
    def gen() :
        for _,d in logs.items():
            ts = (d['at'] - d['at'].values[0]).astype('timedelta64[s]')
            ns = d[key]
            r  = sm.OLS(ns, sm.add_constant(ts)).fit()
            yield r.params[1]
    return list(gen())

In [None]:
plt.figure()
plt.grid()
plt.bar( range(len(logsG)), calc_rxtx(logsG, 'PC'))
None

In [None]:
def plot_rates(logs, key):
    fig,ax = plot.figure_with_legend()
    plt.grid()
    plt.title('RX/TX growth rate for %s' % key)
    plt.bar( range(len(logsG)), calc_rate(logsG, 'Rx'+key), 
            width=0.4, label='RX')
    plt.bar( np.asarray(range(len(logsG)))+0.4, calc_rate(logsG, 'Tx'+key), 
            width=0.4, label='TX')
    plot.add_legend(ax)
    return fig

plot_rates(logsG, 'PV')
plot_rates(logsG, 'PC')
plot_rates(logsG, 'P')
plot_rates(logsG, 'B')
None

In [None]:
rr   = {k: plot.to_commit(v) for k,v in logsC.items() if k[0]=='v'}

t0   = np.min([v['at'].values[0] for _,v in rr.items()])
hMin = np.max([v['H'].values[0]  for _,v in rr.items()])
hMax = np.min([v['H'].values[-1] for _,v in rr.items()])

def clean(v):
    v = v.set_index('H').loc[hMin:hMax]
    v['at'] = (v['at'] - t0).astype('timedelta64[ns]')
    return v
rr   = {k: clean(v) for k,v in rr.items()}

In [None]:
def clean2(v) :
    v = v.copy()
    v['at'] = v['at'].apply(lambda x: [x])
    return v
rrl = [ clean2(v) for v in rr.values()]
r   = rrl[0]
for i in range(1,len(rrl)):
    r = r + rrl[i]

In [None]:
r['at'] = r['at'].apply(lambda x: np.median(x))

In [None]:
def plot_deviation(keys, window=None):
    fig,ax = plot.figure_with_legend()    
    plt.grid()
    plt.title("Commit delay relative to median")
    plt.ylabel('Delay (ms)')
    for key in keys:
        ys = (rr[key]['at'] - r['at']).astype('int64').astype('float64')/1e6
        if window is not None:
            ys = np.convolve(ys, np.ones(window)/window, mode='same')
        plt.plot(rr[key].index, ys, label=key)
    plot.add_legend(ax)
    return fig

plot_deviation(rr.keys())
None

In [None]:
plot_deviation(
    ['validator1', 'validator2', 'validator3', 'validator4', 'validator6', 'validator7', 'validator8', 'validator9'],
    window=31
)
None

In [None]:
xs = np.linspace(1,2,10)

In [None]:
np.convolve(xs, np.ones(3)/3, mode='same')

In [None]:
def plot_dist_delay(key):
    plt.figure()
    plt.title(key)
    plt.hist(
        (rr[key]['at'] - r['at']).astype('int64').astype('float64')/1e6,
        bins=20
    )

In [None]:
for k  in rr.keys():
    plot_dist_delay(k)

In [None]:
means = {k: np.mean((rr[k]['at'] - r['at']).astype('int64').astype('float64')/1e6)
        for k,v in rr.items()}
meds = {k: np.median((rr[k]['at'] - r['at']).astype('int64').astype('float64')/1e6)
        for k,v in rr.items()}
stddevs = {k: np.std((rr[k]['at'] - r['at']).astype('int64').astype('float64')/1e6)
            for k,v in rr.items()}
mads = {k: sm.robust.mad((rr[k]['at'] - r['at']).astype('int64').astype('float64')/1e6)
            for k,v in rr.items()}


In [None]:
plt.figure()
plt.grid()
plt.xlabel('Median delay (ms)')
plt.ylabel('MAD of delay (ms)')
plt.plot( meds.values(), mads.values(), '+')