In [None]:
%matplotlib nbagg
import matplotlib.pyplot as plt
from IPython.display import display
##
import json
import re
##
import numpy           as np
import pandas          as pd
import statsmodels.api as sm
##
from elasticsearch     import Elasticsearch, helpers
from elasticsearch_dsl import Search, A, Q

##
import thundermint.plot  as plot
import thundermint.splot as splot

In [None]:
with open("credentials.json") as f:
    credentials = json.load(f)
es = Elasticsearch("%s:%s@elastic.hxr.team" % (credentials['login'], credentials['password']), 
                   port=443, 
                   use_ssl=True,
                   )

# Global parameters for search

In [None]:
index = 'xenochain-2018-11-26'

# Queries

In [None]:
# -------------------------------------------------------------------
# Primitive filters

def flt_host(host) :
    "filter by host"
    return lambda s : s.filter('term', host=host)
def flt_cluster(cluster) :
    "filter by cluster"
    return lambda s : s.filter('term', env=cluster)
def flt_severity(sev) :
    "Filter by severity"
    return lambda s : s.filter('term',  sev=sev)
def flt_namespace(ns) :
    "Filter on namespace"
    return lambda s : s.filter('term',  ns=ns)
def flt_msg(q, msg):
    "Filter on log message"
    return lambda s : s.filter(q, msg=msg)

flt_consensus = flt_namespace('consensus')
flt_mempool   = flt_namespace('mempool')
flt_net       = flt_namespace('net')

def flt_time(start, delta):
    "Filter time range"
    (h1,m1) = start
    (dh,dm) = delta
    h2 = h1 + dh + (m1+dm) // 60
    m2 = (m1+dm) % 60   
    r  = re.match("^.*-(\d+-\d+-\d+)", index)
    t1 = "%sT%02i:%02i:00Z" % (r.group(1), h1, m1)
    t2 = "%sT%02i:%02i:00Z" % (r.group(1), h2, m2)
    return lambda s : s.filter('range', at ={"gte":t1, "lt":t2})

def make_seatch(cluster, filters) :
    "Create Search object from filters"
    s = Search(using=es, index=index)
    for f in filters:
        s = f(s)
    return s

# -------------------------------------------------------------------
# Postprocessing

def postprocess_entries(s) :
    r       = pd.DataFrame.from_records([x.to_dict() for x in s.scan()],
                                        columns=['at','host','data','msg'])
    r['at'] = pd.to_datetime(r['at'])
    r       = r.sort_values('at')
    return r

def postprocess_mempool(r):
    r['size']      = r['data'].apply(lambda x: x['size'])
    r['filtered']  = r['data'].apply(lambda x: x['filtered'])
    r['added']     = r['data'].apply(lambda x: x['added'])
    r['discarded'] = r['data'].apply(lambda x: x['discarded'])
    r = r.drop('data', axis=1)
    return r

def split_on_host(df):
    return {k : d.reset_index(drop=True) for k,d in df.groupby('host')}

# -------------------------------------------------------------------
# Queries

def q_agg_uniq(cluster, filters, field):
    "Aggregate by unique field"
    s = make_seatch(cluster, filters)
    s.aggs.bucket('unique_ids', A('terms', field=field))
    return s[0:0].execute().aggregations.unique_ids.buckets

def q_scan(cluster, filters, host=None, source=['at','msg','data','host'], postprocess=[]):
    "Fetch data from elastic"
    s = make_seatch(cluster, filters).source(source)
    s = flt_cluster(cluster)(s)
    if host is not None:
        s = flt_host(host)(s)
    r = postprocess_entries(s)
    for f in postprocess:
        r = f(r)
    if host is None:
        r = split_on_host(r)
    return r

# Restarts information

In [None]:
def show_restarts():
    r = q_scan('atum', [ flt_msg("match", "Starting consensus engine"),
                         flt_namespace("consensus"),
                       ])
    for (k,v) in r.items() : 
        print( k)
        display( v)

# Plots

In [None]:
logsC = q_scan('atum', [flt_namespace('consensus'),
                        flt_time((3,15), (0,25))])
logsM = q_scan('atum', [flt_namespace('mempool'),
                        flt_time((3,15), (0,25))],
                postprocess=[postprocess_mempool])

In [None]:
plot.plot_commit_time(logsC)
None

In [None]:
plot.plot_n_tx_in_block(logsC)
None

In [None]:
plot.plot_mempool_size([x for x in logsM.values()])
None

In [None]:
splot.splot(logsC, w=4000)

# Rounds vs time

In [None]:
def extract_round(df) :
    "Extract round information from data frame"
    df = df[df['msg'] == 'Entering prevote'].reset_index(drop=True)
    df['H'] = df['data'].apply(lambda x: x['H'])
    df['R'] = df['data'].apply(lambda x: x['R'])
    return df.drop(['data', 'host'], axis=1)

def extract_commit(df) :
    "Extract commit information from data frame"
    df = df[df['msg'] == 'Actual commit'].reset_index(drop=True)
    df['H'] = df['data'].apply(lambda x: x['H'])
    return df.drop(['data', 'host'], axis=1)


In [None]:
r = { k:extract_round(v) for k,v in logsC.items()}
r.pop('xenogate1')
r.pop('xenochain1')
None

In [None]:
plt.figure()
plt.grid()
for k,v in r.items():
    plt.plot(v['at'], v['R'], lw=0.5, marker='x', markersize=2)

# Scratchpad