In [None]:
%matplotlib nbagg
import matplotlib.pyplot as plt
##
import json
import re
##
import numpy           as np
import pandas          as pd
import statsmodels.api as sm
##
from elasticsearch     import Elasticsearch, helpers
from elasticsearch_dsl import Search, A, Q

##
import thundermint.plot  as plot
import thundermint.splot as splot

In [None]:
with open("credentials.json") as f:
    credentials = json.load(f)
es = Elasticsearch("%s:%s@elastic.hxr.team" % (credentials['login'], credentials['password']), 
                   port=443, 
                   use_ssl=True,
                   )

# Global parameters for search

In [None]:
index = 'xenochain-2018-11-06'

# Queries

In [None]:
def time_range(start, delta):
    (h1,m1) = start
    (dh,dm) = delta
    h2 = h1 + dh + (m1+dm) // 60
    m2 = (m1+dm) % 60   
    r  = re.match("^.*-(\d+-\d+-\d+)", index)
    t1 = "%sT%02i:%02i:00Z" % (r.group(1), h1, m1)
    t2 = "%sT%02i:%02i:00Z" % (r.group(1), h2, m2)
    return (t1,t2)

def postprocess_entries(s) :
    r       = pd.DataFrame.from_records([x.to_dict() for x in s.scan()])
    r['at'] = pd.to_datetime(r['at'])
    r       = r.sort_values('at')
    return r

def split_on_host(df):
    return dict([x for x in df.groupby('host')])

In [None]:
def distinct_fields(field):
    "Count distinct field values in current index"
    s = Search(using=es, index=index)[0:0]
    s.aggs.bucket('unique_ids', A('terms', field=field))
    return s.execute().aggregations.unique_ids.buckets

def query_consensus(start, delta, cluster):
    "Fetch data for logs about consensus"
    (t1,t2) = time_range(start, delta)
    s = Search(using=es, index=index).\
        filter('term',  env=cluster).\
        filter('term',  ns ='consensus').\
        filter('range', at ={"gte":t1, "lt":t2}).\
        source(['at','msg','data','host'])
    return split_on_host(postprocess_entries(s))

def query_mempool(start, delta, cluster):
     # Query
    (t1,t2) = time_range(start, delta)
    s = Search(using=es, index=index).\
        filter('term',  env=cluster).\
        filter('term',  ns ='mempool').\
        filter('range', at ={"gte":t1, "lt":t2}).\
        source(['at','msg','data','host'])
    r = postprocess_entries(s)
    r['size']      = r['data'].apply(lambda x: x['size'])
    r['filtered']  = r['data'].apply(lambda x: x['filtered'])
    r['added']     = r['data'].apply(lambda x: x['added'])
    r['discarded'] = r['data'].apply(lambda x: x['discarded'])
    r = r.drop('data', axis=1)
    return split_on_host(r)

# Plots

In [None]:
def to_commit(d) :
    r = d[d['msg'] == "Entering new height ----------------"].copy()
    r['H'] = r['data'].apply(lambda x : x['H'])
    return r.drop(['data','msg','host'], axis=1)

def to_commit_n_tx(df):
    r = df[df['msg'] == "Actual commit"]
    return pd.DataFrame({'H'  : r['data'].apply(lambda x : x['H']),
                         'Ntx': r['data'].apply(lambda x : x['Ntx']),
                        })

In [None]:
logsC = query_consensus((16,10), (0,5), cluster='profile')
logsM = query_mempool  ((16,10), (0,5), cluster='profile')

In [None]:
plot.plot_commit_time([to_commit(x) for x in logsC.values()])
None

In [None]:
plot.plot_n_tx_in_block([to_commit_n_tx(x) for x in logsC.values()])
None

In [None]:
plot.plot_mempool_size([x for x in logsM.values()])
None

In [None]:
splot.splot(logsC, w=2000)

# Scratchpad