In [None]:
%matplotlib nbagg
import matplotlib.pyplot as plt
##
import json
##
import numpy           as np
import pandas          as pd
import statsmodels.api as sm
##
from elasticsearch import Elasticsearch, helpers
##
import thundermint.splot as splot

In [None]:
with open("credential.json") as f:
    credentials = json.load(f)
es = Elasticsearch("%s:%s@elastic.hxr.team" % (credentials['login'], credentials['password']), 
                   port=443, 
                   use_ssl=True,
                   )

# Ready-made queries for elastic

In [None]:
def query_consensus(t1,t2) :
    return {
        "_source": ["at","data","msg","sev","host"],
        "query": {
            "bool" : {
                "must" : [
                    {"term":{'ns':'consensus'}},
                    {
                        "range": {
                            "at": {"gte":t1, "lt":t2}
                        }
                    }
                ]
            }
        }
    }

def query_errors():
    return {"query": {"term" : {'sev':'Error'}}}
def query_warnings():
    return {"query": {"term" : {'sev':'Warning'}}}

def distinct_fields(field, index) :
    q = {
        "aggs":{
            "unique_ids": {
                "terms": {"field": "app"}
                }
            }    
        }
    r = es.search(body=q, index=index, size=0)
    return r['aggregations']['unique_ids']['buckets']

# Data

In [None]:
r = helpers.scan(client=es, scroll='2m', 
                 query=query_consensus('2018-10-30T04:45:00Z', '2018-10-30T5:30:00Z'),
                 index='xenochain-2018-10-30')
r = [x['_source'] for x in r]
r = pd.DataFrame.from_records(r, columns=['at','data','msg','host'])
r['at'] = pd.to_datetime(r['at'])
r = r.sort_values('at')
logs = dict([x for x in r.groupby('host')])

# Plotting

In [None]:
def to_commit(d) :
    r = d[d['msg'] == "Entering new height ----------------"].copy()
    r['H'] = r['data'].apply(lambda x : x['H'])
    return r.drop(['data','msg','host'], axis=1)

In [None]:
def foo():
    fig = plt.figure()
    plt.grid()
    plt.legend()
    plt.title("Height vs time")
    for k,d in logs.items() :
        d = to_commit(d)
        plt.plot(d['at'], d['H'], '.', markersize=1, label=k)        
    plt.legend(bbox_to_anchor=(0.65, 0, 0.5, 0.65))
    return fig
# ----
foo()
None

In [None]:
def plot_commit_time(logs) :
    "Plot commit time and prime average block time"
    # Subtract leading time
    dfs = [to_commit(l) for l in logs.values()]
    t0  = np.min([df['at'].values[0] for df in dfs])
    h1  = [df['H'].values[0]  for df in dfs]
    h2  = [df['H'].values[-1] for df in dfs]
    for df in dfs :
        df['at'] = (df['at'] - t0).astype('timedelta64[s]')
    # Fit averaged commit times with straign line
    n  = np.min([df['H'].shape[0] for df in dfs])
    hs = dfs[0]['H'][0:n]
    ts = np.average( [df['at'][0:n] for df in dfs], axis=0 )
    r  = sm.OLS(ts, sm.add_constant(hs), missing='drop').fit()
    # Do plot
    figA = plt.figure()
    plt.grid()
    plt.title('Commit time')
    plt.xlabel('Time (s)')
    plt.ylabel('Height')
    plt.plot(hs*r.params[1] + r.params[0], hs, color='grey', linewidth=0.5)
    for df in dfs :
        plt.plot(df['at'] , df['H'], '+')
    print("Time for commit of singe block %.3f s" % float(r.params[1]))
    #
    figB = plt.figure()
    plt.grid()
    plt.title('Commit time residuals')
    plt.xlabel('Time (s)')
    plt.ylabel('Delta H')
    for df in dfs :
        plt.plot(df['at'] , df['H'] - (df['at'] - r.params[0]) / r.params[1])
    return [figA,figB]
# ----
plot_commit_time(logs)
None

# Sratchpad

In [None]:
class Object :
    pass
def foo():
    def f(d) :
        r = Object()
        r.cons = d
        return r
    return splot.splot([f(x) for x in logs.values()], w=20000)
#
foo()

In [None]:
q1 = logs['xenochain1'].reset_index()
q2 = logs['xenochain2'].reset_index()

In [None]:
q1[4877:6000]

In [None]:
r = es.search(index='xenochain-2018-10-30', body=query_errors(), size=1000)
r = [x['_source'] for x in r['hits']['hits']]

In [None]:
pd.DataFrame.from_records(r, columns=['at','msg','ns','data','host']).sort_values('at')

In [None]:
qqq = {
    "_source": ["host"],
    "aggs":{
        "unique_ids": {
            "terms": {"field": "app"}
        }
    }    
}
es.search(body=qqq, index="xenochain-2018-11-06", size=0)['aggregations']['unique_ids']['buckets']

In [None]:
qqq = {"query": {"term": {"host": "manannan"}}}
r = es.search(body=qqq, index="xenochain-2018-11-02", size=1000)['hits']['hits']

In [None]:
r = [x['_source'] for x in r]

In [None]:
set([x['pid'] for x in r]) 

In [None]:
r[0]