# Sisyphus

In [1]:
from IPython.display import Image
image="https://i0.wp.com/sisypheanhigh.com/foot/wp-content/uploads/2015/08/sisyphus-red.png"
Image(url=image,width=200,height=200)

In [2]:
from elasticsearch import Elasticsearch
from pandasticsearch import Select, DataFrame
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [3]:
hostname="http://monster.us.cray.com:30200"

In [4]:
client =  Elasticsearch(hostname, http_compress=True)
#print(client.cluster.health())

In [5]:
import datetime,time
from pytz import timezone

In [6]:
def addSeconds(now, increment=30):
    timestamp=time.mktime(time.strptime(now,"%Y-%m-%d %H:%M:%S"))
    dt=datetime.datetime.fromtimestamp(timestamp)
    dt=dt+datetime.timedelta(seconds=30)
    end=f"{dt:%Y-%m-%d %H:%M:%S}"
    return(end)

In [7]:
start="2019-08-08 09:10:00"

In [8]:
end=addSeconds(start)

In [9]:
#print("start date/time:" + start + " end date/time:" + end)

In [10]:
def myquery(start=None,end=None):
    return """
        {
            "size":0,
            "query": {
                "bool": {
                  "must": [{
                      "match_all": {}
                    },
                    {
                      "range": {
                        "timereported": {
                          "gte": "%s",
                          "lte": "%s",
                          "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
                    }
                  }
                }
              ],
              "must_not": []
            }
          },
            "_source":{
                "excludes":[]
            },
            "aggs": {
            "2": {
              "date_histogram": {
                "field": "timereported",
                "interval": "30s",
                "time_zone": "America/Chicago",
                "min_doc_count": 1
              }
            }
          }
        }
    """ % (start,end)

In [11]:
q=myquery(start, end)
resp = client.search(index="shasta-logs-*", body=q)
#print("Number of responses: " + "{:,}".format(resp['hits']['total']))

In [12]:
table=resp['aggregations']['2']['buckets']
#print(len(table))
pd_table=pd.DataFrame.from_dict(table)
#pd_table
#pd_table.head(5)

In [13]:
#pd_table['doc_count'].describe()

In [14]:
pd_table['key_as_string']=pd.to_datetime(pd_table['key_as_string'].astype(str), format='%Y-%m-%dT%H:%M:%S.%f%z')

In [15]:
count=pd_table['doc_count'].count()
print("Number of entries: %d." % count)

Number of entries: 1.


In [16]:
if ( count > 100 ):
   sample = pd_table.sample(100)
else:
   sample = pd_table.sample(count)

X=sample['key_as_string'].tolist()
Y=sample['doc_count'].tolist()
#print(X)
#print(Y)

In [17]:
import time
import numpy as np
from bokeh.models.sources import ColumnDataSource
from bokeh.plotting import figure
from bokeh.io import output_notebook, show, push_notebook
from bokeh.models import DatetimeTickFormatter
from bokeh.models.tools import HoverTool
from math import pi

In [18]:
output_notebook()

In [19]:
p = figure(x_axis_type="datetime", plot_width=1200, plot_height=400)
source = ColumnDataSource(data={'datetime' : X, 'count' : Y,})

#test_data = ColumnDataSource(data=dict(x=[0], y=[0]))
#line = my_figure.line("x", "y", source=test_data)
#line = p.circle("x", "y", source=test_data, size = 8, color = 'navy', alpha=0.3)
line = p.circle(x='datetime',y='count', source=source, size = 5, color = 'darkblue', alpha=0.6)

p.title.text = 'Message Counts per 30 minutes'
p.background_fill_color="#f5f5f5"
p.grid.grid_line_color="white"
p.yaxis.axis_label = 'Count'
p.xaxis.axis_label =' timereported per 30 minutes'
p.xaxis.formatter=DatetimeTickFormatter(
        hours=["%d %B %Y"],
        days=["%d %B %Y"],
        months=["%d %B %Y"],
        years=["%d %B %Y"],
    )
p.xaxis.major_label_orientation = pi/4

hover = HoverTool(
    tooltips=[
        ('Count', '@count{%d}'),
        ('timereported per 30 seconds','@datetime{%Y-%m-%d %H:%M:%S.%3N}')
    ],
    formatters={
        'count':'printf',
        'datetime': 'datetime',
    },
    # display a tooltip whenever the cursor is vertically in line with a glyph
    mode='vline'
)

handle = show(p, notebook_handle=True)

In [20]:
from threading import Thread

stop_threads = False

# Need to get the next 30 second increment to plot off the old value
def blocking_callback(id, stop):
    new_data=dict(datetime=[0], count=[0])          
    global start, end 
    
    step      = 0
    step_size = 1  # increment for increasing step
    max_step  = 10  # arbitrary stop point for example
    period    = 0.01  # in seconds (simulate waiting for new data)
    n_show    = 10000  # number of points to keep and show

    while True:
        start=end
        if (datetime.datetime.fromtimestamp(time.mktime(time.strptime(start,"%Y-%m-%d %H:%M:%S"))) > datetime.datetime.now()):
            period=30
                         
        end=addSeconds(start)
        #print("start date/time:" + start + " end date/time:" + end)
        q=myquery(start, end)
        resp = client.search(index="shasta-logs-*", body=q)
        #print("Number of responses: " + "{:,}".format(resp['hits']['total']))
        table=resp['aggregations']['2']['buckets']
        #print(len(table))
        pd_table=pd.DataFrame.from_dict(table)
        pd_table
        #print(pd_table.head(5))
        pd_table['key_as_string']=pd.to_datetime(pd_table['key_as_string'].astype(str), format='%Y-%m-%dT%H:%M:%S.%f%z')
        count=pd_table['doc_count'].count()
        #print("Number of entries: %d." % count)
        sample = pd_table.sample(count)

        X=sample['key_as_string'].tolist()
        Y=sample['doc_count'].tolist()

        new_data['datetime'] = X
        new_data['count'] = Y

        #print(X)
        #print(Y)
        
        #test_data.stream(new_data, n_show)
        source.stream(new_data, n_show)
        push_notebook(handle=handle)
        step += step_size
        time.sleep(period)

        if stop():
            print("exit")
            break

thread = Thread(target=blocking_callback, args=(id, lambda: stop_threads))
thread.start()

Exception in thread Thread-4:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py", line 2890, in get_loc
    return self._engine.get_loc(key)
  File "pandas/_libs/index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1607, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1614, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'key_as_string'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/threading.py", line 917, in _bootstrap_inner
    self.run()
  File "/opt/conda/lib/python3.7/threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-20-41c95be8cf21>", line 31, in blocking_cal

In [None]:
# preceding streaming is not blocking
#for cnt in range(10):
#    print("Do this, while plot is still streaming", cnt)

In [None]:
# you might also want to stop the thread
stop_threads=True
del thread