# Elasticsearh - usign python API

### Import it and created client object

In [43]:
from elasticsearch import Elasticsearch, helpers, exceptions as es_exceptions
from elasticsearch.helpers import bulk,scan

es = Elasticsearch(['atlas-kibana.mwt2.org:9200'],timeout=60)

### Check we can connect to the cluster

In [44]:
# define our address so we dont have to type it all the time
es.cluster.health()

{'active_primary_shards': 13373,
 'active_shards': 18522,
 'active_shards_percent_as_number': 100.0,
 'cluster_name': 'atlas',
 'delayed_unassigned_shards': 0,
 'initializing_shards': 0,
 'number_of_data_nodes': 5,
 'number_of_in_flight_fetch': 0,
 'number_of_nodes': 10,
 'number_of_pending_tasks': 0,
 'relocating_shards': 0,
 'status': 'green',
 'task_max_waiting_in_queue_millis': 0,
 'timed_out': False,
 'unassigned_shards': 0}

### Create a bunch of documents

In [49]:
import random
allEvents=[]
for e in range(1000):
    E=random.gauss(1000., 30.)
    pT=random.gauss(100., 50.)
    allEvents.append({ 'eventnr':e, 'E':E, 'pT':pT})

### Index documents one by one

takes around 70 seconds.

In [50]:
from time import time
start=time()
for nr, event in enumerate(allEvents):
    es.create(index='my_events', doc_type='event', id=nr, body=event)
print("it took", time()-start,"seconds.")

it took 80.52499747276306 seconds.


### Bulk indexing of same events

In [51]:
# clean all
try:
    es.indices.delete(index='my_events')
except:
    print("not there?")
    
start=time()
try:
   res = helpers.bulk(es, allEvents, index='my_events', doc_type='event', raise_on_exception=True,request_timeout=60)
except es_exceptions.ConnectionError as e:
   print('ConnectionError ', e)
except es_exceptions.TransportError as e:
   print('TransportError ', e)
except helpers.BulkIndexError as e:
   print(e[0])
   for i in e[1]:
      print(i)
except Exception as e:
   print('Something seriously wrong happened.',e)

print("it took", time()-start,"seconds.")

it took 6.694327354431152 seconds.


### Search for a document

In [52]:
my_query={
    "size": 15,
    "query":{
       "bool":{
            "must":[
                {'range': {'pT': {'gte': 100, 'lt': 120}}},
                {"range" : { "E" :{'gte': 200 }}}
            ]
        }
    }
}

res = es.search(index='my_events', body=my_query )
for r in res['hits']['hits']:
    print(r)

{'_index': 'my_events', '_id': 'AV9qxROBP9IDFhhR0sci', '_score': 2.0, '_type': 'event', '_source': {'E': 984.7084254758788, 'eventnr': 8, 'pT': 117.66098996513544}}
{'_index': 'my_events', '_id': 'AV9qxROBP9IDFhhR0scv', '_score': 2.0, '_type': 'event', '_source': {'E': 1016.0826808457183, 'eventnr': 21, 'pT': 114.83635665160716}}
{'_index': 'my_events', '_id': 'AV9qxROBP9IDFhhR0scx', '_score': 2.0, '_type': 'event', '_source': {'E': 1009.6402201390512, 'eventnr': 23, 'pT': 107.72196819946713}}
{'_index': 'my_events', '_id': 'AV9qxROBP9IDFhhR0sc1', '_score': 2.0, '_type': 'event', '_source': {'E': 928.9305001330944, 'eventnr': 27, 'pT': 110.45458509125653}}
{'_index': 'my_events', '_id': 'AV9qxROBP9IDFhhR0sdA', '_score': 2.0, '_type': 'event', '_source': {'E': 959.4179342547469, 'eventnr': 38, 'pT': 114.91073235511675}}
{'_index': 'my_events', '_id': 'AV9qxROBP9IDFhhR0sdW', '_score': 2.0, '_type': 'event', '_source': {'E': 1022.541522680717, 'eventnr': 60, 'pT': 117.83457827148412}}
{'_

### Perform aggregated search

In [53]:
my_query={
    "aggs" : {
        "pt_bins" : {
            "histogram" : {
                "field" : "pT",
                "interval" : 50
            }
        }
    }
}

res = es.search(index='my_events', body=my_query )
print(res['aggregations'])

{'pt_bins': {'buckets': [{'key': -100.0, 'doc_count': 1}, {'key': -50.0, 'doc_count': 19}, {'key': 0.0, 'doc_count': 157}, {'key': 50.0, 'doc_count': 331}, {'key': 100.0, 'doc_count': 343}, {'key': 150.0, 'doc_count': 122}, {'key': 200.0, 'doc_count': 27}]}}


### Clean up

In [54]:
es.indices.delete(index='my_events')

{'acknowledged': True}