# Working with Elasticsearch

### For guidance, see [Elastic Stack and Product Documentation](https://www.elastic.co/guide/index.html)

### Import necessary modules

In [1]:
# Python Elasticsearch Client docs at http://elasticsearch-py.readthedocs.io
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
import pathlib
import time
# twittertools is my local twittertools.py module
import twittertools

### Instantiate Elasticsearch object

In [2]:
# Default connection to localhost:9200
es = Elasticsearch()

### Print current indices for reference

In [3]:
print(es.cat.indices())

yellow open twitter O8Xp5vwfTcqAR4ex3LNsUQ 5 1 12908 0  8.5mb  8.5mb
yellow open .kibana Yuy-vovlQyeUPVim2PcdFw 1 1     2 0 10.9kb 10.9kb



### Get tweets from a few Twitter user timelines; index them in Elasticsearch

In [4]:
# For this demonstration only, delete any existing /twitter index
result = es.indices.delete(index='twitter', ignore=[400, 404])

In [5]:
# Create Authenticated TwitterTools object
filepath = pathlib.Path.home().joinpath('.twitter', 'credentials.json')
twt = twittertools.TwitterTools(filepath)

In [6]:
all_indexed = 0
total_tweets = 0
screen_names = ['pourmecoffee', 'washingtonpost', 'brainpicker', 'wilw']
for screen_name in screen_names: 
    tweets = twt.get_user_timeline(screen_name)
    total_tweets += len(tweets)
    print(f"{len(tweets)} tweets retrieved from @{screen_name}'s timeline;", end=' ')
    total_indexed = 0
    for tweet in tweets:
        doc = twittertools.unpack_tweet(tweet)
        result = es.index(index='twitter', doc_type='tweet', body=doc)
        if result['_shards']['successful']:
            total_indexed += 1
    print(f'{total_indexed} indexed', flush=True)
    all_indexed += total_indexed
print('Total tweets indexed:', all_indexed)

3248 tweets retrieved from @pourmecoffee's timeline; 3248 indexed
3239 tweets retrieved from @washingtonpost's timeline; 3239 indexed
3204 tweets retrieved from @brainpicker's timeline; 3204 indexed
3233 tweets retrieved from @wilw's timeline; 3233 indexed
Total tweets indexed: 12924


### Confirm number of tweets indexed

In [7]:
# There seems to be latency between indexing and getting
# correct search results. Introduce a short wait...
sleep_time = 0.50
search = Search(using=es, index='twitter', doc_type='tweet')
while True:
    if search.count() == all_indexed:
        break
    print('zzz...', end='')
    time.sleep(sleep_time)
print(f'\nFound {all_indexed} indexed tweets')

zzz...zzz...
Found 12924 indexed tweets


### Perform simple match query on tweet texts

In [8]:
results = search.query("match", text="NASA").execute()
print(f'Got {results["hits"]["total"]} matches')
print('Top ten by relevance score:')
for hit in results['hits']['hits']:
    tweet = hit['_source']
    print('-', tweet['screen_name'], tweet['created'], tweet['text'])

Got 35 matches
Top ten by relevance score:
- pourmecoffee 2017-06-05T22:31:52Z @NASA Like my tweets.
- pourmecoffee 2017-06-29T17:40:25Z @NASA @CassiniSaturn pierogi mmm
- pourmecoffee 2017-07-12T18:24:04Z @NASA Finally some non-Manhatten-sized icebergs.
- washingtonpost 2017-11-29T08:05:43Z A NASA astronaut films his spacewalk — and a breathtaking view of Earth https://t.co/ZaCEVRbTME
- pourmecoffee 2017-08-18T11:51:27Z @NASA @NASA_TDRS You're firing on Earth do you take me for a fool?
- washingtonpost 2017-11-15T23:01:18Z These are the melting glaciers that might someday drown your city, according to NASA https://t.co/hTuR9CobAf
- washingtonpost 2017-11-28T18:58:00Z A NASA astronaut films his spacewalk — and a breathtaking view of Earth https://t.co/3WrPgggQVF
- pourmecoffee 2017-09-10T01:43:52Z NASA engineers these releases every ten years to keep the people docile. I've said too much already.
- pourmecoffee 2017-09-07T22:54:20Z Rare shot from NASA of Carl Sagan just minutes after h