# Working with Elasticsearch

### For guidance, see [Elastic Stack and Product Documentation](https://www.elastic.co/guide/index.html)

### Import necessary modules

In [1]:
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
import pathlib
import time
# twittertools is my local twittertools.py module
import twittertools

### Instantiate Elasticsearch object

In [2]:
# Default connection to localhost:9200
es = Elasticsearch()

### Print current indices for reference

In [3]:
print(es.cat.indices())

yellow open twitter FyCk4wKaTz-V_gRaHReVMw 5 1 12908 0  8.6mb  8.6mb
yellow open blogs   rMlnHDk0RHGKIoAiOMqxdw 3 2     0 0   792b   792b
yellow open .kibana Yuy-vovlQyeUPVim2PcdFw 1 1     2 0 10.9kb 10.9kb
yellow open website HaP0rQARQlClPc2McT5dsg 5 1     1 0  5.8kb  5.8kb



### Get tweets from a few Twitter user timelines; index them in Elasticsearch

In [4]:
# For this demonstration only, delete any existing /twitter index
result = es.indices.delete(index='twitter', ignore=[400, 404])

In [5]:
# Create Authenticated TwitterTools object
filepath = pathlib.Path.home().joinpath('.twitter', 'credentials.json')
twt = twittertools.TwitterTools(filepath)

In [6]:
all_indexed = 0
total_tweets = 0
screen_names = ['pourmecoffee', 'washingtonpost', 'brainpicker', 'wilw']
for screen_name in screen_names: 
    tweets = twt.get_user_timeline(screen_name)
    total_tweets += len(tweets)
    print(f"{len(tweets)} tweets retrieved from @{screen_name}'s timeline;", end=' ')
    total_indexed = 0
    for tweet in tweets:
        doc = twittertools.unpack_tweet(tweet)
        result = es.index(index='twitter', doc_type='tweet', body=doc)
        if result['_shards']['successful']:
            total_indexed += 1
    print(f'{total_indexed} indexed', flush=True)
    all_indexed += total_indexed
print('Total tweets indexed:', all_indexed)

3247 tweets retrieved from @pourmecoffee's timeline; 3247 indexed
3228 tweets retrieved from @washingtonpost's timeline; 3228 indexed
3202 tweets retrieved from @brainpicker's timeline; 3202 indexed
3231 tweets retrieved from @wilw's timeline; 3231 indexed
Total tweets indexed: 12908


### Confirm number of tweets indexed

In [7]:
# There seems to be latency between indexing and getting
# correct search results. Introduce a short wait...
sleep_time = 0.50
search = Search(using=es, index='twitter', doc_type='tweet')
while True:
    if search.count() == all_indexed:
        break
    print('zzz...', end='')
    time.sleep(sleep_time)
print(f'\nFound {all_indexed} indexed tweets')

zzz...zzz...
Found 12908 indexed tweets
