In [1]:
import json
import pathlib
import requests
import time
import twittertools

### Define Elasticsearch server URI

In [2]:
elasticsearch_server = 'http://localhost:9200'

### Get tweets from @pourmecoffee's timeline

In [3]:
filepath = pathlib.Path.home().joinpath('.twitter', 'credentials.json')
twt = twittertools.TwitterTools(filepath)

In [4]:
tweets = twt.get_user_timeline('pourmecoffee')
len(tweets)

3242

### Create Elasticsearch twitter index 

In [5]:
# For this demo only, delete any existing "twitter" index
uri = elasticsearch_server + '/twitter'
requests.delete(uri)

<Response [200]>

In [6]:
# Print indices before creating /twitter
r = requests.get(elasticsearch_server + '/_cat/indices?v')
print(r.text)

health status index   uuid                   pri rep docs.count docs.deleted store.size pri.store.size
yellow open   .kibana Yuy-vovlQyeUPVim2PcdFw   1   1          3            0     49.9kb         49.9kb



In [7]:
uri = elasticsearch_server + '/twitter'
requests.put(uri)

<Response [200]>

In [8]:
# Print indices after creating /twitter
r = requests.get(elasticsearch_server + '/_cat/indices?v')
print(r.text)

health status index   uuid                   pri rep docs.count docs.deleted store.size pri.store.size
yellow open   .kibana Yuy-vovlQyeUPVim2PcdFw   1   1          3            0     49.9kb         49.9kb
yellow open   twitter 1dhcAnozRL-Q8utR60NQgg   5   1          0            0      1.1kb          1.1kb



### Add all tweets to twitter/tweets

In [9]:
uri = elasticsearch_server + '/twitter/tweets/'
success = 0
for tweet in tweets:
    _source = twittertools.unpack_tweet(tweet)
    r = requests.post(uri, json=_source)
    if r.status_code in [200, 201]:
        success += 1
print(f'{success} tweets indexed')

3242 tweets indexed


### Confirm number of tweets added to twitter/tweets

In [10]:
sleep_time = 0.250
while True:
    uri = elasticsearch_server + '/twitter/tweets/_search?q=*'
    r = requests.get(uri)
    results = dict(r.json())
    total = results['hits']['total']
    if total != success:
        print(f'Found {total} of {success} tweets. Sleeping for {sleep_time} seconds.')
        time.sleep(sleep_time)
    else:
        print(f'Found all {total} tweets!')
        break

Found 3161 of 3242 tweets. Sleeping for 0.25 seconds.
Found 3161 of 3242 tweets. Sleeping for 0.25 seconds.
Found all 3242 tweets!
