### Import necessary modules

In [1]:
import json
import pathlib
import requests
import time
# twittertools from local twittertools.py
import twittertools

### Define Elasticsearch server URI

In [2]:
elasticsearch_server = 'http://localhost:9200'

### Create Elasticsearch indices

In [3]:
def create_index(index):
    # ensure index has no leading '/'
    index = index.replace('/', '', 1)
    # For this demo only, delete any existing index
    try:
        uri = elasticsearch_server + '/' + index
        requests.delete(uri)
    except requests.exceptions.ConnectionError:
        print(f'Connection error for URI {uri}')
        raise
    
    # Create index
    try:
        uri = elasticsearch_server + '/' + index
        requests.put(uri)
    except requests.exceptions.ConnectionError:
        print(f'Connection error for URI {uri}')
        raise

In [4]:
create_index('tweets')
create_index('twitter-users')

In [5]:
# Confirm indices were created
r = requests.get(elasticsearch_server + '/_cat/indices')
print(r.text)

yellow open .kibana       Yuy-vovlQyeUPVim2PcdFw 1 1 2 0 10.9kb 10.9kb
yellow open twitter-users O0rQzOOuRpS2B9G4VoA4xQ 5 1 0 0  1.1kb  1.1kb
yellow open tweets        IHfw8LIFRZKjRyRcak1J6g 5 1 0 0  1.1kb  1.1kb



### Create Authenticated TwitterTools object

In [6]:
filepath = pathlib.Path.home().joinpath('.twitter', 'credentials.json')
twt = twittertools.TwitterTools(filepath)

### Get tweets from @pourmecoffee's timeline

In [7]:
screen_name = 'pourmecoffee'
tweets = twt.get_user_timeline(screen_name)
print(f"{len(tweets)} tweets retrieved from @{screen_name}'s timeline")

3242 tweets retrieved from @pourmecoffee's timeline


### Add all tweets to twitter/tweets

In [8]:
uri = elasticsearch_server + '/tweets/objects/'
success = 0
for tweet in tweets:
    _source = twittertools.unpack_tweet(tweet)
    r = requests.post(uri, json=_source)
    if r.status_code in [200, 201]:
        success += 1
print(f'{success} tweets indexed')

3242 tweets indexed


### Confirm number of tweets added to twitter/tweets

In [9]:
sleep_time = 0.250
while True:
    uri = elasticsearch_server + '/tweets/objects/_search?q=*'
    r = requests.get(uri)
    results = dict(r.json())
    total = results['hits']['total']
    if total != success:
        print(f'Found {total} of {success} tweets. Sleeping for {sleep_time} seconds.')
        time.sleep(sleep_time)
    else:
        print(f'Found all {total} tweets!')
        break

Found 3084 of 3242 tweets. Sleeping for 0.25 seconds.
Found all 3242 tweets!


In [10]:
screen_names = ['elastic', 'pourmecoffee', 'washingtonpost', 'BarackObama', 'NateSilver538']
profiles = twt.get_user_profiles(screen_names=screen_names)
uri = elasticsearch_server + '/users/objects/'
success = 0
for profile in profiles:
    _source = twittertools.unpack_profile(profile)
    r = requests.post(uri, json=_source)
    if r.status_code in [200, 201]:
        success += 1
    else:
        print(r.text)
print(f'{success} profiles indexed')

5 profiles indexed


In [11]:
sleep_time = 0.250
while True:
    uri = elasticsearch_server + '/users/objects/_search?q=*'
    r = requests.get(uri)
    results = dict(r.json())
    total = results['hits']['total']
    if total != success:
        print(f'Found {total} of {success} user profiles. Sleeping for {sleep_time} seconds.')
        time.sleep(sleep_time)
    else:
        print(f'Found all {total} user profiles!')
        break

Found 0 of 5 user profiles. Sleeping for 0.25 seconds.
Found 0 of 5 user profiles. Sleeping for 0.25 seconds.
Found 0 of 5 user profiles. Sleeping for 0.25 seconds.
Found 0 of 5 user profiles. Sleeping for 0.25 seconds.
Found all 5 user profiles!
