---
Save selected parts of the Twitter stream
===

Inspired by http://adilmoujahid.com/posts/2014/07/twitter-analytics/

---

Take a peak at the code


In [4]:
# %load tweepy_writer.py
#!/usr/bin/python3

"""A simple streaming writer from Twitter's API.

Inspired by http://adilmoujahid.com/posts/2014/07/twitter-analytics/
"""

import json
import os
import sys

from credentials import credentials
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream

# Load credentials from ~/.credentials.json
creds = credentials.require(['access_token', 
                             'access_token_secret', 
                             'consumer_key',
                             'consumer_secret'])

auth = OAuthHandler(creds.consumer_key, creds.consumer_secret)
auth.set_access_token(creds.access_token, creds.access_token_secret)

class WriteToDiskListener(StreamListener):
    """Write stream listener to disk with limited number of Tweets.
    """

    def __init__(self, filename, limit=5):
        self.counter = 0
        self.filename = filename
        self.limit = limit
        
    def on_data(self, data):
        "If under limit, write received data to disk."
        while self.counter < self.limit:
            try:
                with open(self.filename.lower()+'.json', 'a') as f:
                    f.write(data)
                self.counter += 1
                return True
            except BaseException as e:
                print("Error on_data: {}".format(e))
            return True
        else:
            return False
 
    def on_error(self, status):
        print(status)

if __name__ == '__main__':    
    track = sys.argv[1:] # Track is a list of search terms to stream.
    filename = "_".join([item.lower() for item in track])

    # # Remove existing file of tweets
    # try:
    #     os.remove(filename+'.json')
    # except OSError:
    #     pass

    listener = WriteToDiskListener(filename=filename, 
                                    limit=5)
    stream = Stream(auth, listener)

    try:
        stream.filter(track=track,
                      languages=['en'])
    except:
        stream.disconnect()

406
406
406
406


Put in a list of terms to search for in the Twitter Stream

In [5]:
! python3 tweepy_writer.py pizza

In [6]:
! python3 tweepy_writer.py pizza beer

It saves the data as json for easy processing

In [7]:
import json
from pprint import pprint

In [9]:
data = []

with open('pizza_beer.json') as data_file:
    for line in data_file:
        tweet = json.loads(line)
        data.append(tweet)

In [11]:
pprint(data[0])

{'contributors': None,
 'coordinates': None,
 'created_at': 'Sat Mar 19 13:39:20 +0000 2016',
 'entities': {'hashtags': [],
              'symbols': [],
              'urls': [],
              'user_mentions': [{'id': 2583812180,
                                 'id_str': '2583812180',
                                 'indices': [0, 11],
                                 'name': 'xin ☁',
                                 'screen_name': 'tan_huixin'}]},
 'favorite_count': 0,
 'favorited': False,
 'filter_level': 'low',
 'geo': None,
 'id': 711185288802861057,
 'id_str': '711185288802861057',
 'in_reply_to_screen_name': 'tan_huixin',
 'in_reply_to_status_id': 711184946400854021,
 'in_reply_to_status_id_str': '711184946400854021',
 'in_reply_to_user_id': 2583812180,
 'in_reply_to_user_id_str': '2583812180',
 'is_quote_status': False,
 'lang': 'en',
 'place': None,
 'retweet_count': 0,
 'retweeted': False,
 'source': '<a href="http://twitter.com/download/android" '
           'rel="nofollow"

<br>
<br>