From f06e7e6f09b661af9bc1af2f21ccd743a93e3088 Mon Sep 17 00:00:00 2001 From: Dmitrijs Milajevs Date: Thu, 13 Dec 2018 10:47:04 -0500 Subject: [PATCH] Media command --- CHANGES.rst | 5 +++-- README.rst | 2 ++ poultry/consumers.py | 27 ++++++++++++++++++++++++++- poultry/main.py | 10 ++++++++++ 4 files changed, 41 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 458bbd6..33c6eaa 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,8 +1,8 @@ Changes ======= -1.5.0 (in development) ----------------------- +1.5.0 +----- * Python 2 is not supported anymore. * Use the ``full_text`` field to retrieve tweet's text, fall back to ``text`` if @@ -16,6 +16,7 @@ Changes * The ``Tweet.bounding_box`` property is introduced, it is always a polygon. * The ``--filters`` option for ``filter`` to define what filters are used. * Refactored communication with Twitter and internal stream handling. +* New ``media`` command. 1.3.0 ----- diff --git a/README.rst b/README.rst index 4ad6141..e33a734 100644 --- a/README.rst +++ b/README.rst @@ -3,3 +3,5 @@ Poultry :documentation: http://poultry.readthedocs.org :homepage: https://github.com/dimazest/poultry + +Poultry is a tweet collection manager. diff --git a/poultry/consumers.py b/poultry/consumers.py index 8aaf1db..96868a9 100644 --- a/poultry/consumers.py +++ b/poultry/consumers.py @@ -4,7 +4,7 @@ import logging import sys import time -import json +import csv try: from Queue import Full @@ -417,3 +417,28 @@ def extract_retweets(target): target.send(retweeted_status) target.send(raw_tweet) + + +@consumer +def print_media(output=None): + """Print media items.""" + if output is None: + output = sys.stdout + + field_names = 'tweet_id', 'index', 'media_id', 'type', 'media_url' + writer = csv.DictWriter(output, fieldnames=field_names) + + while True: + tweet = yield + media = tweet.parsed.get('extended_entities', {}).get('media', []) + + for i, m in enumerate(media): + writer.writerow( + { + 'tweet_id': tweet.id, + 'index': i, + 'media_id': m['id'], + 'type': m['type'], + 'media_url': m['media_url'], + } + ) diff --git a/poultry/main.py b/poultry/main.py index d5cb224..d12c5bd 100644 --- a/poultry/main.py +++ b/poultry/main.py @@ -104,3 +104,13 @@ def timeline(producer, window=('w', '%Y-%m-%d-%H', '')): ), ), ) + + +@command() +def media(producer, output): + """Retrieve media urls.""" + producer( + consumers.to_tweet( + consumers.print_media(output=output) + ) + )