Skip to content

Commit

Permalink
Merge branch 'development' of github.com:dimazest/poultry into develo…
Browse files Browse the repository at this point in the history
…pment
  • Loading branch information
dimazest committed Nov 29, 2017
2 parents 453ddd5 + 2e339cd commit 44f4266
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 19 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Changes
1.5.0 (in development)
----------------------

* Use the ``ful_text`` field to retrieve tweet's text, fall back to ``text`` if
it's not available.
* ``language`` filtering predicate.
* The ``--mode`` parameter for the ``filter`` subcommand that sets the file opening
mode. Use `w` to rewrite the files and `a` (the default) to append.
Expand Down
2 changes: 1 addition & 1 deletion poultry/consumers.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def to_simple_queue(queue):
while True:
item = yield
size = queue.qsize()
if size:
if size > 10:
logger.warn('Queue size is %s.', size)
try:
queue.put(item, timeout=1)
Expand Down
4 changes: 1 addition & 3 deletions poultry/producers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def consume_stream(target, input_dir=None):
with consumers.closing(*targets):

for line in lines:

if not line.strip():
continue

Expand All @@ -48,14 +47,13 @@ def readline_dir(input_dir, extract_retweets=False, mark_extracted=False):
try:
tweet = Tweet(l)
except TweetValueError:
pass
continue
else:

# TODO: this duplicates consumers.extract_retweets.
retweeted_status = tweet.parsed.get('retweeted_status', None)
if extract_retweets and retweeted_status:
yield Tweet(retweeted_status)

yield tweet


Expand Down
20 changes: 12 additions & 8 deletions poultry/tweet.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(self, raw_json):
except ValueError:
raise TweetValueError("The passed json can't be parsed.")
else:
if isinstance(tweet, dict) and 'text' in tweet:
if isinstance(tweet, dict) and ('text' in tweet or 'full_text' in tweet):
self.raw = raw_json
self.parsed = tweet
else:
Expand All @@ -34,7 +34,7 @@ def __init__(self, raw_json):
@property
def text(self):
"""The unprocessed text of the tweet."""
return self.parsed['text']
return self.parsed['full_text'] if 'full_text' in self.parsed else self.parsed['text']

@property
def hashtags(self):
Expand Down Expand Up @@ -216,11 +216,15 @@ def get_tokens(self, min_token_len=3, allowed_categories='LN'):
tokens = self.text_without_entities.split()
tokens = (t.lower() for t in tokens)

return filter(lambda s: len(s) >= min_token_len,
[u''.join(c for c in t if unicodedata.category(c)[0] in allowed_categories)
for t in tokens
]
)
return list(
filter(
lambda s: len(s) >= min_token_len,
[
u''.join(c for c in t if unicodedata.category(c)[0] in allowed_categories)
for t in tokens
]
)
)

@property
def is_spam(self):
Expand All @@ -229,7 +233,7 @@ def is_spam(self):
.. todo: parameters instead of magic numbers.
'''
entity_violations = (2 < e for e in [self.hashtags, self.urls, self.user_mentions])
entity_violations = (len(e) > 2 for e in [self.hashtags, self.urls, self.user_mentions])
lenght_violantion = len(self.tokens) < 5

return any(entity_violations) or lenght_violantion
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def run_tests(self):

setup(
name='poultry',
version='1.5.0a1',
version='1.5.0',
description='A tweet collection manager.',
long_description=long_description,
# Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
Expand Down
10 changes: 5 additions & 5 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_show(capfd, poultry_cfg):
)

out, err = capfd.readouterr()
assert not err
assert err.endswith('poultry.stream - WARNING - The POST request is sent.\n')

assert out == (
u'dimazest: pinkpop pukkelpop paaspop prilpop pedropicopop all use #pp12 :)\n'
Expand All @@ -68,9 +68,9 @@ def test_select(capfd, tweets, poultry_cfg):
)

out, err = capfd.readouterr()
assert not err
assert err.endswith('poultry.stream - WARNING - The POST request is sent.\n')

expected_result = u'\n'.join(tweets) + '\n'
expected_result = u'\n'.join(tweets) + u'\n\n'
assert out == expected_result


Expand All @@ -81,7 +81,7 @@ def test_pprint(capfd, tweets, poultry_cfg):
)

out, err = capfd.readouterr()
assert not err
assert err.endswith('poultry.stream - WARNING - The POST request is sent.\n')

expected_result = u'\n'.join(pformat(json.loads(t)) for t in tweets) + '\n'
assert out == expected_result
Expand All @@ -94,7 +94,7 @@ def test_timeline(capfd, tweets, poultry_cfg):
)

out, err = capfd.readouterr()
assert not err
assert err.endswith('poultry.stream - WARNING - The POST request is sent.\n')

assert out == (
u'2012-04-13-13 1\n'
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# and then run "tox" from this directory.

[tox]
envlist = py27,py34,py33,pypy
envlist = py27,py36,pypy

[testenv]
commands = py.test tests README.rst --pep8 --junitxml={envlogdir}/junit-{envname}.xml []
Expand Down

0 comments on commit 44f4266

Please sign in to comment.