Skip to content

Commit

Permalink
Improve error handling. (4.5.0)
Browse files Browse the repository at this point in the history
OGRe now raises its own custom exceptions. The fail_hard runtime
modifier causes exceptions to be raised instead of returning an empty
result set. The test and test_message runtime modifiers have been
removed in favor of the better logging system. Logs are only explicitly
specified during testing and direct invocation (the level may be set
with --log).
  • Loading branch information
dmtucker committed May 13, 2014
1 parent 807ffce commit ebf87f5
Show file tree
Hide file tree
Showing 11 changed files with 467 additions and 317 deletions.
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@
# built documents.
#
# The short X.Y version.
version = '4.4'
version = '4.5'
# The full version, including alpha/beta/rc tags.
release = '4.4.0'
release = '4.5.0'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
108 changes: 59 additions & 49 deletions ogre/Twitter.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
"""OGRe Twitter Interface
"""
OGRe Twitter Interface
:func:`twitter` : method for fetching data from Twitter
"""

import base64
import hashlib
import logging
import sys
import time
import urllib
from datetime import datetime
from time import time
from twython import Twython
from ogre.validation import sanitize
from snowflake2time.snowflake import *
from ogre.exceptions import OGReError, OGReLimitError
from snowflake2time.snowflake import snowflake2utc, utc2snowflake


def sanitize_twitter(
Expand All @@ -25,7 +26,8 @@ def sanitize_twitter(
interval=None
):

"""Validate and prepare parameters for use in Twitter data retrieval.
"""
Validate and prepare parameters for use in Twitter data retrieval.
.. seealso:: :meth:`ogre.validation.validate` describes the format each
parameter must have.
Expand Down Expand Up @@ -57,7 +59,6 @@ def sanitize_twitter(
:rtype: tuple
:returns: Each passed parameter is returned (in order) in the proper format.
"""

clean_keys = {}
Expand Down Expand Up @@ -138,7 +139,8 @@ def twitter(
**kwargs
):

"""Fetch Tweets from the Twitter API.
"""
Fetch Tweets from the Twitter API.
.. seealso:: :meth:`sanitize_twitter` describes more about
the format each parameter must have.
Expand Down Expand Up @@ -217,7 +219,7 @@ def twitter(
:type network: callable
:param network: Specify a network access point (for dependency injection).
:raises: TwythonError
:raises: OGReError, OGReLimitError, TwythonError
:rtype: list
:returns: GeoJSON Feature(s)
Expand All @@ -226,7 +228,6 @@ def twitter(
how to build queries for Twitter using the `keyword` parameter.
More information may also be found at
https://dev.twitter.com/docs/api/1.1/get/search/tweets.
"""

keychain, kinds, q, remaining, geocode, (since_id, max_id) = \
Expand All @@ -239,46 +240,45 @@ def twitter(
interval=interval
)

modifiers = {
"api": Twython,
"fail_hard": False,
"network": urllib.urlopen,
"query_limit": 450, # Twitter allows 450 queries every 15 minutes.
"secure": True,
"strict_media": False
}
for modifier, _ in modifiers.items():
if kwargs.get(modifier) is not None:
modifiers[modifier] = kwargs[modifier]

qid = hashlib.md5(
str(time.time()) +
str(q) +
str(remaining) +
str(geocode) +
str(since_id) +
str(max_id)
str(max_id) +
str(kwargs)
).hexdigest()
logging.basicConfig(
filename="OGRe.log",
level=logging.ERROR,
format="%(asctime)s %(levelname)s:%(message)s",
datefmt="%Y/%m/%d %H:%M:%S %Z"
)
log = logging.getLogger(__name__)
if kwargs.get("test", False):
log.setLevel(logging.DEBUG)
log.info(qid+" Request: Twitter TEST "+kwargs.get("test_message", ""))
log.debug(
qid+" Status:" +
" media("+str(media)+")" +
" keyword("+str(q)+")" +
" quantity("+str(remaining)+")" +
" location("+str(geocode)+")" +
" interval("+str(since_id)+","+str(max_id)+")" +
" kwargs("+str(kwargs)+")"
)
else:
log.setLevel(logging.INFO)
log.info(qid+" Request: Twitter")

maximum_queries = kwargs.get("query_limit")
if maximum_queries is None:
maximum_queries = 450 # Twitter allows 450 queries every 15 minutes.
log = logging.getLogger(__name__)
log.info(qid+" Request: Twitter")
log.debug(
qid+" Status:" +
" media("+str(media)+")" +
" keyword("+str(q)+")" +
" quantity("+str(remaining)+")" +
" location("+str(geocode)+")" +
" interval("+str(since_id)+","+str(max_id)+")" +
" kwargs("+str(kwargs)+")"
)

if not kinds or remaining < 1 or maximum_queries < 1:
if not kinds or remaining < 1 or modifiers["query_limit"] < 1:
log.info(qid+" Success: No results were requested.")
return []

api = kwargs.get("api", Twython)(
api = modifiers["api"](
keychain["consumer_key"],
access_token=keychain["access_token"]
)
Expand All @@ -288,18 +288,28 @@ def twitter(
limit = int(
limits["resources"]["search"]["/search/tweets"]["remaining"]
)
reset = int(
limits["resources"]["search"]["/search/tweets"]["reset"]
)
if limit < 1:
log.info(qid+" Failure: Queries are being limited.")
message = "Queries are being limited."
log.info(qid+" Failure: "+message)
if modifiers["fail_hard"]:
raise OGReLimitError(
source="Twitter",
message=message,
reset=reset
)
else:
log.debug(qid+" Status: "+str(limit)+" queries remain.")
if limit < maximum_queries:
maximum_queries = limit
if limit < modifiers["query_limit"]:
modifiers["query_limit"] = limit
except KeyError:
log.warn(qid+" Unobtainable Rate Limit")
total = remaining

collection = []
for query in range(maximum_queries):
for query in range(modifiers["query_limit"]):
count = min(remaining, 100) # Twitter accepts a max count of 100.
try:
results = api.search(
Expand All @@ -318,12 +328,15 @@ def twitter(
)
raise
if results.get("statuses") is None:
message = "The request is too complex."
log.info(
qid+" Failure: " +
str(query+1)+" queries produced " +
str(len(collection))+" results. " +
"The request is too complex."
message
)
if modifiers["fail_hard"]:
raise OGReError(source="Twitter", message=message)
break
for tweet in results["statuses"]:
if tweet.get("coordinates") is None or tweet.get("id") is None:
Expand All @@ -349,23 +362,20 @@ def twitter(
if tweet.get("text") is not None:
feature["properties"]["text"] = tweet["text"]
if "image" in kinds:
if not kwargs.get("strict_media", False):
if not modifiers["strict_media"]:
if tweet.get("text") is not None:
feature["properties"]["text"] = tweet["text"]
if tweet.get("entities", {}).get("media") is not None:
for entity in tweet["entities"]["media"]:
if entity.get("type") is not None:
if entity["type"].lower() == "photo":
media_url = "media_url_https"
if not kwargs.get("secure", True):
if not modifiers["secure"]:
media_url = "media_url"
if entity.get(media_url) is not None:
feature["properties"]["image"] =\
base64.b64encode(
kwargs.get(
"network",
urllib.urlopen
)(
modifiers["network"](
entity[media_url]
).read()
)
Expand Down Expand Up @@ -398,7 +408,7 @@ def twitter(
.split("max_id=")[1]
.split("&")[0]
)
if query+1 >= maximum_queries:
if query+1 >= modifiers["query_limit"]:
outcome = "Success" if len(collection) > 0 else "Failure"
log.info(
qid+" "+outcome+": " +
Expand Down
6 changes: 3 additions & 3 deletions ogre/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""OpenFusion GIS Retriever
"""
OpenFusion GIS Retriever
`ogre.test` -- subpackage for testing OGRe
Expand All @@ -7,9 +8,8 @@
:mod:`ogre.Twitter` -- module for getting data from Twitter
:mod:`ogre.validation` -- module for parameter validation and sanitation
"""

from ogre.api import OGRe

__version__ = "4.4.0"
__version__ = "4.5.0"
27 changes: 25 additions & 2 deletions ogre/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python

"""Make queries using OGRe directly.
"""
Make queries using OGRe directly.
usage: ogre [(-s|--sources) Twitter]
[--keys "<dict>"]
Expand All @@ -14,11 +15,11 @@
[--strict]
See https://ogre.readthedocs.org/en/latest/ for more information.
"""

import argparse
import json
import logging
import os
from ogre import OGRe

Expand Down Expand Up @@ -72,6 +73,12 @@ def main():
default=None,
nargs=2
)
parser.add_argument(
"--hard",
help="Fail hard (Raise exceptions instead of returning empty).",
action="store_true",
default=False
)
parser.add_argument(
"--insecure",
help="Prefer HTTP.",
Expand All @@ -83,6 +90,11 @@ def main():
help="Specify a query limit.",
default=None
)
parser.add_argument(
"--log",
help="Specify a log level.",
default=None
)
parser.add_argument(
"--strict",
help="Ensure resulting media is specifically requested.",
Expand Down Expand Up @@ -111,6 +123,16 @@ def main():
args.interval[1] = float(args.interval[1])
if args.limit is not None:
args.limit = int(args.limit)
if args.log is not None:
args.log = getattr(logging, args.log.upper())
else:
args.log = logging.WARN

logging.basicConfig(
level=args.log,
format="%(asctime)s.%(msecs)03d %(name)s %(levelname)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)

print json.dumps(
OGRe(args.keys).fetch(
Expand All @@ -120,6 +142,7 @@ def main():
quantity=args.quantity,
location=args.location,
interval=args.interval,
fail_hard=args.hard,
query_limit=args.limit,
secure=args.insecure,
strict_media=args.strict
Expand Down

0 comments on commit ebf87f5

Please sign in to comment.