Skip to content

Commit

Permalink
Merge pull request #62 from pingihu/master
Browse files Browse the repository at this point in the history
Add timeouts for fetching and wrangling posts
  • Loading branch information
ojedatony1616 committed Jun 3, 2016
2 parents 1cf7faf + 61448c0 commit 2e5d837
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 3 deletions.
1 change: 1 addition & 0 deletions baleen/config.py
Expand Up @@ -63,6 +63,7 @@ class BaleenConfiguration(confire.Configuration):
logfile = 'baleen.log' # Location to write log
loglevel = 'DEBUG' # Log messages to record
fetch_html = True # Actually fetch HTML link
timeout = 180 # Timeout for fetching posts/feeds

## Load settings immediately for import
settings = BaleenConfiguration.load()
Expand Down
7 changes: 7 additions & 0 deletions baleen/exceptions.py
Expand Up @@ -64,3 +64,10 @@ class ExportError(BaleenError):
Something went wrong with the export of the corpus
"""
pass


class TimeoutError(Exception):
"""
An operation timed out
"""
pass
4 changes: 3 additions & 1 deletion baleen/feed.py
Expand Up @@ -19,11 +19,12 @@

import feedparser

from baleen.config import settings
from baleen.models import Feed
from baleen.utils.timez import localnow
from baleen.exceptions import FeedTypeError
from baleen.exceptions import SynchronizationError
from baleen.utils.decorators import memoized, reraise
from baleen.utils.decorators import memoized, reraise, timeout


##########################################################################
Expand Down Expand Up @@ -103,6 +104,7 @@ def url(self):
self.MODEL: lambda: self.feed.link,
}[self.type]()

@timeout(settings.timeout)
def parse(self):
"""
Wraps the feedparser.parse function such that if the feed is an model,
Expand Down
27 changes: 26 additions & 1 deletion baleen/utils/decorators.py
Expand Up @@ -17,9 +17,10 @@
## Imports
##########################################################################

import signal
from functools import wraps
from baleen.utils.timez import Timer
from baleen.exceptions import BaleenError
from baleen.exceptions import BaleenError, TimeoutError

##########################################################################
## Memoization
Expand Down Expand Up @@ -65,6 +66,30 @@ def timer_wrapper(*args, **kwargs):
return timer_wrapper


def timeout(seconds):
"""
Raises a TimeoutError if a function does not terminate within
specified seconds.
"""
def _timeout_error(signal, frame):
raise TimeoutError("Operation did not finish within \
{} seconds".format(seconds))

def timeout_decorator(func):

@wraps(func)
def timeout_wrapper(*args, **kwargs):
signal.signal(signal.SIGALRM, _timeout_error)
signal.alarm(seconds)
try:
return func(*args, **kwargs)
finally:
signal.alarm(0)

return timeout_wrapper

return timeout_decorator

##########################################################################
## Exception Handling
##########################################################################
Expand Down
3 changes: 2 additions & 1 deletion baleen/wrangle.py
Expand Up @@ -26,6 +26,7 @@
from copy import deepcopy
from dateutil import parser as dtparser

from baleen.config import settings
from baleen.models import Post
from baleen.utils.decorators import reraise
from baleen.exceptions import WranglingError, FetchError
Expand Down Expand Up @@ -163,7 +164,7 @@ def fetch(self, save=True):
if not self.is_wrangled():
raise ValueError("Entry not yet wrangled, cannot fetch.")

response = requests.get(self.post.url)
response = requests.get(self.post.url, timeout=settings.timeout)
response.raise_for_status()

if response.text:
Expand Down

0 comments on commit 2e5d837

Please sign in to comment.