/
worker.py
45 lines (35 loc) · 1.32 KB
/
worker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import logging
from servicelayer.worker import Worker
from memorious import settings
from memorious.logic.context import Context
from memorious.core import manager, conn, get_rate_limit
log = logging.getLogger(__name__)
class MemoriousWorker(Worker):
def boot(self):
self.scheduler = get_rate_limit('scheduler',
unit=60,
interval=settings.SCHEDULER_INTERVAL,
limit=1)
def periodic(self):
if self.scheduler.check() and not settings.DEBUG:
log.info("Running scheduled crawlers ...")
self.scheduler.update()
manager.run_scheduled()
def handle(self, task):
data = task.payload
stage = task.stage.stage
state = task.context
context = Context.from_state(state, stage)
if context.crawler.disabled:
return
context.execute(data)
def after_task(self, task):
if task.job.is_done():
stage = task.stage.stage
state = task.context
context = Context.from_state(state, stage)
context.crawler.aggregate(context)
def get_stages(self):
return list({str(stage) for _, stage in manager.stages})
def get_worker():
return MemoriousWorker(conn=conn)