Skip to content

Commit

Permalink
Merge pull request #60 from gwu-libraries/t44-scheduling
Browse files Browse the repository at this point in the history
T44 scheduling. Fixes #44 .
  • Loading branch information
kerchner committed Dec 14, 2015
2 parents 36c9b23 + 8692d39 commit 94fc52b
Show file tree
Hide file tree
Showing 17 changed files with 262 additions and 77 deletions.
2 changes: 1 addition & 1 deletion docker/app-dev/apache.conf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Alias /static/ /opt/sfm-static/

# For WSGI daemon mode:
# see http://code.google.com/p/modwsgi/wiki/QuickConfigurationGuide
WSGIDaemonProcess sfm processes=2 threads=15 python-path=/opt/sfm-ui/sfm
WSGIDaemonProcess sfm processes=1 threads=15 python-path=/opt/sfm-ui/sfm
WSGIProcessGroup sfm

# For WSGI embedded mode:
Expand Down
5 changes: 5 additions & 0 deletions docker/example.dev.docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,10 @@ sfmdevapp:
- ~/sfm-ui:/opt/sfm-ui
environment:
- SFM_DEBUG=True
#Turn on logging
#- SFM_DJANGO_LOG=DEBUG
#- SFM_DJANGO_REQUEST_LOG=DEBUG
#- SFM_APSCHEDULER_LOG=DEBUG
#- SFM_UI_LOG=DEBUG
#If not running on localhost
#- SFM_ALLOWED_HOSTS=myserver.mydomain.edu
9 changes: 8 additions & 1 deletion docs/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@ When running on a remote VM, some ports (e.g., 15672 used by the RabbitMQ manage
be blocked. `SSH port forwarding <https://help.ubuntu.com/community/SSH/OpenSSH/PortForwarding>`_
can help make those ports available.

Django logs
^^^^^^^^^^^
Django logs for SFM UI are written to the Apache logs. In the docker environment, the level of various
loggers can be set from environment variables. For example, setting `SFM_APSCHEDULER_LOG` to `DEBUG`
in the `docker-compose.yml` will turn on debug logging for the apscheduler logger. The logger for
the SFM UI application is called ui and is controlled by the `SFM_UI_LOG` environment variable.

Apache logs
^^^^^^^^^^^
SFM UI runs behind Apache, which means Apache collects stdout and stderr in `/var/log/apache2/error.log`.
Expand All @@ -116,4 +123,4 @@ Initial data
^^^^^^^^^^^^
The development and master docker images for SFM UI contain some initial data. This includes a user ("testuser",
with password "password"). For the latest initial data, see `fixtures.json`. For more information on fixtures,
see the `Django docs <https://docs.djangoproject.com/en/1.8/howto/initial-data/>`_.
see the `Django docs <https://docs.djangoproject.com/en/1.8/howto/initial-data/>`_.
1 change: 1 addition & 0 deletions requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ pytz
django-crispy-forms>=1.5,<1.6
appdeps>=1.0,<1.1
pika>=0.10,<0.11
apscheduler>=3.0,<3.0.3
git+https://github.com/gwu-libraries/sfm-utils.git@0.1.0#egg=sfmutils
4 changes: 4 additions & 0 deletions sfm/message_consumer/sfm_ui_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ class SfmUiConsumer(BaseConsumer):
messages from the queue and updates the models as appropriate.
"""
def on_message(self):
# TODO: Currently assumes message is a harvest status message.
# We'll want to check the routing key before processing,
# as on_message will likely be invoked for various
# key bindings in the future.
m = self.message
m_id = m['id']
m_status = m['status']
Expand Down
11 changes: 6 additions & 5 deletions sfm/sfm/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,12 @@
'django.contrib.sites',
'django.contrib.staticfiles',
'ui',
'allauth', # registration
'allauth.account', # registration
'allauth.socialaccount', # registration
'crispy_forms', # for django crispy forms
'message_consumer',
'allauth', # registration
'allauth.account', # registration
'allauth.socialaccount', # registration
'crispy_forms', # for django crispy forms
'apscheduler', # Scheduler
'message_consumer', # Message Consumer
]

MIDDLEWARE_CLASSES = (
Expand Down
43 changes: 39 additions & 4 deletions sfm/sfm/settings/docker_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,53 @@

INSTALLED_APPS.append("finalware")

# This field is stored in `User.USERNAME_FIELD`. This is usually a `username` or an `email`.
# This field is stored in `User.USERNAME_FIELD`.
# This is usually a `username` or an `email`.
SITE_SUPERUSER_USERNAME = env.get('SFM_SITE_ADMIN_NAME', 'sfmadmin')

# This field is stored in the `email` field, provided, that `User.USERNAME_FIELD` is not an `email`.
# If `User.USERNAME_FIELD` is already an email address, set `SITE_SUPERUSER_EMAIL = SITE_SUPERUSER_USERNAME`
# This field is stored in the `email` field, provided,
# that `User.USERNAME_FIELD` is not an `email`.
# If `User.USERNAME_FIELD` is already an email address,
# set `SITE_SUPERUSER_EMAIL = SITE_SUPERUSER_USERNAME`
SITE_SUPERUSER_EMAIL = env.get('SFM_SITE_ADMIN_EMAIL', 'nowhere@example.com')

# A hashed version of `SITE_SUPERUSER_PASSWORD` will be store in superuser's `password` field.
# A hashed version of `SITE_SUPERUSER_PASSWORD` will be store
# in superuser's `password` field.
SITE_SUPERUSER_PASSWORD = env.get('SFM_SITE_ADMIN_PASSWORD', 'password')

STATIC_ROOT = "/opt/sfm-static"

LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'handlers': {
'console': {
'class': 'logging.StreamHandler',
},
},
'loggers': {
'django': {
'handlers': ['console'],
'level': env.get('SFM_DJANGO_LOG', 'INFO'),
'propagate': True,
},
'django.request': {
'handlers': ['console'],
'level': env.get('SFM_DJANGO_REQUEST_LOG', 'INFO'),
'propagate': True,
},
'apscheduler': {
'handlers': ['console'],
'level': env.get('SFM_APSCHEDULER_LOG', 'INFO'),
'propagate': True,
},
'ui': {
'handlers': ['console'],
'level': env.get('SFM_UI_LOG', 'INFO'),
'propagate': True,
},
},
}
RABBITMQ_HOST = "mq"
RABBITMQ_USER = env.get('MQ_ENV_RABBITMQ_DEFAULT_USER')
RABBITMQ_PASSWORD = env.get('MQ_ENV_RABBITMQ_DEFAULT_PASS')
5 changes: 4 additions & 1 deletion sfm/ui/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
default_app_config = 'ui.rabbit.RabbitWorker'
from sched import start_sched

start_sched()
default_app_config = 'ui.rabbit.RabbitWorker'
16 changes: 8 additions & 8 deletions sfm/ui/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ class Collection(a.ModelAdmin):
'stats', 'date_added', 'date_updated']
list_filter = ['group', 'name', 'description', 'is_active', 'is_visible',
'stats', 'date_added', 'date_updated']
search_fields = ['group', 'name', 'description', 'is_active', 'is_visible',
'stats', 'date_added', 'date_updated']
search_fields = ['group', 'name', 'description', 'is_active',
'is_visible', 'stats', 'date_added', 'date_updated']


class SeedSet(a.ModelAdmin):
fields = ('collection', 'credential', 'harvest_type', 'name', 'description',
'is_active', 'schedule', 'harvest_options', 'max_count', 'stats',
'date_added', 'start_date', 'end_date')
fields = ('collection', 'credential', 'harvest_type', 'name',
'description', 'is_active', 'schedule', 'harvest_options',
'max_count', 'stats', 'date_added', 'start_date', 'end_date')
list_display = ['collection', 'credential', 'harvest_type', 'name',
'description', 'is_active', 'schedule', 'harvest_options',
'max_count', 'stats', 'date_added', 'start_date',
Expand All @@ -69,9 +69,9 @@ class SeedSet(a.ModelAdmin):
'max_count', 'stats', 'date_added', 'start_date',
'end_date']
search_fields = ['collection', 'credential', 'harvest_type', 'name',
'description', 'is_active', 'schedule', 'harvest_options',
'max_count', 'stats', 'date_added', 'start_date',
'end_date']
'description', 'is_active', 'schedule',
'harvest_options', 'max_count', 'stats', 'date_added',
'start_date', 'end_date']


class Seed(a.ModelAdmin):
Expand Down
13 changes: 12 additions & 1 deletion sfm/ui/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,22 @@ def save(self, commit=True):

class SeedSetForm(forms.ModelForm):

OPTIONS = (
('daily', 'daily'),
('hourly', 'hourly'),
('minutely', 'minutely'),
)

schedule = forms.CharField(max_length=12,
widget=forms.Select(choices=OPTIONS))
start_date = forms.DateTimeField(required=False)
end_date = forms.DateTimeField(required=False)

class Meta:
model = SeedSet
fields = '__all__'
exclude = []
widgets = None
widgets = {}
localized_fields = None
labels = {}
help_texts = {}
Expand Down
74 changes: 74 additions & 0 deletions sfm/ui/jobs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import json
from .rabbit import RabbitWorker
from .models import SeedSet, Collection, Seed, Credential
import datetime
import logging
from sfmutils.consumer import EXCHANGE

log = logging.getLogger(__name__)

def seedset_harvest(d):
# To get value of Collection id for the associated collection object.
for collection_id in list(Collection.objects.filter(
id=SeedSet.objects.filter(id=d).values('collection')).values('id')):
if 'id' in collection_id:
value=collection_id['id']

# To get value of the token for the associated credential object.
for token in list(Credential.objects.filter(
id=SeedSet.objects.filter(id=d).values('credential')).values(
'token')):
if 'token' in token:
credential = token['token']

# To get value of platform
for platform in list(Credential.objects.filter(
id=SeedSet.objects.filter(id=d).values('credential')).values(
'platform')):
if 'platform' in platform:
media = platform['platform']

# To get list of seeds
seeds = list(Seed.objects.filter(seed_set=d).select_related(
'seeds').values('token', 'uid'))
# To remove empty token values from the list of seeds --
# Need to update below code
#
# if item['token'] not in seeds:
# item.pop('token', None)
for item in seeds:
if item['token'] == '':
item.pop('token', None)

# To get harvest type, options and credentials
harvest_type = SeedSet.objects.filter(id=d).values(
'harvest_type')[0]["harvest_type"]
options = json.loads(SeedSet.objects.filter(id=d).values(
'harvest_options')[0]["harvest_options"])
credential = json.loads(str(credential))

# Routing Key
key = ''.join(['harvest.start.',str(media),'.',harvest_type])

# message to be sent to queue
# TODO: Unique id
# TODO: Correct path
m = {
'id': d,
'type': harvest_type,
'options': options,
'credentials': credential,
'collection': {
'id': str(value),
'path': '/tmp/collection/'+str(value)
},
'seeds': seeds,
}

log.info("Sending %s message to %s with id %s", harvest_type, key,
m["id"])
log.debug("Message with id %s is %s", m["id"], json.dumps(m, indent=4))

# Publish message to queue via rabbit worker
RabbitWorker.channel.basic_publish(exchange=EXCHANGE,
routing_key=key,body=json.dumps(m))
29 changes: 29 additions & 0 deletions sfm/ui/migrations/0003_auto_20151202_2215.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('ui', '0002_auto_20151013_1522'),
]

operations = [
migrations.AlterField(
model_name='seedset',
name='end_date',
field=models.DateTimeField(null=True, blank=True),
),
migrations.AlterField(
model_name='seedset',
name='schedule',
field=models.CharField(max_length=12),
),
migrations.AlterField(
model_name='seedset',
name='start_date',
field=models.DateTimeField(null=True, blank=True),
),
]
6 changes: 3 additions & 3 deletions sfm/ui/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ class SeedSet(models.Model):
name = models.CharField(max_length=255)
description = models.TextField(blank=True)
is_active = models.BooleanField(default=True)
schedule = models.CharField(max_length=255, blank=True)
schedule = models.CharField(max_length=12)
harvest_options = models.TextField(blank=True)
max_count = models.PositiveIntegerField(default=0)
stats = models.TextField(blank=True)
date_added = models.DateTimeField(default=timezone.now)
start_date = models.DateTimeField(default=timezone.now)
end_date = models.DateTimeField(default=timezone.now)
start_date = models.DateTimeField(blank=True, null=True)
end_date = models.DateTimeField(blank=True, null=True)

def __str__(self):
return '<SeedSet %s "%s">' % (self.id, self.name)
Expand Down
2 changes: 2 additions & 0 deletions sfm/ui/rabbit.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ def ready(self):
# Declare sfm_exchange
RabbitWorker.channel.exchange_declare(exchange=EXCHANGE,
type="topic", durable=True)
# Declare harvester queue
RabbitWorker.channel.queue_declare(queue="sfm_ui", durable=True)
16 changes: 16 additions & 0 deletions sfm/ui/sched.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from apscheduler.schedulers.background import BackgroundScheduler
import logging

log = logging.getLogger(__name__)

log.debug("Instantiating scheduler")
sched = BackgroundScheduler()


def get_sched():
return sched


def start_sched():
log.info("Starting scheduler")
sched.start()
19 changes: 19 additions & 0 deletions sfm/ui/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from jobs import seedset_harvest
from sched import get_sched
from models import SeedSet

def schedule_harvest(id, schedule, s, e):
sched = get_sched()

if sched.get_job(str(id)) is not None:
sched.remove_job(str(id))

if schedule=='hourly':
sched.add_job(lambda: seedset_harvest(id),id=str(id) , trigger='cron',
hour='*/1', start_date=s, end_date=e)
elif schedule=='daily':
sched.add_job(lambda: seedset_harvest(id),id=str(id), trigger='cron',
day='*/1', start_date=s, end_date=e)
elif schedule=='minutely':
sched.add_job(lambda: seedset_harvest(id),id=str(id), trigger='cron',
minute='*/1', start_date=s, end_date=e)

0 comments on commit 94fc52b

Please sign in to comment.