Skip to content

Commit

Permalink
Merge pull request #20 from unlimitedlabs/example_workflow
Browse files Browse the repository at this point in the history
Adds a simple workflow and an interactive demo script to run a project on it.
  • Loading branch information
thisisdhaas committed Sep 25, 2015
2 parents 44d75d4 + 13646ec commit a9cfcb5
Show file tree
Hide file tree
Showing 12 changed files with 503 additions and 2 deletions.
3 changes: 2 additions & 1 deletion example_project/example_project/orchestra_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def setup_orchestra(settings_module_name):

# Installed orchestra workflows
settings.ORCHESTRA_PATHS = (
('simple_workflow.workflow', 'simple_workflow'),
)

# The maximum number of tasks an expert can pick up at a time.
Expand Down Expand Up @@ -165,7 +166,7 @@ def setup_orchestra(settings_module_name):
settings.GOOGLE_PROJECT_ROOT_ID = ''

# Feature flags for toggling optional slack integration
settings.SLACK_INTERNAL = True
settings.SLACK_INTERNAL = False
settings.SLACK_EXPERTS = False

# Settings for slack notifications. Notifications are shared internally
Expand Down
1 change: 1 addition & 0 deletions example_project/example_project/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'simple_workflow',
)

STATICFILES_FINDERS = (
Expand Down
246 changes: 246 additions & 0 deletions example_project/interactive_simple_workflow_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
from pprint import pprint
from time import sleep

import argparse
import os
import subprocess
import sys

# Set up the standalone Django script
proj_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"../")
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "example_project.settings")
sys.path.append(proj_path)
from django.core.wsgi import get_wsgi_application
application = get_wsgi_application()

from orchestra.orchestra_api import create_orchestra_project
from orchestra.orchestra_api import get_project_information

FAST_MODE = True


def pause(n_seconds):
if not FAST_MODE:
sleep(n_seconds)


def run_demo():
intro()
describe_workflow()
project_id = create_project()
project_info_1(project_id)
rating_task()
project_info_2(project_id)
conclusion()


def intro():
subprocess.call('clear')
print('----------------------------------------------')
print('Welcome to the Orchestra Simple Workflow Demo.')
print('----------------------------------------------')
pause(2)
print('In this demo, we will create a new project using the "Simple '
'Workflow" workflow,')
pause(2)
print('which automatically extracts an image from a '
'url and asks a human expert to rate it.')
pause(2)
ack = input('Are you ready to begin? (y/n): ').lower()
while ack not in ['y', 'n']:
ack = input('Please respond with "y" or "n": ').lower()
if ack == 'n':
print('Ok, maybe some other time!')
sys.exit()
print('Great!')


def describe_workflow():
print('')
print('The Simple Workflow has two steps, one automated and one human.')
pause(2)
print('The automated step takes a URL and extracts a random image from '
'the page.')
pause(2)
print('The human step asks an expert to rate how "awesome" the image is '
'on a scale from one to five.')
pause(2)
print('For this demo, we will scrape an image from www.josephbotros.com, '
'the homepage of one of our visionary developers.')
pause(2)
print('If you open that URL in a browser, it should be pretty obvious '
'which image the task will scrape.')
pause(2)


def create_project():
print('')
print("Let's start by creating a new project that uses the workflow.")
pause(2)
print("We'll use the 'create_orchestra_project' API call.")
pause(2)
print('The API call looks like this:')
print('''
create_orchestra_project(
'simple_workflow', # The slug representing our workflow
'A test run of our simple workflow', # A description of the new project
10, # A priority level for the project
{
'url': 'http://www.josephbotros.com/' # Data required by the workflow:
},
)
''')
pause(4)
print('Make sure you have the example project running in another window '
'(`python manage.py runserver`).')
input('Press enter when you are ready to make the API call, which will '
'create a new project and print out its id. ')

project_id = create_orchestra_project(
None,
'simple_workflow',
'A test run of our simple workflow',
10,
{
'url': 'http://www.josephbotros.com/'
},
'https://docs.google.com/document/d/1s0IJycNAwHtZfsUwyo6lCJ7kI9pTOZddcaiRDdZUSAs', # noqa
'train',
)
print('Project with id {} created!'.format(project_id))
pause(2)
return project_id


def project_info_1(project_id):
print('')
print('When we created our project, it immediately ran the first step of '
'the workflow: scraping an image from the website we passed in.')
pause(2)
print("Let's verify that this worked successfully by using Orchestra's "
"API to check the project info.")
pause(2)
print('The API call looks like this:')
print('')
print('get_project_information(project_id)')
print('')
pause(2)
input('Press enter when you are ready to make the API call and print out '
'the JSON data received in response. ')

project_info = get_project_information(project_id)
print("Information received! Here's what we got:")
print('')
pause(2)
pprint(project_info)
pause(4)
print('')
print("Note that 'tasks.crawl.status' is 'Complete', and "
"'tasks.crawl.latest_data.image' is set to an image URL scraped "
"from our site. Paste the URL into a browser and you should see "
"Joseph's smiling face!")
pause(2)
print("Also, check out 'tasks.rate'. That's the human step we'll need to "
"do next. Observe that its status is 'Awaiting Processing' and "
"that 'latest_data' is None because no work has been done yet.")
pause(4)


def rating_task():
print('')
print("Let's fix that! It's time to work on the second step in our "
"workflow: rating the image.")
pause(2)
print("In a browser window, log into Orchestra as a worker at "
"127.0.0.1:8000/orchestra/app. If you haven't created a worker "
"account yet, you can log in as the demo worker: username 'demo' "
"and password 'demo'.")
pause(2)
print("Then, click the 'New delivery task' button to get the rating task "
"assigned to you, and rate the photo to complete the task.")
pause(2)
print("When you're happy with your rating, click 'Submit' at the bottom "
"of the page.")
pause(2)
input('Press enter when you have submitted the task. ')


def get_rating_info(msg, project_id):
input(msg)
project_info = get_project_information(project_id)
complete = project_info['tasks']['rate']['status'] == 'Complete'
rating = (project_info['tasks']['rate']['latest_data'].get('rating')
if complete else None)
print('')
print("Information received! Here's what we got:")
pause(2)
print('')
pprint(project_info)
print('')
pause(4)

return complete, rating


def project_info_2(project_id):
print('')
print('Well done. You have completed your first Orchestra project!')
pause(2)
print("Let's verify that your rating was stored successfully by using "
"Orchestra's API to check the project info again.")
pause(2)
print('As a reminder, the API call looks like this:')
print('')
print('get_project_information(project_id)')
print('')
pause(2)

complete, rating = get_rating_info(
'Press enter when you are ready to make the API call and print out '
'the resulting JSON data. ', project_id)

while not complete:
print("Uh oh! It looks like the task did not get completed (see "
"'tasks.rate.status' in the output above.)")
pause(2)
complete, rating = get_rating_info(
'Press enter to try again, or CTRL-C to exit. ', project_id)

print("Task complete! Note that 'tasks.rate.status' is 'Complete' in the "
"output above.")
pause(2)
if not rating:
print('Uh oh! Looks like you submitted the task without rating the '
"image. (see 'tasks.rate.latest_data' in the output above.)")
pause(2)
print('We probably should have validated that :-)')
else:
print("Note that 'tasks.rate.latest_data.rating' is set to '{}', the "
"rating you selected.".format(rating))
pause(4)


def conclusion():
print('')
print('')
print("Congratulations! Now you're ready to start building workflows of "
"your own!")
pause(2)
print("To see a more in-depth explanation of how workflows are built, "
"check out our illustrative workflow at "
"http://orchestra.readthedocs.org/en/latest/example_use.html.")
pause(2)
print('We hope you enjoyed this tutorial, and welcome to the Orchestra '
'community.')
pause(2)
print('Goodbye for now!')


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-s', '--slow', action='store_true',
help='Display text fast.')
args = parser.parse_args()
FAST_MODE = not args.slow
run_demo()
23 changes: 23 additions & 0 deletions orchestra/fixtures/demo_worker.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[
{
"model": "auth.User",
"pk": 1,
"fields": {
"username": "demo",
"first_name": "Demo",
"last_name": "User",
"is_active": true,
"is_superuser": false,
"is_staff": false,
"password": "pbkdf2_sha256$20000$XjRfRAzRxnpZ$bhliXAV11CzsaiPnuHZ3RuVOj/0FBsoctMyx8Hi5Q2w=",
"email": "noreply@example.org"
}
},
{
"model": "orchestra.Worker",
"pk": 1,
"fields": {
"user": 1
}
}
]
3 changes: 2 additions & 1 deletion orchestra/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import datetime
from django.contrib.auth.models import User
from django.db import models
from jsonfield import JSONField
Expand Down Expand Up @@ -27,7 +28,7 @@ def __str__(self):

class Worker(models.Model):
user = models.OneToOneField(User)
start_datetime = models.DateTimeField(auto_now_add=True)
start_datetime = models.DateTimeField(default=datetime.now)
slack_username = models.CharField(max_length=200, blank=True, null=True)

def __str__(self):
Expand Down
Empty file added simple_workflow/__init__.py
Empty file.
73 changes: 73 additions & 0 deletions simple_workflow/crawl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from urllib.parse import urlparse

import logging
import random
import re
import requests

logger = logging.getLogger(__name__)

IMAGE_FILE_REGEX = re.compile('([-\w]+\.(?:jpg|jpeg|gif|png))',
re.IGNORECASE)


def crawl_page(project_data, prerequisites):
""" Picks a random image off of the passed URL."""

result = {
'status': 'success',
'image': None
}
url = project_data.get('url')
if not url:
result['status'] = 'error'
result['error_message'] = 'URL was not provided.'
return result

# Crawl the website for images.
logger.info('Starting to crawl %s', url)
images = find_images(url)
num_images = len(images)
logger.info('Found %s images', num_images)
if num_images == 0:
result['status'] = 'error'
result['error_message'] = 'Unable to find images at the provided URL.'
return result

# Return a random one.
logger.info('Picking a random one...')
image = random.choice(list(images))
result['image'] = image
return result


def find_images(url):
""" Fetches a url's HTML and extracts all image sources in an <img> tag.
"""

images = set()

# Fetch the content.
headers = {
'User-Agent': ('Mozilla/5.0 (compatible; OrchestraBot/1.0; '
'noreply@example.org)'),
}
response = requests.get(url, headers=headers)
if response.status_code < 200 or response.status_code >= 300:
logger.error("Couldn't fetch url {}".format(url))
return images
content = response.text

# Find images in the content.
soup = BeautifulSoup(content)
tags = soup.find_all('img', src=IMAGE_FILE_REGEX)
for tag in tags:
link = tag.get('src')
if link is None:
continue
if not bool(urlparse(link).netloc):
link = urljoin(url, link)
images.add(link)
return images

0 comments on commit a9cfcb5

Please sign in to comment.