Skip to content

Commit

Permalink
Merge pull request #2455 from erasche/grt
Browse files Browse the repository at this point in the history
Galactic Radio Telescope
  • Loading branch information
jmchilton committed Jul 22, 2016
2 parents 5ff6d7a + 212f7f0 commit 8dbf987
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 0 deletions.
41 changes: 41 additions & 0 deletions doc/source/admin/grt.rst
@@ -0,0 +1,41 @@
Galactic Radio Telescope
========================

This is an opt-in service which Galaxy admins can configure to contribute their
job run data back to the community. We hope that by collecting this information
we can build accurate models of tool CPU/memory/time requirements. In turn,
admins will be able to use this analyzed data to optimize their job
distribution across highly heterogenous clusters.

Registration
------------

You will need to register your Galaxy instance with the Galactic Radio
Telescope (GRT). This can be done `https://radio-telescope.galaxyproject.org
<https://radio-telescope.galaxyproject.org>`__.

Submitting Data
---------------

Once you've registered your Galaxy instance, you'll receive an instance ID and
an API key which are used to run ``scripts/grt.py``. The tool itself is very simple
to run. It collects the last 7 days (by default) of data from your Galaxy
server, and sends them to the GRT for processing and display. Additionally
it collects the total number of users, and the number of users who ran
jobs in the last N days.

Running the tool is simple:

.. code-block:: shell
python scripts/grt.py \
<INSTANCE_UUID> \
<API_KEY> \
-c config/galaxy.ini \
--grt-url https://radio-telescope.galaxyproject.org/api/v1/upload/
--days 7
The only required parameters are the instance ID and API key. As you can see in
the example command, the GRT URL is configurable. If you do not wish to
participate in the public version of this experiment you can host your own
radio telescope to collect Galactic information.
2 changes: 2 additions & 0 deletions doc/source/admin/index.rst
Expand Up @@ -15,3 +15,5 @@ documentation. These resources should be used together.
framework_dependencies.rst

useful_scripts.rst

grt.rst
226 changes: 226 additions & 0 deletions scripts/grt.py
@@ -0,0 +1,226 @@
#!/usr/bin/env python
"""Script for uploading Galaxy statistics to the Galactic radio telescope.
See doc/source/admin/grt.rst for more detailed usage information.
"""
from __future__ import print_function

import os
import sys
import json
import urllib2
import argparse
import sqlalchemy as sa
import yaml
import re

sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'lib')))

from galaxy.util.properties import load_app_properties
import galaxy.config
from galaxy.objectstore import build_object_store_from_config
from galaxy.model import mapping

sample_config = os.path.abspath(os.path.join(os.path.dirname(__file__), 'grt.yml.sample'))
default_config = os.path.abspath(os.path.join(os.path.dirname(__file__), 'grt.yml'))


def _init(config):
if config.startswith('/'):
config = os.path.abspath(config)
else:
config = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, config))

properties = load_app_properties(ini_file=config)
config = galaxy.config.Configuration(**properties)
object_store = build_object_store_from_config(config)

return (
mapping.init(
config.file_path,
config.database_connection,
create_tables=False,
object_store=object_store
),
object_store,
config.database_connection.split(':')[0]
)


def _sanitize_dict(unsanitized_dict):
sanitized_dict = dict()

for key in unsanitized_dict:
if key == 'values' and type(unsanitized_dict[key]) is list:
sanitized_dict[key] = None
else:
sanitized_dict[key] = _sanitize_value(unsanitized_dict[key])

if sanitized_dict[key] is None:
del sanitized_dict[key]

if len(sanitized_dict) == 0:
return None
else:
return sanitized_dict


def _sanitize_list(unsanitized_list):
sanitized_list = list()

for key in range(len(unsanitized_list)):
sanitized_value = _sanitize_value(unsanitized_list[key])
if not None:
sanitized_list.append(sanitized_value)

if len(sanitized_list) == 0:
return None
else:
return sanitized_list


def _sanitize_value(unsanitized_value):
sanitized_value = None

fp_regex = re.compile('^(\/[^\/]+)+$')

if type(unsanitized_value) is dict:
sanitized_value = _sanitize_dict(unsanitized_value)
elif type(unsanitized_value) is list:
sanitized_value = _sanitize_list(unsanitized_value)
else:
if fp_regex.match(str(unsanitized_value)):
sanitized_value = None
else:
sanitized_value = unsanitized_value

return sanitized_value


def main(argv):
"""Entry point for GRT statistics collection."""
parser = argparse.ArgumentParser()
parser.add_argument('instance_id', help='Galactic Radio Telescope Instance ID')
parser.add_argument('api_key', help='Galactic Radio Telescope API Key')

parser.add_argument('-c', '--config', dest='config', help='Path to GRT config file (scripts/grt.ini)', default=default_config)
parser.add_argument('--dry-run', dest='dryrun', help='Dry run (show data to be sent, but do not send)', action='store_true', default=False)
parser.add_argument('--grt-url', dest='grt_url', help='GRT Server (You can run your own!)')
args = parser.parse_args(argv[1:])

print('Loading GRT ini...')
try:
with open(args.config) as f:
config_dict = yaml.load(f)
except Exception:
with open(sample_config) as f:
config_dict = yaml.load(f)

# set to 0 by default
if 'last_job_id_sent' not in config_dict:
config_dict['last_job_id_sent'] = 0

if args.instance_id:
config_dict['instance_id'] = args.instance_id
if args.api_key:
config_dict['api_key'] = args.api_key
if args.grt_url:
config_dict['grt_url'] = args.grt_url

print('Loading Galaxy...')
model, object_store, engine = _init(config_dict['galaxy_config'])
sa_session = model.context.current

# Fetch jobs COMPLETED with status OK that have not yet been sent.
jobs = sa_session.query(model.Job)\
.filter(sa.and_(
model.Job.table.c.state == "ok",
model.Job.table.c.id > config_dict['last_job_id_sent']
))\
.all()

# Set up our arrays
active_users = []
grt_tool_data = []
grt_jobs_data = []

def kw_metrics(job):
return {
'%s_%s' % (metric.plugin, metric.metric_name): metric.metric_value
for metric in job.metrics
}

# For every job
for job in jobs:
if job.tool_id in config_dict['tool_blacklist']:
continue

# Append an active user, we'll reduce at the end
active_users.append(job.user_id)

# Find the tool in our normalized tool table.
if (job.tool_id, job.tool_version) not in grt_tool_data:
grt_tool_idx = len(grt_tool_data)
grt_tool_data.append((job.tool_id, job.tool_version))
else:
grt_tool_idx = grt_tool_data.index((job.tool_id, job.tool_version))

metrics = kw_metrics(job)

wanted_metrics = ('core_galaxy_slots', 'core_runtime_seconds')

grt_metrics = {
k: int(metrics.get(k, 0))
for k in wanted_metrics
}

params = job.raw_param_dict()
for key in params:
params[key] = json.loads(params[key])

job_data = {
'tool': grt_tool_idx,
'date': job.update_time.strftime('%s'),
'metrics': grt_metrics,
'params': _sanitize_dict(params)
}
grt_jobs_data.append(job_data)

if len(jobs) > 0:
config_dict['last_job_id_sent'] = jobs[-1].id

grt_report_data = {
'meta': {
'version': 1,
'instance_uuid': config_dict['instance_id'],
'instance_api_key': config_dict['api_key'],
# We do not record ANYTHING about your users other than count.
'active_users': len(set(active_users)),
'total_users': sa_session.query(model.User).count(),
'recent_jobs': len(jobs),
},
'tools': [
{
'tool_id': a,
'tool_version': b,
}
for (a, b) in grt_tool_data
],
'jobs': grt_jobs_data,
}

if args.dryrun:
print(json.dumps(grt_report_data, indent=2))
else:
try:
urllib2.urlopen(config_dict['grt_url'], data=json.dumps(grt_report_data))
except urllib2.HTTPError as htpe:
print(htpe.read())
exit(1)

# Update grt.ini with last id of job (prevent duplicates from being sent)
with open(args.config, 'w') as f:
yaml.dump(config_dict, f, default_flow_style=False)

if __name__ == '__main__':
main(sys.argv)
7 changes: 7 additions & 0 deletions scripts/grt.yml.sample
@@ -0,0 +1,7 @@
galaxy_config: config/galaxy.ini
#instance_id: blah
#api_key: blah
grt_url: https://radio-telescope.galaxyproject.org/api/v1/upload
tool_blacklist:
- __SET_METADATA__
- upload1

0 comments on commit 8dbf987

Please sign in to comment.