From 7026231dad8e9e04236d768baa072275ec669fdf Mon Sep 17 00:00:00 2001 From: John Chilton Date: Sun, 30 Sep 2012 21:43:15 -0500 Subject: [PATCH] Fusion of what was the galaxy-vm-launcher into CloudBioLinux. The galaxy-vm-launcher was an alternative method for building and launching Galaxy cloud images to the mi-deployment/CloudMan workflow. The galaxy-vm-launcher is used at the University of Minnesota as the launching point for a CLIA-certified Galaxy-based variant detection workflow. As I helped migrate mi-deployment into CloudBioLinux, I made numerous modifications to allow reuse between the CloudMan setup code and the fucntionality in this changeset. This changeset isn't just moving galaxy-vm-launcher into the same repository as CloudBioLinux, this code base has been heavily modified to make use existing code in CloudBioLinux and vise versa. While this mechanism for building Galaxy images does not result in all of the bells and whistles available via CloudMan (auto-scaling, admin console, FTP server, share-strings), it does allow a user to launch and clean Ubuntu image, configure it with CloudBioLinux+Galaxy, and transfer data to the VM and into a Galaxy data library all from a single command-line execution and configuration file. This gives rise to easy, automated multi-cloud reproducibility and could provide a path to real hybrid cloud computing. As part of the migration several enhancements to this code were made, these include: - Added some initial CloudBioLinux and CloudMan support. Actions for install CloudBioLinux and launching packaged CloudMan instances have been added. Added ability to tweak CloudBioLinux fabricrc commands, specify CloudBioLinux flavor, and set CloudMan user data from unified settings.yaml file. - Refactored the code to provide uniform access to options (be they coming from command-line arguments or a YAML settings file.). - Moved all command-line argument and yaml parsing logic into its own small file - main.py. This files simply builds up a python dictionary and passes that to the deploy function in cloudbio.deploy. The upshot of this is that the python dictionary could be built up in some other fashion (perhaps by a webui such BioCloudCentral of Refinery for instance) and all of the functionality can utilized as a python library. - Input settings file can now be specified at runtime (deploy.sh --settings=/path/to/settings.yaml). - As part of synchronizing properties between CloudBioLinux and galaxy-vm-launcher, many new paths and settings were parmaterized and can now be overridden if needed. - Implemented automated packaging of of instances for OpenStack. - Support for versions of Ubuntu prior to Ubuntu 12.04 LTS was dropped. --- .gitignore | 5 + cloudbio/deploy/__init__.py | 402 +++++++++++++++++++++++++ cloudbio/deploy/cloudman.py | 36 +++ cloudbio/deploy/config.py | 18 ++ cloudbio/deploy/galaxy.py | 192 ++++++++++++ cloudbio/deploy/image.py | 61 ++++ cloudbio/deploy/main.py | 42 +++ cloudbio/deploy/tools.py | 16 + cloudbio/deploy/util.py | 47 +++ deploy/README.md | 114 +++++++ deploy/TODO | 5 + deploy/Vagrantfile | 14 + deploy/config/tool_data_table_conf.xml | 113 +++++++ deploy/deploy.sh | 11 + deploy/settings.yaml-sample | 338 +++++++++++++++++++++ deploy/setup.sh | 11 + deploy/tools/install_venv.py | 137 +++++++++ deploy/tools/install_virtualenv.sh | 7 + deploy/tools/pip-requires | 8 + deploy/tools/with_venv.sh | 4 + fabfile.py | 16 +- 21 files changed, 1595 insertions(+), 2 deletions(-) create mode 100644 cloudbio/deploy/__init__.py create mode 100644 cloudbio/deploy/cloudman.py create mode 100644 cloudbio/deploy/config.py create mode 100644 cloudbio/deploy/galaxy.py create mode 100644 cloudbio/deploy/image.py create mode 100644 cloudbio/deploy/main.py create mode 100644 cloudbio/deploy/tools.py create mode 100644 cloudbio/deploy/util.py create mode 100644 deploy/README.md create mode 100644 deploy/TODO create mode 100644 deploy/Vagrantfile create mode 100644 deploy/config/tool_data_table_conf.xml create mode 100755 deploy/deploy.sh create mode 100644 deploy/settings.yaml-sample create mode 100755 deploy/setup.sh create mode 100755 deploy/tools/install_venv.py create mode 100644 deploy/tools/install_virtualenv.sh create mode 100644 deploy/tools/pip-requires create mode 100755 deploy/tools/with_venv.sh diff --git a/.gitignore b/.gitignore index f75b03ff1..64914ba66 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,8 @@ dist/ dpkg.lst *.log venv/ +deploy/build +deploy/keys +deploy/settings.yaml +deploy/.venv-deploy +deploy/.vagrant diff --git a/cloudbio/deploy/__init__.py b/cloudbio/deploy/__init__.py new file mode 100644 index 000000000..f7abf05d2 --- /dev/null +++ b/cloudbio/deploy/__init__.py @@ -0,0 +1,402 @@ +#!/usr/bin/env python + +import os + +from tempfile import tempdir + +from cloudbio.utils import _setup_logging, _configure_fabric_environment, _parse_fabricrc +from cloudbio.biodata.genomes import install_data, install_data_s3 +from cloudbio.galaxy import _setup_galaxy_env_defaults +from cloudbio.galaxy.utils import _chown_galaxy +from cloudbio.package.deb import _apt_packages +from fabfile import _perform_install + + +from cloudman import cloudman_launch +from image import configure_MI +from tools import install_tools, purge_tools +from galaxy import setup_galaxy, refresh_galaxy, seed_database, seed_workflows, wait_for_galaxy, purge_galaxy +from util import sudoers_append, wget + +from fabric.main import load_settings +from fabric.api import put, run, env, settings, sudo, cd, get +from fabric.context_managers import prefix +from fabric.colors import red + +from vmlauncher.transfer import FileTransferManager +from vmlauncher import build_vm_launcher + + +def deploy(options): + actions = _expand_actions(options.get("actions")) + vm_launcher = build_vm_launcher(options) + + if _do_perform_action("list", actions): + for node in vm_launcher.list(): + print "Active node with uuid %s <%s>" % (node.uuid, node) + + if _do_perform_action("destroy", actions): + target_name = options["hostname"] + for node in vm_launcher.list(): + node_name = node.name + if node_name == target_name: + vm_launcher.destroy(node) + + if _do_perform_action("cloudman_launch", actions): + cloudman_launch(vm_launcher, options) + + # Do we have remaining actions requiring an vm. + if len(actions) > 0: + print 'Setting up virtual machine' + vm_launcher.boot_and_connect() + _setup_vm(options, vm_launcher, actions) + + +def _setup_vm(options, vm_launcher, actions): + destroy_on_complete = get_boolean_option(options, 'destroy_on_complete', False) + use_galaxy = get_boolean_option(options, 'use_galaxy', True) + try: + ip = vm_launcher.get_ip() + _setup_fabric(vm_launcher, ip, options) + with settings(host_string=ip): + _setup_cloudbiolinux(options) + if 'max_lifetime' in options: + seconds = options['max_lifetime'] + # Unclear why the sleep is needed, but seems to be otherwise + # this doesn't work. + run("bash -c 'nohup sudo shutdown -h %d &'; sleep 2" % seconds) + configure_instance(options, actions) + do_refresh_galaxy = get_boolean_option(options, 'refresh_galaxy', False) + do_upload_genomes = get_boolean_option(options, 'upload_genomes', False) + if do_refresh_galaxy: + refresh_galaxy(env.galaxy_repository) + if use_galaxy: + copy_runtime_properties(ip, options) + if 'transfer' in actions: + transfer_files(options) + if do_upload_genomes: + upload_genomes(options) + if not _seed_at_configure_time(options) and use_galaxy: + seed_database() + seed_workflows(options) + if 'transfer' in actions and use_galaxy: + wait_for_galaxy() + create_data_library_for_uploads(options) + if 'package' in actions: + vm_launcher.package() + if not destroy_on_complete: + print 'Your Galaxy instance (%s) is waiting at http://%s' % (vm_launcher.uuid, ip) + finally: + if destroy_on_complete: + vm_launcher.destroy() + + +def _expand_actions(actions): + unique_actions = set() + for simple_action in ["list", + "destroy", + "transfer", + "purge_galaxy", + "setup_galaxy", + "purge_tools", + "setup_tools", + "purge_genomes", + "setup_genomes", + "setup_ssh_key", + "package", + "setup_image", + "launch", # Dummy action justs launches image + "install_biolinux", + "cloudman_launch", + ]: + if simple_action in actions: + unique_actions.add(simple_action) + compound_actions = {"configure": ["setup_image", "setup_tools", "setup_genomes", "setup_galaxy", "setup_ssh_key"], + "reinstall_galaxy": ["purge_galaxy", "setup_galaxy"], + "reinstall_genomes": ["purge_genomes", "setup_genomes"], + "reinstall_tools": ["purge_tools", "setup_tools"]} + for compound_action in compound_actions.keys(): + if compound_action in actions: + for compound_action_part in compound_actions[compound_action]: + unique_actions.add(compound_action_part) + return unique_actions + + +def _do_perform_action(action, action_list): + do_perform = action in action_list + if do_perform: + action_list.remove(action) + return do_perform + + +def _setup_fabric(vm_launcher, ip, options): + env.user = vm_launcher.get_user() + env.hosts = [ip] + env.key_filename = vm_launcher.get_key_file() + env.disable_known_hosts = True + + +def _setup_cloudbiolinux(options): + def fabricrc_loader(env): + _setup_cloudbiolinux_fabric_properties(env, options) + + flavor = get_main_options_string(options, "cloudbiolinux_flavor", None) + _setup_logging(env) + _configure_fabric_environment(env, flavor, fabricrc_loader=fabricrc_loader) + + +def _setup_cloudbiolinux_fabric_properties(env, options): + fabricrc_file = get_main_options_string(options, "fabricrc_file", None) + if fabricrc_file: + env.update(load_settings(fabricrc_file)) + else: + # Let cloudbiolinux find out default file based on flavor, dist, etc... + _parse_fabricrc(env) + overrides = options.get("fabricrc_overrides", {}) + for key, value in overrides.iteritems(): + # yaml parses bools, wouldn't be expected coming out of a fabricrc + # file so replace everything with a string. + if isinstance(value, bool): + overrides[key] = str(value) + env.update(overrides) + _setup_galaxy_env_defaults(env) + + +def purge_genomes(): + sudo("rm -rf %s" % env.data_files) + + +def configure_smtp(options): + if 'smtp_server' in options: + smtp_server = options['smtp_server'] + username = options['smtp_user'] + password = options['smtp_password'] + conf_file_contents = """mailhub=%s +UseSTARTTLS=YES +AuthUser=%s +AuthPass=%s +FromLineOverride=YES +""" % (smtp_server, username, password) + _apt_packages(pkg_list=["ssmtp"]) + sudo("""echo "%s" > /etc/ssmtp/ssmtp.conf""" % conf_file_contents) + aliases = """root:%s:%s +galaxy:%s:%s +%s:%s:%s""" % (username, smtp_server, username, smtp_server, env.user, username, smtp_server) + sudo("""echo "%s" > /etc/ssmtp/revaliases""" % aliases) + + +def configure_sudoers(options): + if "sudoers_additions" in options: + for addition in options["sudoers_additions"]: + sudoers_append(addition) + + +def configure_ssh_key(options): + if "galaxy_ssh_key" in options: + key_file = options["galaxy_ssh_key"] + sudo("mkdir -p /home/%s/.ssh" % (env.galaxy_user)) + sudo("chmod 700 /home/%s/.ssh" % (env.galaxy_user)) + put(local_path=key_file, + remote_path="/home/%s/.ssh/%s" % (env.galaxy_user, os.path.basename(key_file)), + use_sudo=True, + mode=0600) + _chown_galaxy(env, "/home/%s/.ssh" % env.galaxy_user) + + +def setup_genomes(options): + install_proc = install_data + sudo("mkdir -p %s" % env.data_files) + sudo("mkdir -p %s/tool-data" % env.galaxy_base) + sudo("chown -R %s:%s %s" % (env.user, env.user, env.data_files)) + put("config/tool_data_table_conf.xml", "%s/tool_data_table_conf.xml" % env.galaxy_base) + indexing_packages = ["bowtie", "bwa", "samtools"] + path_extensions = ":".join(map(lambda package: "/opt/galaxyTools/tools/%s/default" % package, indexing_packages)) + with prefix("PATH=$PATH:%s" % path_extensions): + if 'S3' == options['genome_source']: + install_proc = install_data_s3 + install_proc(options["genomes"]) + if options.get("setup_taxonomy_data", False): + setup_taxonomy_data() + stash_genomes_where = get_main_options_string(options, "stash_genomes") + if stash_genomes_where: + stash_genomes(stash_genomes_where) + + +def setup_taxonomy_data(): + taxonomy_directory = os.path.join(env.data_files, "taxonomy") + env.safe_sudo("mkdir -p '%s'" % taxonomy_directory, user=env.user) + with cd(taxonomy_directory): + taxonomy_url = "ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz" + gi_taxid_nucl = "ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_nucl.dmp.gz" + gi_taxid_prot = "ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_prot.dmp.gz" + wget(taxonomy_url) + wget(gi_taxid_nucl) + wget(gi_taxid_prot) + run("gunzip -c taxdump.tar.gz | tar xvf -") + run("gunzip gi_taxid_nucl.dmp.gz") + run("gunzip gi_taxid_prot.dmp.gz") + run("cat gi_taxid_nucl.dmp gi_taxid_prot.dmp > gi_taxid_all.dmp") + run("sort -n -k 1 gi_taxid_all.dmp > gi_taxid_sorted.txt") + run("rm gi_taxid_nucl.dmp gi_taxid_prot.dmp gi_taxid_all.dmp") + run("cat names.dmp | sed s/[\\(\\)\\'\\\"]/_/g > names.temporary") + run("mv names.dmp names.dmp.orig") + run("mv names.temporary names.dmp") + + +def configure_instance(options, actions): + if "setup_image" in actions: + _configure_package_holds(options) + configure_MI(env) + configure_smtp(options) + configure_sudoers(options) + if "install_biolinux" in actions: + install_biolinux(options) + if "purge_tools" in actions: + purge_tools() + if "setup_tools" in actions: + install_tools(options["tools"]) + if "purge_genomes" in actions: + purge_genomes() + if "setup_genomes" in actions: + setup_genomes(options) + if "purge_galaxy" in actions: + purge_galaxy() + if "setup_galaxy" in actions: + seed = _seed_at_configure_time(options) + setup_galaxy(options, seed=seed) + if seed: + seed_workflows(options) + if "setup_ssh_key" in actions: + configure_ssh_key(options) + + +def install_biolinux(options): + _perform_install() + + +def _indices_dir_name(): + indices_dir = env.data_files + if indices_dir.endswith("/"): + indices_dir = indices_dir[0:(len(indices_dir) - 1)] + indices_dir_name = os.path.basename(indices_dir) + return indices_dir_name + + +def _configure_package_holds(options): + # No longer respected. TODO: Implement. + if 'package_holds' in options: + env.package_holds = options['package_holds'] + else: + env.package_holds = None + + +def _cd_indices_parent(): + return cd(_indices_parent()) + + +def _indices_parent(): + parent_dir = os.path.abspath(os.path.join(env.data_files, "..")) + return parent_dir + + +def stash_genomes(where): + with _cd_indices_parent(): + sudo("chown %s:%s ." % (env.user, env.user)) + indices_dir_name = _indices_dir_name() + remote_compressed_indices = "%s.tar.gz" % indices_dir_name + run("tar czvf %s %s" % (remote_compressed_indices, indices_dir_name)) + if where == 'download': + get(remote_path=remote_compressed_indices, + local_path="compressed_genomes.tar.gz") + elif where == 'opt': + sudo("cp %s /opt/compressed_genomes.tar.gz" % remote_compressed_indices) + else: + print(red("Invalid option specified for stash_genomes [%s] - valid values include download and opt." % where)) + + +def upload_genomes(options): + with _cd_indices_parent(): + sudo("chown %s:%s ." % (env.user, env.user)) + indices_dir_name = _indices_dir_name() + _transfer_genomes(options) + run("rm -rf %s" % indices_dir_name) + run("tar xzvfm compressed_genomes.tar.gz") + sudo("/etc/init.d/galaxy restart") + + +def transfer_files(options): + transfer_options = _build_transfer_options(options, "/mnt/uploaded_data", "galaxy") + _do_transfer(transfer_options, options.get("files", []), options.get("compressed_files", [])) + + +def _transfer_genomes(options): + # Use just transfer settings in YAML + options = options['transfer'] + transfer_options = _build_transfer_options(options, _indices_parent(), env.user) + transfer_options["compress"] = False + _do_transfer(transfer_options, ["compressed_genomes.tar.gz"]) + + +def _build_transfer_options(options, destination, user): + transfer_options = {} + transfer_options['compress'] = get_boolean_option(options, 'compress_transfers', True) + transfer_options['num_compress_threads'] = int(get_main_options_string(options, 'num_compress_threads', '1')) + transfer_options['num_transfer_threads'] = int(get_main_options_string(options, 'num_transfer_threads', '1')) + transfer_options['num_decompress_threads'] = int(get_main_options_string(options, 'num_decompress_threads', '1')) + transfer_options['chunk_size'] = int(get_main_options_string(options, 'transfer_chunk_size', '0')) + transfer_options['transfer_retries'] = int(get_main_options_string(options, 'transfer_retries', '3')) + transfer_options['local_temp'] = get_main_options_string(options, 'local_temp_dir', tempdir) + transfer_options['destination'] = destination + transfer_options['transfer_as'] = user + return transfer_options + + +def _do_transfer(transfer_options, files, compressed_files=[]): + FileTransferManager(**transfer_options).transfer_files(files, compressed_files) + + +def get_boolean_option(options, name, default=False): + if name not in options: + return default + else: + return options[name] + + +def get_main_options_string(options, key, default=''): + value = default + if key in options: + value = options[key] + return value + + +def create_data_library_for_uploads(options): + with cd(os.path.join(env.galaxy_home, "scripts", "api")): + db_key_arg = get_main_options_string(options, 'db_key') + transfer_history_name = get_main_options_string(options, 'transfer_history_name') + transfer_history_api_key = get_main_options_string(options, 'transfer_history_api_key') + cmd_template = 'python handle_uploads.py --api_key="%s" --db_key="%s" --history="%s" --history_api_key="%s" ' + galaxy_data = options["galaxy"] + admin_user_api_key = galaxy_data["users"][0]["api_key"] + cmd = cmd_template % (admin_user_api_key, db_key_arg, transfer_history_name, transfer_history_api_key) + sudo("bash -c 'export PYTHON_EGG_CACHE=eggs; %s'" % cmd, user="galaxy") + + +def copy_runtime_properties(fqdn, options): + runtime_properties_raw = options.get("runtime_properties", {}) + runtime_properties = {"FQDN": fqdn} + for runtime_property_raw in runtime_properties_raw: + (name, value) = runtime_property_raw.split(":") + runtime_properties[name] = value + export_file = "" + for (name, value) in runtime_properties.iteritems(): + export_file = "export %s=%s\n%s" % (name, value, export_file) + sudo('mkdir -p %s' % env.galaxy_home) + _chown_galaxy(env, env.galaxy_home) + sudo("echo '%s' > %s/runtime_properties" % (export_file, env.galaxy_home), user=env.galaxy_user) + + +def _seed_at_configure_time(options): + if 'seed_galaxy' in options: + return options['seed_galaxy'] == 'configure' + else: + return True diff --git a/cloudbio/deploy/cloudman.py b/cloudbio/deploy/cloudman.py new file mode 100644 index 000000000..7b8d70eb3 --- /dev/null +++ b/cloudbio/deploy/cloudman.py @@ -0,0 +1,36 @@ +import yaml + +DEFAULT_CLOUDMAN_PASSWORD = 'adminpass' +DEFAULT_CLOUDMAN_CLUSTER_NAME = 'cloudman' + + +def cloudman_launch(vm_launcher, options): + cloudman_options = options.get('cloudman') + image_id = cloudman_options.get('image_id', None) + size_id = cloudman_options.get('size_id', None) + user_data = _prepare_user_data(vm_launcher, cloudman_options) + vm_launcher.create_node('cloudman', + image_id=image_id, + size_id=size_id, + ex_userdata=user_data) + + +def _prepare_user_data(vm_launcher, cloudman_options): + cloudman_user_data = cloudman_options.get('user_data', {}) + cluster_name = \ + cloudman_options.get('cluster_name', DEFAULT_CLOUDMAN_CLUSTER_NAME) + password = cloudman_options.get('password', DEFAULT_CLOUDMAN_PASSWORD) + access_key = vm_launcher.access_id() + secret_key = vm_launcher.secret_key() + + _set_property_if_needed(cloudman_user_data, 'access_key', access_key) + _set_property_if_needed(cloudman_user_data, 'secret_key', secret_key) + _set_property_if_needed(cloudman_user_data, 'cluster_name', cluster_name) + _set_property_if_needed(cloudman_user_data, 'password', password) + + return yaml.dump(cloudman_user_data) + + +def _set_property_if_needed(user_data, property, value): + if property not in user_data: + user_data[property] = value diff --git a/cloudbio/deploy/config.py b/cloudbio/deploy/config.py new file mode 100644 index 000000000..3c41fc31c --- /dev/null +++ b/cloudbio/deploy/config.py @@ -0,0 +1,18 @@ +import inspect +import os +import yaml + + +def parse_settings(name="deploy/settings.yaml"): + return _read_yaml(_path_from_root(name)) + + +def _path_from_root(name): + root_path = os.path.join(os.path.dirname(inspect.getfile(inspect.currentframe())), "..", "..") + file_path = os.path.join(root_path, name) + return file_path + + +def _read_yaml(yaml_file): + with open(yaml_file) as in_handle: + return yaml.load(in_handle) diff --git a/cloudbio/deploy/galaxy.py b/cloudbio/deploy/galaxy.py new file mode 100644 index 000000000..86ea07991 --- /dev/null +++ b/cloudbio/deploy/galaxy.py @@ -0,0 +1,192 @@ +"""Fabric (http://docs.fabfile.org) deployment file to set up galaxy. +""" + +import os +import time + +from cloudbio.custom.galaxy import install_galaxy_webapp + +from fabric.api import sudo, run, env, cd +from fabric.contrib.files import append + +from cloudbio.galaxy.tools import _setup_install_dir +from cloudbio.galaxy.utils import _chown_galaxy +from util import start_service + +## TODO: Investigate whether it would make sense to move more of this +## into cloudbio.galaxy (or maybe cloudbio.custom.galaxy) + + +def wait_for_galaxy(): + + while not "8080" in run("netstat -lant"): + # Check if galaxy has started + print "Waiting for galaxy to start." + time.sleep(10) + + +def purge_galaxy(): + sudo("/etc/init.d/galaxy stop") + sudo("rm -rf %s" % env.galaxy_home) + init_script = "postgresql" + # if env.postgres_version[0] < '9': + # # Postgres 8.4 had different name for script + # init_script = "postgresql-%s" % env.postgres_version + sudo("/etc/init.d/%s restart" % init_script) + sudo('psql -c "drop database galaxy;"', user="postgres") + sudo('psql -c "create database galaxy;"', user="postgres") + + +def setup_galaxy(options, seed=True): + """Deploy a Galaxy server along with some tools. + """ + _setup_install_dir(env) # Still needed? -John + install_galaxy_webapp(env) + #_fix_galaxy_permissions() + _setup_shed_tools_dir() + _setup_galaxy_log_dir() + _migrate_galaxy_database() + if seed: + seed_database(options["galaxy"]) + _start_galaxy() + + +def _migrate_galaxy_database(): + with cd(env.galaxy_home): + sudo("bash -c 'export PYTHON_EGG_CACHE=eggs; python ./scripts/build_universe_config.py conf.d; python -ES ./scripts/fetch_eggs.py; ./create_db.sh'", user="galaxy") + + +def seed_database(galaxy_data): + with cd(env.galaxy_home): + sudo("rm -f seed.py") + _setup_database_seed_file(galaxy_data) + sudo("bash -c 'export PYTHON_EGG_CACHE=eggs; python ./scripts/build_universe_config.py conf.d; python -ES ./scripts/fetch_eggs.py; python seed.py'", user="galaxy") + + +def seed_workflows(options): + wait_for_galaxy() + galaxy_data = options["galaxy"] + with cd(os.path.join(env.galaxy_home, "workflows")): + for user in galaxy_data["users"]: + api_key = user["api_key"] + workflows = None + if "workflows" in user: + workflows = user["workflows"] + if not workflows: + continue + for workflow in workflows: + sudo("bash -c 'export PYTHON_EGG_CACHE=eggs; bash import_all.sh %s %s'" % (api_key, workflow), user=env.galaxy_user) + + +def _setup_database_seed_file(galaxy_data): + _seed_append("""from scripts.db_shell import * +from galaxy.util.bunch import Bunch +from galaxy.security import GalaxyRBACAgent +bunch = Bunch( **globals() ) +bunch.engine = engine +# model.flush() has been removed. +bunch.session = db_session +# For backward compatibility with "model.context.current" +bunch.context = db_session +security_agent = GalaxyRBACAgent( bunch ) +security_agent.sa_session = sa_session + +def add_user(email, password, key=None): + query = sa_session.query( User ).filter_by( email=email ) + if query.count() > 0: + return query.first() + else: + user = User(email) + user.set_password_cleartext(password) + sa_session.add(user) + sa_session.flush() + + security_agent.create_private_user_role( user ) + if not user.default_permissions: + security_agent.user_set_default_permissions( user, history=True, dataset=True ) + + if key is not None: + api_key = APIKeys() + api_key.user_id = user.id + api_key.key = key + sa_session.add(api_key) + sa_session.flush() + return user + +def add_history(user, name): + query = sa_session.query( History ).filter_by( user=user ).filter_by( name=name ) + if query.count() == 0: + history = History(user=user, name=name) + sa_session.add(history) + sa_session.flush() + return history + else: + return query.first() + +""") + i = 0 + for user in galaxy_data["users"]: + username = user["username"] + password = user["password"] + api_key = user["api_key"] + histories = None + if "histories" in user: + histories = user["histories"] + user_object = "user_%d" % i + _seed_append("""%s = add_user("%s", "%s", "%s")""" % (user_object, username, password, api_key)) + _import_histories(user_object, histories) + i = i + 1 + + +def _import_histories(user_object, histories): + if not histories: + return + for history_name in histories: + _import_history(user_object, history_name) + + +def _import_history(user_object, history_name): + history_name_stripped = history_name.strip() + if history_name_stripped: + _seed_append("""add_history(%s, "%s")""" % (user_object, history_name_stripped)) + + +def _seed_append(text): + append("%s/seed.py" % env.galaxy_home, text, use_sudo=True) + + +def _start_galaxy(): + # Create directory to store galaxy service's pid file. + _make_dir_for_galaxy("/var/lib/galaxy") + start_service("galaxy") + + +def refresh_galaxy(target_galaxy_repo): + _update_galaxy(target_galaxy_repo) + sudo("/etc/init.d/galaxy restart", pty=False) + + +def _setup_galaxy_log_dir(): + _make_dir_for_galaxy("/var/log/galaxy") + + +def _setup_shed_tools_dir(): + _make_dir_for_galaxy("%s/../shed_tools" % env.galaxy_home) + + +def _make_dir_for_galaxy(path): + sudo("mkdir -p '%s'" % path) + _chown_galaxy(env, path) + + +#def _fix_galaxy_permissions(): +# # Ensure that everything under install dir is owned by env.galaxy_user +# _chown_galaxy(env, os.path.split(env.install_dir)[0]) +# sudo("chmod 755 %s" % os.path.split(env.install_dir)[0]) + + +def _update_galaxy(target_galaxy_repo): + # Need to merge? -John + hg_command = "hg pull %s; hg update" % target_galaxy_repo + with cd(env.galaxy_home): + sudo(hg_command, user=env.galaxy_user) diff --git a/cloudbio/deploy/image.py b/cloudbio/deploy/image.py new file mode 100644 index 000000000..5a96b2870 --- /dev/null +++ b/cloudbio/deploy/image.py @@ -0,0 +1,61 @@ +# Based almost entirely on version from Dr. Enis Afgan at +# (https://bitbucket.org/afgane/mi-deployment) +import os +import os.path + +from fabric.api import sudo +from fabric.contrib.files import exists, contains, append + +from util import ensure_can_sudo_into, start_service + +from cloudbio.galaxy import _setup_users, _setup_xvfb, _install_nginx_standalone, _setup_postgresql +from cloudbio.galaxy.utils import _chown_galaxy +from cloudbio.package import _configure_and_install_native_packages + + +def configure_MI(env): + # Clean this next line up. + _configure_and_install_native_packages(env, ["minimal", "cloudman", "galaxy"]) + # _update_system() + _setup_users(env) + _setup_xvfb(env) + _required_programs(env) + + +# == required programs +def _required_programs(env): + """ Install required programs """ + if not exists(env.install_dir): + sudo("mkdir -p %s" % env.install_dir) + sudo("chown %s %s" % (env.user, env.install_dir)) + + # Setup global environment for all users + install_dir = os.path.split(env.install_dir)[0] + exports = ["export PATH=%s/bin:%s/sbin:$PATH" % (install_dir, install_dir), + "export LD_LIBRARY_PATH=%s/lib" % install_dir] + for e in exports: + _ensure_export(e) + # Install required programs + _install_nginx_standalone(env) + _start_nginx(env) + _deploy_setup_postgresql(env) + + # Verify this is not needed. + # _install_samtools() + + +def _ensure_export(command): + if not contains('/etc/bash.bashrc', command): + append('/etc/bash.bashrc', command, use_sudo=True) + + +def _start_nginx(env): + galaxy_data = env.galaxy_data_mount + env.safe_sudo("mkdir -p '%s'" % env.galaxy_data) + _chown_galaxy(env, galaxy_data) + start_service("nginx") + + +def _deploy_setup_postgresql(env): + ensure_can_sudo_into("postgres") + _setup_postgresql(env) diff --git a/cloudbio/deploy/main.py b/cloudbio/deploy/main.py new file mode 100644 index 000000000..47390350e --- /dev/null +++ b/cloudbio/deploy/main.py @@ -0,0 +1,42 @@ +from argparse import ArgumentParser +import yaml + +from cloudbio.deploy import deploy + +DESC = "Creates an on-demand cloud instance, sets up applications, and transfer files to it." + + +def main(): + args = parse_args() + options = parse_settings(args.settings) + options["files"] = args.files + options["compressed_files"] = args.compressed_files + options["actions"] = args.actions + options["runtime_properties"] = args.runtime_properties + deploy(options) + + +def parse_args(): + parser = ArgumentParser(DESC) + parser.add_argument("--settings", dest="settings", default="settings.yaml") + parser.add_argument('--action', dest="actions", action="append", default=[]) + parser.add_argument('--runtime_property', dest="runtime_properties", action="append", default=[]) + parser.add_argument('--compressed_file', dest="compressed_files", action="append", default=[], help="file to transfer to new instance and decompress") + parser.add_argument('--file', dest="files", action="append", default=[], help="file to transfer to new instance") + args = parser.parse_args() + if len(args.actions) == 0: + args.actions = ["transfer"] + return args + + +def parse_settings(name): + return _read_yaml(name) + + +def _read_yaml(yaml_file): + with open(yaml_file) as in_handle: + return yaml.load(in_handle) + + +if __name__ == "__main__": + main() diff --git a/cloudbio/deploy/tools.py b/cloudbio/deploy/tools.py new file mode 100644 index 000000000..2e7053c9a --- /dev/null +++ b/cloudbio/deploy/tools.py @@ -0,0 +1,16 @@ +""" +""" + +from fabric.api import env + +from cloudbio.galaxy.tools import _install_tools + + +def purge_tools(): + env.safe_sudo("rm -rf %s" % env.install_dir) + + +def install_tools(tools_conf): + """Deploy a Galaxy server along with some tools. + """ + _install_tools(env, tools_conf) diff --git a/cloudbio/deploy/util.py b/cloudbio/deploy/util.py new file mode 100644 index 000000000..3ce1b40ea --- /dev/null +++ b/cloudbio/deploy/util.py @@ -0,0 +1,47 @@ +from fabric.api import local, sudo, env, put, get +from fabric.contrib.files import exists, append + +import os + + +def setup_install_dir(): + """Sets up install dir and ensures its owned by Galaxy""" + if not exists(env.install_dir): + sudo("mkdir -p %s" % env.install_dir) + if not exists(env.jars_dir): + sudo("mkdir -p %s" % env.jars_dir) + chown_galaxy(os.path.split(env.install_dir)[0]) + + +def ensure_can_sudo_into(user): + sudoers_append("%admin ALL = (" + user + ") NOPASSWD: ALL") + + +def sudoers_append(line): + append("/etc/sudoers", line, use_sudo=True) + + +def start_service(service_name): + # For reasons I don't understand this doesn't work for galaxy init + # script unless pty=False + sudo("/etc/init.d/%s start" % service_name, pty=False) + + +def wget(url, install_command=sudo, file_name=None): + if not file_name: + file_name = os.path.split(url)[-1] + if '?' in file_name: + file_name = file_name[0:file_name.index('?')] + if ("cache_source_downloads" in env) and (not env.cache_source_downloads): + install_command("wget %s -O %s" % (url, file_name)) + else: + cache_dir = env.source_cache_dir + if not cache_dir: + cache_dir = ".downloads" + cached_file = os.path.join(cache_dir, file_name) + if os.path.exists(cached_file): + put(cached_file, file_name) + else: + install_command("wget %s -O %s" % (url, file_name)) + local("mkdir -p '%s'" % cache_dir) + get(file_name, cached_file) diff --git a/deploy/README.md b/deploy/README.md new file mode 100644 index 000000000..c4e0c02a9 --- /dev/null +++ b/deploy/README.md @@ -0,0 +1,114 @@ +# CloudBioLinux Deployer + +This CloudBioLinux deployer has grown out of the galaxy-vm-launcher and +can be used to launch cloud virtual machines, configure them with +Galaxy, and seed it with input data, genomes, workflows, etc.... More +recently, actions for installing CloudBioLinux and launching CloudMan +have been added. + +## Prerequisites + +The `deploy.sh` script should install the needed dependencies local to +the project and doesn't require special permissions as long as +`python`, `easy_install`, and `git` are available. + +## Specify settings + +All deploy actions first require the existence of a setting file. + + cp settings.yaml-sample settings.yaml + +This file has numerous settings to customize how the deployer acts. At +very least, `key_file` and `vm_host` must be set as well as cloud +specific settings (if `vm_host = aws`, these settings will be in the +aws section of the YAML file). The argument +`--settings=/path/to/custom_settings.yaml` may be passed to +`deploy.sh` to specify a custom path for this settings file. + +## Configuring Galaxy + + ./deploy.sh --action=configure --action=transfer file1 file2 file3 + +When called this way, deploy.sh will launch a VM, configure Galaxy, +tools, and genomes. Once Galaxy is ready, it will transfer each of +provided input files to the newly launched VM, and use the Galaxy REST +API to add them to a Galaxy data library (and optionally a +history). Once all of that is complete, it will print a URL to the +screen telling the operator where to find the new Galaxy instance. + +This does not install CloudMan, Galaxy is configured to run at startup +by an init script. A more traditional CloudMan workflow can be +achieved using the `install_biolinux` action described next. + +## Installing CloudBioLinux + + ./deploy.sh --action=install_biolinux --action=package + +This mode will launch an instance, install CloudBioLinux (a flavor can +be specified in setting.yaml), and `package` (see settings.yaml for +more details) the resulting virtual image. + +## Additional Actions + +The actions show above can be combined in different manners, for +instance `configure` and `package` can be used to configure a Galaxy +instance and package so that later `transfer` can be used without +requiring a full configure. Alternatively, `install_biolinux` can be +followed up with `transfer` to install CloudBioLinux and start +analyzing data without requiring Galaxy (be sure to set `use_galaxy: +False` in settings.yaml in this case). + +You can see running instances on target cloud with this command: + + ./deploy.sh --action=list + +You can also destroy all running instances with this command: + + ./deploy.sh --action=destroy + +If an existing CloudBioLinux image bundled with CloudMan has been +created and its image id set as `image_id` in the `cloudman` section +of `setting.yaml`, then this image can be launched for testing with: + + ./deploy.sh --action=launch_cloudman + +The full list of actions can be found in `cloudbio/deploy/__init__.py` +and includes: + +* `list` +* `destroy` +* `transfer` +* `destroy` +* `transfer`, +* `purge_galaxy` +* `setup_galaxy` +* `purge_tools` +* `setup_tools` +* `purge_genomes` +* `setup_genomes` +* `setup_ssh_key` +* `package` +* `setup_image` +* `launch` - Dummy action justs launches instance +* `install_biolinux` +* `cloudman_launch` + +Additional composite actions are shortcuts for multiple actions - these include: + +* `configure` - `setup_image`, `setup_tools`, `setup_genomes`, `setup_ssh_key` +* `reinstall_galaxy` - `purge_galaxy` and `setup_galaxy` +* `reinstall_genomes` - `purge_genomes` and `setup_genomes` +* `reinstall_tools` - `purge_tools` and `setup_tools` + +## Configuring Cloud Provider + +Cloud interactions are managed via the [vm-launcher] project, full +information on configuring different cloud providers can be found +[here][vm-launcher-config] + +In brief, there are few different options for where to create the +VMs. Amazon EC2 is the default target, but it can also target +Eucalyptus or OpenStack based clouds. The ruby package `vagrant` can +be used to target virtual instances on your own machine. + +[vm-launcher-config]: https://github.com/jmchilton/vm-launcher/blob/master/config.md diff --git a/deploy/TODO b/deploy/TODO new file mode 100644 index 000000000..a23a118be --- /dev/null +++ b/deploy/TODO @@ -0,0 +1,5 @@ +TODO: Refactor use_existing_instance out of cloud specific config, make command-line option +TODO: Allow specification of clouds by name instead of type. +TODO: Implement alternate transfer mechanisms (fabric-based transfer has not proven very robust). +TODO: Improve documentation. + diff --git a/deploy/Vagrantfile b/deploy/Vagrantfile new file mode 100644 index 000000000..4e90826ca --- /dev/null +++ b/deploy/Vagrantfile @@ -0,0 +1,14 @@ +# -*- mode: ruby -*- +# vi: set ft=ruby : + +Vagrant::Config.run do |config| + # All Vagrant configuration is done here. The most common configuration + # options are documented and commented below. For a complete reference, + # please see the online documentation at vagrantup.com. + + # vagrant box add precise64 http://files.vagrantup.com/precise64.box + config.vm.box = "precise64" + config.vm.network :hostonly, "33.33.33.11" + config.vm.forward_port 80, 9080 + +end diff --git a/deploy/config/tool_data_table_conf.xml b/deploy/config/tool_data_table_conf.xml new file mode 100644 index 000000000..1001efeea --- /dev/null +++ b/deploy/config/tool_data_table_conf.xml @@ -0,0 +1,113 @@ + + + + + value, dbkey, name, path + +
+ + + value, dbkey, formats, name, path + +
+ + + value, name, path + +
+ + + value, name, path + +
+ + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+ + + name, value, dbkey, species + +
+ + + value, dbkey, name, path + +
+ + + value, name, path + +
+ + + value, name, path + +
+ + + value, name, path + +
+ + + line_type, value, path + +
+ + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+ + + value, name, gatk_value, tools_valid_for + +
+ + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+ + + value, name, path + +
+ + + value, dbkey, name, path + +
+
diff --git a/deploy/deploy.sh b/deploy/deploy.sh new file mode 100755 index 000000000..93cfc202b --- /dev/null +++ b/deploy/deploy.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +project_directory="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd $project_directory +if [ ! -e .venv-deploy ]; +then + ./setup.sh +fi + +export PYTHONPATH=..:$PYTHONPATH +tools/with_venv.sh python $project_directory/../cloudbio/deploy/main.py "$@" diff --git a/deploy/settings.yaml-sample b/deploy/settings.yaml-sample new file mode 100644 index 000000000..d66dada41 --- /dev/null +++ b/deploy/settings.yaml-sample @@ -0,0 +1,338 @@ +--- + +## Must specify a key_file and corresponding vm_host, key_file should +## be configured to allow password-less ssh access to ubuntu user (or +## vagrant user in the case of vm_host: vagrant) + +key_file: MUST_SPECIFY +vm_host: MUST_SPECIFY + +#key_file: /.vagrant.d/insecure_private_key +#vm_host: vagrant +#vm_host: aws +#vm_host: openstack +#vm_host: eucalyptus + +hostname: galaxy + +## Uncomment to destroy VM after the script is complete (for testing). +#destroy_on_complete: True + +## Pull in galaxy update and restart galaxy service at runtime. +refresh_galaxy: False +## Seed galaxy with API users, API keys, workflows etc... at 'configure' time or 'runtime' +seed_galaxy: configure + +genome_source: default # can also set to 'S3' to download pre-existing genomes +tool_data_table_conf_file: config/tool_data_table_conf.xml + +## Uncomment to compress and download genomes locally during the +## configure or setup_genomes step. +# stash_genomes: download + +## Uncomment to compress genomes and store in /opt of the VM during +## the configure or setup_genomes_step, when galaxy boots in a fresh +## VM this file will be decompressed into /mnt if needed. +# stash_genomes: opt + +## Uncomment to upload compressed genomes previously downloaded during +## a configure step. +# upload_genomes: True + +## Add sudoers_addition lines to add options to the /etc/sudoers +## file. Use with caution! +sudoers_additions: + # This line is needed to implement max_lifetime option below. + - "galaxy ALL=NOPASSWD: /sbin/shutdown -h now,/sbin/reboot,/sbin/halt" + +## Configure max lifetime for VM in minutes. +# max_lifetime: 10200 + +## Configure ssmtp (right now only gmail is really supported). +# smtp_server: smtp.gmail.com:587 +# smtp_user: @gmail.com +# smtp_password: + +## Configure genome for initially uploaded files. +# db_key: hg19 + +## Configure history name for initially uploaded files. This history +## name and corresponding user api key must match data found in +## galaxy section below. +# transfer_history_name: +# transfer_history_api_key: + +## Uncomment to setup taxonomy data required by Galaxy metagenomics +## tools during the setup_genomes action. +# setup_taxonomy_data: True + +## Path to ssh key to install for galaxy user (optional). (We +## use this to automate transfers of results back to home base +## from the Cloud. -John) +#galaxy_ssh_key: /path/to/ssh_private_key + +## Specify flavor of CloudBioLinux to install with. This has +## not been tested an should probably not be set. +# cloudbiolinux_flavor: cloudman/cloudman_and_galaxy + +## Default CloudBioLinux properties to load, by default CloudBioLinux +## will just its own defaults (either ../config/fabrircr.txt or whatever +## is specified by the CloudBioLinux flavor). +# fabricrc_file: ../config/fabricrc.txt + +## CloudMan Options (does not work with Vagrant driver) +cloudman: + ## Id of image containing cloudman to launch + image_id: XXXXXXXXXX + + ## Size or flavor id of machine to launch. Defaults to + ## cloud-specific option below. + # size_id: + + ## access_key & secret_key to for use by CloudMan. Defaults + ## to access_id & secret_key specified in cloud-spefic options below. + # access_key: + # secret_key: + + ## Name of cloudman cluster to create. + # cluster_name: cloudman + + ## Password for CloudMan admin interface. + # password: adminpass + + +## Set fabric env properties once virtual machine has been created, +## these are mostly used to modify CloudBioLinux behavior and will +## override the properties loaded by fabricrc_file defind above. +fabricrc_overrides: + ## Mercurial repository (should be fork of or contain changesets in https://bitbucket.org/jmchilton/cloud-galaxy-dist) + galaxy_repository: https://bitbucket.org/jmchilton/cloud-galaxy-dist + + # Location to install galaxy + galaxy_home: /opt/galaxy/web + # Location to setup galaxy tool dependencies + galaxy_tools_dir: /opt/galaxy/tools + galaxy_jars_dir: /opt/galaxy/jars + + # Configure biodata location + data_files: "/mnt/galaxyIndices" + galaxy_base: "/mnt/galaxyIndices/galaxy" + + # Set to true because above changeset preconfigure Galaxy properly. + galaxy_preconfigured_repository: True + + # Setup an init service for Galaxy and virtual frame buffer + galaxy_setup_service: True + galaxy_setup_xvfb: True + + install_dir: /opt/galaxyTools/tools + postgres_version: 9.1 + + # Transferring large files is tough, give it multiple + # tries. + keepalive: 30 + timeout: 60 + connection_attempts: 5 + + tool_data_table_conf_file: config/tool_data_table_conf.xml + + +## Define Amazon connection information, can also specify eucalytus or +## openstack sections for other cloud environments. +aws: + access_id: XXXXXXXXXXXXX + secret_key: XXXXXXXXXXXX + keypair_name: galaxy_key + size_id: m1.medium + ## Fresh Ubuntu Instance + image_id: ami-5c9b4935 + availability_zone: us-east-1 + + ## Set this to a specific instance UUID to have deploy.sh commands use an + ## existing VM instead launching a new one + # use_existing_instance: XXXXXXXXXXXXXXXXXXXXXXXXXXXX + + ## Packaging related options, all optional unless --action=package is + ## specified. + + + ## If package_type is default, manual packaging scripts are setup on the VM + ## that are appropriate for instance or EBS backed instances. If package_type + ## is create_image, Amazon's CreateImage instance command is used, this is + ## only appropriate for EBS backed instances (requires boto). + # package_type: default + + + ## AWS -> Your Account -> Security Credentials -> X.509 Certificates + # x509_cert: /path/to/cert-XXXXXXXXXXXXXXXX.pem + # x509_key: /path/to/pk-XXXXXXXXXXXXXX.pem + ## AWS -> Your Account -> Security Credentials -> Account Identifiers -> AWS Account ID + # user_id: 123456789 + ## S3 bucket to store your image into + # package_bucket: test1 + ## Name of packaged instance + # package_image_name: Test1 + + ## Make the resulting image available to the public (currently only available + ## for package_type: create_image). + # make_public: False + +galaxy: + ## In order to create data libraries. First user should be admin@localhost + ## and an API key must be specified, be sure to change API keys and passwords + users: + - username: admin@localhost + password: adminpass + api_key: 1234556789 + - username: user1@example.com + password: pass1 + api_key: 987654321 + ## Histories to create for this user + histories: + - ExampleHistory + ## Workflows to automatically import for this user. + workflows: + # - /path/to/workflow_file + +## We have had all sorts of trouble getting fabric to reliably SFTP +## really large files, so we have made all sorts of options available +## to tweak this process. +transfer: + ## Override what local temp directory is used on this machine if + ## chunking files or compression is used in transfer + # local_temp_dir: /tmp/ + ## Compress transferred files (default is False) + # compress_transfers: True + ## If the following parameter is set, files will be split into + ## chunks of this size (in Mb) and recombined on remote host. + # transfer_chunk_size: 1 + +genomes: + # Details about the genomes you want to include. + # Required genome fields (corresponding to Galaxy's tool_data_table_conf.xml columns): + # dbkey - globally unique identifier for the genome (e.g., hg19) + # name - descriptive name for the given genome (to be displayed in Galaxy, e.g., Hsapiens) + # Optional genome fields (corresponding to Galaxy's tool_data_table_conf.xml columns): + # formats, species, dbkey1, dbkey2, value, path, index + # Additional genome fields specific to data deployment: + # genome_indexes - list of tool indexes specific to the associated genome (overrides global 'genome_indexes') + genomes: + - dbkey: phix + name: phiX174 + + # - dbkey: hg19 + # name: Human (hg19) + # dbsnp: false + # rnaseq: false + + # Global set of indexes to include for each genome. + # Available choices are in GENOMES_INDEXES_SUPPORTED in cloudbiolinux/biodata/genomes.py + genome_indexes: + - bwa + + # Additional data targets + install_liftover: false + install_uniref: false + +tools: + applications: + bwa: "0.5.9" + ucsc_tools: "default" + bowtie: "0.12.7" + samtools: + - "0.1.16" + - "0.1.18" + abyss: "1.3.3" + bfast: "0.7.0a" + velvet: "1.2.07" + perm: "3.6" + lastz: "1.02.00" + mosaik: "default" + freebayes: "default" + macs: "1.4.2-1" + emboss: + - version: "6.5.7" + symlink_versions: "5.0.0" # Setup a symbolic link, so when Galaxy looks for 5.0.0 it finds newest version + fastx_toolkit: "0.0.13.2" + cufflinks: "1.3.0" + tophat: "1.3.3" + picard: "1.56" + fastqc: "0.10.0" + gatk: "1.4-9-g1f1233b" + maq: "0.7.1" + megablast: "2.2.22" + blast: "2.2.25+" + sputnik: "r1" + taxonomy: "r2" + add_scores: "r1" + hyphy: "r418" + srma: "0.1.15" + beam: "2" + pass: "2.0" + lps_tool: "2010.09.30" + plink: "1.07" + haploview: "4.2b" + eigenstrat: "3.0" + + r_packages: + - DBI + - RColorBrewer + - RCurl + - RSQLite + - XML + - biglm + - bitops + - digest + - ggplot2 + - graph + - hexbin + - hwriter + - kernlab + - latticeExtra + - leaps + - pamr + - plyr + - proto + - qvalue + - reshape + - statmod + - xtable + - yacca + + bioconductor_packages: + - AnnotationDbi + - ArrayExpress + - ArrayTools + - Biobase + - Biostrings + - DynDoc + - GEOquery + - GGBase + - GGtools + - GSEABase + - IRanges + - affy + - affyPLM + - affyQCReport + - affydata + - affyio + - annaffy + - annotate + - arrayQualityMetrics + - beadarray + - biomaRt + - gcrma + - genefilter + - geneplotter + - globaltest + - hgu95av2.db + - limma + - lumi + - makecdfenv + - marray + - preprocessCore + - ShortRead + - siggenes + - simpleaffy + - snpMatrix + - vsn diff --git a/deploy/setup.sh b/deploy/setup.sh new file mode 100755 index 000000000..dd2118e5d --- /dev/null +++ b/deploy/setup.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +if [ ! -e .venv-deploy ] +then + # If virtualenv is not already installed, install it locally + command -v virtualenv >/dev/null 2>&1 || { . tools/install_virtualenv.sh; } + + # Install venv + python tools/install_venv.py +fi + diff --git a/deploy/tools/install_venv.py b/deploy/tools/install_venv.py new file mode 100755 index 000000000..e90a22383 --- /dev/null +++ b/deploy/tools/install_venv.py @@ -0,0 +1,137 @@ +# File copied OpenStack Horzion project. +# https://github.com/openstack/horizon + +# Copyright 2012 United States Government as represented by the +# Administrator of the National Aeronautics and Space Administration. +# All Rights Reserved. +# +# Copyright 2012 OpenStack, LLC +# +# Copyright 2012 Nebula, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import subprocess +import sys + + +ROOT = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +VENV = os.path.join(ROOT, '.venv-deploy') +WITH_VENV = os.path.join(ROOT, 'tools', 'with_venv.sh') +PIP_REQUIRES = os.path.join(ROOT, 'tools', 'pip-requires') + +def die(message, *args): + print >> sys.stderr, message % args + sys.exit(1) + + +def run_command(cmd, redirect_output=True, check_exit_code=True, cwd=ROOT, + die_message=None): + """ + Runs a command in an out-of-process shell, returning the + output of that command. Working directory is ROOT. + """ + if redirect_output: + stdout = subprocess.PIPE + else: + stdout = None + + proc = subprocess.Popen(cmd, cwd=cwd, stdout=stdout) + output = proc.communicate()[0] + if check_exit_code and proc.returncode != 0: + if die_message is None: + die('Command "%s" failed.\n%s', ' '.join(cmd), output) + else: + die(die_message) + return output + + +HAS_EASY_INSTALL = bool(run_command(['which', 'easy_install'], + check_exit_code=False).strip()) +HAS_VIRTUALENV = bool(run_command(['which', 'virtualenv'], + check_exit_code=False).strip()) + + +def check_dependencies(): + """Make sure virtualenv is in the path.""" + + print 'Checking dependencies...' + if not HAS_VIRTUALENV: + print 'Virtual environment not found.' + # Try installing it via easy_install... + if HAS_EASY_INSTALL: + print 'Installing virtualenv via easy_install...', + run_command(['easy_install', 'virtualenv'], + die_message='easy_install failed to install virtualenv' + '\ndevelopment requires virtualenv, please' + ' install it using your favorite tool') + if not run_command(['which', 'virtualenv']): + die('ERROR: virtualenv not found in path.\n\ndevelopment ' + ' requires virtualenv, please install it using your' + ' favorite package management tool and ensure' + ' virtualenv is in your path') + print 'virtualenv installation done.' + else: + die('easy_install not found.\n\nInstall easy_install' + ' (python-setuptools in ubuntu) or virtualenv by hand,' + ' then rerun.') + print 'dependency check done.' + + +def create_virtualenv(venv=VENV): + """Creates the virtual environment and installs PIP only into the + virtual environment + """ + print 'Creating venv...', + run_command(['virtualenv', '-q', '--no-site-packages', VENV]) + print 'done.' + print 'Installing pip in virtualenv...', + if not run_command([WITH_VENV, 'easy_install', 'pip']).strip(): + die("Failed to install pip.") + print 'done.' + print 'Installing distribute in virtualenv...' + pip_install('distribute>=0.6.24') + print 'done.' + + +def pip_install(*args): + args = [WITH_VENV, 'pip', 'install', '--upgrade'] + list(args) + run_command(args, redirect_output=False) + + +def install_dependencies(venv=VENV): + print "Installing dependencies..." + print "(This may take several minutes, don't panic)" + pip_install('-r', PIP_REQUIRES) + +def print_summary(): + summary = """ +galaxy-vm-launcher environment setup is complete. + +To activate the virtualenv for the extent of your current shell session you +can run: + +$ source .venv/bin/activate +""" + print summary + + +def main(): + check_dependencies() + create_virtualenv() + install_dependencies() + print_summary() + +if __name__ == '__main__': + main() diff --git a/deploy/tools/install_virtualenv.sh b/deploy/tools/install_virtualenv.sh new file mode 100644 index 000000000..57a6c9ca6 --- /dev/null +++ b/deploy/tools/install_virtualenv.sh @@ -0,0 +1,7 @@ +#!/bin/bash +export LOCAL_PYTHON=$HOME/.gvl_python +export PYTHON_VERSION=${PYTHON_VERSION:-`python -c "import sys; rev = sys.version_info; str = '%d.%d' % (rev[0], rev[1]); print str"`} +mkdir -p $LOCAL_PYTHON/lib/python$PYTHON_VERSION/site-packages +export PYTHONPATH=$LOCAL_PYTHON/lib/python$PYTHON_VERSION/site-packages:$PYTHONPATH +export PATH=$LOCAL_PYTHON/bin:$PATH +easy_install --prefix=$LOCAL_PYTHON pip virtualenv diff --git a/deploy/tools/pip-requires b/deploy/tools/pip-requires new file mode 100644 index 000000000..7987ce304 --- /dev/null +++ b/deploy/tools/pip-requires @@ -0,0 +1,8 @@ +#fabric +paramiko==1.7.7.1 +git+git://github.com/jmchilton/fabric.git@1.4.2-gvl-2#egg=fabric +git+git://github.com/jmchilton/vm-launcher.git#egg=vmlauncher +argparse +pyyaml +apache-libcloud==0.11.3 + diff --git a/deploy/tools/with_venv.sh b/deploy/tools/with_venv.sh new file mode 100755 index 000000000..b472c43f6 --- /dev/null +++ b/deploy/tools/with_venv.sh @@ -0,0 +1,4 @@ +#!/bin/bash +TOOLS=`dirname $0` +VENV=$TOOLS/../.venv-deploy +source $VENV/bin/activate && $@ diff --git a/fabfile.py b/fabfile.py index b5da8ab1f..0b7649924 100755 --- a/fabfile.py +++ b/fabfile.py @@ -64,10 +64,22 @@ def install_biolinux(target=None, flavor=None): _check_fabric_version() _configure_fabric_environment(env, flavor) env.logger.debug("Target is '%s'" % target) + _perform_install(target, flavor) + _print_time_stats("Config", "end", time_start) + + +def _perform_install(target=None, flavor=None): + """ + Once CBL/fabric environment is setup, this method actually + runs the required installation procedures. + + See `install_biolinux` for full details on arguments + `target` and `flavor`. + """ pkg_install, lib_install, custom_ignore = _read_main_config() if target is None or target == "packages": _configure_and_install_native_packages(env, pkg_install) - if env.nixpkgs: # ./doc/nixpkgs.md + if env.nixpkgs: # ./doc/nixpkgs.md _setup_nix_sources() _nix_packages(pkg_install) _update_biolinux_log(env, target, flavor) @@ -90,7 +102,7 @@ def install_biolinux(target=None, flavor=None): sudo("dpkg -i cloud-init_0.7.1-0ubuntu4_all.deb") sudo("rm -f cloud-init_0.7.1-0ubuntu4_all.deb") _cleanup_ec2(env) - _print_time_stats("Config", "end", time_start) + def _print_time_stats(action, event, prev_time=None): """ A convenience method for displaying time event during configuration.