diff --git a/API.md b/API.md
index f3a8fa1..70e3b78 100644
--- a/API.md
+++ b/API.md
@@ -15,7 +15,7 @@ JSON should be POSTed to /api/runs with following fields:
- uri
-
- The path on HDFS where the VCF can be found.
+ The URL where the VCF can be found.
(e.g. /users/cycledasher/pt-123.vcf)
This should be immutable, as CycleDash expects
to be able to find the VCF here at any time.
@@ -30,11 +30,11 @@ JSON should be POSTed to /api/runs with following fields:
- tumorBamUri
- - The path on HDFS of the tumor BAM on which the caller was run. The BAM must already be in the database.
+ - The URL of the tumor BAM on which the caller was run. The BAM must already be in the database.
- tumorBamId
- The ID of the BAM.
- normalBamUri
- - The path on HDFS of the normalBAM on which the caller was run. The BAM must already be in the database.
+ - The URL of the normalBAM on which the caller was run. The BAM must already be in the database.
- normalBamId
- The ID of the BAM.
- params
@@ -76,7 +76,7 @@ JSON should be POSTed to /api/bams with following fields:
- uri
- - The URI of the BAM on HDFS. Should start with 'hdfs://'.
+ - The URL of the BAM. Should start with 'http[s]://'.
- projectName (or projectId)
-
The name of the project that the run related to.
diff --git a/cycledash/__init__.py b/cycledash/__init__.py
index 021257f..e2afddd 100644
--- a/cycledash/__init__.py
+++ b/cycledash/__init__.py
@@ -1,7 +1,9 @@
from flask import Flask, jsonify, request, make_response, current_app
import flask.json
from flask_sqlalchemy import SQLAlchemy
-from flask.ext import restful, login, bcrypt
+import flask_restful as restful
+import flask_login as login
+import flask_bcrypt as bcrypt
import humanize
import logging
import sys
diff --git a/cycledash/api/__init__.py b/cycledash/api/__init__.py
index 93a081e..f174139 100644
--- a/cycledash/api/__init__.py
+++ b/cycledash/api/__init__.py
@@ -1,7 +1,7 @@
from collections import OrderedDict
from flask import request
-import flask.ext.restful
-from flask.ext.login import current_user
+import flask_restful
+from flask_login import current_user
import functools
import voluptuous
@@ -10,7 +10,7 @@
from cycledash.helpers import prepare_request_data, camelcase_dict
-class Resource(flask.ext.restful.Resource, object):
+class Resource(flask_restful.Resource, object):
"""Extends Resource by adding an authentication check for basic auth or
valid session cokie.
"""
@@ -33,7 +33,7 @@ def dispatch_request(self, *args, **kwargs):
authorized = True
if not authorized:
auth_msg = 'Correct username/password required.'
- return flask.ext.restful.abort(401, message=auth_msg)
+ return flask_restful.abort(401, message=auth_msg)
return super(Resource, self).dispatch_request(*args, **kwargs)
@@ -88,12 +88,12 @@ def decorator(f):
@functools.wraps(f)
def wrapper(*args, **kwargs):
if not (request.json or request.data or request.form):
- flask.ext.restful.abort(400, message='Validation error.',
+ flask_restful.abort(400, message='Validation error.',
errors=['No data provided.'])
try:
data = schema(prepare_request_data(request))
except voluptuous.MultipleInvalid as err:
- flask.ext.restful.abort(400,
+ flask_restful.abort(400,
message='Validation error.',
errors=[str(e) for e in err.errors])
setattr(request, 'validated_body', data)
diff --git a/cycledash/api/bams.py b/cycledash/api/bams.py
index a706a80..a9ea9bc 100644
--- a/cycledash/api/bams.py
+++ b/cycledash/api/bams.py
@@ -1,23 +1,22 @@
"""Defines the API for BAMs."""
from flask import request
-from flask.ext.restful import abort, fields
+from flask_restful import abort, fields
from sqlalchemy import select, desc
import voluptuous
from voluptuous import Schema, Required, Any, Exclusive, Coerce
from common.helpers import tables, find
-from cycledash.validations import expect_one_of, PathString, Doc
+from cycledash.validations import expect_one_of, HttpPathString, Doc
from cycledash import db
from cycledash.helpers import abort_if_none_for
from cycledash.validations import Doc
-import workers.indexer
import projects
from . import Resource, marshal_with, validate_with
CreateBam = Schema({
- Required('uri'): PathString,
+ Required('uri'): HttpPathString,
# One of `project` is required, but not supported in voluptuous, so we
# enforce this in code. cf. https://github.com/alecthomas/voluptuous/issues/115
@@ -35,7 +34,7 @@
'notes': unicode,
'tissues': unicode,
'resection_date': unicode,
- 'uri': PathString
+ 'uri': HttpPathString
})
BamFields = Schema({
@@ -49,7 +48,7 @@
Doc('normal',
'Whether or not the sample is from normal tissue.'): Any(bool, None),
Doc('tissues', 'Tissue type of sample.'): Any(basestring, None),
- Doc('uri', 'The URI of the BAM on HDFS.'): PathString
+ Doc('uri', 'The URL of the BAM.'): HttpPathString
})
@@ -65,9 +64,7 @@ def get(self):
@validate_with(CreateBam)
@marshal_with(BamFields)
def post(self):
- """Create a new BAM.
-
- This will index the BAM index on HDFS if it's not already indexed."""
+ """Create a new BAM."""
try:
expect_one_of(request.validated_body, 'project_name', 'project_id')
except voluptuous.MultipleInvalid as e:
@@ -81,7 +78,6 @@ def post(self):
result = bams.insert(
request.validated_body).returning(*bams.c).execute()
bam = dict(result.fetchone())
- workers.indexer.index.delay(bam['id'])
return bam, 201
diff --git a/cycledash/api/comments.py b/cycledash/api/comments.py
index 93a7379..d9f2fdd 100644
--- a/cycledash/api/comments.py
+++ b/cycledash/api/comments.py
@@ -1,8 +1,8 @@
"""API for user comments."""
from collections import defaultdict
from flask import jsonify, request
-from flask.ext.restful import abort, fields
-from flask.ext.login import current_user
+from flask_restful import abort, fields
+from flask_login import current_user
from sqlalchemy import select, func, desc
from voluptuous import Any, Required, Coerce, Schema
diff --git a/cycledash/api/genotypes.py b/cycledash/api/genotypes.py
index 1826a8a..dbf3218 100644
--- a/cycledash/api/genotypes.py
+++ b/cycledash/api/genotypes.py
@@ -3,7 +3,7 @@
import copy
import json
from flask import request
-import flask.ext.restful as restful
+import flask_restful as restful
from plone.memoize import forever
from sqlalchemy import (select, func, types, cast, join, outerjoin, asc, desc,
and_, Integer, Float, String, distinct)
diff --git a/cycledash/api/projects.py b/cycledash/api/projects.py
index 622292f..99f5be7 100644
--- a/cycledash/api/projects.py
+++ b/cycledash/api/projects.py
@@ -1,6 +1,6 @@
"""Defines the API for Projects."""
from flask import request, redirect, jsonify, url_for, render_template
-from flask.ext.restful import fields, abort
+from flask_restful import fields, abort
from sqlalchemy import exc, select, func, desc
import voluptuous
from voluptuous import Schema, Required, Any
diff --git a/cycledash/api/runs.py b/cycledash/api/runs.py
index ae45213..eae501e 100644
--- a/cycledash/api/runs.py
+++ b/cycledash/api/runs.py
@@ -1,13 +1,13 @@
import datetime
from flask import request
-from flask.ext.restful import abort, fields
+from flask_restful import abort, fields
from sqlalchemy import select, desc, func
import voluptuous
from voluptuous import Schema, Any, Required, Exclusive, Coerce
from cycledash import db
from cycledash.helpers import get_id_where, get_where, abort_if_none_for
-from cycledash.validations import Doc, expect_one_of, PathString
+from cycledash.validations import Doc, expect_one_of, FilePathString, HttpPathString
from common.helpers import tables
import workers.runner
@@ -15,7 +15,7 @@
CreateRun = Schema({
- Required('uri'): PathString,
+ Required('uri'): FilePathString,
# One of `project` is required, but not supported in voluptuous, so we
# enforce this in code. cf. https://github.com/alecthomas/voluptuous/issues/115
@@ -23,15 +23,14 @@
Exclusive('project_name', 'project'): unicode,
Exclusive('normal_bam_id', 'normal_bam'): Coerce(int),
- Exclusive('normal_bam_uri', 'normal_bam'): PathString,
+ Exclusive('normal_bam_uri', 'normal_bam'): HttpPathString,
Exclusive('tumor_bam_id', 'tumor_bam'): Coerce(int),
- Exclusive('tumor_bam_uri', 'tumor_bam'): PathString,
+ Exclusive('tumor_bam_uri', 'tumor_bam'): HttpPathString,
'caller_name': unicode,
'project_id': Coerce(int),
'tumor_dataset_id': Coerce(int),
'normal_dataset_id': Coerce(int),
- 'truth_vcf_path': PathString,
'is_validation': bool,
'notes': unicode,
'dataset': unicode,
@@ -44,9 +43,9 @@
'caller_name': unicode,
Exclusive('normal_bam_id', 'normal_bam'): Coerce(int),
- Exclusive('normal_bam_uri', 'normal_bam'): PathString,
+ Exclusive('normal_bam_uri', 'normal_bam'): HttpPathString,
Exclusive('tumor_bam_id', 'tumor_bam'): Coerce(int),
- Exclusive('tumor_bam_uri', 'tumor_bam'): PathString,
+ Exclusive('tumor_bam_uri', 'tumor_bam'): HttpPathString,
'notes': unicode,
'vcf_header': unicode,
@@ -64,7 +63,7 @@
long,
Doc('extant_columns', 'A list of all the columns the Run has.'):
Any(basestring, None),
- Doc('uri', 'The HDFS or NFS URI of the VCF this run was based on.'):
+ Doc('uri', 'The URL of the VCF this run was based on.'):
basestring,
Doc('caller_name',
'The name of the variant caller used to generate this Run.'):
diff --git a/cycledash/api/tasks.py b/cycledash/api/tasks.py
index b0ffff3..45edd48 100644
--- a/cycledash/api/tasks.py
+++ b/cycledash/api/tasks.py
@@ -1,7 +1,7 @@
"""Methods for working with Celery task states."""
from collections import defaultdict
from sqlalchemy import select
-from flask.ext.restful import abort, fields
+from flask_restful import abort, fields
from voluptuous import Schema, Any
from common.helpers import tables
diff --git a/cycledash/auth.py b/cycledash/auth.py
index a0759c8..13ce113 100644
--- a/cycledash/auth.py
+++ b/cycledash/auth.py
@@ -1,6 +1,6 @@
"""Module to manage user authentication and identification."""
from flask import request, redirect, render_template
-from flask.ext.login import login_user, logout_user
+from flask_login import login_user, logout_user
from sqlalchemy import exc
import voluptuous
import base64
diff --git a/cycledash/helpers.py b/cycledash/helpers.py
index 9c128dd..145f05f 100644
--- a/cycledash/helpers.py
+++ b/cycledash/helpers.py
@@ -9,7 +9,7 @@
from common.helpers import tables, to_epoch
from flask import jsonify, request, url_for, redirect
-import flask.ext.restful, flask.ext.restful.fields
+import flask_restful, flask_restful.fields
import voluptuous
from werkzeug.utils import secure_filename
@@ -188,7 +188,7 @@ def abort_if_none_for(obj_name):
def abort_if_none(obj, obj_id):
"""Abort request with a 404 if object is None."""
if obj is None:
- flask.ext.restful.abort(
+ flask_restful.abort(
404,
message='No {} with id={} found.'.format(obj_name, obj_id))
else:
diff --git a/cycledash/static/js/examine/components/PileupViewer.js b/cycledash/static/js/examine/components/PileupViewer.js
index 65290bd..5dd59f4 100644
--- a/cycledash/static/js/examine/components/PileupViewer.js
+++ b/cycledash/static/js/examine/components/PileupViewer.js
@@ -55,7 +55,9 @@ var PileupViewer = React.createClass({
handleSelectRecord={_.noop}
handleOpenViewer={_.noop}
handleSetComment={_.noop}
- handleDeleteComment={_.noop} />
+ handleDeleteComment={_.noop}
+ currentUser={{}}
+ handleStarGenotype={_.noop} />
);
return (
@@ -68,12 +70,9 @@ var PileupViewer = React.createClass({
);
},
- // Convert an HDFS path to a browser-accessible URL via igv-httpfs.
- hdfsUrl: function(path) {
- return this.props.igvHttpfsUrl + path;
- },
canDisplayPath: function(path) {
- return path && path.indexOf('file://') == -1;
+ return path &&
+ (path.indexOf('http://') > -1 || path.indexOf('https://') > -1);
},
handleClose: function(e) {
e.preventDefault();
@@ -95,14 +94,14 @@ var PileupViewer = React.createClass({
name: name,
viz: pileup.viz.variants(),
data: pileup.formats.vcf({
- url: this.hdfsUrl(path)
+ url: path
})
});
var bamSource = (name, cssClass, path, chunks) => {
var data = pileup.formats.bam({
- url: this.hdfsUrl(path),
- indexUrl: this.hdfsUrl(path + '.bai'),
+ url: path,
+ indexUrl: path + '.bai',
indexChunks: chunks
});
return [
@@ -189,7 +188,7 @@ var PileupViewer = React.createClass({
}
var chunkPath = bamPath.replace('.bam', '.bam.bai.json');
- $.getJSON(this.hdfsUrl(chunkPath))
+ $.getJSON(chunkPath)
.done((chunks) => {
this.setState(_.object([propName], [chunks]));
}).fail((jqXHR, error, textStatus) => {
@@ -207,8 +206,6 @@ var PileupViewer = React.createClass({
componentDidUpdate: function() {
this.update();
},
- componentWillUnmount: function() {
- },
shouldComponentUpdate: function(nextProps, nextState) {
return ((nextProps.isOpen != this.props.isOpen) ||
(nextProps.selectedRecord != this.props.selectedRecord));
diff --git a/cycledash/static/js/runs/components/forms.js b/cycledash/static/js/runs/components/forms.js
index 48b1cd6..d33d36c 100644
--- a/cycledash/static/js/runs/components/forms.js
+++ b/cycledash/static/js/runs/components/forms.js
@@ -60,7 +60,7 @@ var NewRunForm = React.createClass({
+ placeholder='http://cluster.example.com/hdfs/data/somebam.bam' />
Required
- uri
-
- The path on HDFS where the VCF can be found.
+ The URL where the VCF was uploaded to (returned by the /upload endpoint).
(e.g. /users/cycledasher/pt-123.vcf)
This should be immutable, as Cycledash expects
to be able to find the VCF here at any time.
@@ -45,13 +45,13 @@
Required
Optional
- tumorBamUri
- - The path on HDFS of the tumor BAM on which the caller was run. The BAM must already be in the database.
+ - The URL where the tumor BAM on which the caller was run can be found. The BAM must already be in the database.
- tumorBamId
- The ID of the BAM.
- normalBamUri
- - The path on HDFS of the normalBAM on which the caller was run. The BAM must already be in the database.
+ - The URL where the normal BAM on which the caller was run can be found. The BAM must already be in the database.
- normalBamId
- The ID of the BAM.
@@ -100,7 +100,7 @@ BAMs
Required
- uri
- - The URI of the BAM on HDFS. Should start with 'hdfs://'.
+ - The URI of the BAM. Should start with 'http[s]://'.
- projectName/projectId
-
The name or ID of the project that the run related to.
diff --git a/cycledash/validations.py b/cycledash/validations.py
index 86d87ba..0b43d67 100644
--- a/cycledash/validations.py
+++ b/cycledash/validations.py
@@ -22,8 +22,17 @@ def expect_one_of(dct, *args):
raise MultipleInvalid(errors=[error])
-def is_path(s):
- return s[0] == '/' or s.startswith('file://') or s.startswith('hdfs://')
+def is_http_path(s):
+ for start in ['http://', 'https://']:
+ if s.startswith(start):
+ return True
+ return False
+
+
+def is_file_path(s):
+ if s.startswith('file'):
+ return True
+ return False
def is_email(s):
@@ -34,10 +43,13 @@ def to_epoch(v):
return common.helpers.to_epoch(v)
-PathString = All(unicode,
- Length(min=1),
- Msg(truth(is_path),
- 'path must start with "/", "file://" or "hdfs://"'))
+HttpPathString = All(unicode,
+ Msg(truth(is_http_path),
+ 'path must start with "http[s]://"'))
+
+FilePathString = All(unicode,
+ Msg(truth(is_file_path),
+ 'path must start with "file://"'))
class Doc(Marker):
diff --git a/cycledash/views.py b/cycledash/views.py
index 87d1868..104a6ff 100644
--- a/cycledash/views.py
+++ b/cycledash/views.py
@@ -3,14 +3,14 @@
import json
import tempfile
from flask import request, redirect, render_template, send_file, Response
-from flask.ext.login import login_required
+from flask_login import login_required
from sqlalchemy import select, desc, exc
import voluptuous
from common.relational_vcf import genotypes_to_file
from common.helpers import tables
from cycledash import app, db, api, login_manager, bcrypt
-import cycledash.auth as auth
+import cycledash.auth
from cycledash.helpers import (error_response, get_secure_unique_filename,
camelcase_dict, prepare_request_data)
import cycledash.api
@@ -81,12 +81,12 @@ def login():
if request.method == 'GET':
return render_template('login.html')
else:
- return auth.login()
+ return cycledash.auth.login()
@app.route('/logout', methods=['POST'])
def logout():
- auth.logout()
+ cycledash.auth.logout()
return redirect('about')
@@ -95,7 +95,7 @@ def register_user():
if request.method == 'GET':
return render_template('register.html')
else:
- return auth.register()
+ return cycledash.auth.register()
@app.route('/')
diff --git a/package.json b/package.json
index 33632ac..bb7fd90 100644
--- a/package.json
+++ b/package.json
@@ -18,7 +18,7 @@
"jquery": "2.1.1",
"marked": "^0.3.2",
"moment": "^2.9.0",
- "pileup": "^0.5.0",
+ "pileup": "^0.6.1",
"react": "^0.14.0",
"react-dom": "^0.14.0",
"underscore": "^1.7.0"
diff --git a/requirements.txt b/requirements.txt
index 34f24ac..f6076e8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,7 +23,7 @@ requests==2.3.0
voluptuous==0.8.7
wsgiref==0.1.2
psycopg2==2.5.4
-pylint==1.4.1
+pylint==1.5.0
humanize==0.5.1
varcode==0.3.11
mock==1.0.1
diff --git a/tests/python/test_authentication.py b/tests/python/test_authentication.py
index 449fce4..2ea11fc 100644
--- a/tests/python/test_authentication.py
+++ b/tests/python/test_authentication.py
@@ -1,7 +1,7 @@
"""Tests to make sure authentication is present + working."""
-from flask.ext.login import login_user
+from flask_login import login_user
import mock
import nose
import nose.tools as asserts
diff --git a/tests/python/test_bams_api.py b/tests/python/test_bams_api.py
index 741d1d1..1a25aad 100644
--- a/tests/python/test_bams_api.py
+++ b/tests/python/test_bams_api.py
@@ -9,7 +9,7 @@
import helpers
-def create_bam_with_name(project_id, name, uri='hdfs://testbam.bam'):
+def create_bam_with_name(project_id, name, uri='http://testbam.bam'):
with tables(db.engine, 'bams') as (con, bams):
res = bams.insert(
{'name': name,
@@ -21,7 +21,7 @@ def create_bam_with_name(project_id, name, uri='hdfs://testbam.bam'):
class TestBamsAPI(helpers.ResourceTest):
PROJECT_NAME = 'TEST PROJECT BAM'
BAM_NAME = 'something bam name'
- PATH = 'hdfs://somebam.bam'
+ PATH = 'http://somebam.bam'
@classmethod
def setUpClass(cls):
@@ -31,7 +31,6 @@ def setUpClass(cls):
def tearDown(self):
helpers.delete_table(db, 'bams')
- @mock.patch('workers.indexer.index.delay', lambda *args, **kwargs: True)
def test_create_bam(self):
NOTES = 'random notes'
TISSUES = 'left ovary etc'
@@ -49,7 +48,6 @@ def test_create_bam(self):
assert json.loads(r.data)['notes'] == NOTES
assert json.loads(r.data)['uri'] == self.PATH
- @mock.patch('workers.indexer.index.delay', lambda *args, **kwargs: True)
def test_create_bam_with_project_name(self):
r = self.post('/api/bams',
data={'name': self.BAM_NAME,
diff --git a/tests/python/test_comments_api.py b/tests/python/test_comments_api.py
index 5a1b636..3bb1154 100644
--- a/tests/python/test_comments_api.py
+++ b/tests/python/test_comments_api.py
@@ -29,7 +29,7 @@ class TestCommentsAPI(helpers.ResourceTest):
@classmethod
def setUpClass(cls):
cls.project = create_project_with_name('project')
- cls.run = create_run_with_uri(cls.project['id'], 'hdfs://somevcf.vcf')
+ cls.run = create_run_with_uri(cls.project['id'], 'http://somevcf.vcf')
return super(TestCommentsAPI, cls).setUpClass()
def tearDown(self):
diff --git a/tests/python/test_runs_api.py b/tests/python/test_runs_api.py
index e35353f..0bb9d4e 100644
--- a/tests/python/test_runs_api.py
+++ b/tests/python/test_runs_api.py
@@ -22,8 +22,8 @@ class TestRunsAPI(helpers.ResourceTest):
NOTES = '--with-awesome=9001'
PROJECT_NAME = 'TEST PROJECT RUN'
BAM_NAME = 'something bam name'
- BAM_PATH = 'hdfs://somebam.bam'
- RUN_PATH = 'hdfs://somevcf.vcf'
+ BAM_PATH = 'http://somebam.bam'
+ RUN_PATH = 'file://somevcf.vcf'
@classmethod
def setUpClass(cls):
@@ -36,7 +36,6 @@ def tearDown(self):
helpers.delete_table(db, 'vcfs')
@mock.patch('workers.runner', autospec=True)
- @mock.patch('workers.indexer', autospec=True)
def test_create_run(self, *args):
caller_name = 'The Testing Caller'
r = self.post('/api/runs',
@@ -56,7 +55,6 @@ def test_create_run(self, *args):
assert json.loads(r.data)['callerName'] == caller_name
@mock.patch('workers.runner', autospec=True)
- @mock.patch('workers.indexer', autospec=True)
def test_create_run_with_project_and_bam_names(self, *args):
r = self.post('/api/runs',
data={'normalBamUri': self.BAM_PATH,
@@ -96,7 +94,7 @@ def test_get_run(self, *args):
def test_get_runs(self):
run1 = create_run_with_uri(self.project['id'], self.RUN_PATH)
- run2 = create_run_with_uri(self.project['id'], 'hdfs://otherpath.vcf')
+ run2 = create_run_with_uri(self.project['id'], 'http://otherpath.vcf')
r = self.get('/api/runs')
runs = json.loads(r.data)['runs']
assert r.status_code == 200
diff --git a/tests/python/test_views.py b/tests/python/test_views.py
index ac21d19..c1b1d93 100644
--- a/tests/python/test_views.py
+++ b/tests/python/test_views.py
@@ -1,5 +1,5 @@
"""Test rendered views of Cycledash."""
-from flask.ext.login import login_user
+from flask_login import login_user
import mock
import nose
import nose.tools as asserts
@@ -49,7 +49,7 @@ def setUpClass(cls):
'vcf_header': ''}
).returning(*runs.c).execute()
cls.run = dict(res.fetchone())
- cls.run2 = create_run_with_uri(cls.project['id'], 'hdfs://someuri.vcf')
+ cls.run2 = create_run_with_uri(cls.project['id'], 'http://someuri.vcf')
cls.comment1 = create_comment_with_text(cls.run['id'], 'this is some text')
cls.comment2 = create_comment_with_text(cls.run['id'], 'more text')
diff --git a/workers/genotype_extractor.py b/workers/genotype_extractor.py
index 9c8aa63..a38a9f7 100644
--- a/workers/genotype_extractor.py
+++ b/workers/genotype_extractor.py
@@ -26,7 +26,7 @@ def extract(self, vcf_id):
def _extract(vcf_id):
- """Extract the genotypes from an on-disk or HDFS VCF and insert into the DB.
+ """Extract the genotypes from a VCF and insert into the DB.
This also fills in a few fields in the vcfs table which aren't available
until the entire VCF has been read, e.g. the variant count.
diff --git a/workers/indexer.py b/workers/indexer.py
deleted file mode 100644
index 81468c2..0000000
--- a/workers/indexer.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""Index BAM Index (BAI) files
-
-This accepts a BAM HDFS path. If there's no BAM Index Index (.bam.bai.json)
-file already available, it will generate one and put it on HDFS.
-"""
-
-import json
-
-import bai_indexer
-from StringIO import StringIO
-
-from workers.shared import (get_contents_from_hdfs, worker,
- put_new_file_to_hdfs, does_hdfs_file_exist,
- HdfsFileAlreadyExistsError, register_running_task,
- DATABASE_URI, initialize_database)
-
-@worker.task(bind=True)
-def index(self, bam_id):
- engine, connection, metadata = initialize_database(DATABASE_URI)
- bams_table = metadata.tables.get('bams')
- bam = bams_table.select().where(bams_table.c.id == bam_id).execute().fetchone()
- bam_path = bam['uri']
-
- if '.bam' not in bam_path:
- raise ValueError('Expected path to BAM file, got %s' % bam_path)
-
- bai_path = bam_path.replace('.bam', '.bam.bai')
- bai_json_path = bam_path.replace('.bam', '.bam.bai.json')
-
- if does_hdfs_file_exist(bai_json_path):
- return # nothing to do -- it's already been created
-
- contents = get_contents_from_hdfs(bai_path)
- index_json = bai_indexer.index_stream(StringIO(contents))
- index_json_str = json.dumps(index_json)
-
- try:
- put_new_file_to_hdfs(bai_json_path, index_json_str)
- except HdfsFileAlreadyExistsError:
- pass # we lost the race! (e.g. two runs were submitted simultaneously)
diff --git a/workers/runner.py b/workers/runner.py
index 83d500f..e9d90b3 100644
--- a/workers/runner.py
+++ b/workers/runner.py
@@ -1,7 +1,6 @@
from celery import chain
import json
-import indexer
from genotype_extractor import extract as extract_genotype
from varcode_annotator import annotate as varcode_genes