Skip to content

Commit

Permalink
Merge pull request #254 from hammerlab/overwrite_vcf
Browse files Browse the repository at this point in the history
Allow VCFs to be overwritten
  • Loading branch information
tavinathanson committed Nov 20, 2014
2 parents 7c98554 + 50a09f5 commit 6dd4b81
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 9 deletions.
1 change: 1 addition & 0 deletions ENVTEMPLATE.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export CELERY_BROKER='amqp://localhost'
export WEBHDFS_USER=username
export WEBHDFS_URL=http://example.com:5000
export IGV_HTTPFS_URL=http://example.com:9876
export ALLOW_VCF_OVERWRITES=False

# True for automatic reloading & debugging JS insertion.
export USE_RELOADER=False
Expand Down
13 changes: 8 additions & 5 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import os

# ensure that false in config isn't interpreted as True
use_reloader = os.environ.get('USE_RELOADER', False)
if use_reloader and use_reloader.lower() == 'false':
use_reloader = False
USE_RELOADER = use_reloader
def handle_false(value):
if value and value.lower() == 'false':
value = False
return value

# ensure that false in config isn't interpreted as True
USE_RELOADER = handle_false(os.environ.get('USE_RELOADER', False))
SQLALCHEMY_DATABASE_URI = os.environ['DATABASE_URI']
PORT = int(os.environ.get('PORT', 5000))
WEBHDFS_USER = os.environ['WEBHDFS_USER']
WEBHDFS_URL = os.environ['WEBHDFS_URL']
IGV_HTTPFS_URL = os.environ['IGV_HTTPFS_URL']
ALLOW_LOCAL_VCFS = os.environ.get('ALLOW_LOCAL_VCFS', USE_RELOADER)
ALLOW_VCF_OVERWRITES = handle_false(
os.environ.get('ALLOW_VCF_OVERWRITES', False))

TYPEKIT_URL = os.environ.get('TYPEKIT_URL', None)

Expand Down
4 changes: 2 additions & 2 deletions schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ CREATE TABLE vcfs (
);

CREATE TABLE vcf_annotations (
vcf_id BIGINT REFERENCES vcfs NOT NULL,
vcf_id BIGINT REFERENCES vcfs ON DELETE CASCADE NOT NULL,
annotation TEXT NOT NULL,
type TEXT NOT NULL,
"contig" TEXT,
Expand All @@ -35,7 +35,7 @@ CREATE TABLE data_annotations (
);

CREATE TABLE genotypes (
vcf_id BIGINT REFERENCES vcfs NOT NULL,
vcf_id BIGINT REFERENCES vcfs ON DELETE CASCADE NOT NULL,
sample_name TEXT,
contig TEXT,
position INTEGER,
Expand Down
16 changes: 14 additions & 2 deletions workers/genotype_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
adding them to the genotypes table. Finally, determines which columns in the vcf
actually contain values, and stores a list of them in the vcf table.
"""
import config
import json
from sqlalchemy import create_engine, MetaData

Expand All @@ -23,8 +24,13 @@ def extractor(run):
engine, connection, metadata = initialize_database(DATABASE_URI)

if vcf_exists(connection, run):
print 'VCF already exists with URI {}'.format(run['vcf_path'])
return False
if config.ALLOW_VCF_OVERWRITES:
was_deleted = delete_vcf(metadata, connection, run['vcf_path'])
assert was_deleted, ("Rows should have been deleted if we are "
"deleting a VCF that exists")
else:
print 'VCF already exists with URI {}'.format(run['vcf_path'])
return False

reader, header = load_vcf_from_hdfs(run['vcf_path'])
insert_vcf_metadata(metadata, run, header)
Expand Down Expand Up @@ -85,6 +91,12 @@ def get_vcf_id(con, run):
return con.execute(query).first().id


def delete_vcf(metadata, connection, uri):
"""Delete VCFs with this URI, and return True if rows were deleted."""
vcfs = metadata.tables.get('vcfs')
result = vcfs.delete().where(vcfs.c.uri == uri).execute()
return result.rowcount > 0

def vcf_exists(connection, run):
"""Return True if the VCF exists in the vcfs table, else return False."""
query = "SELECT * FROM vcfs WHERE uri = '" + run['vcf_path'] + "'"
Expand Down

0 comments on commit 6dd4b81

Please sign in to comment.