Skip to content

Commit

Permalink
Merge 4f9409b into d161866
Browse files Browse the repository at this point in the history
  • Loading branch information
netsettler committed Aug 12, 2020
2 parents d161866 + 4f9409b commit c0d2410
Show file tree
Hide file tree
Showing 32 changed files with 844 additions and 227 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,6 @@ elasticsearch-*.deb
# Saved configurations from Elastic beanstalk that have been downloaded.
.elasticbeanstalk/saved_configs/
.elasticbeanstalk/app_versions/

# Used for some kinds of debugging in dcicutils, snovault, cgap & ff.
DEBUGLOG-*
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,16 @@ download-genes: # grabs latest gene list from the below link, unzips and drops i
mv gene_inserts_v0.4.5.json src/encoded/annotations/gene_inserts_v0.4.5.json

deploy1: # starts postgres/ES locally and loads inserts, and also starts ingestion engine
@SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load
@DEBUGLOG=`pwd` SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load

deploy1a: # starts postgres/ES locally and loads inserts, but does not start the ingestion engine
@SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load --no_ingest
@DEBUGLOG=`pwd` SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load --no_ingest

deploy1b: # starts ingestion engine separately so it can be easily stopped and restarted for debugging in foreground
@echo "Starting ingestion listener. Press ^C to exit." && SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` poetry run ingestion-listener development.ini --app-name app
@echo "Starting ingestion listener. Press ^C to exit." && DEBUGLOG=`pwd` SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` poetry run ingestion-listener development.ini --app-name app

deploy2: # spins up waittress to serve the application
@SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` pserve development.ini
@DEBUGLOG=`pwd` SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` pserve development.ini

deploy3: # uploads: GeneAnnotationFields, then Genes, then AnnotationFields, then Variant + VariantSamples
python src/encoded/commands/ingestion.py src/encoded/annotations/variant_table_v0.4.7.csv src/encoded/schemas/annotation_field.json src/encoded/schemas/variant.json src/encoded/schemas/variant_sample.json src/encoded/annotations/GAPFIRHN9YOZ.vcf hms-dbmi hms-dbmi src/encoded/annotations/gene_table_v0.4.5.csv src/encoded/schemas/gene_annotation_field.json src/encoded/schemas/gene.json src/encoded/annotations/gene_inserts_v0.4.5.json hms-dbmi hms-dbmi development.ini --post-variant-consequences --post-variants --post-gene-annotation-field-inserts --post-gene-inserts --app-name app
Expand Down
368 changes: 315 additions & 53 deletions poetry.lock

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool.poetry]
# Note: Various modules refer to this system as "encoded", not "cgap-portal".
name = "encoded"
version = "2.4.2"
version = "2.4.3"
description = "Clinical Genomics Analysis Platform"
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
license = "MIT"
Expand Down Expand Up @@ -43,7 +43,7 @@ chardet = "3.0.4"
colorama = "0.3.3"
dcicpyvcf = "1.0.0"
dcicsnovault = ">=3.1.9,<4" # Fixes build problems in 3.1.8
dcicutils = ">=0.37.0,<1" # has the LockoutManager needed for Snovault
dcicutils = ">=0.38.1,<1" # has LockoutManager 0.38.0 and check_true 0.38.1
docutils = "0.12"
elasticsearch = "5.5.3"
elasticsearch-dsl = "^5.4.0"
Expand Down Expand Up @@ -115,6 +115,8 @@ xlwt = "1.2.0"
"zope.sqlalchemy" = "1.3"

[tool.poetry.dev-dependencies]
# PyCharm says boto3-stubs contains useful type hints
boto3-stubs = ">=1.14.37.0"
coverage = ">=5.2"
codacy-coverage = ">=1.3.11"
coveralls = ">=2.1.1"
Expand Down
3 changes: 2 additions & 1 deletion src/encoded/commands/clear_variants_and_genes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import argparse
import structlog
import logging
from encoded.commands.purge_item_type import purge_item_type_from_storage

from pyramid.paster import get_app
from ..commands.purge_item_type import purge_item_type_from_storage


logger = structlog.getLogger(__name__)
Expand Down
4 changes: 2 additions & 2 deletions src/encoded/commands/gene_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import logging
from pyramid.paster import get_app
from dcicutils.misc_utils import VirtualApp
from encoded.commands.gene_table_intake import GeneTableParser, GeneTableIntakeException
from encoded.commands.ingest_genes import GeneIngestion
from ..commands.gene_table_intake import GeneTableParser, GeneTableIntakeException
from ..commands.ingest_genes import GeneIngestion
from tqdm import tqdm

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion src/encoded/commands/gene_table_intake.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
from pyramid.paster import get_app
from dcicutils.misc_utils import VirtualApp
from encoded.commands.variant_table_intake import MappingTableParser
from ..commands.variant_table_intake import MappingTableParser


logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion src/encoded/commands/ingest_vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pyramid.paster import get_app
from dcicutils.misc_utils import VirtualApp
from collections import OrderedDict
from encoded.util import resolve_file_path
from ..util import resolve_file_path

logger = logging.getLogger(__name__)
EPILOG = __doc__
Expand Down
4 changes: 2 additions & 2 deletions src/encoded/commands/ingestion.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import os
import argparse
import logging
from encoded.commands.variant_ingestion import run_variant_table_intake, run_ingest_vcf
from encoded.commands.gene_ingestion import run_gene_table_intake, run_ingest_genes
from ..commands.variant_ingestion import run_variant_table_intake, run_ingest_vcf
from ..commands.gene_ingestion import run_gene_table_intake, run_ingest_genes
from dcicutils.misc_utils import VirtualApp
from dcicutils.env_utils import CGAP_ENV_DEV, CGAP_ENV_WOLF
from pyramid.paster import get_app
Expand Down
5 changes: 3 additions & 2 deletions src/encoded/commands/variant_ingestion.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import argparse
import logging
from encoded.commands.variant_table_intake import MappingTableParser
from encoded.commands.ingest_vcf import VCFParser

from dcicutils.misc_utils import VirtualApp
from pyramid.paster import get_app
from tqdm import tqdm
from ..commands.variant_table_intake import MappingTableParser
from ..commands.ingest_vcf import VCFParser


logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion src/encoded/commands/variant_table_intake.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pyramid.paster import get_app
from dcicutils.misc_utils import VirtualApp
from collections import OrderedDict, Mapping
from encoded.util import resolve_file_path
from ..util import resolve_file_path

logger = logging.getLogger(__name__)
EPILOG = __doc__
Expand Down
5 changes: 3 additions & 2 deletions src/encoded/ingestion_listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,9 @@ def queue_ingestion(context, request):
}
if uuids is []:
return response
queue_manager = request.registry[INGESTION_QUEUE] if not override_name \
else IngestionQueueManager(request.registry, override_name=override_name)
queue_manager = (request.registry[INGESTION_QUEUE]
if not override_name
else IngestionQueueManager(request.registry, override_name=override_name))
_, failed = queue_manager.add_uuids(uuids)
if not failed:
response['notification'] = 'Success'
Expand Down
2 changes: 1 addition & 1 deletion src/encoded/search/lucene_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from urllib.parse import urlencode
from snovault import TYPES
from snovault.elasticsearch.create_mapping import determine_if_is_date_field
from encoded.search.search_utils import (
from .search_utils import (
find_nested_path, convert_search_to_dictionary,
QueryConstructionException,
COMMON_EXCLUDED_URI_PARAMS, QUERY, FILTER, MUST, MUST_NOT, BOOL, MATCH, SHOULD,
Expand Down
1 change: 1 addition & 0 deletions src/encoded/tests/datafixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ def grandpa(testapp, project, institution, grandpa_sample):
@pytest.fixture
def mother(testapp, project, institution, grandpa, female_individual, mother_sample):
item = {
"aliases": ["test-project:indiv-003389"],
"accession": "GAPIDMOTHER1",
"samples": [mother_sample['@id']],
"age": 33,
Expand Down
12 changes: 6 additions & 6 deletions src/encoded/tests/test_gene_table_intake.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import pytest
from encoded.tests.test_variant_table_intake import (
ANNOTATION_FIELD_SCHEMA,
)
from encoded.util import resolve_file_path
from encoded.tests.variant_fixtures import GENE_ANNOTATION_FIELD_URL
from encoded.commands.gene_table_intake import GeneTableParser

from ..util import resolve_file_path
from ..commands.gene_table_intake import GeneTableParser
from .test_variant_table_intake import ANNOTATION_FIELD_SCHEMA
from .variant_fixtures import GENE_ANNOTATION_FIELD_URL


pytestmark = [pytest.mark.working, pytest.mark.ingestion]
MT_LOC = resolve_file_path('annotations/gene_table_v0.4.5.csv')
Expand Down
60 changes: 30 additions & 30 deletions src/encoded/tests/test_generate_item_from_owl.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,23 +156,23 @@ def test_get_existing_items(connection, rel_disorders, delobs_disorders):
# TODO: Currently this test passes but is not really a unit test and also does not
# quite mock the correct things so should be refactored
disorder_ids = [d.get('disorder_id') for d in rel_disorders + delobs_disorders]
with mock.patch('encoded.commands.generate_items_from_owl.search_metadata', side_effect=[rel_disorders, delobs_disorders]):
with mock.patch.object(gifo, 'search_metadata', side_effect=[rel_disorders, delobs_disorders]):
dbdiseases = gifo.get_existing_items(connection, 'Disorder')
assert len(dbdiseases) == len(rel_disorders) + len(delobs_disorders)
assert all([d in dbdiseases for d in disorder_ids])


def test_get_existing_items_from_db_w_deleted(connection, disorder_gen, delobs_disorder_gen, rel_disorders, delobs_disorders):
disorder_ids = [d.get('disorder_id') for d in rel_disorders + delobs_disorders]
with mock.patch('encoded.commands.generate_items_from_owl.search_metadata', side_effect=[disorder_gen, delobs_disorder_gen]):
with mock.patch.object(gifo, 'search_metadata', side_effect=[disorder_gen, delobs_disorder_gen]):
dbdiseases = list(gifo.get_existing_items_from_db(connection, 'Disorder'))
assert len(dbdiseases) == len(rel_disorders) + len(delobs_disorders)
assert all([dis.get('disorder_id') in disorder_ids for dis in dbdiseases])


def test_get_existing_items_from_db_wo_deleted(connection, disorder_gen, rel_disorders):
disorder_ids = [d.get('disorder_id') for d in rel_disorders]
with mock.patch('encoded.commands.generate_items_from_owl.search_metadata', side_effect=[disorder_gen]):
with mock.patch.object(gifo, 'search_metadata', side_effect=[disorder_gen]):
dbdiseases = list(gifo.get_existing_items_from_db(connection, 'Disorder', include_invisible=False))
assert len(dbdiseases) == len(rel_disorders)
assert all([dis.get('disorder_id') in disorder_ids for dis in dbdiseases])
Expand All @@ -186,7 +186,7 @@ def test_get_existing_items_from_db_w_duplicates(connection, rel_disorders):
rel_disorders.append(rel_disorders[0]) # add the duplicate item
dgen = iter(rel_disorders)
disorder_ids = [d.get('disorder_id') for d in rel_disorders]
with mock.patch('encoded.commands.generate_items_from_owl.search_metadata', side_effect=[dgen]):
with mock.patch.object(gifo, 'search_metadata', side_effect=[dgen]):
dbdiseases = list(gifo.get_existing_items_from_db(connection, 'Disorder', include_invisible=False))
assert len(dbdiseases) == len(rel_disorders)
assert all([dis.get('disorder_id') in disorder_ids for dis in dbdiseases])
Expand Down Expand Up @@ -338,7 +338,7 @@ def test_is_deprecated_not_deprecated(uberon_owler5):


def test_create_term_dict(mkd_class, uberon_owler5):
with mock.patch('encoded.commands.generate_items_from_owl.get_term_name_from_rdf',
with mock.patch.object(gifo, 'get_term_name_from_rdf',
return_value='Multicystic kidney dysplasia'):
term = gifo.create_term_dict(mkd_class, 'HP:0000003', uberon_owler5, 'Phenotype')
assert term == {'hpo_id': 'HP:0000003', 'hpo_url': 'http://purl.obolibrary.org/obo/HP_0000003', 'phenotype_name': 'Multicystic kidney dysplasia'}
Expand Down Expand Up @@ -372,11 +372,11 @@ def simple_terms():
def test_add_additional_term_info(simple_terms):
val_lists = [[], ['val1'], ['val1', 'val2']]
fields = ['definition', 'synonyms', 'dbxrefs', 'alternative_ids']
with mock.patch('encoded.commands.generate_items_from_owl.convert2URIRef', return_value='blah'):
with mock.patch('encoded.commands.generate_items_from_owl.get_synonyms', side_effect=val_lists):
with mock.patch('encoded.commands.generate_items_from_owl.get_definitions', side_effect=val_lists):
with mock.patch('encoded.commands.generate_items_from_owl.get_dbxrefs', side_effect=val_lists):
with mock.patch('encoded.commands.generate_items_from_owl.get_alternative_ids', side_effect=val_lists):
with mock.patch.object(gifo, 'convert2URIRef', return_value='blah'):
with mock.patch.object(gifo, 'get_synonyms', side_effect=val_lists):
with mock.patch.object(gifo, 'get_definitions', side_effect=val_lists):
with mock.patch.object(gifo, 'get_dbxrefs', side_effect=val_lists):
with mock.patch.object(gifo, 'get_alternative_ids', side_effect=val_lists):
result = gifo.add_additional_term_info(simple_terms, 'data', 'synterms', 'defterms', 'Phenotype')
for tid, term in result.items():
for f in fields:
Expand All @@ -402,7 +402,7 @@ def returned_synonyms():


def test_get_syn_def_dbxref_altid(owler, returned_synonyms):
with mock.patch('encoded.commands.generate_items_from_owl.getObjectLiteralsOfType',
with mock.patch.object(gifo, 'getObjectLiteralsOfType',
side_effect=returned_synonyms):
checks = ['test_val1', 'test_val2']
class_ = 'test_class'
Expand Down Expand Up @@ -636,25 +636,25 @@ def test_check_for_fields_to_keep(raw_item_dict):


def test_id_fields2patch_unchanged(raw_item_dict):
with mock.patch('encoded.commands.generate_items_from_owl.get_raw_form', return_value=raw_item_dict):
with mock.patch('encoded.commands.generate_items_from_owl.compare_terms', return_value=None):
with mock.patch.object(gifo, 'get_raw_form', return_value=raw_item_dict):
with mock.patch.object(gifo, 'compare_terms', return_value=None):
assert not gifo.id_fields2patch(raw_item_dict, raw_item_dict, True)


def test_id_fields2patch_keep_term(raw_item_dict):
""" case when remove unchanged (rm_unch) param is False just returns term
"""
with mock.patch('encoded.commands.generate_items_from_owl.get_raw_form', return_value=raw_item_dict):
with mock.patch('encoded.commands.generate_items_from_owl.compare_terms', return_value=None):
with mock.patch.object(gifo, 'get_raw_form', return_value=raw_item_dict):
with mock.patch.object(gifo, 'compare_terms', return_value=None):
assert gifo.id_fields2patch(raw_item_dict, raw_item_dict, False) == raw_item_dict


def test_id_fields2patch_find_some_fields(raw_item_dict):
""" case when remove unchanged (rm_unch) param is False just returns term
"""
patch = {'uuid': 'uuid1', 'field1': 'val1', 'field2': ['a', 'b']}
with mock.patch('encoded.commands.generate_items_from_owl.get_raw_form', return_value=raw_item_dict):
with mock.patch('encoded.commands.generate_items_from_owl.compare_terms', return_value=patch):
with mock.patch.object(gifo, 'get_raw_form', return_value=raw_item_dict):
with mock.patch.object(gifo, 'compare_terms', return_value=patch):
assert gifo.id_fields2patch(raw_item_dict, raw_item_dict, True) == patch


Expand Down Expand Up @@ -699,8 +699,8 @@ def test_identify_item_updates_no_changes(terms, mock_logger):
dbterms = terms.copy()
for i, tid in enumerate(dbterms.keys()):
dbterms[tid].update({'uuid': 'uuid' + str(i + 1)})
with mock.patch('encoded.commands.generate_items_from_owl._get_uuids_for_linked', return_value={}):
with mock.patch('encoded.commands.generate_items_from_owl.id_fields2patch', return_value=None):
with mock.patch.object(gifo, '_get_uuids_for_linked', return_value={}):
with mock.patch.object(gifo, 'id_fields2patch', return_value=None):
assert not gifo.identify_item_updates(terms, dbterms, 'Phenotype', logger=mock_logger)


Expand All @@ -712,9 +712,9 @@ def test_identify_item_updates_w_new_term(terms, mock_logger):
terms['hp:11'] = new_term
side_effect = [None] * 9
side_effect.append(new_term)
with mock.patch('encoded.commands.generate_items_from_owl.uuid4', return_value='uuid11'):
with mock.patch('encoded.commands.generate_items_from_owl._get_uuids_for_linked', return_value={}):
with mock.patch('encoded.commands.generate_items_from_owl.id_fields2patch', side_effect=side_effect):
with mock.patch.object(gifo, 'uuid4', return_value='uuid11'):
with mock.patch.object(gifo, '_get_uuids_for_linked', return_value={}):
with mock.patch.object(gifo, 'id_fields2patch', side_effect=side_effect):
to_update = gifo.identify_item_updates(terms, dbterms, 'Phenotype', logger=mock_logger)
new_term.update({'uuid': 'uuid11'})
assert to_update[0] == new_term
Expand All @@ -732,8 +732,8 @@ def test_identify_item_updates_w_patch_term(terms, mock_logger):
se = copy.deepcopy(added_field)
se.update({'uuid': 'uuid{}'.format(n)})
side_effect.append(se)
with mock.patch('encoded.commands.generate_items_from_owl._get_uuids_for_linked', return_value={}):
with mock.patch('encoded.commands.generate_items_from_owl.id_fields2patch', side_effect=side_effect):
with mock.patch.object(gifo, '_get_uuids_for_linked', return_value={}):
with mock.patch.object(gifo, 'id_fields2patch', side_effect=side_effect):
to_update = gifo.identify_item_updates(terms, dbterms, 'Phenotype', logger=mock_logger)
assert len(to_update) == 2
for upd in to_update:
Expand All @@ -751,8 +751,8 @@ def test_identify_item_updates_set_obsolete_true_obsolete(terms, mock_logger):
for tid in dbterms.keys():
uid = tid.replace('hp:', 'uuid')
dbterms[tid].update({'uuid': uid})
with mock.patch('encoded.commands.generate_items_from_owl._get_uuids_for_linked', return_value={}):
with mock.patch('encoded.commands.generate_items_from_owl.id_fields2patch', return_value=None):
with mock.patch.object(gifo, '_get_uuids_for_linked', return_value={}):
with mock.patch.object(gifo, 'id_fields2patch', return_value=None):
to_update = gifo.identify_item_updates(terms, dbterms, 'Phenotype', logger=mock_logger)
assert len(to_update) == 1
obsterm = to_update[0]
Expand All @@ -769,8 +769,8 @@ def test_identify_item_updates_set_obsolete_false_do_not_obsolete_live_term(term
dbterms.update({added_obs['hpo_id']: added_obs})
for i, tid in enumerate(dbterms.keys()):
dbterms[tid].update({'uuid': 'uuid' + str(i + 1)})
with mock.patch('encoded.commands.generate_items_from_owl._get_uuids_for_linked', return_value={}):
with mock.patch('encoded.commands.generate_items_from_owl.id_fields2patch', return_value=None):
with mock.patch.object(gifo, '_get_uuids_for_linked', return_value={}):
with mock.patch.object(gifo, 'id_fields2patch', return_value=None):
to_update = gifo.identify_item_updates(terms, dbterms, 'Phenotype', set_obsoletes=False, logger=mock_logger)
assert not to_update

Expand All @@ -784,8 +784,8 @@ def test_identify_item_updates_set_obsolete_true_do_not_patch_obsolete_term(term
dbterms.update({added_obs['hpo_id']: added_obs})
for i, tid in enumerate(dbterms.keys()):
dbterms[tid].update({'uuid': 'uuid' + str(i + 1)})
with mock.patch('encoded.commands.generate_items_from_owl._get_uuids_for_linked', return_value={}):
with mock.patch('encoded.commands.generate_items_from_owl.id_fields2patch', return_value=None):
with mock.patch.object(gifo, '_get_uuids_for_linked', return_value={}):
with mock.patch.object(gifo, 'id_fields2patch', return_value=None):
to_update = gifo.identify_item_updates(terms, dbterms, 'Phenotype', logger=mock_logger)
assert not to_update

Expand Down
Loading

0 comments on commit c0d2410

Please sign in to comment.