From fb329f51192088fcae222524c0bc17387c3a09cb Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 29 May 2020 13:57:09 -0400
Subject: [PATCH 001/125] added some submit functions - WIP

---
 src/encoded/submit.py | 222 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 222 insertions(+)
 create mode 100644 src/encoded/submit.py

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
new file mode 100644
index 0000000000..aed3e862ac
--- /dev/null
+++ b/src/encoded/submit.py
@@ -0,0 +1,222 @@
+from pyramid.paster import get_app
+from pyramid.response import Response
+from pyramid.view import view_config
+from snovault.util import debug_log
+from webtest import TestApp
+import datetime
+import xlrd
+
+
+BGM_FIELD_MAPPING = {
+    'bcgg-id': 'patient id',
+    'bcgg-f-id': 'family id',
+    "date req rec'd": 'date requisition received'
+}
+
+
+POST_ORDER = ['Sample', 'SampleProcessing', 'Individual', 'Family']
+
+
+SECOND_ROUND = {}
+
+
+@view_config(route_name='submit_data', request_method='POST', permission='add')
+@debug_log
+def submit_data(context, request):
+    '''
+    usage notes here later
+    '''
+    config_uri = request.json.get('config_uri', 'production.ini')
+    patch_only = request.json.get('patch_only', False)
+    post_only = request.json.get('post_only', False)
+    app = get_app(config_uri, 'app')
+    environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': 'TEST'}
+    testapp = TestApp(app, environ)
+    # expected response
+    request.response.status = 200
+    result = {
+        'status': 'success',
+        '@type': ['result'],
+    }
+
+    raise NotImplementedError
+
+
+def xls_to_json(xls_data, project, institution):
+    book = xlrd.open_workbook(xls_data)
+    sheet, = book.sheets()
+    row = row_generator(sheet)
+    top_header = next(row)
+    keys = next(row)
+    next(row)
+    rows = []
+    counter = 0
+    for values in row:
+        r = [val for val in values]
+        row_dict = {keys[i].lower(): item for i, item in enumerate(r)}
+        rows.append(row_dict)
+
+    items = {'Individual': {}, 'Family': {}, 'Sample': {}, 'SampleProcessing': {}}
+    specimen_ids = {}
+    for row in rows:
+        indiv_alias = 'test-project:individual-{}'.format(row['patient id'])
+        fam_alias = 'test-project:family-{}'.format(row['family id'])
+        sp_alias = 'test-project:sampleproc-{}'.format(row['specimen id'])
+        # create items for Individual
+        if row['patient id'] not in items['Individual']:
+            items['Individual'][indiv_alias] = {
+                'aliases': [indiv_alias],
+                'individual_id': row['patient id'],
+                'sex': row['sex'],
+                'age': row['age'],
+                'birth_year': row['birth year']
+            }
+        # create/edit items for Family
+        if fam_alias not in items['Family']:
+            items['Family'][fam_alias] = {
+                'aliases': [fam_alias],
+                'family_id': row['family id'],
+                'members': [indiv_alias]
+            }
+        else:
+            items['Family'][fam_alias]['members'].append(indiv_alias)
+        if row.get('relation to proband', '').lower() in ['proband', 'mother', 'father']:
+            items['Family'][fam_alias][row['relation to proband'].lower()] = indiv_alias
+        # create item for Sample if there is a specimen
+        if row['specimen id']:
+            samp_alias = 'test-project:sample-{}'.format(row['specimen id'])
+            if row['specimen id'] in specimen_ids:
+                samp_alias = samp_alias + '-' + specimen_ids[row['specimen id']]
+                specimen_ids[row['specimen id']] += 1
+            else:
+                specimen_ids[row['specimen id']] = 1
+            items['Sample'][samp_alias] = {
+                'aliases': [samp_alias],
+                'workup_type': row['workup type'],
+                'specimen_type': row['specimen type'],
+                'specimen_collection_date': row['date collected'],
+                'specimen_collection_location': row['location collected'],
+                'specimen_accession': row['specimen id'],
+                'date_transported': row['date transported'],
+                'transported_by': row['transport method'],
+                'sent_by': row['sent by'],
+                'date_received': row["date rec'd at ref lab"],
+                'specimen_accepted': row['specimen accepted by ref lab'],
+                'dna_concentration': row['dna concentration'],
+                'specimen_notes': row['specimen notes'],
+                'files': []
+            }
+            items['Individual'][indiv_alias]['samples'] = [samp_alias]
+            # create SampleProcessing item for that one sample if needed
+            if row['report required'].lower() in ['yes', 'y']:
+                items['SampleProcessing'][sp_alias] = {
+                    'aliases': [sp_alias],
+                    'analysis_type': row['workup type'],
+                    'samples': [samp_alias]
+                }
+        else:
+            print('WARNING: No specimen id present for patient {},'
+                  ' sample will not be created.'.format(row['patient id']))
+    # create SampleProcessing item for trio/group if needed
+    for v in items['Family'].values():
+        if 'members' in v and len(v['members']) > 1:
+            # create sample_processing item
+            alias = 'test-project:{}-sampleproc'.format(v['family_id'])
+            samples = [items['Individual'][indiv].get('samples', [None])[0] for indiv in v['members']]
+            samples = [s for s in samples if s]
+            if len (samples) > 1:
+                sp = {
+                    'aliases': [alias],
+                    'samples': samples
+                }
+                analysis_type = items['Sample'][items['Individual'][v['proband']]['samples'][0]]['workup_type']
+                if sorted(v['members']) == sorted([v['proband'], v['mother'], v['father']]):
+                    sp['analysis_type'] = analysis_type + '-Trio'
+                else:
+                    sp['analysis_type'] = analysis_type + '-Group'
+                items['SampleProcessing'][alias] = sp
+    # removed unused fields, add project and institution 
+    for val1 in items.values():
+        for val2 in val1.values():
+            remove_keys = [k for k, v in val2.items() if not v]
+            for key in remove_keys:
+                del val2[key]
+            val2['project'] = project['@id']
+            val2['institution'] = institution['@id']
+
+    return items
+
+
+def check_against_db():
+    alias_dict = {}
+    links = ['samples', 'members', 'mother', 'father', 'proband']
+    for itemtype in POST_ORDER:
+        profile = testapp.get('/profiles/{}.json'.format(itemtype))
+        for alias in results[itemtype]:
+            try:
+                # check if already in db
+                result = testapp.get(alias + '/?frame=object')
+            except Exception as e:
+                # post if not in db
+                if 'HTTPNotFound' in str(e):
+                    validation = testapp.post_json(results[itemtype][alias], itemtype + '/?checkonly=True', status=201)
+                    if validation:  # modify to check for lack of validation errors
+                        response = testapp.post_json(results[itemtype][alias], status=201)
+                        # do something to record response
+                    else:
+                        # do something to report validation errors
+                        pass
+            else:
+                # patch if item exists in db
+                alias_dict[alias] = result['@id']
+                to_patch = {}
+                for field in results[itemtype][alias]:
+                    if field in links:
+                        # look up atids of links
+                        if profile['properties'][field]['type'] != 'array':
+                            for i, item in enumerate(results[itemtype][alias][field]):
+                                if item in alias_dict:
+                                    results[itemtype][alias][field][i] = alias_dict[item]
+                        elif profile['properties'][field]['type'] == 'string':
+                            if item in alias_dict:
+                                results[itemtype][alias][field] = alias_dict[item]
+                    # if not an array, patch field gets overwritten (if different from db)
+                    if profile['properties'][field]['type'] != 'array':
+                        if results[itemtype][alias][field] != result.get(field):
+                            to_patch[field] = results[itemtype][alias][field]
+                    else:
+                        # if array, patch field vals get added to what's in db
+                        if sorted(results[itemtype][alias][field]) != sorted(result.get(field, [])):
+                            val = result.get(field, [])
+                            val.extend(results[itemtype][alias][field])
+                            to_patch[field] = list(set(val))
+
+
+def cell_value(cell, datemode):
+    """Get cell value from excel."""
+    # This should be always returning text format
+    ctype = cell.ctype
+    value = cell.value
+    if ctype == xlrd.XL_CELL_ERROR:  # pragma: no cover
+        raise ValueError(repr(cell), 'cell error')
+    elif ctype == xlrd.XL_CELL_BOOLEAN:
+        return str(value).upper().strip()
+    elif ctype == xlrd.XL_CELL_NUMBER:
+        if value.is_integer():
+            value = int(value)
+        return str(value).strip()
+    elif ctype == xlrd.XL_CELL_DATE:
+        value = xlrd.xldate_as_tuple(value, datemode)
+        if value[3:] == (0, 0, 0):
+            return datetime.date(*value[:3]).isoformat()
+        else:  # pragma: no cover
+            return datetime.datetime(*value).isoformat()
+    elif ctype in (xlrd.XL_CELL_TEXT, xlrd.XL_CELL_EMPTY, xlrd.XL_CELL_BLANK):
+        return value.strip()
+    raise ValueError(repr(cell), 'unknown cell type')  # pragma: no cover
+
+
+def row_generator(sheet):
+    datemode = sheet.book.datemode
+    for index in range(sheet.nrows):
+        yield [cell_value(cell, datemode) for cell in sheet.row(index)]

From eb9a0224baf7eb4cdd109cfd87cb61f22bce10f4 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 29 May 2020 17:05:15 -0400
Subject: [PATCH 002/125] added some comments and edits to submit.py

---
 src/encoded/submit.py | 74 ++++++++++++++++++++++++++++++-------------
 1 file changed, 52 insertions(+), 22 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index aed3e862ac..79b7309d5e 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -14,12 +14,13 @@
 }
 
 
-POST_ORDER = ['Sample', 'SampleProcessing', 'Individual', 'Family']
+POST_ORDER = ['sample', 'sample_processing', 'individual', 'family']
 
 
 SECOND_ROUND = {}
 
 
+# This is a placeholder for a submission endpoint modified from loadxl
 @view_config(route_name='submit_data', request_method='POST', permission='add')
 @debug_log
 def submit_data(context, request):
@@ -43,6 +44,10 @@ def submit_data(context, request):
 
 
 def xls_to_json(xls_data, project, institution):
+    '''
+    Converts excel file to json for submission.
+    Functional but expect future changes.
+    '''
     book = xlrd.open_workbook(xls_data)
     sheet, = book.sheets()
     row = row_generator(sheet)
@@ -56,15 +61,15 @@ def xls_to_json(xls_data, project, institution):
         row_dict = {keys[i].lower(): item for i, item in enumerate(r)}
         rows.append(row_dict)
 
-    items = {'Individual': {}, 'Family': {}, 'Sample': {}, 'SampleProcessing': {}}
+    items = {'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {}}
     specimen_ids = {}
     for row in rows:
         indiv_alias = 'test-project:individual-{}'.format(row['patient id'])
         fam_alias = 'test-project:family-{}'.format(row['family id'])
         sp_alias = 'test-project:sampleproc-{}'.format(row['specimen id'])
         # create items for Individual
-        if row['patient id'] not in items['Individual']:
-            items['Individual'][indiv_alias] = {
+        if row['patient id'] not in items['individual']:
+            items['individual'][indiv_alias] = {
                 'aliases': [indiv_alias],
                 'individual_id': row['patient id'],
                 'sex': row['sex'],
@@ -72,16 +77,16 @@ def xls_to_json(xls_data, project, institution):
                 'birth_year': row['birth year']
             }
         # create/edit items for Family
-        if fam_alias not in items['Family']:
+        if fam_alias not in items['family']:
             items['Family'][fam_alias] = {
                 'aliases': [fam_alias],
                 'family_id': row['family id'],
                 'members': [indiv_alias]
             }
         else:
-            items['Family'][fam_alias]['members'].append(indiv_alias)
+            items['family'][fam_alias]['members'].append(indiv_alias)
         if row.get('relation to proband', '').lower() in ['proband', 'mother', 'father']:
-            items['Family'][fam_alias][row['relation to proband'].lower()] = indiv_alias
+            items['family'][fam_alias][row['relation to proband'].lower()] = indiv_alias
         # create item for Sample if there is a specimen
         if row['specimen id']:
             samp_alias = 'test-project:sample-{}'.format(row['specimen id'])
@@ -90,7 +95,7 @@ def xls_to_json(xls_data, project, institution):
                 specimen_ids[row['specimen id']] += 1
             else:
                 specimen_ids[row['specimen id']] = 1
-            items['Sample'][samp_alias] = {
+            items['sample'][samp_alias] = {
                 'aliases': [samp_alias],
                 'workup_type': row['workup type'],
                 'specimen_type': row['specimen type'],
@@ -106,10 +111,10 @@ def xls_to_json(xls_data, project, institution):
                 'specimen_notes': row['specimen notes'],
                 'files': []
             }
-            items['Individual'][indiv_alias]['samples'] = [samp_alias]
+            items['individual'][indiv_alias]['samples'] = [samp_alias]
             # create SampleProcessing item for that one sample if needed
             if row['report required'].lower() in ['yes', 'y']:
-                items['SampleProcessing'][sp_alias] = {
+                items['sample_processing'][sp_alias] = {
                     'aliases': [sp_alias],
                     'analysis_type': row['workup type'],
                     'samples': [samp_alias]
@@ -118,24 +123,24 @@ def xls_to_json(xls_data, project, institution):
             print('WARNING: No specimen id present for patient {},'
                   ' sample will not be created.'.format(row['patient id']))
     # create SampleProcessing item for trio/group if needed
-    for v in items['Family'].values():
+    for v in items['family'].values():
         if 'members' in v and len(v['members']) > 1:
             # create sample_processing item
             alias = 'test-project:{}-sampleproc'.format(v['family_id'])
-            samples = [items['Individual'][indiv].get('samples', [None])[0] for indiv in v['members']]
+            samples = [items['individual'][indiv].get('samples', [None])[0] for indiv in v['members']]
             samples = [s for s in samples if s]
             if len (samples) > 1:
                 sp = {
                     'aliases': [alias],
                     'samples': samples
                 }
-                analysis_type = items['Sample'][items['Individual'][v['proband']]['samples'][0]]['workup_type']
+                analysis_type = items['sample'][items['individual'][v['proband']]['samples'][0]]['workup_type']
                 if sorted(v['members']) == sorted([v['proband'], v['mother'], v['father']]):
                     sp['analysis_type'] = analysis_type + '-Trio'
                 else:
                     sp['analysis_type'] = analysis_type + '-Group'
-                items['SampleProcessing'][alias] = sp
-    # removed unused fields, add project and institution 
+                items['sample_processing'][alias] = sp
+    # removed unused fields, add project and institution
     for val1 in items.values():
         for val2 in val1.values():
             remove_keys = [k for k, v in val2.items() if not v]
@@ -147,7 +152,15 @@ def xls_to_json(xls_data, project, institution):
     return items
 
 
-def check_against_db():
+def validate_and_post(testapp, json_data, dryrun=False):
+    '''
+    Still in progress, not necessarily functional yet. NOT YET TESTED.
+
+    Function that:
+    1. looks up each item in json
+    2. if item in db, will validate and patch any different metadata
+    3. if item not in db, will post item
+    '''
     alias_dict = {}
     links = ['samples', 'members', 'mother', 'father', 'proband']
     for itemtype in POST_ORDER:
@@ -159,13 +172,13 @@ def check_against_db():
             except Exception as e:
                 # post if not in db
                 if 'HTTPNotFound' in str(e):
-                    validation = testapp.post_json(results[itemtype][alias], itemtype + '/?checkonly=True', status=201)
-                    if validation:  # modify to check for lack of validation errors
-                        response = testapp.post_json(results[itemtype][alias], status=201)
-                        # do something to record response
-                    else:
+                    validation = testapp.post_json('/{}/?checkonly=True'.format(itemtype), results[itemtype][alias], status=201)
+                    if not validation:  # modify to check for presence of validation errors
                         # do something to report validation errors
                         pass
+                    elif not dryrun:  # post
+                        response = testapp.post_json('/' + itemtype, results[itemtype][alias], status=201)
+                        # do something to record response
             else:
                 # patch if item exists in db
                 alias_dict[alias] = result['@id']
@@ -190,10 +203,26 @@ def check_against_db():
                             val = result.get(field, [])
                             val.extend(results[itemtype][alias][field])
                             to_patch[field] = list(set(val))
+                validation = testapp.patch_json(result['@id'] + '/?checkonly=True', to_patch, status=200)
+                if not validation:  # modify to check for presence of validation errors
+                    # do something to report validation errors
+                    pass
+                elif not dryrun:  # patch
+                    response = testapp.patch_json('/' + itemtype, results[itemtype][alias], status=201)
+                    # do something to record response
+
+
+# This was just to see if i could post something using testapp in the python command line, currently works.
+def test_function():
+    app = get_app('development.ini', 'app')
+    environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': 'TEST'}
+    testapp = TestApp(app, environ)
+    response = testapp.post_json('/project', {'name': 'test', 'title': 'Test'}, status=201)
+    print(response)
 
 
 def cell_value(cell, datemode):
-    """Get cell value from excel."""
+    """Get cell value from excel. [From Submit4DN]"""
     # This should be always returning text format
     ctype = cell.ctype
     value = cell.value
@@ -217,6 +246,7 @@ def cell_value(cell, datemode):
 
 
 def row_generator(sheet):
+    '''Generator that gets rows from excel sheet [From Submit4DN]'''
     datemode = sheet.book.datemode
     for index in range(sheet.nrows):
         yield [cell_value(cell, datemode) for cell in sheet.row(index)]

From a6fe747c13b08c3a044f539c928f867a4b92e62c Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 11 Jun 2020 13:29:39 -0400
Subject: [PATCH 003/125] Major changes to submit.py

Code broken down into smaller functions
Functions not all finished currently
Significant refactoring
---
 src/encoded/submit.py | 298 +++++++++++++++++++++++++++++-------------
 1 file changed, 210 insertions(+), 88 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 79b7309d5e..29a5c2fed9 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -2,8 +2,13 @@
 from pyramid.response import Response
 from pyramid.view import view_config
 from snovault.util import debug_log
-from webtest import TestApp
+# from webtest import TestApp
+from dcicutils.misc_utils import VirtualApp
+from dcicutils import ff_utils
+from webtest.app import AppError
+import ast
 import datetime
+import json
 import xlrd
 
 
@@ -32,7 +37,7 @@ def submit_data(context, request):
     post_only = request.json.get('post_only', False)
     app = get_app(config_uri, 'app')
     environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': 'TEST'}
-    testapp = TestApp(app, environ)
+    virtualapp = VirtualApp(app, environ)
     # expected response
     request.response.status = 200
     result = {
@@ -64,95 +69,190 @@ def xls_to_json(xls_data, project, institution):
     items = {'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {}}
     specimen_ids = {}
     for row in rows:
-        indiv_alias = 'test-project:individual-{}'.format(row['patient id'])
-        fam_alias = 'test-project:family-{}'.format(row['family id'])
-        sp_alias = 'test-project:sampleproc-{}'.format(row['specimen id'])
+        indiv_alias = '{}:individual-{}'.format(project['name'], row['patient id'])
+        fam_alias = '{}:family-{}'.format(project['name'], row['family id'])
+        sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
         # create items for Individual
-        if row['patient id'] not in items['individual']:
-            items['individual'][indiv_alias] = {
-                'aliases': [indiv_alias],
-                'individual_id': row['patient id'],
-                'sex': row['sex'],
-                'age': row['age'],
-                'birth_year': row['birth year']
-            }
+        items = fetch_individual_metadata(row, items, indiv_alias)
         # create/edit items for Family
-        if fam_alias not in items['family']:
-            items['Family'][fam_alias] = {
-                'aliases': [fam_alias],
-                'family_id': row['family id'],
-                'members': [indiv_alias]
-            }
-        else:
-            items['family'][fam_alias]['members'].append(indiv_alias)
-        if row.get('relation to proband', '').lower() in ['proband', 'mother', 'father']:
-            items['family'][fam_alias][row['relation to proband'].lower()] = indiv_alias
+        items = fetch_family_metadata(row, items, indiv_alias, fam_alias)
         # create item for Sample if there is a specimen
         if row['specimen id']:
-            samp_alias = 'test-project:sample-{}'.format(row['specimen id'])
+            samp_alias = '{}:sample-{}'.format(project['name'], row['specimen id'])
             if row['specimen id'] in specimen_ids:
                 samp_alias = samp_alias + '-' + specimen_ids[row['specimen id']]
                 specimen_ids[row['specimen id']] += 1
             else:
                 specimen_ids[row['specimen id']] = 1
-            items['sample'][samp_alias] = {
-                'aliases': [samp_alias],
-                'workup_type': row['workup type'],
-                'specimen_type': row['specimen type'],
-                'specimen_collection_date': row['date collected'],
-                'specimen_collection_location': row['location collected'],
-                'specimen_accession': row['specimen id'],
-                'date_transported': row['date transported'],
-                'transported_by': row['transport method'],
-                'sent_by': row['sent by'],
-                'date_received': row["date rec'd at ref lab"],
-                'specimen_accepted': row['specimen accepted by ref lab'],
-                'dna_concentration': row['dna concentration'],
-                'specimen_notes': row['specimen notes'],
-                'files': []
-            }
-            items['individual'][indiv_alias]['samples'] = [samp_alias]
-            # create SampleProcessing item for that one sample if needed
-            if row['report required'].lower() in ['yes', 'y']:
-                items['sample_processing'][sp_alias] = {
-                    'aliases': [sp_alias],
-                    'analysis_type': row['workup type'],
-                    'samples': [samp_alias]
-                }
+            items = fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias)
         else:
             print('WARNING: No specimen id present for patient {},'
                   ' sample will not be created.'.format(row['patient id']))
     # create SampleProcessing item for trio/group if needed
-    for v in items['family'].values():
+    items = create_sample_processing_groups(items, sp_alias)
+    # removed unused fields, add project and institution
+    for val1 in items.values():
+        for val2 in val1.values():
+            remove_keys = [k for k, v in val2.items() if not v]
+            for key in remove_keys:
+                del val2[key]
+            val2['project'] = project['@id']
+            val2['institution'] = institution['@id']
+
+    return items
+
+
+def fetch_individual_metadata(row, items, indiv_alias):
+    new_items = items.copy()
+    info = {
+        'aliases': [indiv_alias],
+        'individual_id': row['patient id'],
+        'sex': row.get('sex'),
+        'age': row.get('age'),
+        'birth_year': row.get('birth year')
+    }
+    if indiv_alias not in new_items['individual']:
+        new_items['individual'][indiv_alias] = {k: v for k, v in info.items() if v}
+    else:
+        for key in info:
+            if key not in new_items['individual'][indiv_alias]:
+                new_items['individual'][indiv_alias][key] = info[key]
+    return new_items
+
+
+def fetch_family_metadata(row, items, indiv_alias, fam_alias):
+    new_items = items.copy()
+    info = {
+        'aliases': [fam_alias],
+        'family_id': row['family id'],
+        'members': [indiv_alias]
+    }
+    if row.get('relation to proband', '').lower() in ['proband', 'mother', 'father']:
+        info[row['relation to proband'].lower()] = indiv_alias
+    if fam_alias not in new_items['family']:
+        new_items['family'][fam_alias] = info
+    else:
+        if indiv_alias not in new_items['family'][fam_alias]['members']:
+            new_items['family'][fam_alias]['members'].append(indiv_alias)
+        if row.get('relation to proband', '').lower() not in new_items['family'][fam_alias]:
+            new_items['family'][fam_alias][row['relation to proband'].lower()] = indiv_alias
+    return new_items
+
+
+def fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias):
+    new_items = items.copy()
+    info = {
+        'aliases': [samp_alias],
+        'workup_type': row.get('workup type'),
+        'specimen_type': row.get('specimen type'),
+        'specimen_collection_date': row.get('date collected'),
+        'specimen_collection_location': row.get('location collected'),
+        'specimen_accession': row['specimen id'],
+        'date_transported': row.get('date transported'),
+        'transported_by': row.get('transport method'),
+        'sent_by': row.get('sent by'),
+        'date_received': row.get("date rec'd at ref lab"),
+        'specimen_accepted': row.get('specimen accepted by ref lab'),
+        'dna_concentration': row.get('dna concentration'),
+        'specimen_notes': row.get('specimen notes')
+    }
+    new_items['sample'][samp_alias] = {k: v for k, v in info.items() if v}
+    if indiv_alias in new_items['individual']:
+        new_items['individual'][indiv_alias]['samples'] = [samp_alias]
+    # create SampleProcessing item for that one sample if needed
+    if row['report required'].lower() in ['yes', 'y']:
+        new_items['sample_processing'][sp_alias] = {
+            'aliases': [sp_alias],
+            'analysis_type': row['workup type'],
+            'samples': [samp_alias]
+        }
+    return new_items
+
+
+def create_sample_processing_groups(items, sp_alias):
+    new_items = items.copy()
+    for v in new_items['family'].values():
         if 'members' in v and len(v['members']) > 1:
             # create sample_processing item
-            alias = 'test-project:{}-sampleproc'.format(v['family_id'])
             samples = [items['individual'][indiv].get('samples', [None])[0] for indiv in v['members']]
             samples = [s for s in samples if s]
             if len (samples) > 1:
                 sp = {
-                    'aliases': [alias],
+                    'aliases': [sp_alias],
                     'samples': samples
                 }
                 analysis_type = items['sample'][items['individual'][v['proband']]['samples'][0]]['workup_type']
-                if sorted(v['members']) == sorted([v['proband'], v['mother'], v['father']]):
+                if all([relation in v for relation in ['proband', 'mother', 'father']]) and sorted(
+                    v['members']) == sorted([v['proband'], v['mother'], v['father']]
+                ):
                     sp['analysis_type'] = analysis_type + '-Trio'
                 else:
                     sp['analysis_type'] = analysis_type + '-Group'
-                items['sample_processing'][alias] = sp
-    # removed unused fields, add project and institution
-    for val1 in items.values():
-        for val2 in val1.values():
-            remove_keys = [k for k, v in val2.items() if not v]
-            for key in remove_keys:
-                del val2[key]
-            val2['project'] = project['@id']
-            val2['institution'] = institution['@id']
+                new_items['sample_processing'][sp_alias] = sp
+    return new_items
+
+# NOT YET TESTED
+def compare_with_db(alias, virtualapp):
+    try:  # check if already in db
+        # result = virtualapp.get(alias + '/?frame=object')
+        # result = virtualapp.get('/search/?type=Item&aliases={}'.format(alias))
+        result = virtualapp.get('/search/?type=Item&age=33')
+        print(result)
+    except Exception as e:  # if not in db
+        print(e)
+        if 'HTTPNotFound' in str(e):
+            return None
+    else:
+        return result.json
+
+
+def validate_item(virtualapp, item, method, itemtype, atid=None):
+    if method == 'post':
+        #import pdb; pdb.set_trace()
+        try:
+            validation = virtualapp.post_json('/{}/?checkonly=True'.format(itemtype), item)
+        except AppError as e:
+            print('exception')
+            return parse_exception(e)
+        else:
+            return
+    elif method == 'patch':
+        try:
+            validation = virtualapp.patch_json(atid + '?checkonly=True', item, status=200)
+        except Exception as e:
+            return parse_exception(e)
+        else:
+            return
+    else:
+        raise ValueError("Unrecognized method -- must be 'post' or 'patch'")
+
+
+def parse_exception(e):
+    """ff_utils functions raise an exception when the expected code is not returned.
+    This response is a pre-formatted text, and this function will get the resonse json
+    out of it. [Adapted from Submit4DN]"""
+    try:
+        # try parsing the exception
+        text = e.args[0]
+        resp_text = text[text.index('{'):-1]
+        resp_text = json.loads(resp_text.replace('\\', ''))
+        resp_list = [error['description'] for error in resp_text['errors']]
+        return resp_list
+    # if not re-raise
+    except:  # pragma: no cover
+        raise e
+
+
+def patch_item_data():
+    pass
 
-    return items
 
+def post_item_data():
+    pass
 
-def validate_and_post(testapp, json_data, dryrun=False):
+
+# NOT FINISHED
+def validate_and_post(virtualapp, json_data, dryrun=False):
     '''
     Still in progress, not necessarily functional yet. NOT YET TESTED.
 
@@ -163,22 +263,19 @@ def validate_and_post(testapp, json_data, dryrun=False):
     '''
     alias_dict = {}
     links = ['samples', 'members', 'mother', 'father', 'proband']
+    errors = []
+    json_data_final = {'post': {}, 'patch': {}}
     for itemtype in POST_ORDER:
-        profile = testapp.get('/profiles/{}.json'.format(itemtype))
+        profile = virtualapp.get('/profiles/{}.json'.format(itemtype))
         for alias in results[itemtype]:
-            try:
-                # check if already in db
-                result = testapp.get(alias + '/?frame=object')
-            except Exception as e:
-                # post if not in db
-                if 'HTTPNotFound' in str(e):
-                    validation = testapp.post_json('/{}/?checkonly=True'.format(itemtype), results[itemtype][alias], status=201)
-                    if not validation:  # modify to check for presence of validation errors
-                        # do something to report validation errors
-                        pass
-                    elif not dryrun:  # post
-                        response = testapp.post_json('/' + itemtype, results[itemtype][alias], status=201)
-                        # do something to record response
+            result = compare_with_db(alias)
+            if not result:
+                error = validate_item(results[itemtype][alias], 'post', itemtype)
+                if error:  # modify to check for presence of validation errors
+                    # do something to report validation errors
+                    errors.append(error)
+                else:
+                    json_data_final['post'].setdefault(itemtype, default=[]).append(results[itemtype][alias])
             else:
                 # patch if item exists in db
                 alias_dict[alias] = result['@id']
@@ -203,22 +300,47 @@ def validate_and_post(testapp, json_data, dryrun=False):
                             val = result.get(field, [])
                             val.extend(results[itemtype][alias][field])
                             to_patch[field] = list(set(val))
-                validation = testapp.patch_json(result['@id'] + '/?checkonly=True', to_patch, status=200)
-                if not validation:  # modify to check for presence of validation errors
+                error = validate_item(to_patch, 'post', itemtype, atid=result['@id'])
+                if error:  # modify to check for presence of validation errors
                     # do something to report validation errors
-                    pass
-                elif not dryrun:  # patch
-                    response = testapp.patch_json('/' + itemtype, results[itemtype][alias], status=201)
+                    errors.append(error)
+                else:  # patch
+                    json_data_final['patch'][result['@id']] = to_patch
                     # do something to record response
+    if errors:
+        return errors
+    output = []
+    item_names = {'individual': 'individual_id', 'family': 'family_id', 'sample': 'specimen_id'}
+    if json_data_final['post']:
+        for k, v in json_data_final['post'].items():
+            # also create Case and Report items for each SampleProcessing item created
+            for item in v:
+                for field in links:
+                    if field in item:
+                        json_data_final['patch'][item['aliases'][0]] = item[field]
+                        del item[field]
+                try:
+                    response = virtualapp.post_json('/' + k, item, status=201)
+                    aliasdict[item['aliases'][0]] = response.json['@graph'][0]['@id']
+                    if response.json['status'] == 'success' and k in item_names:
+                        output.append('Success - {} {} posted'.format(k, item[item_names[k]]))
+                except Exception:
+                    pass
+    for k, v in json_data_final['patch'].items():
+        atid = k if k.startswith('/') else aliasdict[k]
+        try:
+            response = testapp.patch_json(atid, v, status=200)
+        except Exception:
+            pass
 
 
 # This was just to see if i could post something using testapp in the python command line, currently works.
-def test_function():
-    app = get_app('development.ini', 'app')
-    environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': 'TEST'}
-    testapp = TestApp(app, environ)
-    response = testapp.post_json('/project', {'name': 'test', 'title': 'Test'}, status=201)
-    print(response)
+# def test_function():
+#     app = get_app('development.ini', 'app')
+#     environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': 'TEST'}
+#     testapp = TestApp(app, environ)
+#     response = testapp.post_json('/project', {'name': 'test', 'title': 'Test'}, status=201)
+#     print(response)
 
 
 def cell_value(cell, datemode):

From ee5fd51b63e3c49db951337eceb7a11424011476 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 11 Jun 2020 13:31:19 -0400
Subject: [PATCH 004/125] unit tests written for some submit.py functions

---
 src/encoded/tests/datafixtures.py |   1 +
 src/encoded/tests/test_submit.py  | 162 ++++++++++++++++++++++++++++++
 2 files changed, 163 insertions(+)
 create mode 100644 src/encoded/tests/test_submit.py

diff --git a/src/encoded/tests/datafixtures.py b/src/encoded/tests/datafixtures.py
index 62daca1f15..f12fef3781 100644
--- a/src/encoded/tests/datafixtures.py
+++ b/src/encoded/tests/datafixtures.py
@@ -171,6 +171,7 @@ def grandpa(testapp, project, institution):
 @pytest.fixture
 def mother(testapp, project, institution, grandpa, female_individual):
     item = {
+        "aliases": ["test-project:indiv-003389"],
         "age": 33,
         "age_units": "year",
         'project': project['@id'],
diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
new file mode 100644
index 0000000000..ce332d06da
--- /dev/null
+++ b/src/encoded/tests/test_submit.py
@@ -0,0 +1,162 @@
+import pytest
+from encoded.submit import *
+import json
+# from pyramid.paster import get_app
+# from dcicutils.misc_utils import VirtualApp
+
+
+@pytest.fixture
+def row_dict():
+    return {
+        'patient id': '456',
+        'family id': '333',
+        'sex': 'M',
+        'relation to proband': 'proband',
+        'report required': 'Y',
+        'specimen id': '3464467',
+        'specimen type': 'blood',
+        'workup type': 'WGS'
+    }
+
+
+@pytest.fixture
+def empty_items():
+    return {'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {}}
+
+
+@pytest.fixture
+def submission_info():
+    return {
+        'family': {'test-proj:fam1': {
+            'members': ['test-proj:indiv1'],
+            'proband': 'test-proj:indiv1'
+        }},
+        'individual': {'test-proj:indiv1': {'samples': ['test-proj:samp1']}},
+        'sample': {'test-proj:samp1': {'workup_type': 'WGS'}},
+        'sample_processing': {}
+    }
+
+
+@pytest.fixture
+def submission_info2(submission_info):
+    submission_info['family']['test-proj:fam1']['members'].append('test-proj:indiv2')
+    submission_info['individual']['test-proj:indiv2'] = {'samples': ['test-proj:samp2']}
+    submission_info['sample']['test-proj:samp2'] = {'workup_type': 'WGS'}
+    return submission_info
+
+
+@pytest.fixture
+def submission_info3(submission_info2):
+    info = submission_info2.copy()
+    info['family']['test-proj:fam1']['members'].append('test-proj:indiv3')
+    info['family']['test-proj:fam1']['mother'] = 'test-proj:indiv2'
+    # submission_info['family']['test-proj:fam1']['father'] = 'test-proj:indiv3'
+    info['individual']['test-proj:indiv3'] = {'samples': ['test-proj:samp3']}
+    info['sample']['test-proj:samp3'] = {'workup_type': 'WGS'}
+    return info
+
+
+def test_fetch_individual_metadata_new(row_dict, empty_items):
+    items_out = fetch_individual_metadata(row_dict, empty_items, 'test-proj:indiv1')
+    assert items_out['individual']['test-proj:indiv1']['aliases'] == ['test-proj:indiv1']
+    assert items_out['individual']['test-proj:indiv1']['individual_id'] == '456'
+
+
+def test_fetch_individual_metadata_old(row_dict):
+    items = empty_items.copy()
+    items['individual'] = {'test-proj:indiv1': {
+        'individual_id': '456',
+        'age': 46,
+        'aliases': ['test-proj:indiv1']
+    }}
+    items_out = fetch_individual_metadata(row_dict, items, 'test-proj:indiv1')
+    assert len(items['individual']) == len(items_out['individual'])
+    assert 'sex' in items_out['individual']['test-proj:indiv1']
+    assert 'age' in items_out['individual']['test-proj:indiv1']
+
+
+def test_fetch_family_metadata_new(row_dict, empty_items):
+    items_out = fetch_family_metadata(row_dict, empty_items, 'test-proj:indiv1', 'test-proj:fam1')
+    assert items_out['family']['test-proj:fam1']['members'] == ['test-proj:indiv1']
+    assert items_out['family']['test-proj:fam1']['proband'] == 'test-proj:indiv1'
+
+
+def test_fetch_family_metadata_old(row_dict):
+    items = empty_items.copy()
+    items['family'] = {'test-proj:fam1': {
+        'aliases': ['test-proj:fam1'],
+        'family_id': '333',
+        'members': ['test-proj:indiv2'],
+        'mother': 'test-proj:indiv2'
+    }}
+    items_out = fetch_family_metadata(row_dict, items, 'test-proj:indiv1', 'test-proj:fam1')
+    assert items_out['family']['test-proj:fam1']['members'] == ['test-proj:indiv2', 'test-proj:indiv1']
+    assert items_out['family']['test-proj:fam1']['proband'] == 'test-proj:indiv1'
+    assert items_out['family']['test-proj:fam1']['mother'] == 'test-proj:indiv2'
+
+
+def test_fetch_sample_metadata_sp(row_dict):
+    items = empty_items.copy()
+    items['individual'] = {'test-proj:indiv1': {}}
+    items_out = fetch_sample_metadata(row_dict, items, 'test-proj:indiv1', 'test-proj:samp1', 'test-proj:sp1')
+    assert items_out['sample']['test-proj:samp1']['specimen_accession'] == row_dict['specimen id']
+    assert items_out['sample_processing']['test-proj:sp1']['samples'] == ['test-proj:samp1']
+    assert items_out['individual']['test-proj:indiv1']['samples'] == ['test-proj:samp1']
+
+
+def test_fetch_sample_metadata_no_sp(row_dict):
+    items = empty_items.copy()
+    items['individual'] = {'test-proj:indiv1': {}}
+    row_dict['report required'] = 'N'
+    items_out = fetch_sample_metadata(row_dict, items, 'test-proj:indiv1', 'test-proj:samp1', 'test-proj:sp1')
+    assert items_out['sample']['test-proj:samp1']['specimen_accession'] == row_dict['specimen id']
+    assert not items_out['sample_processing']
+
+
+def test_create_sample_processing_groups_grp(submission_info2):
+    items_out = create_sample_processing_groups(submission_info2, 'test-proj:sp-multi')
+    assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Group'
+    assert len(items_out['sample_processing']['test-proj:sp-multi']['samples']) == 2
+
+
+def test_create_sample_processing_groups_one(submission_info):
+    items_out = create_sample_processing_groups(submission_info, 'test-proj:sp-single')
+    assert not items_out['sample_processing']
+
+
+def test_create_sample_processing_groups_trio(submission_info3):
+    items_out = create_sample_processing_groups(submission_info3, 'test-proj:sp-multi')
+    assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Group'
+    submission_info3['family']['test-proj:fam1']['father'] = 'test-proj:indiv3'
+    items_out = create_sample_processing_groups(submission_info3, 'test-proj:sp-multi')
+    assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Trio'
+
+
+def test_xls_to_json(project, institution):
+    json_out = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+    assert len(json_out['family']) == 1
+    assert len(json_out['individual']) == 3
+    assert all(['encode-project:individual-' + x in json_out['individual'] for x in ['123', '456', '789']])
+
+
+def test_validate_item_post_valid(testapp, a_case):
+    result = validate_item(testapp, a_case, 'post', 'case')
+    assert not result
+
+
+def test_validate_item_post_invalid(testapp, a_case):
+    a_case['project'] = '/projects/invalid-project/'
+    result = validate_item(testapp, a_case, 'post', 'case')
+    assert 'not found' in result[0]
+
+
+def test_validate_item_patch_valid(testapp, mother, grandpa):
+    patch_dict = {'mother': mother['aliases'][0]}
+    result = validate_item(testapp, patch_dict, 'patch', 'individual', atid=grandpa['@id'])
+    assert not result
+
+
+def test_validate_item_patch_invalid(testapp, grandpa):
+    patch_dict = {'mother': 'non-existant-alias'}
+    result = validate_item(testapp, patch_dict, 'patch', 'individual', atid=grandpa['@id'])
+    assert 'not found' in result[0]

From 8c90f7db37694262595c207514626f55173cf6f5 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 11 Jun 2020 13:31:55 -0400
Subject: [PATCH 005/125] test file added for testing submit functions

---
 .../tests/data/documents/cgap_submit_test.xlsx  | Bin 0 -> 12955 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/encoded/tests/data/documents/cgap_submit_test.xlsx

diff --git a/src/encoded/tests/data/documents/cgap_submit_test.xlsx b/src/encoded/tests/data/documents/cgap_submit_test.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..0d65ab327411da00aa287eec8b4637569b70ec6d
GIT binary patch
literal 12955
zcmeHuWmFv5x_0A@dvJGmmju_~9^Bm>f(C6YxH|-Q4-(vhyGw8l?r@ozbMDM!&bQY0
z|DNi#s(M$|`*iJHukHOltsnydi2;BH009612|$P|V9XW_0Dy!70MG$I@DCz(w$3KD
z&Uz{y_9jj`jP5qp#JP~*)Y$;=*Zcpw{U08IGL=EwE@qTA^@F#f?P_5;Atfb1{1*HV
z<a;p5?y(x=4cj=>j~}T_6;-Q&ofL}JV;?9l__50uDK&-P{jg@d*OVvMhyM~;?sz8f
zB$*`^kYA=2SsMf5$i_xIJ%ygChmmbsG-R|SeIccI!%+vvw%d6ww=Y~8`c{OZjdrI5
z{BHhGV6aOQT!zL4+mPEmP>BIodR4u(RgslVjl43=i9nSx{w9(JqgtW4?w&~MU}zE}
zTc_F_hgXE?okbZ~kqs*m0$VUFN;M0{IkA}^G!xlM%TN<5T=$~8xBj67c1y;OiUt?G
zN4|iGm<ZKu`I#EGX3MQ;_=^;$izD`dg7PqiAuW?5xf|GLcWe{yD?9r93oQS!J>pE3
zm@|*IPe7+&6pIzWYRfU|CB9QR_#W)pr5YF3PppS{q^N0b?&$PIK$4pQ*5?Y|D+EY?
zgd-70^ZDxt{JYX6q?L?ZA)@ufGbFs{S37%ofdDA{%}nc6naM6+bx-#7D*UUN>N%QN
zJ25f-w*Rkr{vWo<zjVDU_Pu;JGeYpG<YUOd&CE&+im;5EkmOeq6(2vTW#szEJaW91
z4k~;U72H4w3EwuK$DySa{>Xy?lIu<OG7uUDA6bK2d2rIbg9{8TrDLMFL)k_Ts_V?v
z%yo){v<J0oTQqHHQ&Eo0;2N3u)R|~C${3?ME-ZQhe()QC6o0M0_nNDQw-sPB!pg_x
z!Bvf1*#~iBX})ubh5LwaLwRJ6CQ~p6oD9ttD!d1*Nv`g2Rh7+oEUFAM9eGJT^o*^)
zU5KW2pg(yt$fXY`k+P%RvW`jgQ)FNHYSpoy4QF`taKUz$4*VPp01*|fzDBHn8i~JP
zeA311h_t*K2`&H#=5EdOhn=|DIa(Ro*;)M-&HimOV6P$V73Kf-R;Kh`rsvhA!#{^G
zyQR5eqAoZykshfYA_E8OXqG5R*nBS6@Mv3fG-hR(AnbxYPDlD(Z#a-PA<(Zn>59V9
zAU)BoPx+wiMlMDmfQHBAgaHyz=x9d=ho#5xNU2Wf&4RH5^qCmkJGaynI3an3q@tJ2
zlfmqkqs%U>mQLdYL$5kBT({+S|4I3%JfN<zkvNscbo-@QWa%0@E5L(j;sOU&iXYJf
zFtImHLU-FKb`3mleKdXVF>2MBcaN}*Un)ab!Xe-t>`WXoAVK6y6YI2{oAOolbi-Nr
zVTh(<>70J}^3-%|uzzOl^-cd%Wr(Z0(C{Gu03Zwifc$#LtIGUUS;|xu?b5kXeBSOq
z2X4C=ra>i&kzmr}>EvHyTuokAJ4U!wgD2O%cg-t$dFr#CuN#!9I1aC`?)QE+-NAc!
zT*<MTXk%B~7wkWZFeLZNv6?w`eB9q*m1L=jy5X>MVRCvTX7F)4+a=odvN^G>Zx;cK
zFX3~IBI1y!vDf^#mPM|C8CFY+;=@iQs1E$x+7KCEVI?JLA<SFh=`A|&{R8u*Yw4%5
zI&8j$uVkU08Qk8jaavZfX;H0$&p9}8A^DH508koBR8I`RdyCK)W-8`_*BaKZ{d@QH
z*QM6SsM$AK(UWy;W@rrRW<Iz<k8erdC%i<U_4xohH(U@_gE3IaP7}s9!V;u>VpScx
zq%%9QpG?@HCsXTUg?{>=F$g-qpY`6U)t_G|wuB;gueDyJSh83)yQFFHXzsehinJxY
zl}&b_mY4}hm`JW*Zss1mTg=w7y_;{rLpeu4hL1DG?DL@j=9y=(3%EI!d%^H}pC77o
za2ZtMA;L)zZFl6cqQSvmLbVGIi}l$ZbGRYmzE3kc9}KwYaqPw;b@c{9+u+-UaRkVU
z(OYePbCR(<;V#Z9>5DzhAJ&^2d_iHt5+sEbcFh0i?TbUGZ&#z4JvU11-chnD68pSy
z6T^zq_x%{oG1liJf@UnGm=@&Bwk4{HK|D0d6Lt2s<NLf^+9B?HXx(IT^wj7Ik68cy
z?`K``HwF6;Z@<40uT`8Gdw4tfMQ{B9YsC6tN8k!()r{iiT9er)6iH7-@9<-k5w1TM
z@`XLBGWH484=mNMl>G)Zvhq&8QuS7TEFTo!!Uk=mY~P;M<foA@V1R+T#83)`xg|lv
ztf3aF<?0a$*2;1b18(t!d6-4-1rEMAQh$n>B|;dm0*pHPKHgKaMab^lu&Q&U)o}YV
zy3=JNcylqx;vlMH!>)iPuwx!@=>@G&P(18U%hNnj3cz>hb9-Yeh}F}?5WcG$#E65K
zH6&D<!jjS~mHPx;o@WPq`WJX+#5wgOGTpl)zp`jAcT!v{+RE4p8A}lPGnqS^zTc^2
ze10{Te;N`*;fM_?BmfYM003b8VMtEq2974i%Fd1!wq{Phx!wr%CEF!&l#fi~zSuu0
z7!=ZM(ekB;>0*renAbtH%~e-Tq!OF6Ta0aA9dkQQH78>;bXe=5=Eqle97oe}klVd0
zr9F*}3e@@svqB_btB0H!XYRT0c6W}}jf7`m6F^FIf^rSkaZKHMzBn3(K(_<|Ik??}
zF$>Gu06GI?mk~HVIvd)JpEG63AMoBZGbHP%7TQHe3Farn(B{b6#q-GtJ|&Ec4~8Jh
zF9t<t#cq5X+W3Z8snZ!eDi2+ZhosaoTQPR$RkihWXKnSlQy#m{nve%)v}v|)B7%r>
z(q3+ZFAj8pS}sa4%uqI-iix*jUxj8#CS7JJGR(jbB&5sTs+&|L5ya@^9+oV{^*oCd
zz3|gM^NVVN?HT%swkMrgC0r4wC+xdNsO|#$cTwquD~`A4q;265LGV-8gZ4Zv?v&p4
z8`a~-8zymq13+z)pw^;9+@E~>tb9*twhb=vTULQb0kP~14Qi%JqFgt41hPEp&EHe`
zgRaFj`(~Ai?3u`9DHpLv)4h#xH{`c$xz_d|k8*fO5z9I7&~;`g0`(MY#Cr49Rc?ZS
zj-EbX{?Bamc-J(DSlT4`Cc+c3Qd>O2JvKtk6CB(3X}L;^<NYTw_gP*(^a#86irDL0
z1OtXcz>5<d$o5JFIy}J}r+ljY_a2Q&$gWK}jJytbqgJEFy(NKk^4JL!e$gx~;|$T}
zxn6S1*g4i@+(}1p1UW|@6GHWUM-jDLJYaP=m3zYU-H=zM%8jp{O50;EEj>K8?B{ae
z-Zghe(wFh`MqyJZ4fS&+KAoV1$w;Y_Lx80>)2!@lKY`!}4x00Px36_j5-oZaK@JKQ
zGOURgrOp_z$aQ(awj>Zgl=_p?9SE;tbJ$MTLXaaP-=OB&Os*1cT(EhZ0VU34LXlOD
zw><G}8M=ZhzfiUN^eaUNwII7u6{w6i)!4u#3Wb7m(`8*_WOQ2iJ(nyCvYdgDUs@jx
zP_&E&s^=cKXn;|buWB%Fz_O3m<XtM3;LNq3y0Vuk%feGMoSdBKe`JWrlE>vJ%T$(Z
zr=cw}zx7<oZF6lef`ST*xUW4G*t(lnlXk<w)~)W0=}`&at|-Ly;Uid0W_vfVPZdzR
zc5vW@i0vg6c4dw<_=!poM<A8u>c=2xzbBfz<EaSl?EoA!f#{VNb_l>n{H^qwm^~!T
z&;BlDDs?hyGS)nWF#^O3P4^r9AJv3oW=y7OnRod(D}_Fj_iNS1#D(bqCWFT{xdLDD
zGG8qmnICXk+4H#vI`p~V?K3+*%5G6yje5Jj<Hs~U)x${L{Z)9X8&lLSGh&-q$CXaL
zL|VHi&Rhjz5H}09;z(^c<$19ef!h58tJhgccr?$^*w3%g_Ma|<&xTc|^VM|-zounm
zf5u`ba}yJ1r+;N~zc*O^H;?-i-5K7+j3IUd@hsx*6*n7%Afn7g+N8P*VR*j?y_6Jl
zgYo25i*NF6d)t0&n=|c!zZlOH=`;Ow3wY3$5tez?Pp?IW4xwu9gJld@2L2d>DaQ6x
z2st_Fu>KegO!(>`Me--?Zl9io>L}Ra7{Q$dsW1=b!8;J%cv*?1&AA>-9g|%BDgD+*
z^*~+$CC%rfsE<Vaws{?+lILTeq<CWV1#=~N2E>lf{S1W;-L5mOJ|aOD+I%0rxQ6{t
zBhZP3(k}NpHhsjuEynzwjX0Z|*qAW=zW=@3N17ubJT8n5toyeJPHvAJKcXpC*T<~m
zR>+Mr;_>Snjuh3{SYuo9(7`Equ9T?r3zCHG1mY*(K_IqWM#GZV?GsH<jm(pkZ<D84
zOKZ6zf+U{XzI?gK^zpoKKAUVy_LNA5?}|;-YgafINROCLCZ9_5wc@5`_m-E8k3cnn
zwY`I<f0S9u@^M7983gvvL)?j4=OhwI@hs84=>-OVZu(}*?Y~AD(J$FWmi49|pCE8G
z`3+rxQaLtR?5%Eqp@P>(RuZx!!~|(yui{qu4VKTsh!NCl>Lt|_In_V9YlK6C_LNQt
zNEXixGFkWtH@zlrsL{}T%<m|kcCi|n-b|=bif%(|ir8kqur|x;duvQMo$q&AP7(tH
zwY&=SAd0m6uMVHIIvOUwfSI!gBosEx^ByMH#uy+f+|T5ERX<f7ezQ8Rr&HTZP!b(9
zm#yMk6-<QCvMvT+DMTZv-5ru)`o-*UE*nGdL-(A_v93bnrImuHlw(scV^F=NTk|5O
zao1iH`YJ-8P+Kg`m|_fK>5-DMtOhC9wpPRmt02G+T!~z2bFdddejHfS1fu3y?^&Xl
z0;1UkqfzQNsn8%wtKL=$`ac(vg*Py><|pXrhiTv`UAr6h49Ye<(q+$Z5i?~9cs>8z
zzX<6N@V(r->(;AjuBVl;g`rSZKADNudwF`oTGi`#{&{f5wLeXD*45MTcsHEX@p8s{
zRkgW{$<+Sw{CqzSYwhFJu6#7X;u1bV@=ubm>Yo)GMyHe@1AnB4GqATAJ<P&AvF}j#
zT#HVI2f%ed5Fd28R2_FM;cN6kweojZEwm7wHIqlBREk$aEgMC+V!QFKm!uB=d<|K$
zD{&XiVPC_>KU<wJN|ym{cG<GkH<cD5t4wwf!_mSDxjQG7JE}z@2ui<-gl^9Xux`p$
zg<YPYR0f7lQHVI|Qz$Xki5P|scbO1>T09?FwW;hbvYHX_HYPK8eU8`ssd&3@*ql-E
zNQp<3g@d3SWW2@d=-VklS;0gu>KRURac@NbNoI#P<O)})0}kN_(gUGR4`-~5<KAR5
z2%GJ*fa`pu0>9IuZ0taf4<!<b*k`vuDJ<{k1wK&$5qyOjmgPp+hOQ@5(k7@a%hY(P
z(0h|ng6h_Ylr|zP=YT4Z$s&KleEmom8z1jQal+zPY-Q(&K5VZ>K_`uI1GeGa<_UFD
z{RDy|=`y;3QPrz-eK3>Q602Dn$0Q(~$<0~3O_iCQLxB4m>M+!`{5xFH6=@yr?Q|8-
zj3}h{>m1<QL|dRKnmMRF=Gd<tQiqX1O|vA=Hz(dY_H;gWwf*{fs{+UEkT~9!IK<bD
z*CQy3RssA5!9X|!td)RJ%x7)Uj8ZM@llZjxG)`m)3~{wK^ti3a-m$P6sz<jWLN_oc
zXsGB9XAkkbY1AgWqW-n8H$?PSp5~d$-|S<{oMt^#opfhzUmi+Kr*s7?O-8L!F0}ft
zuCIhFQ(C?KhbT|a_cG;MtGC%m;!lR5dcO+0eS~bCh=5@(T2gGGYPJL$CB04MNTy$o
zCu8{GK>3!6buoGzK}*DB<y(5V?3Ic;8)ddpeuxVlX4ZK5gWQFCbmWwrS#w!SYB4-i
z?43J}cBDv>bm5+s!0BrF^r6q5ikoH<H1)z1wt=iEo}vr3q8^ekhOXHs-Gvm531^K7
zg@dFVEpdl!IA$A6O!w{Ub+NYX5TQk(U{n0~T_emdbt$b86>y^|;+}DA#VKG4hD>b4
z(ay>XEe;-*E*2$iV)+bY5ze)d%#+LkSR3?wBZ&{1eJv>=)2#B%hRt}6YzL9Zs8(X6
zR89Vrl*;2he71M>mcT|@GIpyw^gZnEId|x0vgr?4xynthI8vFmw{8ZBI9N3yqVc@i
zadJb@ET5CT;(HkM7<>~D*(dOGZRphfPz1Gpq`spgN^?MQ<{NNl?Ig1ZUcpw(lq2%r
zP_D3lF6mt(C?j9M?`k88jBbPA-Q_~f*U(_1gNX|B0|h!BjIj8pRbhVKzH{5uA2Oe7
z4WHzRk0Rp*DY)h)8)tTLgjIZ>kf$TUmS0Hi%sFJ_PcDJDZj)c(BAOX;g0|Q(E9|8`
zn0Aia58>NFEIk*QuAwckeUf8vtgvEiMnaqMkZ#?W{s@BTWM;_v&iY=nRjxa{wpMYe
zj*T9OAV1A{YvUr$EpS#r^>!fRyOzs4`a|iA+xwyA))`Y6H-COldtdG^N_z^zc1dK^
z_mg-7@tpdg<v7hq;94sW6~l`UjWx`Q^gKg6u0aQiN}y5>YC~y|E4JG3Q<e6C3oY`x
z{%1vPq^ZMuFl~czF;|Pq{yD~zAKMHgL8u^*tloF5`i=X|cI^dkhf`GIjEDN6Y)u<i
z%OKNw0cyxLxxDeCS>7Ps@(-too-0)o`20WH$PkehXKWP?x!~#@8$SRoDNKCwG>c#o
zr1^>(@t(4ui0O><w`o2aw*zi0zc#%5$Mqwcw*d<BuXT_o5&!`4kMQf{>|t&4`{ZR>
z>x11UH-?X%#fwP1TPpmgG+<tiP5x(73A%c^<5E~84Hv_$WTfwwFAs!#uq+HDU~ncE
z!kgS#9ZqLF{hWDBhsn5=$zMKEwC^u36#B=-j7YNHjC5oMs;#jr`N!hq(s#BucNX87
zJU-avsaR9{Xz9}<_(>|QX#=bAwbXDK?IR}n8hT(`!?2@2`$-5kPEz!nCk{m$S~fSV
z)EkPTv58>!8KB2tpC!%IVAi;YL8hQd_lO&$h~p!5jG*^T#DmY&PYLBzyS6NI%(h~S
z*0<wPQIeR^fO-hlwdlhoL)WGVG$7!EDA0(rg16igk52~Lcc@5R=%e()*t2*D#pqFs
zQEOC5y=&n`3*ds8U>{N-&oMQ)PdNw+IwI6Xb9!0I&!~;#9Yp;0-#L{~2(gTME|W<Y
zrVrSnhQqEHk5pJI%#J?vLOL_yF$|I!u@SyEsT{)|X58MqD7J(4FK1lPhPz2X7rhB@
zAQN+fje?psKT-D;`S#;d8@?o8zBBaGWVwvTq*t~9e?T)8vpl#v^ZCQ>2%0yQKlA51
zzXSCtuhEG<hqfJ}{D{~TW0nqe!n6;Zz>|$w=BY<0DO2}IGiF3yI2|&2f;!mdMx*T=
ze-xU~bbW@0(PISYwoI~0UElF`X8-c4=(hgVY9pGI&TQpAGn7*h^#qKG>H8n3y`KS?
zj;wkEdYy?%IkLdeupbi<**r2R3Fxi2!Y^p&kok~n81TS-F;OA9&exnz>K4DPp>i{u
zdfF8#6>4~~J%yQwPNuLp?MpE#g5LGwZk&s*`)c_^A7N%=`S_CkI2?h6(8dt|1u--U
zOHaJu;QEzYT_mVRK}$lsCOlBXJCGmTKrfM_mw`OykrU*<5Ev23s})?{+3=NW1NYFJ
z?^|^lei6h)C1RcS<cO$=A3v>nPBXDBBiV8yYr9BXP(ja7m5GpoU;>$mU565@ykYuB
zW<nvI6oDbkP~seV2@!Wtj@HZI#5qnSX5I7wZ%Km$=G^s%LAzwDUFhb3=j(fbY>-Yg
zZ~oj4XC)h3QBgJo0`6C1GPlYu=s8~U1I>Ak6OT8pk$f3JIu<_6`l{R$mtWE4D)yzL
z$Uletr1k2UT8dWX@8wtcimjEr1nU`D2a9?h4(8IyeFsHuBAGxlJ1tC)@x8xSJ>~v-
z`zu6B0p)5(o;ueJ$E)v08hM&R8=v?jIF>FJOz*UVi0zyto58`c-TCROvqR3(HAMUu
zRXn~eyXr-Aey6dDrF`Vf)Nk_B%l^(zD+?Y+veW2_M)1e-qLE5t3%d62rc^mL6Al)5
zlS*^l3QWGES{&O;eK31M9&E%q8}6g)phV5LEKpZ2^c^axE9r^fW#v0OTbWVL!#P&P
zh)bI2-hfQ~GR>=AnX8=_h5p`6afn#@;>zxaKnzIf<mrW4k1`Oyz@$D2=BwM{n~hQE
z4cF4K7?|+(I}t|o;=+Uda0PDq#abR<a;;1AN|78`5Py>z-sdGnV!H44pVU<N?ey+V
zJziikKdT~437%e2d6)<b-M*gD$=u^!IrgVUAiPN66^g4lek<iQrJc^xW<mI^0PZc~
zxY9=e&h=^g7yBKEj=pg8wZ~nDNj!djXxmad`TKG?Jg;(pE)R2$jEBK)5#}nLhiARx
zvge36&#O7``Oo&e-SJ%>f@2(MRtLq_7w?ZrrV`L#%@3DhZ0DX2cwFhsm*h&P#GceT
zmaet@cbkw`Mz<#5`6Iu&ZHBem@W@@S0i;&(;LwbuW4l=3+{Qn2lC7Wlp{N<amOifa
z-M)SGtN)z()md*|alTH8vZ(+7>_0s3Z_BX%n)>}Y=}XsIQu-l@_Hixr2_|H<x*SBY
z<e#Eo@Vx*pb!W8|_Jd!p%`h44-fHC2%}8ubzKR-$QD`zDC{IOAEi+EDVE9C@Lv47}
zDt%k3B~!z-(avS_ieHEP@JpD>M~)>Nt&G;tS&(^3#gDKnzR~iI>*uG+l}7f5l(+p3
z6JW@P-i88Ly{5~D+ZVg;nA<DP=#?AajZ({V!Jofpdfrij^29tQEEk;aKXx>R$JVQQ
zV4E%7N)i#pdGNpAWh!;oQIV^0$H7GLeNMM_+_vFZ>sz56@@cnr-PyN<U?hw+QF>&|
z(8`$c_I|9BtKBnzy?8e9nrOYC3w*)9-7PVY*2c3)_+HB{PXuCgKT%~loFcjsZ!_2R
zn%wP3z*{;rIkO^#wbtgzf*&NDZcpz^d5|*mIhHM?!|Naig<a|O6ClDotU7<O18qd@
zA`2PS5-&R-r|(~WL+t&&`jS=O_*K%+fTvXM&0D#$aYB&S+QIR0B6y3`W@9?OTln4v
zt8U)~X|RxW=|vX1j?O6AV8(Gk$zjJ>K50^@H0Im<efj+pY{sMRZyx*NNcbb5EsoXb
zBz?lnZ{+74r5fwl;QKS5$Bi&&%2b{`W<5gB-QVi0Qi3*df`c^PoEcr#(Bs-q*MF=$
z<qN!D7Q1)FppqX8Lcpk@+UC-I%<Q|vcSiPiJ@$_<$YeBlck64GFaQp{BHl5y#)Ut&
ze?R1=`<Qt5vwZZCf@v}V4)(Pe>~}i}h4GxF19{qlodbXSR15#}MGk}Y{fzAT`j{Mf
z?Wsrcg^(%4RKMsu?^B(!LW4LDcb#?fulyxA+{Vy>$4u%ueD>LB>n=KT`(GO{hFpcH
zISIHGbRUlc0s9H`-1a{;X$g=sntBEs$eXTunYSqsmqyou9EM;AAYAUQ$(yn-4jj2}
z*<l$&E6>ku-?!<ZR45q%aEeowm4?8!N+aG8dh>()hjRIE-QPeYh<W_nr-!Bmov;5;
zLOQU#UNMLhp7f~B&b}B})#aG2>6XtUU0fQOY(%1;P>8&vjKw4Lq9^l*7_gWTFG<u}
z9Oyf~)zK`PX>LiwOKXNW9Mx_?E_eys?m~7S0P|U?h56d1j)S%ctaBJ@XQ*79izyDq
zHy@A{LE_kw6p2EmN&3dRgDy&3>hj&TgWv*dWK12AXC{UX@ve|$7x!F8gpqv<?Xx9n
z0#B<jq))2m{IRCCm5(j^mfG-G2FY8nAD1CJySgI66j8yq3Uk6mAd=}Blmkbgi@>KA
zVqqQBoLI;(8H<2-Es(+m{^Bwu;Q%;^6WgKKcbE!5lXoQ3U_lB%3}FfdV8LqxJSfH!
zj^qYI_?+g`kuq?w5T)ilvA?x4aN)OaB*tmN1DruIHgF_M7{U!SpZ<D*cS+dn!=5#K
zhD}Z_gL6aBXv6rzo8M1THw-VuDwPUXzbEeH^v77Dm<~)R(ACvX>r2AI!}WLgc&2~H
zvW(4c73QqA@^+fQX0N)w$GF>bs=YI~{-$^^a*godn?5>oT1sc|)w)O=-WP%C&z-?3
z`0A8Z6gH0C;=IMO`13sMoM0d3?8)b`ISx6}rqu&S{1(zvAFHZIWjVWD`PXDQ^BP8)
zJNpbi`Ed1qKX)>yF)-4>wFWQjq^kqtnd5Q39kQx1h+j>%UcO$on<(t1xo`?;?Ut`a
z*XMkTz$utE*ESeDqlS$e0WNMyEsI*65~NXhE>{Vg7KCA#(-7fZ*o#6}e;fuARj|mp
znOUW=)Xkz9qET)mzYILAH^E^3{Hesy0xp<8MNbyT>>Rn;^uw_~6~5Ih8DGK>hfozE
zY`}~JN*PChz6mPiVJNW%2W0cCcz-xCXOV9}_9AS+A|;TvSqO3{k+?`PAe$IA;D8J$
z9Z6h77myta3<xIy4h0eyK?M+>K?)_p<~q<;9irhyD(5)Rvdaba6_0ac;(ihSp<<p6
zw^g@b2|v?mX#zJ>i7V>z<=i&OR%pvf&NJkLVBn{RdACN~bJBUs@ekkpW<bpI%yA#t
z{U;9I^zqKuaVp-xz!X+%J|yE<YgF>GHybR$1;QROZYytwVNQEI2h4rTi}sX;n3d^B
zwO$m-!j;8OOl6Bh#5+i{piV4vibG7>z41l5>(cLP)lMZfZPixgJg2d&<<m#i!?T|6
zv3p+Fdh>n{9gxMq;)czlW3-U(gDpE>v8T&*wg63r;F9Lo467<N;kZGUQbJ?dJ@;N!
zWjeS&Zzy8%C#GToPW=WSimF*p->y&8Wi#?^Ym^IK`=k-e5X^YJv$qYO{(8W!rYey~
zYEF3MyOIWAzI52T5=US@ZP2?CR^X=~eRQCUJjpaqkir~>5-VkUyftjTv9#Ko0%>p>
zR5VG8sCUwakV@gCF=#0M@w&kLEg|9pXmFAT<3(W(Bf|nuCjY-F{-4$$20ES%gdBhD
zS9kDGYz`mpA0_ONz$~At7{;>WU*T^~vm6i#;ugBn_hL89FjqsKP@$*m6A26(N$vjJ
zY5J{8pxAmqIufg1&2qM_-SDxS)n|<0eAY#aPjM-(7MCV#dx>lenSbXx&A>c_qvUm%
zhezB9S3_OS-~-Xq9PBNIATFGOAST2X%*eIpIB7Ata2-LRI;Tk|_Lr=k6}w(B71wHe
zAvMHaq7CzFc(S+U7DY=4cesJ!A&prkLhcR2b}}~!N%s~U;m;XXDyZMOrj8t&jZ|T$
zgPC+nFp3cMT2yG*xr+O>1~lLhVK=b03R({>t}yR*uDab|&U0HTAw}N2RvTv@;4&#c
ziisf6=AY1cFL9QJ+&by8B+oxK=MEOqWU1wD;hi8;N$aXm4Y}SDs2tX1QuDt@D<FZv
zRH7O(N?<DuAs%DX#15gvo1KV~<>XhMfnlFHoKZ~|NXS~;%J8@+ed}<Ry=PhX3<RzU
zzAI*V93t$lbtmYk?LQ}o^V#en>;?huG8=K+?A*`=dcP=M$Cq7<vif|&zSq$}tU2II
zKW~!cqJR^2mxR9!7Js@z$ZF{QiCeuERO2H%CHw>yDA|$Za8|RQuL+)Y8n0LSzU+M}
zre-__bC-q>ZUO;QN%Dc+;q~)tMf;z*mZ`a4lkw~P*Xxxff%?jlFt#&NaI~{`VluLG
zH2D|b@4w`T*Cg~)te#9aGe++kRENmm#bQ2zEp7{v%Xh302ri5@VaKmfOZns}4;TCI
zmuy_AfgVz$%^hAR6#MS#Z#uEC>FRNFxJju2z4bAc9KyrB?5$${F0!y{=1^pHFrdDL
zE7c*op?d%32~2FNPUy^h`t2EDc}6L9Ut(pBG3PnbcCU6?ERQb-oi@w!pqd}LFmw}S
zKco}F+_x%T^a)g&TO3Ur*$Dt-zrs7%Ax(S<mgh5G=7Oi%Y(?HtrIwG|X1smJHBffF
zwO(f13r#Dwc{Uwk8yEezAFm%5BXFfvPs1UGlsaj+zNK?6Or;Ve$F{AneiO3uyl`O;
zQ(AVIUZPKSVe7cn4sf40V;hWY;C1Vi$&^9roD7gUv&%c3Mwo4YK)o%hBRI-$y&|Fl
z(F~!*#>8BTI5rYPc(Ub_-xWD&PtQ#(g`6kv=xq{5{1D^MzY62)K_|9y)bmL?!uwCE
zIkqlQ*Y&EJ^q2s^>rCJ`HirFc_1M|OQQ5@V`L{CiuWc4aLkrs@l@aM>1#AJf>f^ma
zB#dQe#e6!4J%qU871fk{SLw-5<ZMk2EY3>6h`|_9(Sm?mTtUesMxAT07FUv982h~4
zLbmzDoQQSt+mr3DYhwaDTsJY^oTT1D;gC4TbdIe7!~DY9Jv^l)hU*Z8k(24ymsHDG
zi)m=nt=f{b5cj+Dv`PDoL%8bjBnyre`znbKm@-C9=O(G=T=;1OdM|N<d)_CEKkzH-
zXAT~YyYpT$&hzKfnVJaKF@rWyX_@aTOg*lg0=)Td^Q%C&Q6kf4NW3qxytvw23V~sb
z<~3@0I2u37P^XMnj8)7k*zCp|Q#dAQYwV0KW!I|coqtdxvSBtqDJI`f2myx?z^4M^
z#0}DKE~vvccy-rpoWb1<5)s@$xFTlxxzLM)%u>h3D9=&P?JJg;d;7rx%@6xsuhkWj
z4s+b&mMxV_kif&%!9ssQ?d%!yY}pwht(!Jn>3Kr6`W<iimvgb7&#({Q8C~ZgRLu5k
z){1P`bH}K=32&4UfX`U__Dq?mOO*YB;#)U&pLpe6YSKAya{Z`@;X=Xx!{K}GH-`T>
z8{D(0+)j8^1F~190m@%Jl!3kde;w58ko{{*k9}{q^c&+3;+hC@1&`-g0l~j4SCa3t
z$`CBp)l9C?D5rruwCwZIoP5K?w5Ho)BoULYU~Z8ek`A4x>{bBNwxgn?|7!5i-A9@%
z*EPm=X(5F608V%MZB(h%<gu(v%An9Ng4lsKnbHf){Rv9Oct?J3nvb2nAQeN5S}c|-
z%#Cj9R8<mqswIgzl>hv;M_0zUIjA5PRhs!SR|n_1p0~TgVyd5KXJnlLHCo9YJzDrK
z$Mg(1>wz>K`ViQwR@y9UHp;L#w{>d{SH4rcs%7qwA(4!$C0xD=e-CD$u?DS8n&+i^
zWzIt`bmFGFUVzczXAht=K_2wqMc^JNG>%x3O#eO>qgy0oJV^^*xQRVle=+}&WmqvL
z&}l=T*?+5#XLHm6beXsMZqpZqX}5>0yN7RMSqsFj9bM$N0kL~~$$wUfb2u*cvwxXe
zY%Rk#=Svg3+qc_>*^As&&X}s9w+>nl#o0vX;>vu<Wq3cm%2wHl@Va^U<+<z<=Rlh!
z1224~7FRwiJHN5V{s^{U;0&)6nEy<$|M#o=_whf_?G<GH4)AyC@4p3qJAQe!kG~Ls
ze-->Y9rCY&-=Tl^t^W^l<gYlta^(I*dU^d1!7q%tUxk0=1^p>3`N|Y}75)cH=vRba
zX#syCfKdMT=l``*^hb~WTrc{a?D;F;uT}Ox0V7d=2mH0>{wu(*b%H+u5ODtJ)Bjct
zeii+7q3ci4V8UNJ^Jh1HMfrOQ_a_DbaQ+%L|B=xBD*kJj{SzRP_V<VQ{|UOkg8toU
i{|N~I#MA%fnf~R%6=a}Zbr1kRc>PfRrp8Rawf+ZZ_Y+D0

literal 0
HcmV?d00001


From 26c70c1abb2d6cbb22e45a169bc38f3bd6537eec Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 11 Jun 2020 16:55:50 -0400
Subject: [PATCH 006/125] edits to validate_and_post in submit.py

---
 src/encoded/submit.py | 116 ++++++++++++++++++++++--------------------
 1 file changed, 61 insertions(+), 55 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 29a5c2fed9..47603ea17b 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -3,7 +3,7 @@
 from pyramid.view import view_config
 from snovault.util import debug_log
 # from webtest import TestApp
-from dcicutils.misc_utils import VirtualApp
+from dcicutils.misc_utils import VirtualApp, VirtualAppError
 from dcicutils import ff_utils
 from webtest.app import AppError
 import ast
@@ -108,9 +108,11 @@ def fetch_individual_metadata(row, items, indiv_alias):
         'aliases': [indiv_alias],
         'individual_id': row['patient id'],
         'sex': row.get('sex'),
-        'age': row.get('age'),
-        'birth_year': row.get('birth year')
+        # 'age': int(row.get('age')),
+        # 'birth_year': int(row.get('birth year'))
     }
+    info['age'] = int(row['age']) if row.get('age') else None
+    info['birth_year'] = int(row['birth year']) if row.get('birth year') else None
     if indiv_alias not in new_items['individual']:
         new_items['individual'][indiv_alias] = {k: v for k, v in info.items() if v}
     else:
@@ -127,15 +129,15 @@ def fetch_family_metadata(row, items, indiv_alias, fam_alias):
         'family_id': row['family id'],
         'members': [indiv_alias]
     }
-    if row.get('relation to proband', '').lower() in ['proband', 'mother', 'father']:
-        info[row['relation to proband'].lower()] = indiv_alias
+    if row.get('relation to proband', '').lower() == 'proband':
+        info['proband'] = indiv_alias
     if fam_alias not in new_items['family']:
         new_items['family'][fam_alias] = info
     else:
         if indiv_alias not in new_items['family'][fam_alias]['members']:
             new_items['family'][fam_alias]['members'].append(indiv_alias)
-        if row.get('relation to proband', '').lower() not in new_items['family'][fam_alias]:
-            new_items['family'][fam_alias][row['relation to proband'].lower()] = indiv_alias
+        if row.get('relation to proband', '').lower() == 'proband' and 'proband' not in new_items['family'][fam_alias]:
+            new_items['family'][fam_alias]['proband'] = indiv_alias
     return new_items
 
 
@@ -191,35 +193,33 @@ def create_sample_processing_groups(items, sp_alias):
                 new_items['sample_processing'][sp_alias] = sp
     return new_items
 
+
 # NOT YET TESTED
 def compare_with_db(alias, virtualapp):
     try:  # check if already in db
-        # result = virtualapp.get(alias + '/?frame=object')
-        # result = virtualapp.get('/search/?type=Item&aliases={}'.format(alias))
-        result = virtualapp.get('/search/?type=Item&age=33')
-        print(result)
+        result = virtualapp.get(alias + '/?frame=object')
     except Exception as e:  # if not in db
-        print(e)
+        # print(e)
         if 'HTTPNotFound' in str(e):
             return None
     else:
         return result.json
 
 
+# TODO : Handle validation of not-yet-submitted-aliases in fields
 def validate_item(virtualapp, item, method, itemtype, atid=None):
     if method == 'post':
         #import pdb; pdb.set_trace()
         try:
-            validation = virtualapp.post_json('/{}/?checkonly=True'.format(itemtype), item)
-        except AppError as e:
-            print('exception')
+            validation = virtualapp.post_json('/{}/?check_only=true'.format(itemtype), item)
+        except (AppError, VirtualAppError) as e:
             return parse_exception(e)
         else:
             return
     elif method == 'patch':
         try:
-            validation = virtualapp.patch_json(atid + '?checkonly=True', item, status=200)
-        except Exception as e:
+            validation = virtualapp.patch_json(atid + '?check_only=true', item, status=200)
+        except (AppError, VirtualAppError) as e:
             return parse_exception(e)
         else:
             return
@@ -267,71 +267,77 @@ def validate_and_post(virtualapp, json_data, dryrun=False):
     json_data_final = {'post': {}, 'patch': {}}
     for itemtype in POST_ORDER:
         profile = virtualapp.get('/profiles/{}.json'.format(itemtype))
-        for alias in results[itemtype]:
-            result = compare_with_db(alias)
+        for alias in json_data[itemtype]:
+            # TODO : format fields (e.g. int, list, etc.)
+            result = compare_with_db(virtualapp, alias)
             if not result:
-                error = validate_item(results[itemtype][alias], 'post', itemtype)
+                error = validate_item(virtualapp, json_data[itemtype][alias], 'post', itemtype)
                 if error:  # modify to check for presence of validation errors
                     # do something to report validation errors
-                    errors.append(error)
+                    for e in error:
+                        errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
                 else:
-                    json_data_final['post'].setdefault(itemtype, default=[]).append(results[itemtype][alias])
+                    json_data_final['post'].setdefault(itemtype, [])
+                    json_data_final['post'][itemtype].append(json_data[itemtype][alias])
             else:
                 # patch if item exists in db
                 alias_dict[alias] = result['@id']
                 to_patch = {}
-                for field in results[itemtype][alias]:
+                for field in json_data[itemtype][alias]:
                     if field in links:
                         # look up atids of links
                         if profile['properties'][field]['type'] != 'array':
-                            for i, item in enumerate(results[itemtype][alias][field]):
+                            for i, item in enumerate(json_data[itemtype][alias][field]):
                                 if item in alias_dict:
-                                    results[itemtype][alias][field][i] = alias_dict[item]
+                                    json_data[itemtype][alias][field][i] = alias_dict[item]
                         elif profile['properties'][field]['type'] == 'string':
                             if item in alias_dict:
-                                results[itemtype][alias][field] = alias_dict[item]
+                                json_data[itemtype][alias][field] = alias_dict[item]
                     # if not an array, patch field gets overwritten (if different from db)
                     if profile['properties'][field]['type'] != 'array':
-                        if results[itemtype][alias][field] != result.get(field):
-                            to_patch[field] = results[itemtype][alias][field]
+                        if json_data[itemtype][alias][field] != result.get(field):
+                            to_patch[field] = json_data[itemtype][alias][field]
                     else:
                         # if array, patch field vals get added to what's in db
-                        if sorted(results[itemtype][alias][field]) != sorted(result.get(field, [])):
+                        if sorted(json_data[itemtype][alias][field]) != sorted(result.get(field, [])):
                             val = result.get(field, [])
-                            val.extend(results[itemtype][alias][field])
+                            val.extend(json_data[itemtype][alias][field])
                             to_patch[field] = list(set(val))
-                error = validate_item(to_patch, 'post', itemtype, atid=result['@id'])
+                error = validate_item(virtualapp, to_patch, 'post', itemtype, atid=result['@id'])
                 if error:  # modify to check for presence of validation errors
                     # do something to report validation errors
-                    errors.append(error)
+                    for e in error:
+                        errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
                 else:  # patch
                     json_data_final['patch'][result['@id']] = to_patch
                     # do something to record response
     if errors:
         return errors
-    output = []
-    item_names = {'individual': 'individual_id', 'family': 'family_id', 'sample': 'specimen_id'}
-    if json_data_final['post']:
-        for k, v in json_data_final['post'].items():
-            # also create Case and Report items for each SampleProcessing item created
-            for item in v:
-                for field in links:
-                    if field in item:
-                        json_data_final['patch'][item['aliases'][0]] = item[field]
-                        del item[field]
-                try:
-                    response = virtualapp.post_json('/' + k, item, status=201)
-                    aliasdict[item['aliases'][0]] = response.json['@graph'][0]['@id']
-                    if response.json['status'] == 'success' and k in item_names:
-                        output.append('Success - {} {} posted'.format(k, item[item_names[k]]))
-                except Exception:
-                    pass
-    for k, v in json_data_final['patch'].items():
-        atid = k if k.startswith('/') else aliasdict[k]
-        try:
-            response = testapp.patch_json(atid, v, status=200)
-        except Exception:
-            pass
+    else:
+        return 'All items validated'
+    # output = []
+    # item_names = {'individual': 'individual_id', 'family': 'family_id', 'sample': 'specimen_id'}
+    # if json_data_final['post']:
+    #     for k, v in json_data_final['post'].items():
+    #         # also create Case and Report items for each SampleProcessing item created
+    #         for item in v:
+    #             for field in links:
+    #                 if field in item:
+    #                     json_data_final['patch'][item['aliases'][0]] = item[field]
+    #                     del item[field]
+    #             try:
+    #                 response = virtualapp.post_json('/' + k, item, status=201)
+    #                 aliasdict[item['aliases'][0]] = response.json['@graph'][0]['@id']
+    #                 if response.json['status'] == 'success' and k in item_names:
+    #                     output.append('Success - {} {} posted'.format(k, item[item_names[k]]))
+    #             except Exception:
+    #                 pass
+    # for k, v in json_data_final['patch'].items():
+    #     atid = k if k.startswith('/') else aliasdict[k]
+    #     try:
+    #         response = testapp.patch_json(atid, v, status=200)
+    #     except Exception:
+    #         pass
 
 
 # This was just to see if i could post something using testapp in the python command line, currently works.

From 1f8cddfc0beb06a93633d77365d5479173ad495d Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Sun, 14 Jun 2020 21:13:17 -0400
Subject: [PATCH 007/125] Added alias exceptions to validation error handling

---
 src/encoded/submit.py | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 47603ea17b..bc2ba93e4e 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -207,27 +207,27 @@ def compare_with_db(alias, virtualapp):
 
 
 # TODO : Handle validation of not-yet-submitted-aliases in fields
-def validate_item(virtualapp, item, method, itemtype, atid=None):
+def validate_item(virtualapp, item, method, itemtype, aliases, atid=None):
     if method == 'post':
         #import pdb; pdb.set_trace()
         try:
             validation = virtualapp.post_json('/{}/?check_only=true'.format(itemtype), item)
         except (AppError, VirtualAppError) as e:
-            return parse_exception(e)
+            return parse_exception(e, aliases)
         else:
             return
     elif method == 'patch':
         try:
             validation = virtualapp.patch_json(atid + '?check_only=true', item, status=200)
         except (AppError, VirtualAppError) as e:
-            return parse_exception(e)
+            return parse_exception(e, aliases)
         else:
             return
     else:
         raise ValueError("Unrecognized method -- must be 'post' or 'patch'")
 
 
-def parse_exception(e):
+def parse_exception(e, aliases):
     """ff_utils functions raise an exception when the expected code is not returned.
     This response is a pre-formatted text, and this function will get the resonse json
     out of it. [Adapted from Submit4DN]"""
@@ -235,8 +235,14 @@ def parse_exception(e):
         # try parsing the exception
         text = e.args[0]
         resp_text = text[text.index('{'):-1]
-        resp_text = json.loads(resp_text.replace('\\', ''))
-        resp_list = [error['description'] for error in resp_text['errors']]
+        resp_dict = json.loads(resp_text.replace('\\', ''))
+        if resp_dict.get('description') == 'Failed validation':
+            resp_list = [error['description'] for error in resp_dict['errors']]
+            for error in resp_list:
+            # if error is caused by linkTo to item not submitted yet but in aliases list,
+            # remove that error
+                if 'not found' in error and error.split("'")[1] in aliases:
+                    resp_list.remove(error)
         return resp_list
     # if not re-raise
     except:  # pragma: no cover
@@ -260,10 +266,23 @@ def validate_and_post(virtualapp, json_data, dryrun=False):
     1. looks up each item in json
     2. if item in db, will validate and patch any different metadata
     3. if item not in db, will post item
+
+    Current status:
+    Still testing validation/data organization parts - patch/post part hasn't been fully
+    written or tested and need to add code to create Case/Report items.
+    
+    More notes:
+    Case and Report items to be created at end. We don't want them in the validation report, since
+    they are not part of the user's spreadsheet and validation error messages would be too confusing.
+    We only want to create these when we are sure no validation issues in other items exist.
+    Spreadsheet has no Case ID, but if there is an "analysis ID" then we can create a Case ID from this
+    (perhaps analysis ID + indiv ID + label indicating group/trio vs solo)
+    Report ID can be same as case ID but with "report" appended (?)
     '''
     alias_dict = {}
     links = ['samples', 'members', 'mother', 'father', 'proband']
     errors = []
+    all_aliases = [k for itype in json_data for k in itype]
     json_data_final = {'post': {}, 'patch': {}}
     for itemtype in POST_ORDER:
         profile = virtualapp.get('/profiles/{}.json'.format(itemtype))
@@ -271,7 +290,7 @@ def validate_and_post(virtualapp, json_data, dryrun=False):
             # TODO : format fields (e.g. int, list, etc.)
             result = compare_with_db(virtualapp, alias)
             if not result:
-                error = validate_item(virtualapp, json_data[itemtype][alias], 'post', itemtype)
+                error = validate_item(virtualapp, json_data[itemtype][alias], 'post', itemtype, all_aliases)
                 if error:  # modify to check for presence of validation errors
                     # do something to report validation errors
                     for e in error:
@@ -303,7 +322,7 @@ def validate_and_post(virtualapp, json_data, dryrun=False):
                             val = result.get(field, [])
                             val.extend(json_data[itemtype][alias][field])
                             to_patch[field] = list(set(val))
-                error = validate_item(virtualapp, to_patch, 'post', itemtype, atid=result['@id'])
+                error = validate_item(virtualapp, to_patch, 'post', itemtype, all_aliases, atid=result['@id'])
                 if error:  # modify to check for presence of validation errors
                     # do something to report validation errors
                     for e in error:
@@ -315,6 +334,7 @@ def validate_and_post(virtualapp, json_data, dryrun=False):
         return errors
     else:
         return 'All items validated'
+    # TODO : create case and report items here - skip validation part because they are not part of user's spreadsheet
     # output = []
     # item_names = {'individual': 'individual_id', 'family': 'family_id', 'sample': 'specimen_id'}
     # if json_data_final['post']:

From e51f5e5fa90969a88f90e60983a7305a4f2d3a70 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 15 Jun 2020 11:47:38 -0400
Subject: [PATCH 008/125] code added for creation of case and report items in
 submit.py

---
 src/encoded/submit.py | 107 ++++++++++++++++++++++++++++--------------
 1 file changed, 73 insertions(+), 34 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index bc2ba93e4e..7f777596cc 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -19,7 +19,7 @@
 }
 
 
-POST_ORDER = ['sample', 'sample_processing', 'individual', 'family']
+POST_ORDER = ['sample', 'sample_processing', 'individual', 'family', 'report', 'case']
 
 
 SECOND_ROUND = {}
@@ -66,7 +66,10 @@ def xls_to_json(xls_data, project, institution):
         row_dict = {keys[i].lower(): item for i, item in enumerate(r)}
         rows.append(row_dict)
 
-    items = {'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {}}
+    items = {
+        'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {},
+        'case': {}, 'report': {}
+    }
     specimen_ids = {}
     for row in rows:
         indiv_alias = '{}:individual-{}'.format(project['name'], row['patient id'])
@@ -84,12 +87,14 @@ def xls_to_json(xls_data, project, institution):
                 specimen_ids[row['specimen id']] += 1
             else:
                 specimen_ids[row['specimen id']] = 1
-            items = fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias)
+            analysis_alias = '{}:analysis-{}'.format(project['name'], row['analysis id'])
+            items = fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias, analysis_alias, fam_alias)
         else:
             print('WARNING: No specimen id present for patient {},'
                   ' sample will not be created.'.format(row['patient id']))
     # create SampleProcessing item for trio/group if needed
-    items = create_sample_processing_groups(items, sp_alias)
+    # items = create_sample_processing_groups(items, sp_alias)
+    items = create_case_items(items, project['name'])
     # removed unused fields, add project and institution
     for val1 in items.values():
         for val2 in val1.values():
@@ -108,8 +113,6 @@ def fetch_individual_metadata(row, items, indiv_alias):
         'aliases': [indiv_alias],
         'individual_id': row['patient id'],
         'sex': row.get('sex'),
-        # 'age': int(row.get('age')),
-        # 'birth_year': int(row.get('birth year'))
     }
     info['age'] = int(row['age']) if row.get('age') else None
     info['birth_year'] = int(row['birth year']) if row.get('birth year') else None
@@ -141,7 +144,7 @@ def fetch_family_metadata(row, items, indiv_alias, fam_alias):
     return new_items
 
 
-def fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias):
+def fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias, analysis_alias, fam_alias):
     new_items = items.copy()
     info = {
         'aliases': [samp_alias],
@@ -162,12 +165,50 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias):
     if indiv_alias in new_items['individual']:
         new_items['individual'][indiv_alias]['samples'] = [samp_alias]
     # create SampleProcessing item for that one sample if needed
-    if row['report required'].lower() in ['yes', 'y']:
-        new_items['sample_processing'][sp_alias] = {
-            'aliases': [sp_alias],
-            'analysis_type': row['workup type'],
-            'samples': [samp_alias]
-        }
+    # if row['report required'].lower() in ['yes', 'y']:
+    #     new_items['sample_processing'][sp_alias] = {
+    #         'aliases': [sp_alias],
+    #         'analysis_type': row['workup type'],
+    #         'samples': [samp_alias]
+    #     }
+    new_sp_item = {
+        # not trivial to add analysis_type here, turn into calculated property
+        'aliases': [analysis_alias],
+        'samples': [],
+        'families': []
+    }
+    new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)
+    new_items['sample_processing'][analysis_alias]['samples'].append(samp_alias)
+    if fam_alias not in new_items['sample_processing'][analysis_alias]['families']:
+        new_items['sample_processing'][analysis_alias]['families'].append(fam_alias)
+    return new_items
+
+
+def create_case_items(items, proj_name):
+    new_items = items.copy()
+    for k, v in items['sample_processing'].items():
+        analysis_id = k[k.index('analysis-')+9:]
+        for sample in v['samples']:
+            case_id = '{}-{}'.format(analysis_id, items['sample'][sample]['specimen_accession'])
+            if len(v['samples']) == 1:
+                case_id += '-single'
+            elif len(v['samples']) > 1:
+                case_id += '-group'
+            case_alias = '{}:case-{}'.format(proj_name, case_id)
+            indiv = [ikey for ikey, ival in items['individual'].items() if sample in ival.get('samples', [])][0]
+            report_alias = case_alias.replace('case', 'report')
+            new_items['report'][report_alias] = {
+                'aliases': [report_alias],
+                'description': 'Analysis Report for Individual ID {}'.format(items['individual'][indiv]['individual_id'])
+            }
+            case_info = {
+                'aliases': [case_alias],
+                'case_id': case_id,
+                'sample_processing': k,
+                'individual': indiv,
+                'report': report_alias
+            }
+            new_items['case'][case_alias] = case_info
     return new_items
 
 
@@ -233,19 +274,26 @@ def parse_exception(e, aliases):
     out of it. [Adapted from Submit4DN]"""
     try:
         # try parsing the exception
-        text = e.args[0]
+        if isinstance(e, VirtualAppError):
+            text = e.raw_exception
+        else:
+            text = e.args[0]
         resp_text = text[text.index('{'):-1]
         resp_dict = json.loads(resp_text.replace('\\', ''))
-        if resp_dict.get('description') == 'Failed validation':
-            resp_list = [error['description'] for error in resp_dict['errors']]
-            for error in resp_list:
+    except Exception:  # pragma: no cover
+        raise e
+    if resp_dict.get('description') == 'Failed validation':
+        keep = []
+        resp_list = [error['description'] for error in resp_dict['errors']]
+        for error in resp_list:
             # if error is caused by linkTo to item not submitted yet but in aliases list,
             # remove that error
-                if 'not found' in error and error.split("'")[1] in aliases:
-                    resp_list.remove(error)
-        return resp_list
-    # if not re-raise
-    except:  # pragma: no cover
+            if 'not found' in error and error.split("'")[1] in aliases:
+                continue
+            else:
+                keep.append(error)
+        return keep
+    else:
         raise e
 
 
@@ -269,22 +317,14 @@ def validate_and_post(virtualapp, json_data, dryrun=False):
 
     Current status:
     Still testing validation/data organization parts - patch/post part hasn't been fully
-    written or tested and need to add code to create Case/Report items.
-    
-    More notes:
-    Case and Report items to be created at end. We don't want them in the validation report, since
-    they are not part of the user's spreadsheet and validation error messages would be too confusing.
-    We only want to create these when we are sure no validation issues in other items exist.
-    Spreadsheet has no Case ID, but if there is an "analysis ID" then we can create a Case ID from this
-    (perhaps analysis ID + indiv ID + label indicating group/trio vs solo)
-    Report ID can be same as case ID but with "report" appended (?)
+    written or tested.
     '''
     alias_dict = {}
     links = ['samples', 'members', 'mother', 'father', 'proband']
     errors = []
-    all_aliases = [k for itype in json_data for k in itype]
+    all_aliases = [k for itype in json_data for k in json_data[itype]]
     json_data_final = {'post': {}, 'patch': {}}
-    for itemtype in POST_ORDER:
+    for itemtype in POST_ORDER[:4]:  # don't pre-validate case and report
         profile = virtualapp.get('/profiles/{}.json'.format(itemtype))
         for alias in json_data[itemtype]:
             # TODO : format fields (e.g. int, list, etc.)
@@ -334,7 +374,6 @@ def validate_and_post(virtualapp, json_data, dryrun=False):
         return errors
     else:
         return 'All items validated'
-    # TODO : create case and report items here - skip validation part because they are not part of user's spreadsheet
     # output = []
     # item_names = {'individual': 'individual_id', 'family': 'family_id', 'sample': 'specimen_id'}
     # if json_data_final['post']:

From f72f33fbf9addf496e25e2d7a52a453fef3a5d57 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 15 Jun 2020 11:48:13 -0400
Subject: [PATCH 009/125] cgap_submit_test.xlsx modified for more testing

---
 .../data/documents/cgap_submit_test.xlsx      | Bin 12955 -> 13221 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/src/encoded/tests/data/documents/cgap_submit_test.xlsx b/src/encoded/tests/data/documents/cgap_submit_test.xlsx
index 0d65ab327411da00aa287eec8b4637569b70ec6d..0e3409a68f19e8f22e4379178b33a7c180d15e45 100644
GIT binary patch
delta 6480
zcmaKRWl-GBvo#tPcLIw$!JXaUBtWp>?(T~QLXaPU#U;3FkPxy!aF@m1oghgF1b4Ul
zJoi@Jf8F=P+f`FFbNbAend<4D)7iEq_TTZb0ZtoK7#%ta3L(}%0|f=;y$heOo2Rpt
zo0~K5dsi2PuBqD!16g2x-Ba*;U(^8~t8cJWLj7;EYIE}ypUC@I6zb!!nv`@8I*)$M
z?#y?1(<h>b#}IC>`Sa4`A7+79RjgBCUZe3G3N_>{ItOe6=2hb%R0{Ja4V(2a+jx4Q
zxbF-5(K@YLeRJ#FEdi>GvzG~U!4__yITXIaygEd|?d%FZv~UpZ^-(yT*3)V!Wn(PC
zc5HGwp<@(sLr(71lX#ZMuMf^2$i^a?(Qg~Ha%rc)ClpQL*{vP8aM;V>Cf#~ZLUay=
zxWRF;3eo$9XN|tx3gYPMXn;Z?_5hIEb?)>{0lTQetd%*K&40S;-adu@(ir4ZWqmAK
z97*X{6+l3UP(4jYB%$k_;`RTPE)qzXFG$4;xk`69`Dh%e9=}kYwthJ+-?k@p(r;hh
zU$!+*UgB9ZdO{ZdfeXmy4{ZFai?63(on__enQ&GjTVit7;iJ#6NlZt<rUZcdgIGb<
zX5x-UhnrfB1m4%13!P4|E;BtaS-jN9QM(`s4(>T-hunyKzuN(;FZr%ouJ!3$$XTyv
z4;PbY8}3V2nyM&~5IlKqm0w+6D$0kD7y0G=N#?~P<_kmgk?FYf#Au95Gi#llc!b}@
zmN+I=?1|d)t9Tid%&#Sj@c_7hr}z~#^G98`&Hk3hxSbMMU0=4v;w)HUiselVIsUkV
zFz${GxOC)((7m4}bF$5ME*UK09nWso$)`qt1FPfDx+o7QJ$aKK^zq1Cxac6lcQN08
zJ>|!;{pvO2vgx;m*%g`rVYDxF{Xbbs`F{}fJL|UKaKMG3qk#!d+5oCG#n?T|Y=nK<
z?}1uOo}}UoEN&cjS>fz{q)Si($g0JNN4uW_VSpDq$d?jMN>PR{=@e0(fy-DSO`njC
zMzixsMi&y7ICO8e)n@Zba9{$n3R5TaC_GF^p@)pQ(0Ga(z1%#ckDU;W$@j3kr4Y-N
z(_)<IW&X4<j)OqdM@c{Mvswp|XCn^e6eo2C<S~n1#?9%Lo*;4&Bt!kiL!?zr9{S-i
z;!Pt`BNrjYlgn$08@O7uhRG-2lJilL3(?82{T!EAC@@e^J|~;w(E`ru%wl8@W?!Fv
za+)`!;=EeYovW17Qwk<0sbbUVQncS<`z>fq*w6!e4E$vO24$r->`lS-JNoS(UVJ@#
z>wD38iw0lkrZ%n~LQ$)S-!abZIvh`o?X2aa*Ssy0(=j@kvgq+Swmm;nf8|X3v$94p
zbh>%VD{ZN5(`Yep^?Nrku9a;B2@v@<DjF_tsbZ(Ezw{G{q&MU#25VF<3?mO2_bXEa
zOwQ_zZ}u$s!A4MU%Ty}Y(&W+!zgLmv)<Vk#6d+7%dzJ6X><Ph%NgE?2R1kvIc?ka=
zs24WTxE?>hcTrW*9C1SDs<Y}-fY2bqjAC%UEl*m#!nI7GBmk<!$oDnBenT^3a+t2@
zjO(!$l`tVQG_|L`hl@X78{TO}bBZ-XAqJ4{tTYA8UmRO$Is)=d75clw_6~>Kp)Kz@
z1KfXBPaaIkXNL}gOl-s2%HpYy#U%vAZ?a*cYAKssq5C1p-lj0bXh|gOP?SU!svS1E
zA`y1k?>9d01<<?mF{zd=Q;z2bSb=}5Z^E+Nyzwq4X%fTdm*c)xW=A8dIF>%!O{+NF
z%3(kyJA-AOsM2`uQo?a7$>Kc=9gr;1XpFxa3cCM><2<Novbz4Yep&puZAORN)BJ(t
z8N?6kG()gE;GXj15!dW++Qb9Rj?xF?wbeiEJg~AD764Y{*VBwJWNkW))zn43rY!PW
zJ&7!B+3t-a9&QNnLRQWa59d9{F0Z$<&jC0<1!M1(wylWS%FJ`8r{QOwJB4muoSZe?
zUWDZ<zwh7X$&IWlXQo@2_Te6Q)9NXMle-sCXRl=u8Hi8f0Sz$iO{Jp-N8I>ga~Rx1
z!%;;LNIM&Gp}{7sEBXjyL{C;8$jI>|zDP<G%9jcwREwn`{%kr;KzMwU*!CV?bj{j}
zRqhiYg57x76IR7O&=JJ0r4Ii9^8J#kGuc$`jFToCNdn=@zr@Q$ItD*h{*V$nA!a#u
zQQagl-Jasoe2_u}C+pF3XB^{!9WprrQ=G<uxTDsn@OL2|h28V3tF(wJ_zAl5x9|%U
zoFOrJOmcklBcr;7asB!NhD1%^vOPkAzExQ%{o_woYE>oGR09|1@;{9;KjWb2@Q@$|
zNNa`8x-)W7Ng1K&^@dI2Wa#fyth01pTh`nA$g>e+-S^B~l-@*hRhBep*HjNIfzOcu
zad8(tt~#xGd|~Whtehk}cq``ku&du%GH=3cOjPnrNxa4MPH7JqXlT#yL~Dbe0LjWv
zAqG!%f{Fd9-k+>u%C8=!I5&)4>E_iu+nOI<KE=V*k7G<b=%vC}%W(1)zmAYanb&u2
zKJwOW9$xq;$2>cnIf$kCAY>-bx{XEA+&j2-k^kNr@j47S-qD==6NsGx{`;Y*%_RB=
z5}cYNbGE`nK@n$0LBao@eCy@y=VI&iZ?>H^GIsj~Aqz5he0-JSn}q=qBUUI$R;2<1
zVF@T<f^7+q@$Dm%fbr_wp9{LL<HSZuyR9a?4_#)uJi1C*^8B{e`GSh<!BH|NWSSJ#
z6DGfV?_b`Y4)7B@N9S)-;-WMB>J{3S<9?Qwis405xEasmj;Aiy&&;xsHi57~3S_aJ
zmP8t<tsEK<GgW6Y;e9S3cmu!Q^PB<!26ZwasvuUYitnn-IM3GY1b#cnecT;(Eb20V
z(yj^XNzlKpKa3?!46cQT{UUJZp$KQ`YW2R;TbGmlh%xwu5n5?0)@WmXLYlZtG2r$(
zYk~&<6-7)hXi<<NZwd9Y7G~%|#zSo24TwAE0*s$*-cS8`@>j;Z110}(6JwV*Pyn4H
zDFS_wq$6S_eQVQ5BoD8g*(!C>m><8D$MEK(<{4tP6#~DtshXf1;obaI8)bjnQ4v(r
zOmvt>`uZxmfmJ>bJ05e^;ZPq?>-v3ef?Zy$-W3U-j#Fu#_Ajs$hBveGe+q08sM$-u
zKnq|G=TE#1-qoM-@1I(BY`HK8-cYh?4_RFnNQl-Vj-<QPyH#tjSn__6=XDD^aCygl
z_nv&Yx%MeF6HBg`!81f;ztygF{m6&*X7BR00+)0VbT=EgODCR|M1}X#G63g9>Jy2i
zhTXC-Sc?WngOR<q`Tn^JBl$2qI0FKHi=9<XsR>1%iJglvs>>)Js|rK_33-ZSb!6PQ
z9BJ^0Nv=Cif@nEg{esL|#C|r;S#?cuR*PR27`gGTDp1OByLo%H#cltm-wIAUJ{HeZ
zA_ZCHs1U9d5ed5LPA#6<6Fj?#1Ghxip?75PQ?J+{Gts53VY0a#+7RyyWyadzVp&ni
zBG^Txf91DFoNg1<#$qEtTZUfSh+k(gd0}81%*~lzC?fkqRiTO7=7(%@SV^yPwXLki
z4|3)b^`jS=eZ+6gDe(&{Ro(vJyiSlwD?*j%)2MjbbT|po$1!qxN1nD<5$n9yz9}|Z
z9~f#P3-|}3ly)~L(oQ=EX6t8C>7>k{KC3saNo!5F40Wa@x!~RhaIPe9Tf1||yLm;V
zDZg{)INYw{urGMCDkcrez0Yj-J-@W3Qscjx;|-_SY6vmoag~F&S;^!Iy$wQDqSCUB
z7O5w~#XR#k(Jk!=yg8Jy`x<#AeqO%Sp@3RCR@e*}p}owS+|Cr5NRS_ILk)!`f2}Dk
zmB?^U=jHWY2?{<1>YS`&?s@d6>+KJh-esR7wMnm|vcwZ$mEz)`a-%*lmAe)`M3w37
zLRWAJ99j?xZCaAXe>`o^E9+vROck`ej^7YZj5n7|L5BW+^w~pFD$Y5F&(#f6(^4}|
zP3)W{JK8_I{Fc!cOIN9-dZ<y=?&t1(u$7kbD}7-Nc9U2S=wL<*4wH|2fBsEhnK53B
z_jz_*5;BA>Wiqn5{^`{A^hh(l&_~YlNPER%b=hpyB?Wv6nHK^E<wfUoBn?S$6kYur
z^iaK8I7OZ3sLB%t9-XLtg>W`!Ul2?sMW>Iwn&7T{>ea4^wOF=qe`In~<pBMIbdH?y
zf2oDO4MzgZ9Xi{ev>G0^6OOXZNLzi!0<XW;7^2td(949cP8?z9WFR}MH+zMi0>Qp_
zpVWhDr)me<6<Whq71n8`MSooOT%SxGt*G7ols-_25bE&KpttJ^il3olA?pa=$g~qu
zlYH))_jR?nMT-g5HP^p$srtxt0Cvr@;#enMw(wW%oe`|ZH+ReWi)4^zp#JZ}ylY9g
zEA;KHZ5YcD0Y(%*tH&W_v-sbN4PtkH84y<^TsPoC@Rt7H0u85^b0Cln2L&aaCpnLf
z30N5>36$2|rZ)-t6vvwuhr3u(%}7KdC)dQ{)(vv+YiMIl9D&w91zx?8EYiE$pe(<X
z=jORM_rBVt8mhfosQUByjCm}^uVMM<Lwhca-E^#`^=8+y)Bonr-?6fl7sEF1(z$DN
zTh<?PDONiP9`Ewo^9>){->c5lpi}7e1p!yp+zY>hIx{3oM~eU2?s;SS9@YkArX1Tp
zPV&wBW&O}4<ar(l*t75QCEt`A8&httEDjzXZP$8djpe27lq2FccbHxvpMH6C@W$5v
zaiR7aVR?G23lbVhbLRIDHy<5YPo09<L#gv65H%9brd@4-pQYg5{qRqE`bYdxet?`;
z!D!sxPtWGcmXQ0(IsebO?_KMIjUCU>jhk&h?w#MQbtUOy#;N&slGyi#`+kFCW$Q$d
z(w@{$`CGnst50{xLkE8BFK#{(A|FT<n9Hh*EbrMM=U--e81xj^HuDdWYpXPZ2>ZFA
znY#zrejK~U>;e70E~dALIzX}60%)@}KHp)=O?`p)>z(-1NX?%<?y2T8v@)l=tIWzl
z%+D>+ZngWZqP}Q^hh1IrwhDwfqr*3!ypwo?>F5;-;GNFq1VtVJCeWG2HJW{qh5p*2
z^4p2CaaXP4WET&~PA+;&hvRMDcJ(|8TlB-rHjW=r_aED@pm%;b<MebY2LShLjr5J{
z!z*DPJB_;ymkH{>!J{>pc%Y{P?ro!9jn<*6-6v@eiO%O#EAltS0~l;V{+z=ujwW)i
z>apJh*;+NO_^tK27hy{dEaNx5Ujx;p;|IO>;dMW0BhFn{(;M}c5Twr?dr5`@!wJib
zP~RfHWKda=pC0XE7ov1|Y63h~BXCKBzdy%#e7muuwKR@hmdE*A%7l_@>hdjo%cJSN
zD`$OeDj}*sjVlS-9lyjB+;ZgxOQZZ^(cO}6T?!kO%<%zY()JP?b=J_;wy0bbmq~aI
z0&iYXg&7XN0-62!*zw+vEP^fi_SYXxnk<gp7=1+#htFkvW@f|8E2+R|i&-+Jopufm
z2lGk8yLZkNAMS-;B!{+3+%0_!1NWLBNZSui`j<(Yg5k5S#jsm)#YUL)?#^=6(NfT%
z%eE9kO2}1e{_?67b&#B!4CM)O{LKG>ewlTXV-v2sJ+H2YAxJ)0C=-uys>d*Z1|=R8
zf4$49bmtWf(#g)1{|?OL_4!+Fhsum&oN6*`qusD1OrSLr4L*MzE?S-*Icb7n=1-yD
zJzg&ZdmYg6@|Tc};Un9AZz&|Jcc6yC$`G#_5pIz7ihGYS4U>2zI?`6Dx0EgNTPX*K
z#hF1=ev(W(Ru{!++Bi>{)!-Rf_saqz3be5ABpmP(8Ab#qjwK+V0rFC3RFw+jza#5D
z*9O(9F@!CZ64_<TWw_#4HoSoU{}u_O4M|ISf-A~)#zQV#nXat06*dj2{|&OO%yG%-
zejB#QmSJ1L7ZzBmT<5W<uI3`xf4Yt?@*NmwWrvf`p8yS$+<eOh4_cPgXB;*oF)=+^
zGrdU5hrnGy()x&(-tOVCUYi@=+3x3U``rlN?Qif8E2&{KG^x4!tk|tFMrdV-eG%!4
z7?ZWO8>Emv59dvw%f80KvhRY}^NPD5KJkv19ZWRG^prjhAx{jarCS5+PMY0477fg4
zau~nOr~sQXLo1NG06!lOgXJpIoC4Z>>diXO;r)QVww|XfHNxKkUhYhyRS=)V3>51>
z4ETn6)+t|u!dy(Ov|}{OTuH95Q<s_K5tP;`PAeq7G=0l#9x7$?m+pv4>sjX&681t1
z<6_2dNsDjN`W<^iZ`Q~?^qvHw(_mDJ5*r*wH2|Sp6bS~V5>X1htcDAQtRpc-*lVH`
zeA&v~C}Ip&St2qN+3!EFL;6-3aJVuV3>q+yU{(+Xi|qGXP)Laa5rQkkf|wYCE1bcA
z4g+b#0AkBy$gmBG%;8MbX7wz8{Wf**D+yGW>5KBFPSn3(81&+x!6=hro}tuIq$&$2
z71wne^3F#SSzrF5W6t;U!S{F2YiW!-;`fY=_)df8pJ`|mG8r%xYV8cLvW=qsq9ERJ
zW6J}Hp*<hwjMJ$auG}>Ye4$@o$R@|mfE~>g*Ne!npINg*Emki=V`hA|MrBV7ze}}1
zT*|x_G`IuuO=v{VCIFwCZdpscjwT7fF0%yzz4e0CT3XPVLm@7|7J$+p3}J^X%ng47
z<Uj39kFs5i$y!pEK+<v|U0+%iF>ngP2Hr$+M)JWFUZwP;YZY-0|9qs>)g&-yi9>Ki
zxGBfedTsk?7zmT1OI7(!6PVv?#!0#ABrY;S?tglSo;t(nsOyObBV%L~R4@R+X~u*^
zoR6`>Ah_}|2p*=+$nA||X{kLpD^P8_n}*;nRmHX`Pm(DGVcR&x%arzlY&fH3N`pZ*
zyJ0e=5+IxMk1`p7g08kL0+yLuDh&T82>ufpbHhIQy!#qweCCT6ujaZdJ1rva+jZ+<
z02ZqG#EH*YpBraF5BMbFLhN+abUrG6pGd9I-YQ_P;5`CLBMw2|I#e3_*k`Qn!w*8A
zKY8nnW;inC<jiA#aDe{SUB?hVRI@i+vQ}X<ZL3JWd>y}Sl^+~Fy5DT=kpHemotFT2
z&HiwWH~T!SCvqU^ypJ5qZkq(_^N3F`P~&Y2Wg}ZaYnJV;^<S!jd?4Roh?N8B``4b2
z@eg(+bx!sZ5CHii7jLDpPjnqZ9x>l_yyDw9s!_lg^L1wq+0K`x`nwKfR|tr?z-6ET
z9zt>jG#yQaL|LGLBUShPFaEP=KidYP^sBTh#*2Wh66prl3*|!knp}H`l~g$}@4Do7
zEV?6s21O9B_+5`54icOMY<p-(JoZiZS>8gwLMMFlt{iNvi;y=G>kj{r9Jp%=ry$`J
zas7Qi<HB3n!Ers24#Jg?V}($3R~O+lYw9=|7!hOKGe!2e@DVOiGA8XQc@}$?77)Q>
zfFl2&)<Sy&>g*5UJ(g2KG(ZkpE6>+GbVk0kmY_l9)w7$3(0SJv@7mOgc@uX3KEF?L
z6QyEwepK{{WYePa`dtyzLtzWiV(SP!CXxCIrLK!t5Or$bx}I=e1y>t24nC_852rBl
z^=O)J_LtiuV3ymfgt%AymZ7ePLG||jIc2y5ts=Y8^P~On8l}A?W}rSk)NS&Xp2`3T
zEYd@0{@Lr^G{5JSbm5m}n<cHo&K)z(?LYkm{0fvQf3T!}?Ml@>ajfEo*IB20|4nS3
z9kdyz?S?0Pwi_xr)`aE1x5@aBut|8PJ7rjWH8-TJp>9Us{$t0H)z&$1O)I~pWlR|&
z>iGBpPCE7?=iSl{KXQIoYDD=A3-9xRS@EBlmgGNPkZMC(t+k`d+P2YTKWygMi>Sr>
z&C1Imy022#&;P9<F*pd%jcYMD*iivZ^=^K$uywV2Lbd#*Wmy@M{z&Cxd8R04@OsuI
z0l3k}F($Zk4Hk+$)jM#Rb9a%G%{CKgSfI|Y=OlvEd?SF?SD)Y{iTvh57b{Q`{{o3q
zVG%-TaU$H_q5PiQ%hkfs8?#^O`(ScPoSz7idb>P>XhcjykorT!sa2)gP!1rSCGwT>
z$-<c-P+74UV*tY|cDAcvH}X1?tnHPav9>S@5}QmT#53)+<?Em}0#(N+8iy1i`xmQ)
zr@8SS6H6hDR@-aFOH{tCrcKns0s)VaZ9I8qS>+#fE-E1#!o(6=&oXA%0)KqxE43k9
zrMjM8S@R?v?}q0<cU>t)Tf)rD19)YB&`*bLDj4UApXY2Rp&J?UW8b)*^NI8=Z_6;y
zi@;)XQx=dt+}XeD)bE4~LW9@ryvYdNtd@Z)FO$)Qsg;=1j4ktD9FgIM!m*OV;Cy2B
z;JTr}8`EjG{}D>EInyw)|F<!b{E}ao`hP`MJQNhNe`J&Ym~wIyzbtA}@(8~MYDzMV
z02}rHE3y2COPXvdz(D<<{{RXK<$s+2%nFh-1h{Day=xm(6cm3n6cpnB<G<%fUKfx=
YOXo_a6Vyh_<4N`sWP0Yv|1ami0N{{Jx&QzG

delta 6203
zcmY+IWmFqlu(pFIxVC`+#oZxzaWC#rw3OmrT!LGG0>RyjyA+2~EV#S1NPz+cic7z9
z?vH!EyMN7oXU(5Iv)?uADYePAuEztxXd1o}Q=$O?Kr8@&0002^IC6WtxI4UaadF`C
zadNEI({jlXCJd51d=A_5ewU4rCQnPo0WmGPCcT=yZg7wDZa~dwQu8V<|NAuLxZFJY
zruHPZrC~VW`NKZs;c=tTVWwL^|4?H1SKK=W|3Zg_Gxx{CeLf}L#)KQFizheC<B^6l
z$ou@@)q%g$sdGz@EMR(#QfLy7oK|0;@yE3)K_k|fes+QwIE$(wEUBwCKBd+{S;<aD
zwAL>`Ze;u|&mXU<@akr;*h(jTbP}hx)Rv%qJ-;#YChC%#hY)_q^ag;ity1sQ5_Pbg
zWM!dtDRQfI`#NOsr^UAN_7n?DV5f^D!^~-c)v|fvtvBZMuF`$#-#Fr-AYlKFC+=n>
zDG~iy>eNn5s&Y`Wp8HpoyngVLwE*U9R&%oS?;v7M2{)Yc!8`qyi%YGJXo7*QuFFh&
zUi)^>Y-16l10RrVce<o%h8v6GLTKttMlDaf@aWxYfwA-5atDO)0+#@WgLqFi6vPND
zw#^lI?d@LUk0ly#aclq;vaEyPVJp7c>n-LZ#>V-B(IYb^Kjd-(^~R%6%eJ~04ZZy4
zJ^-Qf3IJj{QM$xHLsjKD9KQE?ys<wOuE?((N<J$YGhZ70OUV6Ff(~8Ay(BOIMow+v
z(r8q$G)d#rTe&Hl{JaCZN#r9O8b85yPYw#iHA-fZH%4FBvnSHEOu;04Vky{jS1T@J
zLkizxnq@GMWPPgjO%53zKmUqzQ+kLdIsQVSNo!&1L2~x9`S!!h3CD;1*H>7ZHjFpd
zMm#~$_~yFi$AJk}6d^(cm##!Q;8W%wFZDW^hAkUaH9TO-Ee^pj-nW{PpopE!z1#D~
zl5B<*QWW@CdBV~$VI}w&U$mWGjX_+hqmFvqh<8d^F<H6V>!Z)^Ea8cZuW&~k0F&;p
z$9oq3IMw|dJ_BfWqcDuihrIwdK!{V79M1p@+JMi16CWrzBpe6{eO=FLvi%54QTI8D
z29s-P46@~M#|^|R6BXb*hKhP$V$!qib%rE7pBG1hhF3%u6a>u`^Lz&4J9$mk`x&pb
zoZmQ0zgK(}!fpHUgW7%_*E1TT5RDXsRyN^8gbn~iA}n#KU~A563WR~&(=hOFMo!Ia
zXW|lN8urBZVm#Y$w)Xm~FLa9E7k9b3JKc+V&x~f1b4~eLFqWq`_uVIR$O(D^>QwyR
zTb1e$kLE`yf*Oz?xfbq)?+*5VZClAKf>Pny>=Np&jtK4na~QedG0;2pwL12}(UhHi
zQz*M7f#(D^OpM)$ZRhtwwa#0}i+0WoQ@t{mPYDtwsflcbsxB#F>Jm?>(+Z<ecp9q_
zpYoG;`j9()cy*@zk&_yj6%c&w-o@IfJOBFKr#nZ7q<#%>vm><#`Q(?yp_#Z>g0rsb
zJ7Ng<B~gtW;}~c4bQT$8$F-i=o?fNeUUrO=GeX)7CfsF~UauIz<>3>Pp)B;gi2rHj
zw`<;Ky;SFOl2a2ucAGlvazQ_k)FZ|~sjE~%PT7Y0?FC(TY+MA+-1VrdNQVzofa^}f
z^vRAjB5VX`Vja;{o<{LoOq@^bDciZ#GiBEy>{n>A0B5WI2W>f_8wi!Eh(Y^!mUzUq
zg3-_-Oy`v=H@zy;DtIy{;625T#;&u_)=%_bg(7r#HBbnN=>lVzxmKh6V2Oe5O$5;0
zF9;>%nV$o4&5HNZgqG4;W+qvAS48HUlXUwGbnia9NPBg9_%!i8-#>^0_uyU&yuC{`
z@(u~SJT*n{sncYKNZfdoFdwS<wxttzeJSLEiMl~19VXuoR)(=_fKwTRKk;@<bAGZd
z@>gF67dp}lr~ksHD*WX;Bi%CeE3QdM1f`k0?xzgL0QyyxPTN0LWa9fXJ0~`|`uBR+
z&uih{^v}~5Z89G)3=J-{#Y`hoXXBMmj{)`p+>5G<!&DM)p{y4r-kr@DiXG;)5pJ4x
zus3{ZmsKB0K?Kdkk<PSeKeUH4a@?q|k_-9IwxS5)<6jUJIn8cT?_Bcxo&y!n-$WDW
zp6vQjI&*%FsQb*^6Ev*-DWZeGo4HhX`b(n|cA9iFsxW*0HEC|YUGQ_|x&-ez3d0}A
zUn7hilVJu$N1lc#%r%>aJUbwP>BizeTChs;rJge{fodI99LCnu(^HE;&bWLH3TSnn
zj#3XRTe<D6-&#?(S5G+xMoip&)7k6YyJdY9Z*s6%Lx18o-Ppa_GKwHEs?7|3sgXnG
z(59`UBY!-wzjDl#E&gaYvjq9;EZ(ahqwvFTpM-m#%CRL!uraCRuKdAKfFK1)71)-%
zD?01%;jcQ(2J|fSe8rkmRCt$0J~tLW8mOmixIbj)-Ib7Ulm*S+Z#A6Il;r@pEg!ek
zOJNGtFk|vJKd_ApmWz%|If_tw7WRX5yc2}l%nxAGC#*pYWB3EZ^$@+CITp_a`R^}J
z>O5etj1Nvf3YE&E-K;n(;CGYP6cZ(1hM5eHY+mQ5Ls*|bfuFJd^@yed!Vn-ft7NFA
z1_l7|M2lc2r-J>JP4Ujc3C{)=7dn+BeNbd?VLzz?;Tw9s+s(iqxBvS<Ee7J{q(#BD
zzLfbcoZstlE;1}w%zd0eQJ3*KoU!L{eWfe}kvO5mcQetO7pA`@pdFG-Uc}Mg)81ck
zXZ`r#QmpI95@c+_fg7x(y=4MypfuK};Bt+d6>I$l>Vn09KP3e#O0>-~4%?<7KfSYW
zZ{28lCr8XL3m&p0Nd%v#FEo-h`oy4T603Yuu*_7T#P6LT8JbB!U1*t;E^P4XScfil
zf#q6yAk0j(HmvY(RNKZJu}aZfb5w?CI1!A*H2IOcK4~YXBR%`fbe<du<}m{KBGmF6
zL={AhdN8_xCLFm^>_~3VLnitKnW69*l)AJx&OokkkhkWX<$a2qZ1AC!M<t^)@1);4
zy-Hcmh%-?vXyg4vt)u4R<ijBPM{Wq`D7_UwwVHL^6nKnl@B3wi3uZ_S*NO@DO)81p
zO>8T@yay-&W8U`E04Ce_<BthWWr;?Abogw|8<_8`e}ScVXgf2H2C5Iw#lyh_aR74&
zPtslRk-?n*<jjy;_x`Jrxa7?DyuAk0*>44br#s0!bB`FxAAI6%c<@BAP3bwPnnCSt
zR(ty)gsjmy7Mu^0C%Bm1dGvK=u<0J2ked2W-NTy=R;-!*1v*1EglBM;87%7$YCnhu
zlVAWccRuqG^ZqpLLRDaN%#WG40+Ba_sU%&uGJlCL&`Z#_NO6FN@^aGb{TIRSly51!
zTkF0Tu*`d5WMPc0_GI$BVt-Rsk^Q-X2-p02^#I{%=CfJzFhrf#Rx`ckI*m<bC%uCd
zzofyW<^7f-F}i)_-5d{Z)HGI9Xp9Xr41k4+qgtA$K?gY*5+4QOAuC~V@uJ3&HT|uf
z%sUjvwqkt^)s*FEmvwl}CbJWA*1_Uz28Hc3&Rq2CX?#7hh=|f}$a-sOO^H-`YnNVa
zK8<%dfjrdGrkSsiWYIK*9E!3&@IvFiqca!eb!5%+N1~OjieyXIZ%17+91bwsLt)R?
z_W;!h(@&x$OZ$R#{QTwR1!%Ywo$u+r>%L+xi834+EkjRzUwFlf<wls=1@Tzu3D5lL
zBvG$DR7qe+iu%pw-#fSVNl$!GT(c8woUsP-i*E)IUBsez_6y%(Fjy<4vM$ccaFC7<
z4;p0NU+;CIWtK8+_7)om-9WK<oM8A9Pjkf6GvN|bIr5Pl9{YIUkLjwpQ0QKNc;`pA
z!nJF(l+St)v0ayjRa<e7soJ#?g1oFgjrsMEj~*K<zQ0uGNwlnRPBi4=wWn6hT&3pp
zpx;xERz%aQio8p$$BFDtT$SJ2JQ0kxy*wWqV((=lDzPs$&?$o<E1N67rN9pO#E#E5
z7IccqPxMF;=`#a6=vklVMGfkTOo|go)V#HjG%A-@E<au;!nIGI{<4@eg;ADTx1?ir
zdau6NnZ(@js+vl~itV|R<szvlJ35Tj6y{iM5&>p3dA4tqtD{7Sf6<2|t!dG)kGqEJ
z>x#RW-+%D^i<Ot8hdU>6b_HYhwU&{-{YNs*`zgF}62gK@eVHmMjc7cPRQ8`U$r0(c
zqwXukmc*OZ4g`>2pY?op-AC&kiY3{4JaC(Zh>K%7SGj21*Qi7MYeIy4ZGCeeMh9eh
z>P;V>%}=VI;}CvVOQ_39uA&1eUwtK}pxF*b6^@r`C$w{^#30*a<#jCQrRO6NFLv8C
z^{P4fC;i^FYvYiEF9aKtyE8cA@txk^V|tuK)UUSy%9{{uVk?#8ue{ja({KCfx6gwK
z^({eFk6S~xl7N357Q5N;`;{OF04QKae4(a;{ZJwf+=_m}irQ?bfitd!WNKQDmqN1k
zH@iS@gNvNT=s`a><DYIOk{e5O^`TbL8PxD%U48vLgi-0(sd=yd*rY?wo^nT?p;w!W
z=l3gdQ-<TuF`j|YHFD$JuINR$ZD#F{m@Bc#n%?W@r@D<cfrm`VVYe9+g5!X9uU`(r
zKCB<_T^{(5?QMJ{soNR1%Bm?seIC#AyJLbE%lppQuXx-C_O`_)x9ItTZPsp;UcEy2
zimM%PSNWLgsyF(OlM%w6a~$3GoS<7n8*Ip+9!IbJLwhtX>SSx}N3LAs+=YOE$7c1W
zpO&D@XKVkNu1of?zm&HJm6j?d5W7^^c$0v}D>#?WsUGj~+^Z{vZd)_|*@NCx$l9^>
zxdR=@(L^L4XOw!rCub=0LD?qgM74|^(#rskxiTN7!ozyl{P@`gzLRi3AZ^t|v+hQa
zbNHtZZ*Y9`Z+=T#r&6$`NR|2vN%iV!YPkQ_(aA{~YKQXowj4_D*q=LmW<!@Sx=3lq
zs>^%<Q`1TM(cF{J%H!Ut61wzg6*9?^Lyf~zFxRhvKHozHe98&<E_Cx#x&?J!AHzj&
zmEkrR^>86++KOzUTKCy^(Kq_SN78hY3I3ftGQ#l1xz(RW4hq-#mcY6*v9SAf`Fk%?
zW{s%`T+&A7Jt4ElyrDbFj|3qwual5C%RDYisau#$>If?4hC(lLONesn@E+-H_Lz3}
zyJqr{k$W~38$_kY6?{93LHe9;ihkAsF2uQgYQp*bSDlnkZ9#Q=drF<5>CCs{Qu+hh
z+_0Qfz?o@vnI*#4$8_7aQ@oN~_&sLW3AaI^m}>#?wx{XRVP`8T(n}h~B1k2yY4&&$
z1~^RR5O)1-#70Gs`{mncE5nzoL7qJ(ytT=#2sb2X1kLmAn&C^q<&nGatpJECy6)n_
zS*_cguvXg&Kwgo#u8l<5t%{SP4iJZjAdAFreO{oY%KQF4<iKQuUu^%-#y_&Z-mpZ-
z%=$JI6kLvMnn4#E2Q*;CbgOF<vu*esGn(;tOvw;xe-8Q(v=O@ng~~MZ)sdl-TT`R*
zh4zkYNOn8g@uW!yLFwO^y{`m5BPc-|O<0}X2IR!6z-BjO4`<!#Qes6UrR|8SEIxEs
zNj8CqHNB5-pG1zP%5&Vgm+JE6#FPP^$U-7N-d!2(0mX%>EEh~*mpI9uC{?6O20bXt
zX!*p*#39I8U{`-^DwkFg<;S0>{R1;u8ODUjTg@ezayae$0^yN6(DQY82aSv=QDHKA
z%p1E<sSb3R(h!9=w6Oqe#ZzZwvJ{yn&{~Ri9wkB(NGijq2`v5Rf*O(NhfRA!Ds#aa
z{!0fqT1MEY2BQga)B&#i8`8edmKhO@NOZ!cT_crgWexxT6QXM}HgA7A;^aCNHgSGz
zjhJkmUU~69B1`MoTC#SPOoJNDpkN5;8smIeYN?r-MfOnoO9(}XTj06H9q&3=z#-<N
z{)Xf{mDA6LmT&I|{N{T4BU^4@6{GQ6)JJ_5Bs|%f{gGGOvIxlM*YipJk(rbRO!bUT
z?gI*<6{?iWA_9UaZ<kydmch_tX!g88*hKJ3PS#_6!=$R<fS34|s$g;JM0@{`Wq6R#
z;P{1yWuv8)DTO0ySwDL-ipY}4$6KUBqh-owhU5D6y30)2AnT<^RM!AZqlv^qP!d;A
zB73QOG;%>7Oc95wV9%mT)Q}lrSazXa2g;5heOK5T7f?1x$lh`igOyOaD)>FG-f(Sz
z*C@)c#z|uxc-&%5%99jc`OXeIQasaKmE7inpy9*YlMrS~hedj^R3ucoo*ERoph#E^
z4YjZ)LO+hCF@&PGFDk%>V`&7-VW9=9pwLw&AX~dMIx>x>Tr0GI1{8Wk4^)Y#DQ6EY
zhz5qn(gKlTH02nfH0S8jX`muEw)$gYNW4y=8=HW7#8AbwFd4;XnIF2gIoP|+EA}`G
z{r1+_3w0E7o}VwA)19SvJ=Fc8-b#dpKP-E<QC!e1+fTpk3x+Mgd6sz)fdU~jM=yp%
zmzxE(Zm_V*8jKz?$Q=#qL<QO{*RaDt$6R|l-($$;eV;>@`iejKsh};s>CJLnmCdJ6
zrA*7>N8mw@vh7&Xsyz^R*#|c;{JXCfU%O1Q=st9r=m}o1+Bb<=5Sc?9m%q6@uk5~1
z`$+=G=j8PU^TJH6R!V}vs>`)M&4kWZFzInUvxD0~ddf5IJM5X2to8#-YI?f!k;BEv
zxYgfeTB+nMJ7R=-Hs6L0f)f6;6Wn$sctUz+t$2}G(=8tZoWv})Ll2DfUioGf#>Puk
zwgO93Vx%hFfhBAaQk8tb@CXYMpr;1yyhw!R5~(&H6Rao25mfSCMgK*q3MwlRv65Ya
zl*&7F?O3`*V#1IVGhoTCG)*ZcD(%DjRT-$&yHY>ykpDCO|NRMh_{m}z`t;+lft#;Z
zd+gZoBz3PMS<PJS*h?4j4e|DD`w{5~Vd*Oie}Q+ow)zA!x*Y67Fxjw}iL8O7{tta$
zUspJesKmc)(YIgh?s@k(z!x+{b+PDaET*-FXrf@v-&>=fA`sud&bG9Tf>!?X(qj|e
z)SJ<s=Qv>`*>0|ONVH3j2(%gb6&nVVB|$baAqsbRwCP#;sp}uU{@Mfo#M*29pHYo8
z{&KC$TM*UT8oTl}815ZKSZq{VzO}SZ>zK=%o7D7sJ80~4u7fU7-`BZc?(J53p!rB{
z(@N5EJo64+Rso@kVdD`)Y&_7;%iYqhW4kM|yZx&HZ>)==jyiPN7yrPy#Ru#>ra*aF
ze72HP_JB3Ps;FBJbKZ>Q$M&MpGS+<kqFu--0kevkE;G^#c1xvu+?>ZEu0~u+i$$i*
zjI>JSuZp6X;x__EF+mn*5>y4nbr!G$7LFJ6a$cwAukPmh-qT6CofrJHZ+->>HzlMh
zcps6}15G|uy-mXxREVJO->3)RfV;dla&H%JlGlTuwXRdDFDLne!ol~ZhIoxfVmTLI
zl!O?uWqgz*aBd?Np0057TL*trG;BpQ2C2@;JfVas^`^U>Hy)N4q2`~Zm{+M)t7VZH
zrI7M`H4LIirQ)v4IC43@ekS;LqeA~+8~mjSpOqN}X8vY?hjefYqgQtHa<zoYnW6*V
zbNpo#nh@!hjC&`>S_wnu!{wpcnv)j`&{uh~z1RPg@zBTMMgL1MdkaOOFdYkEuqDwR
zDl;}H&?O(@sS47!#h`D-f)A}+=^@#XEg|hQWMJlg%)Angy#-)RZWYT=T3z9L!3+Go
zL6dA)vIq>yZo>OKsvk@ugZTx07~KPRsjps?WQK_KmfXlmbq0VwtSJRT8c{0pKA(&7
zls+{TXo*g0cLeU)hz^}JVz>-8`P=NReA#d=cIu7Uxg5R?ynbAbqfpU1i$z0f_p=K1
z<p{3KWl?1$cW-a@NxS%6dJ4p7ue;5!ab%cz^7r1Fg!(Mo@Q=o~ihB3I$$Nv}KO3ri
z?oxa<kGt54Ms!==O!cdz>*^IVoE1r&oS67W*1e4e&5yr?;jY}nWPWL8E$Sj;-~2mG
z+z)y2lB*c8ZzMDh?&d-1zaULX?3Eax|E2H<IbH$C|1w2f0D$x#CjZ}2LLhjhQQ?G$
zT0%y|11}qDJQ1RvhzudlM+o^3FaQ8}|CRg`;UaAKXej^NkpTek|H(&EBf6+55Z!!)
m|I>EF4BrdX3ueS2-)q!(HUta54r&SqB9NaB)13R?y8j1l`^)73


From ee73402951cbf15c67d901b03178291c18a0e948 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 15 Jun 2020 17:07:34 -0400
Subject: [PATCH 010/125] reorganized code for validation and posting in
 submit.py

---
 src/encoded/submit.py | 134 +++++++++++++++++++++++++++++-------------
 1 file changed, 94 insertions(+), 40 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 7f777596cc..b2894868f9 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -22,7 +22,10 @@
 POST_ORDER = ['sample', 'sample_processing', 'individual', 'family', 'report', 'case']
 
 
-SECOND_ROUND = {}
+LINKS = [
+    'samples', 'members', 'mother', 'father', 'proband', 'report',
+    'individual', 'sample_processing', 'families'
+]
 
 
 # This is a placeholder for a submission endpoint modified from loadxl
@@ -306,7 +309,7 @@ def post_item_data():
 
 
 # NOT FINISHED
-def validate_and_post(virtualapp, json_data, dryrun=False):
+def validate_all_items(virtualapp, json_data):
     '''
     Still in progress, not necessarily functional yet. NOT YET TESTED.
 
@@ -320,12 +323,14 @@ def validate_and_post(virtualapp, json_data, dryrun=False):
     written or tested.
     '''
     alias_dict = {}
-    links = ['samples', 'members', 'mother', 'father', 'proband']
     errors = []
     all_aliases = [k for itype in json_data for k in json_data[itype]]
     json_data_final = {'post': {}, 'patch': {}}
+    validation_results = {}
     for itemtype in POST_ORDER[:4]:  # don't pre-validate case and report
-        profile = virtualapp.get('/profiles/{}.json'.format(itemtype))
+        if itemtype in json_data:
+            profile = virtualapp.get('/profiles/{}.json'.format(itemtype))
+            validation_results[itemtype] = {'validated': 0, 'errors': 0}
         for alias in json_data[itemtype]:
             # TODO : format fields (e.g. int, list, etc.)
             result = compare_with_db(virtualapp, alias)
@@ -335,23 +340,25 @@ def validate_and_post(virtualapp, json_data, dryrun=False):
                     # do something to report validation errors
                     for e in error:
                         errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
+                    validation_results[itemtype]['errors'] += 1
                 else:
                     json_data_final['post'].setdefault(itemtype, [])
                     json_data_final['post'][itemtype].append(json_data[itemtype][alias])
+                    validation_results[itemtype]['validated'] += 1
             else:
                 # patch if item exists in db
                 alias_dict[alias] = result['@id']
                 to_patch = {}
                 for field in json_data[itemtype][alias]:
-                    if field in links:
+                    # if field in links:
                         # look up atids of links
-                        if profile['properties'][field]['type'] != 'array':
-                            for i, item in enumerate(json_data[itemtype][alias][field]):
-                                if item in alias_dict:
-                                    json_data[itemtype][alias][field][i] = alias_dict[item]
-                        elif profile['properties'][field]['type'] == 'string':
-                            if item in alias_dict:
-                                json_data[itemtype][alias][field] = alias_dict[item]
+                        # if profile['properties'][field]['type'] != 'array':
+                        #     for i, item in enumerate(json_data[itemtype][alias][field]):
+                        #         if item in alias_dict:
+                        #             json_data[itemtype][alias][field][i] = alias_dict[item]
+                        # elif profile['properties'][field]['type'] == 'string':
+                        #     if item in alias_dict:
+                        #         json_data[itemtype][alias][field] = alias_dict[item]
                     # if not an array, patch field gets overwritten (if different from db)
                     if profile['properties'][field]['type'] != 'array':
                         if json_data[itemtype][alias][field] != result.get(field):
@@ -363,40 +370,87 @@ def validate_and_post(virtualapp, json_data, dryrun=False):
                             val.extend(json_data[itemtype][alias][field])
                             to_patch[field] = list(set(val))
                 error = validate_item(virtualapp, to_patch, 'post', itemtype, all_aliases, atid=result['@id'])
-                if error:  # modify to check for presence of validation errors
-                    # do something to report validation errors
+                if error:  # do something to report validation errors
                     for e in error:
                         errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
+                    validation_results[itemtype]['errors'] += 1
                 else:  # patch
-                    json_data_final['patch'][result['@id']] = to_patch
+                    json_data_final['patch'].setdefault(itemtype, {})
+                    json_data_final['patch'][itemtype][result['@id']] = to_patch
                     # do something to record response
+                    validation_results[itemtype]['validated'] += 1
+    output = [error for error in errors]
+    for itemtype in validation_results:
+        output.append('{} items: {} validated; {} errors'.format(
+            itemtype, validation_results[itemtype]['validated'], validation_results[itemtype]['errors']
+        ))
     if errors:
-        return errors
+        output.append('Validation errors found in items. Please fix spreadsheet before submitting.')
+        return ({}, output)
     else:
-        return 'All items validated'
-    # output = []
-    # item_names = {'individual': 'individual_id', 'family': 'family_id', 'sample': 'specimen_id'}
-    # if json_data_final['post']:
-    #     for k, v in json_data_final['post'].items():
-    #         # also create Case and Report items for each SampleProcessing item created
-    #         for item in v:
-    #             for field in links:
-    #                 if field in item:
-    #                     json_data_final['patch'][item['aliases'][0]] = item[field]
-    #                     del item[field]
-    #             try:
-    #                 response = virtualapp.post_json('/' + k, item, status=201)
-    #                 aliasdict[item['aliases'][0]] = response.json['@graph'][0]['@id']
-    #                 if response.json['status'] == 'success' and k in item_names:
-    #                     output.append('Success - {} {} posted'.format(k, item[item_names[k]]))
-    #             except Exception:
-    #                 pass
-    # for k, v in json_data_final['patch'].items():
-    #     atid = k if k.startswith('/') else aliasdict[k]
-    #     try:
-    #         response = testapp.patch_json(atid, v, status=200)
-    #     except Exception:
-    #         pass
+        json_data_final['post']['case'] = list(json_data['case'].values())
+        json_data_final['post']['report'] = list(json_data['report'].values())
+        json_data_final['aliases'] = alias_dict
+        output.append('All items validated.')
+        return (json_data_final, output)
+
+
+def post_and_patch_all_items(virtualapp, json_data_final):
+    output = []
+    item_names = {'individual': 'individual_id', 'family': 'family_id', 'sample': 'specimen_accession'}
+    final_status = {}
+    if json_data_final['post']:
+        for k, v in json_data_final['post'].items():
+            final_status[k] = {'posted': 0, 'not posted': 0, 'patched': 0, 'not patched': 0}
+            for item in v:
+                patch_info = {}
+                for field in LINKS:
+                    if field in item:
+                        patch_info[field] = item[field]
+                        del item[field]
+    # return json_data_final
+                try:
+                    response = virtualapp.post_json('/' + k, item, status=201)
+                    # aliasdict[item['aliases'][0]] = response.json['@graph'][0]['@id']
+                    if response.json['status'] == 'success':
+                        final_status[k]['posted'] += 1
+                        atid = response.json['@graph'][0]['@id']
+                        json_data_final['aliases'][item['aliases'][0]] = atid
+                        json_data_final['patch'].setdefault(k, {})
+                        json_data_final['patch'][k][atid] = patch_info
+                        if k in item_names:
+                            output.append('Success - {} {} posted'.format(k, item[item_names[k]]))
+                    else:
+                        final_status[k]['not posted'] += 1
+                except Exception as e:
+                    final_status[k]['not posted'] += 1
+                    output.append(e)
+        for itype in final_status:
+            if final_status[itype]['posted'] > 0 or final_status[itype]['not posted'] > 0:
+                output.append('{}: {} items posted successfully; {} items not posted'.format(
+                    itype, final_status[itype]['posted'], final_status[itype]['not posted']
+                ))
+    for k, v in json_data_final['patch'].items():
+        final_status.setdefault(k, {'patched': 0, 'not patched': 0})
+        for item_id, patch_data in v.items():
+            # atid = k if k.startswith('/') else aliasdict[k]
+            try:
+                response = virtualapp.patch_json('/' + item_id, patch_data, status=200)
+                if response.json['status'] == 'success':
+                    # if k in item_names:
+                    #     output.append('Success - {} {} patched'.format(k, patch_data[item_names[k]]))
+                    final_status[k]['patched'] += 1
+                else:
+                    final_status[k]['not patched'] += 1
+            except Exception as e:
+                final_status[k]['not patched'] += 1
+                output.append(e)
+        if final_status[k]['patched'] > 0 or final_status[k]['not patched'] > 0:
+            output.append('{}: {} items patched successfully; {} items not patched'.format(
+                itype, final_status[k]['patched'], final_status[k]['not patched']
+            ))
+    return output
+
 
 
 # This was just to see if i could post something using testapp in the python command line, currently works.

From 812340adeff0de598c701652f12989bd72800602 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 15 Jun 2020 17:08:00 -0400
Subject: [PATCH 011/125] a couple submission unit tests added

---
 src/encoded/tests/test_submit.py | 34 ++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
index ce332d06da..4bf8515564 100644
--- a/src/encoded/tests/test_submit.py
+++ b/src/encoded/tests/test_submit.py
@@ -140,23 +140,49 @@ def test_xls_to_json(project, institution):
 
 
 def test_validate_item_post_valid(testapp, a_case):
-    result = validate_item(testapp, a_case, 'post', 'case')
+    result = validate_item(testapp, a_case, 'post', 'case', [])
     assert not result
 
 
 def test_validate_item_post_invalid(testapp, a_case):
     a_case['project'] = '/projects/invalid-project/'
-    result = validate_item(testapp, a_case, 'post', 'case')
+    result = validate_item(testapp, a_case, 'post', 'case', [])
     assert 'not found' in result[0]
 
 
 def test_validate_item_patch_valid(testapp, mother, grandpa):
     patch_dict = {'mother': mother['aliases'][0]}
-    result = validate_item(testapp, patch_dict, 'patch', 'individual', atid=grandpa['@id'])
+    result = validate_item(testapp, patch_dict, 'patch', 'individual', [], atid=grandpa['@id'])
     assert not result
 
 
 def test_validate_item_patch_invalid(testapp, grandpa):
     patch_dict = {'mother': 'non-existant-alias'}
-    result = validate_item(testapp, patch_dict, 'patch', 'individual', atid=grandpa['@id'])
+    result = validate_item(testapp, patch_dict, 'patch', 'individual', [], atid=grandpa['@id'])
     assert 'not found' in result[0]
+
+
+def test_validate_item_patch_alias(testapp, grandpa):
+    patch_dict = {'mother': 'existing-alias'}
+    result = validate_item(testapp, patch_dict, 'patch', 'individual', ['existing-alias'], atid=grandpa['@id'])
+    assert not result
+
+
+def test_validate_all_items_errors(testapp, mother, empty_items):
+    new_individual = {
+        'aliases': ['test-proj:new-individual-alias'],
+        'individual_id': '1234',
+        'sex': 'F',
+        'mother': mother['aliases'][0],
+        'project': 'test-proj:invalid-project-alias',
+        'institution': 'test-proj:invalid-institution-alias'
+    }
+    items = empty_items
+    items['individual']['new-individual-alias'] = new_individual
+    data_out, result = validate_all_items(testapp, items)
+    assert not data_out
+    assert len(result) > 1
+    errors = ' '.join(result)
+    assert "'test-proj:invalid-project-alias' not found" in errors
+    assert "'test-proj:invalid-institution-alias' not found" in errors
+    assert mother['aliases'][0] not in errors

From 5daf28c1f61875c807bd124778f72be24dfc3e4c Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 22 Jun 2020 16:02:52 -0400
Subject: [PATCH 012/125] script to test submission code so far added

---
 src/encoded/commands/submission_test.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 src/encoded/commands/submission_test.py

diff --git a/src/encoded/commands/submission_test.py b/src/encoded/commands/submission_test.py
new file mode 100644
index 0000000000..aef1cc1a05
--- /dev/null
+++ b/src/encoded/commands/submission_test.py
@@ -0,0 +1,22 @@
+from pyramid.paster import get_app
+from encoded.submit import *
+from dcicutils.misc_utils import VirtualApp
+import json
+
+
+def main():
+    app = get_app('development.ini', 'app')
+    environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': 'TEST'}
+    virtualapp = VirtualApp(app, environ)
+    proj = virtualapp.get('/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/').json
+    inst = virtualapp.get('/institutions/hms-dbmi/').json
+    json_data = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', proj, inst)
+    final_json, validation_log = validate_all_items(virtualapp, json_data)
+    print(validation_log)
+    print(json.dumps(final_json, indent=4))
+    result = post_and_patch_all_items(virtualapp, final_json)
+    print(result)
+
+
+if __name__ == '__main__':
+    main()

From 41515cefe0b73f2280381b29fd0049a40a2f5ffe Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 22 Jun 2020 16:03:18 -0400
Subject: [PATCH 013/125] src/encoded/commands/submission_test.py added to
 scripts in pyproject.toml

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index cbeecf50e3..c4a020b10c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -191,6 +191,7 @@ profiler = "encoded.commands.profiler:main"
 purge-item-type = "encoded.commands.purge_item_type:main"
 run-upgrade-on-inserts = "encoded.commands.run_upgrader_on_inserts:main"
 spreadsheet-to-json = "encoded.commands.spreadsheet_to_json:main"
+submission-test = "encoded.commands.submission_test:main"
 update-inserts-from-server = "encoded.commands.update_inserts_from_server:main"
 verify-item = "encoded.commands.verify_item:main"
 

From 432f08730ae6da27c842f7fa09fcfb41ebd8d629 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 22 Jun 2020 16:03:47 -0400
Subject: [PATCH 014/125] VirtualAppError handling modified to adapt to changes
 in dcicutils

---
 src/encoded/submit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index b2894868f9..4c96580143 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -278,7 +278,7 @@ def parse_exception(e, aliases):
     try:
         # try parsing the exception
         if isinstance(e, VirtualAppError):
-            text = e.raw_exception
+            text = e.raw_exception.args[0]
         else:
             text = e.args[0]
         resp_text = text[text.index('{'):-1]

From 273af15e18b648fae1b14024875b95dedbf94ffc Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 22 Jun 2020 17:08:40 -0400
Subject: [PATCH 015/125] bug fixes so that compare_with_db in submit.py works
 properly with alias instead of atid

---
 src/encoded/submit.py | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 4c96580143..c6636a1618 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -239,9 +239,12 @@ def create_sample_processing_groups(items, sp_alias):
 
 
 # NOT YET TESTED
-def compare_with_db(alias, virtualapp):
+def compare_with_db(virtualapp, alias):
     try:  # check if already in db
-        result = virtualapp.get(alias + '/?frame=object')
+        result = virtualapp.get('/' + alias + '/?frame=object')
+        if result.status_code == 301:
+            msg = json.loads(result.body).get('message', '')
+            result = virtualapp.get(msg[msg.index('/'):msg.index(';')])
     except Exception as e:  # if not in db
         # print(e)
         if 'HTTPNotFound' in str(e):
@@ -327,9 +330,9 @@ def validate_all_items(virtualapp, json_data):
     all_aliases = [k for itype in json_data for k in json_data[itype]]
     json_data_final = {'post': {}, 'patch': {}}
     validation_results = {}
-    for itemtype in POST_ORDER[:4]:  # don't pre-validate case and report
+    for itemtype in POST_ORDER:  # don't pre-validate case and report
         if itemtype in json_data:
-            profile = virtualapp.get('/profiles/{}.json'.format(itemtype))
+            profile = virtualapp.get('/profiles/{}.json'.format(itemtype)).json
             validation_results[itemtype] = {'validated': 0, 'errors': 0}
         for alias in json_data[itemtype]:
             # TODO : format fields (e.g. int, list, etc.)
@@ -338,9 +341,10 @@ def validate_all_items(virtualapp, json_data):
                 error = validate_item(virtualapp, json_data[itemtype][alias], 'post', itemtype, all_aliases)
                 if error:  # modify to check for presence of validation errors
                     # do something to report validation errors
-                    for e in error:
-                        errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
-                    validation_results[itemtype]['errors'] += 1
+                    if itemtype not in ['case', 'report']:
+                        for e in error:
+                            errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
+                        validation_results[itemtype]['errors'] += 1
                 else:
                     json_data_final['post'].setdefault(itemtype, [])
                     json_data_final['post'][itemtype].append(json_data[itemtype][alias])
@@ -369,11 +373,12 @@ def validate_all_items(virtualapp, json_data):
                             val = result.get(field, [])
                             val.extend(json_data[itemtype][alias][field])
                             to_patch[field] = list(set(val))
-                error = validate_item(virtualapp, to_patch, 'post', itemtype, all_aliases, atid=result['@id'])
+                error = validate_item(virtualapp, to_patch, 'patch', itemtype, all_aliases, atid=result['@id'])
                 if error:  # do something to report validation errors
-                    for e in error:
-                        errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
-                    validation_results[itemtype]['errors'] += 1
+                    if itemtype not in ['case', 'report']:
+                        for e in error:
+                            errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
+                        validation_results[itemtype]['errors'] += 1
                 else:  # patch
                     json_data_final['patch'].setdefault(itemtype, {})
                     json_data_final['patch'][itemtype][result['@id']] = to_patch
@@ -388,8 +393,8 @@ def validate_all_items(virtualapp, json_data):
         output.append('Validation errors found in items. Please fix spreadsheet before submitting.')
         return ({}, output)
     else:
-        json_data_final['post']['case'] = list(json_data['case'].values())
-        json_data_final['post']['report'] = list(json_data['report'].values())
+        # json_data_final['post']['case'] = list(json_data['case'].values())
+        # json_data_final['post']['report'] = list(json_data['report'].values())
         json_data_final['aliases'] = alias_dict
         output.append('All items validated.')
         return (json_data_final, output)
@@ -447,7 +452,7 @@ def post_and_patch_all_items(virtualapp, json_data_final):
                 output.append(e)
         if final_status[k]['patched'] > 0 or final_status[k]['not patched'] > 0:
             output.append('{}: {} items patched successfully; {} items not patched'.format(
-                itype, final_status[k]['patched'], final_status[k]['not patched']
+                k, final_status[k]['patched'], final_status[k]['not patched']
             ))
     return output
 

From 1f9667f685262404b2d5c1f282d23495acc20416 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 25 Jun 2020 10:54:51 -0400
Subject: [PATCH 016/125] patch logic for submit.py extracted to new
 compare_fields function

---
 src/encoded/submit.py | 86 +++++++++++++++++--------------------------
 1 file changed, 34 insertions(+), 52 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index c6636a1618..b75dcf870f 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -19,7 +19,7 @@
 }
 
 
-POST_ORDER = ['sample', 'sample_processing', 'individual', 'family', 'report', 'case']
+POST_ORDER = ['sample', 'individual', 'family', 'sample_processing', 'report', 'case']
 
 
 LINKS = [
@@ -238,7 +238,6 @@ def create_sample_processing_groups(items, sp_alias):
     return new_items
 
 
-# NOT YET TESTED
 def compare_with_db(virtualapp, alias):
     try:  # check if already in db
         result = virtualapp.get('/' + alias + '/?frame=object')
@@ -246,17 +245,14 @@ def compare_with_db(virtualapp, alias):
             msg = json.loads(result.body).get('message', '')
             result = virtualapp.get(msg[msg.index('/'):msg.index(';')])
     except Exception as e:  # if not in db
-        # print(e)
         if 'HTTPNotFound' in str(e):
             return None
     else:
         return result.json
 
 
-# TODO : Handle validation of not-yet-submitted-aliases in fields
 def validate_item(virtualapp, item, method, itemtype, aliases, atid=None):
     if method == 'post':
-        #import pdb; pdb.set_trace()
         try:
             validation = virtualapp.post_json('/{}/?check_only=true'.format(itemtype), item)
         except (AppError, VirtualAppError) as e:
@@ -303,15 +299,30 @@ def parse_exception(e, aliases):
         raise e
 
 
-def patch_item_data():
-    pass
-
-
-def post_item_data():
-    pass
+def compare_fields(profile, aliases, json_item, db_item):
+    to_patch = {}
+    for field in json_item:
+        # if not an array, patch field gets overwritten (if different from db)
+        if profile['properties'][field]['type'] != 'array':
+            val = json_item[field]
+            if isinstance(val, str):
+                if val in aliases:
+                    val = aliases[val]
+            if val != db_item.get(field):
+                to_patch[field] = val
+        else:
+            # if array, patch field vals get added to what's in db
+            if field != 'aliases':
+                val = [aliases[v] if v in aliases else v for v in json_item[field]]
+            else:
+                val = [v for v in json_item[field]]
+            if sorted(val) != sorted(db_item.get(field, [])):
+                new_val = db_item.get(field, [])
+                new_val.extend(val)
+                to_patch[field] = list(set(new_val))
+    return to_patch
 
 
-# NOT FINISHED
 def validate_all_items(virtualapp, json_data):
     '''
     Still in progress, not necessarily functional yet. NOT YET TESTED.
@@ -330,6 +341,7 @@ def validate_all_items(virtualapp, json_data):
     all_aliases = [k for itype in json_data for k in json_data[itype]]
     json_data_final = {'post': {}, 'patch': {}}
     validation_results = {}
+    output = []
     for itemtype in POST_ORDER:  # don't pre-validate case and report
         if itemtype in json_data:
             profile = virtualapp.get('/profiles/{}.json'.format(itemtype)).json
@@ -352,28 +364,8 @@ def validate_all_items(virtualapp, json_data):
             else:
                 # patch if item exists in db
                 alias_dict[alias] = result['@id']
-                to_patch = {}
-                for field in json_data[itemtype][alias]:
-                    # if field in links:
-                        # look up atids of links
-                        # if profile['properties'][field]['type'] != 'array':
-                        #     for i, item in enumerate(json_data[itemtype][alias][field]):
-                        #         if item in alias_dict:
-                        #             json_data[itemtype][alias][field][i] = alias_dict[item]
-                        # elif profile['properties'][field]['type'] == 'string':
-                        #     if item in alias_dict:
-                        #         json_data[itemtype][alias][field] = alias_dict[item]
-                    # if not an array, patch field gets overwritten (if different from db)
-                    if profile['properties'][field]['type'] != 'array':
-                        if json_data[itemtype][alias][field] != result.get(field):
-                            to_patch[field] = json_data[itemtype][alias][field]
-                    else:
-                        # if array, patch field vals get added to what's in db
-                        if sorted(json_data[itemtype][alias][field]) != sorted(result.get(field, [])):
-                            val = result.get(field, [])
-                            val.extend(json_data[itemtype][alias][field])
-                            to_patch[field] = list(set(val))
-                error = validate_item(virtualapp, to_patch, 'patch', itemtype, all_aliases, atid=result['@id'])
+                patch_data = compare_fields(profile, alias_dict, json_data[itemtype][alias], result)
+                error = validate_item(virtualapp, patch_data, 'patch', itemtype, all_aliases, atid=result['@id'])
                 if error:  # do something to report validation errors
                     if itemtype not in ['case', 'report']:
                         for e in error:
@@ -381,10 +373,13 @@ def validate_all_items(virtualapp, json_data):
                         validation_results[itemtype]['errors'] += 1
                 else:  # patch
                     json_data_final['patch'].setdefault(itemtype, {})
-                    json_data_final['patch'][itemtype][result['@id']] = to_patch
+                    if patch_data:
+                        json_data_final['patch'][itemtype][result['@id']] = patch_data
+                    else:
+                        output.append('{} {} - Item already in database, no changes needed'.format(itemtype, alias))
                     # do something to record response
                     validation_results[itemtype]['validated'] += 1
-    output = [error for error in errors]
+    output.extend([error for error in errors])
     for itemtype in validation_results:
         output.append('{} items: {} validated; {} errors'.format(
             itemtype, validation_results[itemtype]['validated'], validation_results[itemtype]['errors']
@@ -393,8 +388,6 @@ def validate_all_items(virtualapp, json_data):
         output.append('Validation errors found in items. Please fix spreadsheet before submitting.')
         return ({}, output)
     else:
-        # json_data_final['post']['case'] = list(json_data['case'].values())
-        # json_data_final['post']['report'] = list(json_data['report'].values())
         json_data_final['aliases'] = alias_dict
         output.append('All items validated.')
         return (json_data_final, output)
@@ -402,9 +395,11 @@ def validate_all_items(virtualapp, json_data):
 
 def post_and_patch_all_items(virtualapp, json_data_final):
     output = []
+    if not json_data_final:
+        return output
     item_names = {'individual': 'individual_id', 'family': 'family_id', 'sample': 'specimen_accession'}
     final_status = {}
-    if json_data_final['post']:
+    if json_data_final.get('post'):
         for k, v in json_data_final['post'].items():
             final_status[k] = {'posted': 0, 'not posted': 0, 'patched': 0, 'not patched': 0}
             for item in v:
@@ -413,10 +408,8 @@ def post_and_patch_all_items(virtualapp, json_data_final):
                     if field in item:
                         patch_info[field] = item[field]
                         del item[field]
-    # return json_data_final
                 try:
                     response = virtualapp.post_json('/' + k, item, status=201)
-                    # aliasdict[item['aliases'][0]] = response.json['@graph'][0]['@id']
                     if response.json['status'] == 'success':
                         final_status[k]['posted'] += 1
                         atid = response.json['@graph'][0]['@id']
@@ -438,7 +431,6 @@ def post_and_patch_all_items(virtualapp, json_data_final):
     for k, v in json_data_final['patch'].items():
         final_status.setdefault(k, {'patched': 0, 'not patched': 0})
         for item_id, patch_data in v.items():
-            # atid = k if k.startswith('/') else aliasdict[k]
             try:
                 response = virtualapp.patch_json('/' + item_id, patch_data, status=200)
                 if response.json['status'] == 'success':
@@ -457,16 +449,6 @@ def post_and_patch_all_items(virtualapp, json_data_final):
     return output
 
 
-
-# This was just to see if i could post something using testapp in the python command line, currently works.
-# def test_function():
-#     app = get_app('development.ini', 'app')
-#     environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': 'TEST'}
-#     testapp = TestApp(app, environ)
-#     response = testapp.post_json('/project', {'name': 'test', 'title': 'Test'}, status=201)
-#     print(response)
-
-
 def cell_value(cell, datemode):
     """Get cell value from excel. [From Submit4DN]"""
     # This should be always returning text format

From 5edf74738a1a70591226475cb2c9ea036d69fa19 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 25 Jun 2020 10:55:27 -0400
Subject: [PATCH 017/125] print statements restructured for submission-test
 command

---
 src/encoded/commands/submission_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/encoded/commands/submission_test.py b/src/encoded/commands/submission_test.py
index aef1cc1a05..07eb772985 100644
--- a/src/encoded/commands/submission_test.py
+++ b/src/encoded/commands/submission_test.py
@@ -12,10 +12,10 @@ def main():
     inst = virtualapp.get('/institutions/hms-dbmi/').json
     json_data = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', proj, inst)
     final_json, validation_log = validate_all_items(virtualapp, json_data)
-    print(validation_log)
+    print('\n'.join(validation_log))
     print(json.dumps(final_json, indent=4))
     result = post_and_patch_all_items(virtualapp, final_json)
-    print(result)
+    print('\n'.join(result))
 
 
 if __name__ == '__main__':

From f6145ea50eeb08eaeb39280138393a635efba52b Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 26 Jun 2020 15:05:19 -0400
Subject: [PATCH 018/125] only generate reports when required in submit.py

---
 src/encoded/submit.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index b75dcf870f..87d2ce4e2a 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -84,6 +84,7 @@ def xls_to_json(xls_data, project, institution):
         items = fetch_family_metadata(row, items, indiv_alias, fam_alias)
         # create item for Sample if there is a specimen
         if row['specimen id']:
+            items['reports'] = []
             samp_alias = '{}:sample-{}'.format(project['name'], row['specimen id'])
             if row['specimen id'] in specimen_ids:
                 samp_alias = samp_alias + '-' + specimen_ids[row['specimen id']]
@@ -182,6 +183,8 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias, analysi
     }
     new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)
     new_items['sample_processing'][analysis_alias]['samples'].append(samp_alias)
+    if row.get('report required').lower().startswith('y'):
+        new_items['reports'].append(samp_alias)
     if fam_alias not in new_items['sample_processing'][analysis_alias]['families']:
         new_items['sample_processing'][analysis_alias]['families'].append(fam_alias)
     return new_items
@@ -199,19 +202,21 @@ def create_case_items(items, proj_name):
                 case_id += '-group'
             case_alias = '{}:case-{}'.format(proj_name, case_id)
             indiv = [ikey for ikey, ival in items['individual'].items() if sample in ival.get('samples', [])][0]
-            report_alias = case_alias.replace('case', 'report')
-            new_items['report'][report_alias] = {
-                'aliases': [report_alias],
-                'description': 'Analysis Report for Individual ID {}'.format(items['individual'][indiv]['individual_id'])
-            }
             case_info = {
                 'aliases': [case_alias],
                 'case_id': case_id,
                 'sample_processing': k,
-                'individual': indiv,
-                'report': report_alias
+                'individual': indiv
             }
+            if sample in items['reports']:
+                report_alias = case_alias.replace('case', 'report')
+                new_items['report'][report_alias] = {
+                    'aliases': [report_alias],
+                    'description': 'Analysis Report for Individual ID {}'.format(items['individual'][indiv]['individual_id'])
+                }
+                case_info['report'] = report_alias
             new_items['case'][case_alias] = case_info
+    del new_items['reports']
     return new_items
 
 
@@ -317,6 +322,8 @@ def compare_fields(profile, aliases, json_item, db_item):
             else:
                 val = [v for v in json_item[field]]
             if sorted(val) != sorted(db_item.get(field, [])):
+                if len(val) == 1 and val not in db_item.get(field, []):
+                    continue
                 new_val = db_item.get(field, [])
                 new_val.extend(val)
                 to_patch[field] = list(set(new_val))

From 6dcf2464de25f2e9220f5937574a410bb8788074 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 26 Jun 2020 16:24:18 -0400
Subject: [PATCH 019/125] edit to code for handling report items in submit.py

---
 src/encoded/submit.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 87d2ce4e2a..d8a2cc3137 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -71,7 +71,7 @@ def xls_to_json(xls_data, project, institution):
 
     items = {
         'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {},
-        'case': {}, 'report': {}
+        'case': {}, 'report': {}, 'reports': []
     }
     specimen_ids = {}
     for row in rows:
@@ -84,7 +84,6 @@ def xls_to_json(xls_data, project, institution):
         items = fetch_family_metadata(row, items, indiv_alias, fam_alias)
         # create item for Sample if there is a specimen
         if row['specimen id']:
-            items['reports'] = []
             samp_alias = '{}:sample-{}'.format(project['name'], row['specimen id'])
             if row['specimen id'] in specimen_ids:
                 samp_alias = samp_alias + '-' + specimen_ids[row['specimen id']]
@@ -184,6 +183,7 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias, analysi
     new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)
     new_items['sample_processing'][analysis_alias]['samples'].append(samp_alias)
     if row.get('report required').lower().startswith('y'):
+        print('report')
         new_items['reports'].append(samp_alias)
     if fam_alias not in new_items['sample_processing'][analysis_alias]['families']:
         new_items['sample_processing'][analysis_alias]['families'].append(fam_alias)
@@ -209,6 +209,7 @@ def create_case_items(items, proj_name):
                 'individual': indiv
             }
             if sample in items['reports']:
+                print('2')
                 report_alias = case_alias.replace('case', 'report')
                 new_items['report'][report_alias] = {
                     'aliases': [report_alias],

From 847e01f145163616be71360a18441aa89caa14c2 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 9 Jul 2020 14:23:47 -0400
Subject: [PATCH 020/125] family alias based on proband id rather than family
 id in submit.py

---
 src/encoded/submit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index d8a2cc3137..d4c06134f4 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -76,7 +76,7 @@ def xls_to_json(xls_data, project, institution):
     specimen_ids = {}
     for row in rows:
         indiv_alias = '{}:individual-{}'.format(project['name'], row['patient id'])
-        fam_alias = '{}:family-{}'.format(project['name'], row['family id'])
+        fam_alias = '{}:family-{}'.format(project['name'], row['patient id'])
         sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
         # create items for Individual
         items = fetch_individual_metadata(row, items, indiv_alias)

From c7523c7bceba37baaababbb3753ab61348effb3b Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 9 Jul 2020 16:45:53 -0400
Subject: [PATCH 021/125] added more metadata fields to digest for submit.py

---
 src/encoded/submit.py | 49 +++++++++++++++++++++++++++++++++----------
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index d4c06134f4..6e9415a266 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -66,7 +66,7 @@ def xls_to_json(xls_data, project, institution):
     counter = 0
     for values in row:
         r = [val for val in values]
-        row_dict = {keys[i].lower(): item for i, item in enumerate(r)}
+        row_dict = {keys[i].lower().rstrip('*'): item for i, item in enumerate(r)}
         rows.append(row_dict)
 
     items = {
@@ -154,26 +154,40 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias, analysi
         'workup_type': row.get('workup type'),
         'specimen_type': row.get('specimen type'),
         'specimen_collection_date': row.get('date collected'),
-        'specimen_collection_location': row.get('location collected'),
+        # change collection location to stored location?
+        'specimen_collection_location': row.get('location stored'),
         'specimen_accession': row['specimen id'],
+        # second specimen id
         'date_transported': row.get('date transported'),
         'transported_by': row.get('transport method'),
         'sent_by': row.get('sent by'),
+        # sequencing ref lab
         'date_received': row.get("date rec'd at ref lab"),
         'specimen_accepted': row.get('specimen accepted by ref lab'),
+        # sample ID by ref lab
         'dna_concentration': row.get('dna concentration'),
-        'specimen_notes': row.get('specimen notes')
+        'specimen_notes': row.get('specimen notes'),
+        'files': [],  # TODO: implement creation of file db items
+        'requisition_type': row.get('req type'),
+        # research protocol name
+        'date_requisition_received': row.get("date req rec'd"),
+        'ordering_physician': row.get('physician/provider'),
+        'physician_id': row.get('physician id'),
+        'indication': row.get('indication')
     }
+    req_info = {
+        'accepted_rejected': row.get('req accepted y/n'),
+        'rejection_reason': row.get('reason rejected'),
+        'corrective_action': row.get('corrective action taken'),
+        # corrective action taken by
+        'date_sent': row.get('date sent'),
+        'date_completed': row.get('date completed'),
+        'notes': row.get('correction notes')
+    }
+    info['requisition_acceptance'] = {k, v for k, v in req_info.items() if v}
     new_items['sample'][samp_alias] = {k: v for k, v in info.items() if v}
     if indiv_alias in new_items['individual']:
         new_items['individual'][indiv_alias]['samples'] = [samp_alias]
-    # create SampleProcessing item for that one sample if needed
-    # if row['report required'].lower() in ['yes', 'y']:
-    #     new_items['sample_processing'][sp_alias] = {
-    #         'aliases': [sp_alias],
-    #         'analysis_type': row['workup type'],
-    #         'samples': [samp_alias]
-    #     }
     new_sp_item = {
         # not trivial to add analysis_type here, turn into calculated property
         'aliases': [analysis_alias],
@@ -190,6 +204,20 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias, analysi
     return new_items
 
 
+# TODO: finish implementing this function
+def fetch_file_metadata(filenames):
+    files = []
+    for filename in filenames:
+        file_info = {
+            'aliases': [],
+            'file_format': '',
+            'file_type': '',
+            'filename': ''
+        }
+        files.append(file_info)
+    raise NotImplementedError
+
+
 def create_case_items(items, proj_name):
     new_items = items.copy()
     for k, v in items['sample_processing'].items():
@@ -209,7 +237,6 @@ def create_case_items(items, proj_name):
                 'individual': indiv
             }
             if sample in items['reports']:
-                print('2')
                 report_alias = case_alias.replace('case', 'report')
                 new_items['report'][report_alias] = {
                     'aliases': [report_alias],

From 7d49c47a3485c3935fc50491df41c2c211ef9c8c Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 10 Jul 2020 10:54:52 -0400
Subject: [PATCH 022/125] added a few more fields to sample from accessioning
 worksheet

---
 src/encoded/schemas/sample.json | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/src/encoded/schemas/sample.json b/src/encoded/schemas/sample.json
index 4bd1023bf0..76ec69be3b 100644
--- a/src/encoded/schemas/sample.json
+++ b/src/encoded/schemas/sample.json
@@ -54,6 +54,13 @@
             "lookup": 30,
             "description": "Clinical or research consent/protocol"
         },
+        "research_protocol_name": {
+            "title": "Research Protocol Name",
+            "type": "string",
+            "label": "requisition",
+            "lookup": 31,
+            "description": "Consent Protocol Name for Research Requisition"
+        },
         "date_requisition_received": {
             "title": "Date Requisition Received",
             "type": "string",
@@ -131,6 +138,12 @@
                     "lookup": 113,
                     "description": "If requisition was rejected, the corrective action noted/taken"
                 },
+                "action_taken_by": {
+                    "title": "Action Taken By",
+                    "type": "string",
+                    "lookup": 114,
+                    "description": "Name or ID of person who took the corrective action"
+                },
                 "date_sent": {
                     "title": "Date Correction Sent",
                     "type": "string",
@@ -203,6 +216,13 @@
             "type": "string",
             "lookup": 140
         },
+        "specimen_storage_location": {
+            "title": "Specimen Storage Location",
+            "description": "Location of specimen storage",
+            "label": "specimen",
+            "type": "string",
+            "lookup": 144
+        },
         "specimen_accession": {
             "title": "Specimen Accession",
             "description": "Accession of specimen from sequencing lab",
@@ -247,6 +267,13 @@
             "lookup": 160,
             "description": "ID of person who sent the specimen"
         },
+        "sequencing_lab": {
+            "title": "Sequencing Lab",
+            "description": "Location performing sequencing on sample",
+            "type": "string",
+            "label": "test",
+            "lookup": 189
+        },
         "date_received": {
             "title": "Date Received in Sequencing Lab",
             "type": "string",

From be81d70f07d97032238c43af8f2686c01c299807 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 10 Jul 2020 10:55:21 -0400
Subject: [PATCH 023/125] more accessioning fields digested by submit.py

---
 src/encoded/submit.py | 67 ++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 32 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 6e9415a266..aefcfca689 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -77,7 +77,7 @@ def xls_to_json(xls_data, project, institution):
     for row in rows:
         indiv_alias = '{}:individual-{}'.format(project['name'], row['patient id'])
         fam_alias = '{}:family-{}'.format(project['name'], row['patient id'])
-        sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
+        # sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
         # create items for Individual
         items = fetch_individual_metadata(row, items, indiv_alias)
         # create/edit items for Family
@@ -91,7 +91,8 @@ def xls_to_json(xls_data, project, institution):
             else:
                 specimen_ids[row['specimen id']] = 1
             analysis_alias = '{}:analysis-{}'.format(project['name'], row['analysis id'])
-            items = fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias, analysis_alias, fam_alias)
+            items = fetch_sample_metadata(row, items, indiv_alias, samp_alias,
+                                          analysis_alias, fam_alias, project['name'])
         else:
             print('WARNING: No specimen id present for patient {},'
                   ' sample will not be created.'.format(row['patient id']))
@@ -147,39 +148,42 @@ def fetch_family_metadata(row, items, indiv_alias, fam_alias):
     return new_items
 
 
-def fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias, analysis_alias, fam_alias):
+def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, fam_alias, proj_name):
     new_items = items.copy()
     info = {
         'aliases': [samp_alias],
         'workup_type': row.get('workup type'),
         'specimen_type': row.get('specimen type'),
         'specimen_collection_date': row.get('date collected'),
-        # change collection location to stored location?
-        'specimen_collection_location': row.get('location stored'),
+        'specimen_storage_location': row.get('location stored'),
         'specimen_accession': row['specimen id'],
-        # second specimen id
         'date_transported': row.get('date transported'),
         'transported_by': row.get('transport method'),
         'sent_by': row.get('sent by'),
-        # sequencing ref lab
+        'sequencing_lab': row.get('sequencing ref lab'),
         'date_received': row.get("date rec'd at ref lab"),
         'specimen_accepted': row.get('specimen accepted by ref lab'),
-        # sample ID by ref lab
+        'sequence_id': row.get('sample id by ref lab'),
         'dna_concentration': row.get('dna concentration'),
         'specimen_notes': row.get('specimen notes'),
         'files': [],  # TODO: implement creation of file db items
         'requisition_type': row.get('req type'),
-        # research protocol name
+        'research_protocol_name': row.get('research protocol name')
         'date_requisition_received': row.get("date req rec'd"),
         'ordering_physician': row.get('physician/provider'),
         'physician_id': row.get('physician id'),
         'indication': row.get('indication')
     }
+    if row.get('second specimen id'):
+        other_id = {'id': row['second specimen id'], 'id_type': proj_name}  # add proj info?
+        if row.get('second specimen id type'):
+            other_id['id_type'] = row['second specimen id type']
+        info['other_specimen_ids'] = [other_id]
     req_info = {
         'accepted_rejected': row.get('req accepted y/n'),
         'rejection_reason': row.get('reason rejected'),
         'corrective_action': row.get('corrective action taken'),
-        # corrective action taken by
+        'action_taken_by': row.get('corrective action taken by')
         'date_sent': row.get('date sent'),
         'date_completed': row.get('date completed'),
         'notes': row.get('correction notes')
@@ -197,7 +201,6 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, sp_alias, analysi
     new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)
     new_items['sample_processing'][analysis_alias]['samples'].append(samp_alias)
     if row.get('report required').lower().startswith('y'):
-        print('report')
         new_items['reports'].append(samp_alias)
     if fam_alias not in new_items['sample_processing'][analysis_alias]['families']:
         new_items['sample_processing'][analysis_alias]['families'].append(fam_alias)
@@ -248,27 +251,27 @@ def create_case_items(items, proj_name):
     return new_items
 
 
-def create_sample_processing_groups(items, sp_alias):
-    new_items = items.copy()
-    for v in new_items['family'].values():
-        if 'members' in v and len(v['members']) > 1:
-            # create sample_processing item
-            samples = [items['individual'][indiv].get('samples', [None])[0] for indiv in v['members']]
-            samples = [s for s in samples if s]
-            if len (samples) > 1:
-                sp = {
-                    'aliases': [sp_alias],
-                    'samples': samples
-                }
-                analysis_type = items['sample'][items['individual'][v['proband']]['samples'][0]]['workup_type']
-                if all([relation in v for relation in ['proband', 'mother', 'father']]) and sorted(
-                    v['members']) == sorted([v['proband'], v['mother'], v['father']]
-                ):
-                    sp['analysis_type'] = analysis_type + '-Trio'
-                else:
-                    sp['analysis_type'] = analysis_type + '-Group'
-                new_items['sample_processing'][sp_alias] = sp
-    return new_items
+# def create_sample_processing_groups(items, sp_alias):
+#     new_items = items.copy()
+#     for v in new_items['family'].values():
+#         if 'members' in v and len(v['members']) > 1:
+#             # create sample_processing item
+#             samples = [items['individual'][indiv].get('samples', [None])[0] for indiv in v['members']]
+#             samples = [s for s in samples if s]
+#             if len (samples) > 1:
+#                 sp = {
+#                     'aliases': [sp_alias],
+#                     'samples': samples
+#                 }
+#                 analysis_type = items['sample'][items['individual'][v['proband']]['samples'][0]]['workup_type']
+#                 if all([relation in v for relation in ['proband', 'mother', 'father']]) and sorted(
+#                     v['members']) == sorted([v['proband'], v['mother'], v['father']]
+#                 ):
+#                     sp['analysis_type'] = analysis_type + '-Trio'
+#                 else:
+#                     sp['analysis_type'] = analysis_type + '-Group'
+#                 new_items['sample_processing'][sp_alias] = sp
+#     return new_items
 
 
 def compare_with_db(virtualapp, alias):

From 9dced9b926c126f6423de506e8c685ef54c1cc75 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 10 Jul 2020 11:05:32 -0400
Subject: [PATCH 024/125] digestion of second individual id in submit.py

---
 src/encoded/submit.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index aefcfca689..de3953e9b7 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -79,7 +79,7 @@ def xls_to_json(xls_data, project, institution):
         fam_alias = '{}:family-{}'.format(project['name'], row['patient id'])
         # sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
         # create items for Individual
-        items = fetch_individual_metadata(row, items, indiv_alias)
+        items = fetch_individual_metadata(row, items, indiv_alias, institution['name'])
         # create/edit items for Family
         items = fetch_family_metadata(row, items, indiv_alias, fam_alias)
         # create item for Sample if there is a specimen
@@ -111,13 +111,18 @@ def xls_to_json(xls_data, project, institution):
     return items
 
 
-def fetch_individual_metadata(row, items, indiv_alias):
+def fetch_individual_metadata(row, items, indiv_alias, inst_name):
     new_items = items.copy()
     info = {
         'aliases': [indiv_alias],
         'individual_id': row['patient id'],
         'sex': row.get('sex'),
     }
+    if row.get('other individual id'):
+        other_id = {'id': row['other individual id'], 'id_source': inst_name}
+        if row.get('other individual id type'):
+            other_id['id_source'] = row['other individual id source']
+        info['institutional_id'] = other_id
     info['age'] = int(row['age']) if row.get('age') else None
     info['birth_year'] = int(row['birth year']) if row.get('birth year') else None
     if indiv_alias not in new_items['individual']:

From b6dd846e6791e45510a722f5cfbe041d30f379d8 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 10 Jul 2020 16:45:42 -0400
Subject: [PATCH 025/125] added mapping of spreadsheet to cgap fields in
 submit.py

---
 src/encoded/submit.py | 93 +++++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 44 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index de3953e9b7..99fba865c7 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -12,12 +12,37 @@
 import xlrd
 
 
-BGM_FIELD_MAPPING = {
-    'bcgg-id': 'patient id',
-    'bcgg-f-id': 'family id',
-    "date req rec'd": 'date requisition received'
+GENERIC_FIELD_MAPPING = {
+    'individual': {'patient id': 'individual_id'},
+    'family': {},
+    'sample': {
+        'date collected': 'specimen_collection_date',
+        'location stored': 'specimen_storage_location',
+        'specimen id': 'specimen_accession',
+        'transport method': 'transported_by',
+        'sequencing ref lab': 'sequencing_lab',
+        "date rec'd at ref lab": 'date_received',
+        'specimen accepted by ref lab': 'specimen_accepted',
+        'sample id by ref lab': 'sequence_id',
+        'req type': 'requisition_type',
+        "date req rec'd": 'date_requisition_received',
+        'physician/provider': 'ordering_physician'
+    },
+    'requisition': {
+        'req accepted y/n': 'accepted_rejected',
+        'reason rejected': 'rejection_reason',
+        'corrective action taken': 'corrective_action',
+        'corrective action taken by': 'action_taken_by',
+        'correction notes': 'notes'
+    }
 }
 
+# BGM_FIELD_MAPPING = {
+#     'bcgg-id': 'patient id',
+#     'bcgg-f-id': 'family id',
+#     "date req rec'd": 'date requisition received'
+# }
+
 
 POST_ORDER = ['sample', 'individual', 'family', 'sample_processing', 'report', 'case']
 
@@ -51,6 +76,15 @@ def submit_data(context, request):
     raise NotImplementedError
 
 
+def map_fields(row, metadata_dict, addl_fields, item_type):
+    for map_field in GENERIC_FIELD_MAPPING[item_type]:
+        if map_field in row:
+            metadata_dict[GENERIC_FIELD_MAPPING[item_type][map_field]] = row.get(map_field)
+    for field in addl_fields:
+        metadata_dict[field.replace('_', ' ')] = row.get(field)
+    return metadata_dict
+
+
 def xls_to_json(xls_data, project, institution):
     '''
     Converts excel file to json for submission.
@@ -113,18 +147,15 @@ def xls_to_json(xls_data, project, institution):
 
 def fetch_individual_metadata(row, items, indiv_alias, inst_name):
     new_items = items.copy()
-    info = {
-        'aliases': [indiv_alias],
-        'individual_id': row['patient id'],
-        'sex': row.get('sex'),
-    }
+    info = {'aliases': [indiv_alias]}
+    info = map_fields(row, info, ['sex', 'age', 'birth_year'], 'individual')
     if row.get('other individual id'):
         other_id = {'id': row['other individual id'], 'id_source': inst_name}
         if row.get('other individual id type'):
             other_id['id_source'] = row['other individual id source']
         info['institutional_id'] = other_id
-    info['age'] = int(row['age']) if row.get('age') else None
-    info['birth_year'] = int(row['birth year']) if row.get('birth year') else None
+    info['age'] = int(info['age']) if info.get('age') else None
+    info['birth_year'] = int(info['birth year']) if info.get('birth year') else None
     if indiv_alias not in new_items['individual']:
         new_items['individual'][indiv_alias] = {k: v for k, v in info.items() if v}
     else:
@@ -155,44 +186,18 @@ def fetch_family_metadata(row, items, indiv_alias, fam_alias):
 
 def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, fam_alias, proj_name):
     new_items = items.copy()
-    info = {
-        'aliases': [samp_alias],
-        'workup_type': row.get('workup type'),
-        'specimen_type': row.get('specimen type'),
-        'specimen_collection_date': row.get('date collected'),
-        'specimen_storage_location': row.get('location stored'),
-        'specimen_accession': row['specimen id'],
-        'date_transported': row.get('date transported'),
-        'transported_by': row.get('transport method'),
-        'sent_by': row.get('sent by'),
-        'sequencing_lab': row.get('sequencing ref lab'),
-        'date_received': row.get("date rec'd at ref lab"),
-        'specimen_accepted': row.get('specimen accepted by ref lab'),
-        'sequence_id': row.get('sample id by ref lab'),
-        'dna_concentration': row.get('dna concentration'),
-        'specimen_notes': row.get('specimen notes'),
-        'files': [],  # TODO: implement creation of file db items
-        'requisition_type': row.get('req type'),
-        'research_protocol_name': row.get('research protocol name')
-        'date_requisition_received': row.get("date req rec'd"),
-        'ordering_physician': row.get('physician/provider'),
-        'physician_id': row.get('physician id'),
-        'indication': row.get('indication')
-    }
+    info = {'aliases': [samp_alias], 'files': []}  # TODO: implement creation of file db items
+    fields = [
+        'workup_type', 'specimen_type', 'dna_concentration', 'date_transported',
+        'specimen_notes', 'research_protocol_name', 'sent_by', 'physician_id', 'indication'
+    ]
+    info = map_fields(row, info, fields, 'sample')
     if row.get('second specimen id'):
         other_id = {'id': row['second specimen id'], 'id_type': proj_name}  # add proj info?
         if row.get('second specimen id type'):
             other_id['id_type'] = row['second specimen id type']
         info['other_specimen_ids'] = [other_id]
-    req_info = {
-        'accepted_rejected': row.get('req accepted y/n'),
-        'rejection_reason': row.get('reason rejected'),
-        'corrective_action': row.get('corrective action taken'),
-        'action_taken_by': row.get('corrective action taken by')
-        'date_sent': row.get('date sent'),
-        'date_completed': row.get('date completed'),
-        'notes': row.get('correction notes')
-    }
+    req_info = map_fields(row, {}, ['date sent', 'date completed'], 'requisition')
     info['requisition_acceptance'] = {k, v for k, v in req_info.items() if v}
     new_items['sample'][samp_alias] = {k: v for k, v in info.items() if v}
     if indiv_alias in new_items['individual']:

From 40b8015821ae3e44cce486b5cea7fa0a201ea0a7 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 13 Jul 2020 16:19:05 -0400
Subject: [PATCH 026/125] edits to submit.py to fix testing bugs

---
 src/encoded/submit.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 99fba865c7..2449b68411 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -13,7 +13,7 @@
 
 
 GENERIC_FIELD_MAPPING = {
-    'individual': {'patient id': 'individual_id'},
+    'individual': {},
     'family': {},
     'sample': {
         'date collected': 'specimen_collection_date',
@@ -81,7 +81,7 @@ def map_fields(row, metadata_dict, addl_fields, item_type):
         if map_field in row:
             metadata_dict[GENERIC_FIELD_MAPPING[item_type][map_field]] = row.get(map_field)
     for field in addl_fields:
-        metadata_dict[field.replace('_', ' ')] = row.get(field)
+        metadata_dict[field] = row.get(field.replace('_', ' '))
     return metadata_dict
 
 
@@ -109,8 +109,8 @@ def xls_to_json(xls_data, project, institution):
     }
     specimen_ids = {}
     for row in rows:
-        indiv_alias = '{}:individual-{}'.format(project['name'], row['patient id'])
-        fam_alias = '{}:family-{}'.format(project['name'], row['patient id'])
+        indiv_alias = '{}:individual-{}'.format(project['name'], row['individual id'])
+        fam_alias = '{}:family-{}'.format(project['name'], row['individual id'])
         # sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
         # create items for Individual
         items = fetch_individual_metadata(row, items, indiv_alias, institution['name'])
@@ -129,7 +129,7 @@ def xls_to_json(xls_data, project, institution):
                                           analysis_alias, fam_alias, project['name'])
         else:
             print('WARNING: No specimen id present for patient {},'
-                  ' sample will not be created.'.format(row['patient id']))
+                  ' sample will not be created.'.format(row['individual id']))
     # create SampleProcessing item for trio/group if needed
     # items = create_sample_processing_groups(items, sp_alias)
     items = create_case_items(items, project['name'])
@@ -148,7 +148,7 @@ def xls_to_json(xls_data, project, institution):
 def fetch_individual_metadata(row, items, indiv_alias, inst_name):
     new_items = items.copy()
     info = {'aliases': [indiv_alias]}
-    info = map_fields(row, info, ['sex', 'age', 'birth_year'], 'individual')
+    info = map_fields(row, info, ['individual_id', 'sex', 'age', 'birth_year'], 'individual')
     if row.get('other individual id'):
         other_id = {'id': row['other individual id'], 'id_source': inst_name}
         if row.get('other individual id type'):
@@ -192,13 +192,22 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, f
         'specimen_notes', 'research_protocol_name', 'sent_by', 'physician_id', 'indication'
     ]
     info = map_fields(row, info, fields, 'sample')
+    if info['specimen_accepted'].lower() == 'y':
+        info['specimen_accepted'] = 'Yes'
+    elif info['specimen_accepted'].lower() == 'n':
+        info['specimen_accepted'] = 'No'
     if row.get('second specimen id'):
         other_id = {'id': row['second specimen id'], 'id_type': proj_name}  # add proj info?
         if row.get('second specimen id type'):
             other_id['id_type'] = row['second specimen id type']
         info['other_specimen_ids'] = [other_id]
     req_info = map_fields(row, {}, ['date sent', 'date completed'], 'requisition')
-    info['requisition_acceptance'] = {k, v for k, v in req_info.items() if v}
+    if req_info['accepted_rejected'].lower() in ['yes', 'no', 'y', 'n']:
+        if req_info['accepted_rejected'].lower().startswith('y'):
+            req_info['accepted_rejected'] = 'Accepted'
+        else:
+            req_info['accepted_rejected'] = "Rejected"
+    info['requisition_acceptance'] = {k: v for k, v in req_info.items() if v}
     new_items['sample'][samp_alias] = {k: v for k, v in info.items() if v}
     if indiv_alias in new_items['individual']:
         new_items['individual'][indiv_alias]['samples'] = [samp_alias]
@@ -245,7 +254,7 @@ def create_case_items(items, proj_name):
             indiv = [ikey for ikey, ival in items['individual'].items() if sample in ival.get('samples', [])][0]
             case_info = {
                 'aliases': [case_alias],
-                'case_id': case_id,
+                # 'case_id': case_id,
                 'sample_processing': k,
                 'individual': indiv
             }

From 3df3aa5844833a1d8690b0a73e5d64ebe8082a61 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 13 Jul 2020 16:45:26 -0400
Subject: [PATCH 027/125] edits to tests for submit.py

---
 src/encoded/submit.py                         |   8 +--
 .../data/documents/cgap_submit_test.xlsx      | Bin 13221 -> 13213 bytes
 src/encoded/tests/test_submit.py              |  64 ++++++++----------
 3 files changed, 34 insertions(+), 38 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 2449b68411..f85161eef0 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -110,7 +110,7 @@ def xls_to_json(xls_data, project, institution):
     specimen_ids = {}
     for row in rows:
         indiv_alias = '{}:individual-{}'.format(project['name'], row['individual id'])
-        fam_alias = '{}:family-{}'.format(project['name'], row['individual id'])
+        fam_alias = '{}:family-{}'.format(project['name'], row['family id'])
         # sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
         # create items for Individual
         items = fetch_individual_metadata(row, items, indiv_alias, institution['name'])
@@ -192,9 +192,9 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, f
         'specimen_notes', 'research_protocol_name', 'sent_by', 'physician_id', 'indication'
     ]
     info = map_fields(row, info, fields, 'sample')
-    if info['specimen_accepted'].lower() == 'y':
+    if info.get('specimen_accepted', '').lower() == 'y':
         info['specimen_accepted'] = 'Yes'
-    elif info['specimen_accepted'].lower() == 'n':
+    elif info.get('specimen_accepted', '').lower() == 'n':
         info['specimen_accepted'] = 'No'
     if row.get('second specimen id'):
         other_id = {'id': row['second specimen id'], 'id_type': proj_name}  # add proj info?
@@ -202,7 +202,7 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, f
             other_id['id_type'] = row['second specimen id type']
         info['other_specimen_ids'] = [other_id]
     req_info = map_fields(row, {}, ['date sent', 'date completed'], 'requisition')
-    if req_info['accepted_rejected'].lower() in ['yes', 'no', 'y', 'n']:
+    if req_info.get('accepted_rejected', '').lower() in ['yes', 'no', 'y', 'n']:
         if req_info['accepted_rejected'].lower().startswith('y'):
             req_info['accepted_rejected'] = 'Accepted'
         else:
diff --git a/src/encoded/tests/data/documents/cgap_submit_test.xlsx b/src/encoded/tests/data/documents/cgap_submit_test.xlsx
index 0e3409a68f19e8f22e4379178b33a7c180d15e45..de1a55de5fd1af955829d96e04f80ff8a679b648 100644
GIT binary patch
delta 4960
zcmaJ_bx_m~^F9#d5I905kCp>LT50e&;Rxw23F+>RkHi7ea)5M6A8~Xa0)isQNeR-3
zv>++1KfmuE?>q1N$8TqL=GmQRXLn|2XLh&LKF=OYK?+Q12B<@CfIyTae-#7*1-J<M
zyL&l5cXxLd2yk_&)?d<`6QT+$X!_H87>vz>6(uTb10{?!IiCk-R&AiI^~&Ey{2Zuf
z|KPk;Kfa*5i5Zvp#Z}PLdAS$j)q&yj;Y-x-7!r~$rXNCXA|B<C13hNM)(O?C>b-mm
zAIO&gm3Yl^*2lRn&F$@9E^ZRQtwqhGrpuy?v_EFgaV~tk`M}xLY0uJ0>|G1;urEWW
zql1IJl(e$lX`hAtQC;unon{)ZN=3ur!Cm=;>b4sfEqXP~pPddE+p=?VYAK>>O<%Mb
zq<i_x@*ta)@nVT$R~>|{{tuASHw*~@<O(eWkPhD(FleShns)msfz*GMSe)T`tbV9~
z{RFM|rAlJy12G{G%4)Skq`?pYUm%h7@qz<+$&@n}YPp-5U(SQ<;Mk^yH?aA&z&F=R
z)U=%LUF6+X0l_r1Vsx@qRqNqU#MW$JIWJZu8%vMgk{_kJdDrVx=f`IeDI@^_!YVMp
z>JJQTZ6N8$xc40)4)Hl#o(_H_E=%x3u~L3z6LU3npR))Hw^-(zZ&yBV?q04{$g31l
z!<l;I8@UCjyo+v?$0V-atw*NdVr}K20d6Y2%-jP{ri>0>`<xF;os%4Rtyn8TnojZf
z{u@8krNGC13`BVQVQ=Me@i4OcH>?2i`Oy#!<YOi)onJ?CQ0<PARTUxAwLrKMb<-Oe
zbK<9rH3kXM+L_5fq$o@UwVubvI=NgVDsy-d>UJ^WABh@k(jDMT48GVWyE^+JHWL;c
z6S~x?<K@iiyISsW`s5L6(N3a_TEbUcO-UHF)gt~|3&j_XP)7I9a#xCR2?2IvGlGcF
z06PYp#V5!7d*=@<wIyy9A9z9&>@k5<6Kh_=RxYE5f%w=f#mb*pcnz!egyDc1*fA7<
zo8S689!j?tnfOZO+I+p*GE^i6Ph3Z&7s+}-_nvB>6p0Wo7QLz01B%yVAyU|zZ}+_h
z&+5I+RrK*w<JN{BX_Jp|OIrygA+4~n8CEQ;8^a+imy_rHoY$=UO@gm{B+_LLQknc6
z+uFh#e_+h;?Zh>^p+q>gsqjY(xa{8q=%O2UNS8*!zR(hZK&5Caas>draCOOohlA!C
zQObp9zN{Pb-h%ytOFMC5F4dFU4*QqCdtVr>b)-1?K0Mo4_xXt@Fy_`{=dC$&z}iZm
ziCmnQ7d0n&7=C>tzh!nLIqYKY#s356W@Kewsm>3@W$W89^%rBg8Znq3`^(G8)wDVk
zJk!o=mZwyH)2ec{fxv9dm9G#zMGcf<Ve@0U%jS=O%=R&%Bj<&S+#Zsri$(YrYDp+c
zaoDauax8(#75eGSlb?d_4o+0|G1G+3WW#LVts0|xY6yvQ(U;=BDzT=SaRmLn=>8pb
z;$Xe)su!=p**-h&tNWQ0lyr&)(_d*HuZ*Hvy)J5BFutLKzXL3U%t%Q#KR5=NLo9H3
z0Z!|cKHpQDHs~{%yK-&&Mw`Dji$BTR5Y;vy%=O+=oH4Ux<z1<Z&%sm=)8+koRXPM(
z4c2uO#Qm8_%W^9b>#q5T;8hv>op#DLI%=4%EBS_oTjt=|?e?5?Cgw=lhe{<y&3@KZ
zJ7v%J`BYmfvVh~%Y#f!m240ZuGZ1Y%ELXY`!H0Td#$zJ%vKSuQ#$O@SQG@_=^;DQH
zG9i_J+F!r#^QaZ;19AU^v*;o-lUX_ULs2GQ58DekWmVjVJ3G5h6PUI+Ud}lzdI&fh
zkk-mr>FI;y1~<+8Wr{2JBt5O88fuo!uzbEz4I7FPfK$y^h7HzRgbt$S<LyRKq}zt&
zO%GSP%z`%Oa^vEv_va(Ge@XP9%Re8s>a24a86F^{pT(r74wmfuf*yrY@xgXhAq6#a
zcLK*dH=2l*#s$UuBr2M?7>3Vn#gd{(X2;;IDd8%m)@mB<u|Y#0vNQ|&VqnO4@R)#O
z49VFQplmMFr+r29?S{TIE<4Kfbfs?^ZEewWyOJ9r8`U|X%Ijb@^@w<wO@TBwT{Ad(
zbon71Fmx-(JOk4NiNU;H)*fx9SA?+|yj8(a(y9dzQZ%Wh$*Dft7(882=$Gnk)3Vyx
z+k<>D>^mbU)$hOHLVeFEj);q{^XH#+-oTj9Yo&asczn6|BtQ9CY02>0{Zxvwdu6H*
z^3ko7(m?ksT>LUvXE_;Fo+S8Q3u%Ka%D+u<q!{3DxmwSHx39Cy$v-b)UZ}k;Iq9*U
zGgp%&Fz-9-Q7HxFs{xZN<yF-VrFzAwV1V`RevKIA4oq@!J}mDKvX%R6KwycIwckw-
zrTJ#kHtt>Btc4dqXX15w97qFkO)}KBI``yf63+C023w@()3yp}l;vccK?PoZvWo^p
zk$?U*%8i9o5D0044s{aiEfDAxx|M|!SXHJ8U5dRXiCL;28))zZ-A<*T_*Ck+cbp?c
z2z51OQGuVjjr3?{r0J{L(z-o)XEt)6ZD6n_Y0PCj2V7_4P3lxKG$#f$=C)i~OoyJQ
z%Bj(5O7CKZuM_1v)9DVrvFDe@+mv5R56uC)zfT{}xjX(61vFBNiK%JNH7?r$o{WAs
z@d6p{BZ;3S!!`s<GG$+71eWnXDsu!I?}hOQX5%~d*6lv$y`NC*j1wC9^756OoCf!h
zL|)ejT2Y&LDAm{u{`>sMCu_&yXh>>h-SgLxZM(3_##6B8<2?2FCEL$_&T<Saa{S{D
za0R_xfMl4A<Mqd7R=VFO$3k2J05V&X+c>*CS&@XpACtRw+}dHf5b&I1=)CUk;6bPH
zp_D#x^^^93r$bv=MsCa_mp0h9M7qQ26c%Na(z|~;7rzvWAHHYG(RetsA4%Zg(!=T&
z5WE-AwSgMLiC&a<cU}vjZefg@8@_v%GfVYr(>HZ?&XvBXEgv$~kZX-Q1AcR4aiKZF
z<#YXZ07t2w%y(6ONhDQbKmo7fD3v}hPIIDwcb?s>jr7KbE&U@8nfqmRLp)govQflC
ze`<9+^%<X`Nv^BCX9azMjOM*n8HUovoUJDZ<dFy%<&gE22%nP&W|io--SV#y|BbCV
zd6Cc{0ljRjpK3^d%+l&Tpy0(e`=_J{T}icZ26FHp*H7giafP#IhUb68q4-@QDL?Zo
z0}OSODf)Gn@`e-_xgP3;aRz(KY|ZJDW5Z}I@9Oavls`$r2OBsj-;WT>b+Ibw`|012
zWsVskp#{Are5?PoXk#Tc@Qk@)JczH-emRxl_)Cvj`CcDMHz@4=5iqZg*VZu>B-PS1
zCZm(v<mtUoIlJ&W+yAOlWit$wp|M%u2v~pKR%{;ayYVwz4^zJqi3_>r2w)rE{>$3p
z2q<ji{iteKBNkORH51dGqgs1^P40ntR`|vKf&PyjeLYq=TwG{(+;)GtVSSyUcIa7K
z8c7)+@nRt@$5yukz~f+1)oEDM{NrG?E!Mxa@wkTA^kJr?#o+eaRdRjUKEW-W=2ul)
zuc1>N3x#i*EJE!s+MDw_u|L)H&a?)W9D^+lV^eNib?h9sbk^~5eOLH!Fg#jQ%SdYT
zAVoI(2if;{IE2WHC#5b{&ouSp2AEnaCkxg3HFuys)|(?m7YGlz{UHmEBl~W~Mn~AL
zP~vWIa*Nu84DUGl&E#D>^@kdx0_ow4W8-unR-%Yj>Dif<6MNcoa493=evH;Ods+mz
zlml`9rIvF<NtL2JTV=Rbd?BrFEH6l~l2dbwCXE24MREtht_4nqv!@ZFw8-utgtfrV
zn(PncP}qw*K;NM*7^7bJSJ|t@P}tc!eNO*4{vX45SJ^;1bt=iGne%_)_>{m;J=Gdy
z$1KtKRBh3#T0c@>t43kzq>cki=_Dy;(^e>}pV9CFqPx4oa!MWLI08iSw{*`)grP+n
zJd6O=xjrmQtNqGx<hs@+EyTIc!o_OdF7pOij5dTXjwIexaI}zNozII0W$Ue{Z6*Po
zw3;bN5@za<mrC${zH&^zF-cp#q;{T@*k9&rw<`23q}9XB;Mm)A)Fo8F1uFKX6g6Sb
z*oA;7FIG1Wx7sts0O{9*g9q8p0r9y=b>m#=YU!&wxnrSGN-}QCqg%sD6doZZ;e_kC
zLJbPrjaAVO6UckX>ii0C#8hOseVjzOVS`jPvkC8RD>eT0&~!9QLMMD<!(5Ty?mcvg
zoV4d9ZMNNtqYSur>>^4t0mPP-Dh(1?cs%5nBq>rPjflPnfM=809K<3Do52xC4)AO=
z+jM#wI3j}$%=m(h$3LR5&G{aJ>o$??Q>sZGd94&8`4eiQQY6*nU3o19BKZL-qBVJ{
zNkVz-K2b04ZH38)G>;M$)H$^$l>ZlE%ho2;WU4H6g-Ldm#j7t2bp6W@1%Rx4UHDO=
zx9a&Z%^qNwuQaLX!4l5oH6eOABKQi`&%;^r)&WMoV`=?lOq@Enr9Nxxlpp#9gPZBY
zHpZa(Oirv654N8}^_4@VgA%lt84|K=l6|LdGnHqJRHD8$3|!c)Vv9yyp46{hx~*cf
z3jRp0k&&Sk+na1hczyLffyveRi??q@a4jA)@7#Z@wJY9|Rk`Uh(cYi1<ruEM{Wk?f
z)rv=wK-wp2R({e(CaHzWmuhLgDTXJ;y>wPTt^a7Owit?2aWd70JWhiet&gr2);n+o
zg_v8|Lc>xK;vG%e#JMHRs$P&YDd)>6BxUm)xa5-}+b$QO;|>*|Y>I-<G7=%UC5R9!
zx?q$B`yDY<Q<BPq8q<bGhb$UpOe)+YifBBWGbMvQyr~#mLae}6Lr3TsNTrMkhZ}{8
z>?%)ZV||XeQmXy#VVH-Ulp1DV{TH09@LMGya`?T<)Y(xnKQndPrC_NQnOG+qrYQpS
z)$MSHS%?47kDaT_f9394uhHZlUC}8kx=KejVD)L`ZQoWs%-hJ*^2h+adlHiD+w~?6
z{>v+2c4pW(Sh|-K^05LZ4xYi5!(!#eLqwI&JHs2-NJ0xxvW?`s4H~}|2Cd@#M<?c<
zqj6XwPoq}?t~+nPnXSNm<{(DtwG@Do;6NhWjoNd>_!mS;kZ$Ja25SZ;(CDIw;k((8
zyTcQ*m?gYZ_k<E6gbp#*FW6ja8}feDzi^i5#M{F);<QelsbUG(c_U0&od}l<@t!~y
zLUHXy)Z2F<J=+7q$$MjDQQirt`|jn8c}mwKPgz52eV-Nyh~$+E=DUgEGJrgi9$20d
zTkzfIQk0Ls?&h#B{*<AO>n|{xl^A&(b9du<)H^}mK#VaLf0;N)B%C<N#>bdw>oDfv
z0$p?n2r3>kiiky8o02R#coe254tl7)q5i<nQ8Ko?Su*AkjLJs1@OH$0Cs&~B?Opl_
zHo@@)MZc`CvNxikmF@4V!~k>Hjv)5${RW0~3>awvz~l3oi3xKL%z*BLIf@>0S$+O9
zhy4To<UyjK7-TMu(gnJA64%(bLvK?$?TPpqyg(#uF8<+GooO_!d8=pJbYM{5P6k`w
zPK)od`TKB#paqYFX=1N<@PSm4Mwh_e*GIPL(_BhH%p?{oCyzz6dI3O!^=~catsZUD
z7=IY%)Nx(qmmbbEs};5tAA}i;;S#L~+_@cQ8og$JmM$7)VZs%*K|aMqbQsTR4}N@9
zMHjs7>EHVLK-Y`LUUNcmF`FxSEhl|BzdGo|tMN^4n!wa$#&!+nvgyS%?8E}UFW^o1
zv%|%2+2JI!4E6iQ{hQjJf~ntZzp6Ig@2WS(<#$b(ml?krm>@*bSNU5t_Q&vYABmjo
z^??4l1&Qkf_wsEUDeuDw01M4Np*sUh>Q|7b1z%@f37Q!=d3)(z-a|aZJ=$e{=(amf
zdw#UFD+Z5+$+c|ETj1Lr#bym`_lXBpYod9Qz_;L@{3RxkvFCE6?%I~jx)JFny;Qqz
zi?zc-(+{$}TrKT<C<axq;oSG12Ffb%Z8*i%uklr6CU!jutp8m092jN&L^GWLG;l3P
zr4ZD<74C)hQwUXSD)_)}g(1E;>$ZS;h10acy=dQSCjea#HQPT>hjdYUu7%oPc@R(h
z*9iY_L*31XHuElGV=!0ivn%1S0=b?R{U?>#&phFQFp0IBnG-O;q+V9Hfo`5@X2;jP
zC2mkV8V~&>_S9~{d{PjX!p7ovuIKfpM!;-e=T7PqEY?!wYHriQDc%<!4i3?qy(`V@
zzO@=4Cm)EQp-Fvs&)-|5@R~_AH5vAPAmny;X9dLZH`ij6r<5jG{Wjw%K2{07xQ@FL
zI2UhB;lmIU|Lb*yh6_tD{rd<f2Z8SVePaDbl+kEmdBPMbbS)JZ`cjyeFqImOrM`=n
z5#c4EM%#<9LI1t*^MgS2{|Wqy@>$SbER5)G5h|vCb(R1G3MK@BsQ-&}gx(a9BS=Pb
Si|P?%phHACZ(0fcL-;>-uv@19

delta 4967
zcmaJ_XHb(1vkpx{4Im%}By^D8O{54YQ9z^#(nIfrDn-hR5CTe32)zXa5g~%oq+{s4
zOO>J`NbgPBch0#p_uTK#-I@KdyU+gFooD9R&31h2_>qnZ5Vj*hbCQ8T5X!$C1Of%P
zN%(tsyV`hoxQYk3yAcd6JZ89<L-3WCVFCUmoqjeY=w!%~k}>VC;~!qJwDTdW;wWo0
z3^$s$evGY*H~GR6$gnYx<+&hn4*X^YbXLnY8ST>-FN7>-se8O3(1|J=2)~XT->F`#
zLfgl~fP(gSf?M-9JgREzzy6^oQE~M#M@-mbjP<&)GrY6itfH>&&`m2R`8Ge~?ue0o
zU7-N?TPo)ommN-S84u#nS{0M`TlM*_@eS2jLM_>!YW?gR!_Xbp8f<2L6AcD^JFLdA
z${Ru^gpBJNxb;E)toq7le^I0YnGPv{%%kcAvcK%Rlpv|{KUmiDh6x0Xl$|*yNgkSk
z{mN{&<qM+O0?UHwA%us!sf0u_qg~pL-*@t*UXSM{(}o|VI_*3+i`I#sC{CF_98qsr
zQ{L%tEbb^;8fSUyUEa6DjD02oWJ-p7{%uHSgtX1D@%Dbb_g3|-`CgOX6Rt%@PF4X8
z0NN4C54N>ba5mjs)c;KHd%QT&?1KJcX#{1CSMJ?vlxCu)*{5hy>s9aY*x>hPS=G+A
z-TfNA*XG?S!Xw{6bK9Mx?1fx7t-7ezk1sx2nw#*O_~MR4R6#Flq8k>SN&{mg<(?Xy
zYi_26e7rR!G^FLo(10%!=h8qOD;3ZJ7%6XsBSiYQ$`-rzC9eSo4XDl&)jF%aFyt`b
zgBTXN0Vi3S6+38Q?+L`SgD-uk!T&1`s^}XpXxS{G4Ld<sN@g4s2N&)<z=uBHLdoWD
zMEOtRgXWXIO*_sWb5C28RFBPYbjp&x<Lvm(S19?7zQfh9j#>yKi|7k^?P36s*s}JY
z@r^|}ru^)zpb$$eNTU>`7F3nZ>>#>@R)cNoOvMa3SP90ov0V~L*sH8XbV@E!#c4F$
zAMU_jr;>84TrhKn$0c;1SuQo$-IeYfq$s0!9I=IsltH#K^X8civy&C0!rKKQq&)tc
zEp>U6?!s0BJh$;9veZseFP<v}La$UfF<t3(Vqx8>G-ipJd^dpNoVtXoMKN`Em~~$*
ztMPh*jZvuSRqj0qHyfIsyT3rAf5QZ~Q-Z^TaCv0RRDsS@e5~Xk&>I|z_8!oRz6g2c
z_y9Ch5&0nZ_$h4pn~y}R#QfUJoJr&NUxyct&Lc=Z^*rVNy6Ut&*uOG|Co6wcr1sc!
zXV|LMZ`*!<Pv@@djqj!9_ajDXmwZyD8Wv3_LuNm=4CrT?!h_{X`sA_d)>;lvo=klw
z5@9A{1yJ46i5}u6_j+k^u=yTPX?C(^B?&b}K<kE+MW%+Pb|iiBt(PY14iEq&rQt5#
zo!1*q9h1_36@rvOS9;0*?5vVC*F7HCKXZGirPu31CQ@nBj)Zd%B28ncOQwfx?$TJl
zW}`2=#j>vVzJ%10$7$q4b6l&fyrMa?iG?Hk8AjpST+d29sY|RSh!FsYp8OoSyziXr
zIhum;7RZj4$hFOGPefgSaIok1PeU8S>X{K;U~~J(hN5`(Z3RVXg_BIQymr!}NW^+L
z&esA>=zAND-jrv0h%kukn^BBB><Anf_knpz@H{M>W*f*3wt@cCSwv@e_|hH@aU@{J
zr{mt2X1*kr2~E9m07kT2{-}{7l$yg-F0ZGEohD&vN}1!m^PGr$FG;z7ROhz*#!{ba
z>+!q)w!K|&v24jn(^~tC=?XlMayL!7CHRc(`Ig97Ps-pq$BM=?v$@${jbiAc7(R4<
zU~4VGguiAtR?m>+m@VIDb|<>9Zn-Uvv8OuRhgdqs*pqXO3Mj5}aLfXPz`6Ya8p}3}
z0!66okrB+E_e!3JkFaogiw~rD=I7a?9JStg&Gb|&i*}j~-y24nP+ZFd$=I<fA&u}#
zA-EcCu&A+B?MxG2fI?%ube*-NQ}%k@IH(|%`OneZWH`;vv@CDNgTw?GymBN&JC>F4
zjl~E(Wcvh2Xb8aMAM>|S7W)OuQGMQQjVu%FYzh_B*TFmk`@c(mJXBNcN}Zw_%>)<0
zAJS$Mox?6lzbVV?F!JrYJzQk6SRNM9J69%z;f!FSY1_0=r*xr^B$t7>t@;<(r{P|C
zE#tGZHwa~z9Wu=l?12___bqh_7CO|HY30PglPb1ct^_^curW#zR<Eg%`usaT`$LU~
z$;NKDDu@Db&KGix$XM_NMx%2B6`vQ|6|1Hs2wjTV-u%*GtCTZn*)OlOr=d`1ajLNf
zbXGT}d6PC^b^x5_Ww`NWrF23^vhORKnBt=gW#I)gcg}Hb?}pm*+m~@@o$VNlCYUmI
zwul;k|9vm>3shC};)QtS;^u*$X3Uk%(T!M+XEN-T>io-;wQXH<2lxP6!o5i1KvOO5
zI}kez{6po690q!N`ZnRt5CLisC{+vuVgunarJ+EU>5TqN4^zk;!)2Ix=&LyKlsKBn
zx1YEfnAFs2_&i#`p6%5Q{0Y5?s>_h02TJ)yM+<Dlhw7qY2m8K9tJk|LjwZ@}z1idK
zj|r@v-g?%UjTW@%FRwpYwQdeN`SrWMXy#^*-P2Uja>Kg$^K91HX8Ma$d?ViEqA}p%
zXgL|cYSbQj^htE$XJ~VpQekhwZ~HZ03jfWD;Pj+z$BQ9}@xY94h7hr9A%LUcss!$&
z*vyP=d1kU}b8ET6H={o%Wu=%9x46P{lX&^Vt4Tbz>X#e4PcPr)MP;Z=Z;Gp=mjbG<
zcRqO-?TBE<D-y~TYc0Ms0D;!hYiB**VKCr=u1}Ih9BDe>7-(d7WDgNNa>f7J_dl&a
zw{sBdKJl>cpg9fvV_SN4GHn;NoOtVF^;w7Uwx(@@Tw!bSEBuoG>FnhxannhX>Yaz5
zqTI8qNZz7P`POFw@cnnAtz1U;=N9ps#JO1xX@cX|?$Oh8^m?42*VqPZ-H^gpP6q&3
z7wZfJK3`km$xgmW`{Sv?WpDYfcG2P5J<=kV)1&m#E{ZpGFFh*O>*f7PA)8;msM{kU
zS$#bxuY425!#H8cTuieoiay^f*c>tXd5&XUZla?izxdDK-hjJ)0nW`!saXVO?X<lt
z-l&tqYEQO#*dX-n#o6=5BgAQ7)&KzG)Y=fer<=NPym=%m=Ae7J;5NwqJFKsqf);$a
zA-ZhZrd!`#wt9KTOR@Rd^%?aOvrcjWnIPdFH)nITyH!*VLhWrn&jil3dFE3obxzaO
z1k8aeQ|Uqj{KP$X8W^+ZwT=3G6{TU>{kNGxXzIxHD4iw7LoTfu^^v|Wf-(S`+hvW{
zY!uDau#eZsFCHzd=uZt$73BzDQ#NN489sc3S@Nn0a2KwsNQRI|mAf;Mo=PeXW2|RR
zD0PbuCOxe==as2Qn4Qnz<_)*0NXAUu?emM}X?SG6!ZEco578z)cfppwo;L*qGDiu#
z{PW|N9!G}IYRr@SUQTa{BrGj~9^RScH&$cJJS&YtLQbe5lhdcJAD*4b-o!;TDxOX~
zkA$|F6RtLH>~u_DtqH@7xfh`Su-yNQzO%YAUA8q9y6Lv8Oi-3_R~|n+swe4U5oHEl
z!ndykox`U27ljrvhRfqR+T_wKU3n_;<hw>(oumlHE`@um{2He|FTsz2%xv|KqdDzC
z*2@tp1LV7UT+5^<e6I&dYZ<z(-NVWkr$!H%lUoL{K4}@KQbFGfZo2(L#?ItIV7;vn
zp3&A>&T3;q`-y-xPPr?(#+^dJ*!wctUZbs0AiAVb2+ZfoC9gijY!GV*G959?(d0M2
z!rXE@mw}Zu@*|TF=B)}hK%k(umeK|L=x{$&j+8uQZrL{gS7>uZP8BjZWU8gPQ(ITx
z#QcB3>%Qv5DI@6_O$W1XH<5IA{)!KF)ye-P1!lgAs2P^@*k!5+%+Se7O_k1zS$+B>
zLce}_{4(FabAVqEz_9!!n1@6qrj5__t=ad4?0REjS~Eu5h}P#JtI}7iqHg<oVq<+4
z7d{G{joWv4K%TBINcL!GQ*qU3d-}~d%yCEQr`>ud*B;fcYU|L&dS@Ri{+cuMn3B@*
z3*1p$!Hw`re4uD!ur{W(@S>ZzV={85KG<QW*3)a!7{!qVaF>i~Evj_Sz)yn%{k)8)
z%Pg{TZ{XP%E4_QxgWDTgFEg|uKZAWddF0FBehFzH+h1ICCPubN??WTq%xw%}^orb>
zj;NBSdDIDPwn;8CO#U40(*j;vMdOE_gi_lv*BK_kJS(#T?vljG2PqxSZ4oDPEM7*J
zQZG}`*Y7I=3qq#l5VaRf<P;hVNibDiH*!^HM)Jse3`umVrK>L($@x_om_e!^zfpy^
z&vH?Vq;nZplM|u*U{*fWkAJ}7Z;=cHk#H+UMsg7>moX<f(Uc1;ki(T`A0C}0oM6E3
zU3~9Ra@Tt%gdxv6&Bezr{!T_B4mPSy(^<#43IS`mvW!9nLyvA>JgMCL^t;C>iSOtB
zKSS@`AwOjd;QmbK(zXAFg9Dk)MS-kvFs95jeHr)y?i<%X-I);4`s}M&>h<a)PhDew
z#QU47xY$vsv*rEye3s)Ywt@(&*@K9fQNN`=)g6<M%5`wJBA*Fe(O|qehy30k;CIrZ
zY7H1Q)hIfo*AWnZe9#rvR-7mV-0jB%Q2322a+8m@`ggGUtCf*HfrEZkYxcMBl&om?
z+cx=J!qVu@2hqaO5@_Vzq}EjZeBqw&7i@-l^eDbKf>4x)X8aAGWj|eG*{fvAW&R`d
zs58AdW%tJklicvL?_Tn|t{6`CD#otp7(fN7MJ_$U{W^jAd8{lLqqz;nMCx#h`chk4
z8+46HeX?IoA&3?}q_QhcR4D{g*}2546t;owgkP!@hJo!?BUK6&!FI*ZRnkJF-R<k7
ztkajYxc-X-|1#rij~`xqU3X6cHF<FNYfEXfRn!^KXjp}&BtZ@C`0cfO2)}NH?nE8j
z+MTr+e-XdVp#9m=CU`A3fPmmofRin4YV<#EH`DQx1e1;LJaQ#9=^b`)6?5F!AbaHb
z*n}=ZuMMkIf$U3J%2zL*r)!v%gkt;FYi*tIPs?@0>1pO1H|NAN_aj@QI}`WYStuQr
znJ9rby?$-wzV-+K<_XeT)wYVDsWSL8mIba@HSm+5iv51cuBOE1p$>X_055m&=nmD6
zp;Pz;#nYy{5(``9Nb-JvPu}olyz2Ge^FZbYsatLs6-2O?j9M;7Q%xR22Do$Uq38O$
zpfS?#4V^EZlo@;&$Oo3*-l=v!(9DCCXFCFL8|C70_o={b`4vS{1c7lT@OWUei+(3~
z*-KaPqJ5;@`VZngV$eV5$jQz&pLn7;@AMbQNwhp~7>;F)>uCQThpiWc#<fN}$(F*m
zkr8C>Zn7!1>~Sh+2JW~k_XXpyy(03=JO;z+e2#o|VET*T`;x!v^Bj%YGoN8w0qbEI
zjx2$CvG;3a+!85up<SA@dnZv5<L)<~HfR@!zh3=)?JUtl{yMko#eKhM0ew!NpC4jc
zS?%Ha0!<OyOv>*O?3M9Id6&ke<3ZP1=qJ<9T~|IRU|CK5y=um5gVfgOdDYfR;hrCU
zs<2m~5k_l&O&Q~K<G!HjwXJn*IiRtY$XgX3;W6|FcHNi=Ov2zCfAxCSjIa459t39C
zXWV%#C>k>$8Z`0_dKa9ozA>e9?0&sva9hiRwz*RCtmM|XAb2s(z=QVA-fDzWe+^~O
z+9LP)>qW?(;jl@;(bsNGT^&nU<F^%OetXxDIemOxUB4z=-udDg=4$^<*{r8iE5z|t
z<zCG_O4>IYmIc2?>u|q(;Gaxx=+AAHRy6eC0;zao4_-{3EtVd3bG}zTzV`QRNFfB-
zH?ym_a#Mf=(9>y=WR9HIK7(lEr>0d^%sZkrx7B%G@IvP^4(Xwv{hVXMn&;3FRKsnZ
zhgqiwS(yT(Att#xl146aM7;-6r0rF&U6hC=Zk(}F<?%O()E}(EAyzxGjZK;XEI#g5
z&b|~K8XvoGNpXR4ME2$4G=|<=y3*{=qIPXcwY#%`I~j8C*)FYIxk5DW7m#<7`^1iY
z$z6>;j%IGSYh-313nEf6>xO%$+_SzH+Ccx%`I5saNyhQ!Y~F5myw~7V_-C8tIkTzj
z{`D3$?6Ok97tsx3IhGm4&mSL@!WU#26_>80jS7T(`zTRp2bsNoJTf!qeRZG(lZ9Ay
zXYH$tv_u7qtA2xxL@pxDvK98TmJ`WLO(dyK-1jBq+NYOQxL|VVnCzqpVykH8&q|$B
z+1!Y*IR{^6h=<KIQ08OaH!(aDvzxYr`XLmJHIa=~l7-?Kb;2sULryG4JpM<%XN#mz
zQ2g`I!oj8B?Eib1(}F<Ef4#Q;HBDTMlqyLgu3t)*BnihUtwM4Yhmz)E|8MF4FCGNS
t#bro~u>Z485)dee6a-@Yzh%%FZb4d^Bozmfc}$Xn^OfPf;w<?$;6I5SZ8ZP@

diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
index 4bf8515564..f4b14cde0a 100644
--- a/src/encoded/tests/test_submit.py
+++ b/src/encoded/tests/test_submit.py
@@ -8,7 +8,7 @@
 @pytest.fixture
 def row_dict():
     return {
-        'patient id': '456',
+        'individual id': '456',
         'family id': '333',
         'sex': 'M',
         'relation to proband': 'proband',
@@ -21,7 +21,10 @@ def row_dict():
 
 @pytest.fixture
 def empty_items():
-    return {'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {}}
+    return {
+        'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {},
+        'case': {}, 'report': {}, 'reports': []
+    }
 
 
 @pytest.fixture
@@ -57,19 +60,19 @@ def submission_info3(submission_info2):
 
 
 def test_fetch_individual_metadata_new(row_dict, empty_items):
-    items_out = fetch_individual_metadata(row_dict, empty_items, 'test-proj:indiv1')
+    items_out = fetch_individual_metadata(row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi')
     assert items_out['individual']['test-proj:indiv1']['aliases'] == ['test-proj:indiv1']
     assert items_out['individual']['test-proj:indiv1']['individual_id'] == '456'
 
 
-def test_fetch_individual_metadata_old(row_dict):
+def test_fetch_individual_metadata_old(row_dict, empty_items):
     items = empty_items.copy()
     items['individual'] = {'test-proj:indiv1': {
         'individual_id': '456',
         'age': 46,
         'aliases': ['test-proj:indiv1']
     }}
-    items_out = fetch_individual_metadata(row_dict, items, 'test-proj:indiv1')
+    items_out = fetch_individual_metadata(row_dict, items, 'test-proj:indiv1', 'hms-dbmi')
     assert len(items['individual']) == len(items_out['individual'])
     assert 'sex' in items_out['individual']['test-proj:indiv1']
     assert 'age' in items_out['individual']['test-proj:indiv1']
@@ -81,7 +84,7 @@ def test_fetch_family_metadata_new(row_dict, empty_items):
     assert items_out['family']['test-proj:fam1']['proband'] == 'test-proj:indiv1'
 
 
-def test_fetch_family_metadata_old(row_dict):
+def test_fetch_family_metadata_old(row_dict, empty_items):
     items = empty_items.copy()
     items['family'] = {'test-proj:fam1': {
         'aliases': ['test-proj:fam1'],
@@ -95,41 +98,34 @@ def test_fetch_family_metadata_old(row_dict):
     assert items_out['family']['test-proj:fam1']['mother'] == 'test-proj:indiv2'
 
 
-def test_fetch_sample_metadata_sp(row_dict):
+def test_fetch_sample_metadata_sp(row_dict, empty_items):
     items = empty_items.copy()
     items['individual'] = {'test-proj:indiv1': {}}
-    items_out = fetch_sample_metadata(row_dict, items, 'test-proj:indiv1', 'test-proj:samp1', 'test-proj:sp1')
+    items_out = fetch_sample_metadata(
+        row_dict, items, 'test-proj:indiv1', 'test-proj:samp1', 'test-proj:sp1', 'test-proj:fam1', 'test-proj'
+    )
     assert items_out['sample']['test-proj:samp1']['specimen_accession'] == row_dict['specimen id']
     assert items_out['sample_processing']['test-proj:sp1']['samples'] == ['test-proj:samp1']
     assert items_out['individual']['test-proj:indiv1']['samples'] == ['test-proj:samp1']
 
 
-def test_fetch_sample_metadata_no_sp(row_dict):
-    items = empty_items.copy()
-    items['individual'] = {'test-proj:indiv1': {}}
-    row_dict['report required'] = 'N'
-    items_out = fetch_sample_metadata(row_dict, items, 'test-proj:indiv1', 'test-proj:samp1', 'test-proj:sp1')
-    assert items_out['sample']['test-proj:samp1']['specimen_accession'] == row_dict['specimen id']
-    assert not items_out['sample_processing']
-
-
-def test_create_sample_processing_groups_grp(submission_info2):
-    items_out = create_sample_processing_groups(submission_info2, 'test-proj:sp-multi')
-    assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Group'
-    assert len(items_out['sample_processing']['test-proj:sp-multi']['samples']) == 2
-
-
-def test_create_sample_processing_groups_one(submission_info):
-    items_out = create_sample_processing_groups(submission_info, 'test-proj:sp-single')
-    assert not items_out['sample_processing']
-
-
-def test_create_sample_processing_groups_trio(submission_info3):
-    items_out = create_sample_processing_groups(submission_info3, 'test-proj:sp-multi')
-    assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Group'
-    submission_info3['family']['test-proj:fam1']['father'] = 'test-proj:indiv3'
-    items_out = create_sample_processing_groups(submission_info3, 'test-proj:sp-multi')
-    assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Trio'
+# def test_create_sample_processing_groups_grp(submission_info2):
+#     items_out = create_sample_processing_groups(submission_info2, 'test-proj:sp-multi')
+#     assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Group'
+#     assert len(items_out['sample_processing']['test-proj:sp-multi']['samples']) == 2
+#
+#
+# def test_create_sample_processing_groups_one(submission_info):
+#     items_out = create_sample_processing_groups(submission_info, 'test-proj:sp-single')
+#     assert not items_out['sample_processing']
+#
+#
+# def test_create_sample_processing_groups_trio(submission_info3):
+#     items_out = create_sample_processing_groups(submission_info3, 'test-proj:sp-multi')
+#     assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Group'
+#     submission_info3['family']['test-proj:fam1']['father'] = 'test-proj:indiv3'
+#     items_out = create_sample_processing_groups(submission_info3, 'test-proj:sp-multi')
+#     assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Trio'
 
 
 def test_xls_to_json(project, institution):

From a9ce9e1ee967d4c6291e0bb31721a51fd677daaa Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Tue, 14 Jul 2020 15:21:34 -0400
Subject: [PATCH 028/125] more tests for submit.py functions

---
 src/encoded/tests/test_submit.py | 123 +++++++++++++++++++++++++------
 1 file changed, 102 insertions(+), 21 deletions(-)

diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
index f4b14cde0a..2f4a0c06c7 100644
--- a/src/encoded/tests/test_submit.py
+++ b/src/encoded/tests/test_submit.py
@@ -1,8 +1,6 @@
 import pytest
 from encoded.submit import *
 import json
-# from pyramid.paster import get_app
-# from dcicutils.misc_utils import VirtualApp
 
 
 @pytest.fixture
@@ -59,6 +57,69 @@ def submission_info3(submission_info2):
     return info
 
 
+@pytest.fixture
+def sample_info():
+    return {
+        'workup type': 'WES',
+        'specimen id': '9034',
+        'date collected': '2020-01-06'
+    }
+
+
+@pytest.fixture
+def example_rows():
+    return [
+        {'individual id': '456', 'analysis id': '1111', 'relation to proband': 'proband'},
+        {'individual id': '123', 'analysis id': '1111', 'relation to proband': 'mother'},
+        {'individual id': '789', 'analysis id': '1111', 'relation to proband': 'father'},
+        {'individual id': '456', 'analysis id': '2222', 'relation to proband': 'proband'},
+        {'individual id': '555', 'analysis id': '3333', 'relation to proband': 'proband'},
+        {'individual id': '546', 'analysis id': '3333', 'relation to proband': 'mother'}
+    ]
+
+
+@pytest.fixture
+def new_family(child, mother, father):
+    return {
+        "title": "Smith family",
+        "proband": child['@id'],
+        "members": [
+            child['@id'],
+            mother['@id'],
+            father['@id']
+        ]
+    }
+
+
+@pytest.fixture
+def aunt(testapp, project, institution):
+    item = {
+        "accession": "GAPIDAUNT001",
+        "age": 35,
+        "age_units": "year",
+        'project': project['@id'],
+        'institution': institution['@id'],
+        "sex": "F"
+    }
+    return testapp.post_json('/individual', item).json['@graph'][0]
+
+
+def test_map_fields(sample_info):
+    result = map_fields(sample_info, {}, ['workup_type'], 'sample')
+    assert result['workup_type'] == 'WES'
+    assert result['specimen_accession'] == '9034'
+    assert result['specimen_collection_date'] == '2020-01-06'
+    assert not result.get('sequencing_lab')
+
+
+def test_create_families(example_rows):
+    fams = create_families(example_rows)
+    assert sorted(list(fams.keys())) == ['1111', '2222', '3333']
+    assert fams['1111'] == 'family-456'
+    assert fams['2222'] == 'family-456'
+    assert fams['3333'] == 'family-555'
+
+
 def test_fetch_individual_metadata_new(row_dict, empty_items):
     items_out = fetch_individual_metadata(row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi')
     assert items_out['individual']['test-proj:indiv1']['aliases'] == ['test-proj:indiv1']
@@ -109,32 +170,52 @@ def test_fetch_sample_metadata_sp(row_dict, empty_items):
     assert items_out['individual']['test-proj:indiv1']['samples'] == ['test-proj:samp1']
 
 
-# def test_create_sample_processing_groups_grp(submission_info2):
-#     items_out = create_sample_processing_groups(submission_info2, 'test-proj:sp-multi')
-#     assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Group'
-#     assert len(items_out['sample_processing']['test-proj:sp-multi']['samples']) == 2
-#
-#
-# def test_create_sample_processing_groups_one(submission_info):
-#     items_out = create_sample_processing_groups(submission_info, 'test-proj:sp-single')
-#     assert not items_out['sample_processing']
-#
-#
-# def test_create_sample_processing_groups_trio(submission_info3):
-#     items_out = create_sample_processing_groups(submission_info3, 'test-proj:sp-multi')
-#     assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Group'
-#     submission_info3['family']['test-proj:fam1']['father'] = 'test-proj:indiv3'
-#     items_out = create_sample_processing_groups(submission_info3, 'test-proj:sp-multi')
-#     assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Trio'
-
-
 def test_xls_to_json(project, institution):
     json_out = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
     assert len(json_out['family']) == 1
+    assert 'encode-project:family-456' in json_out['family']
     assert len(json_out['individual']) == 3
     assert all(['encode-project:individual-' + x in json_out['individual'] for x in ['123', '456', '789']])
 
 
+def test_parse_exception_invalid_alias(testapp, a_case):
+    a_case['invalid_field'] = 'value'
+    a_case['project'] = '/projects/invalid-project/'
+    try:
+        testapp.post_json('/case', a_case)
+    except Exception as e:
+        errors = parse_exception(e, ['/projects/other-project/'])
+    assert len(errors) == 2
+    assert 'Additional properties are not allowed' in ''.join(errors)
+    assert 'not found' in ''.join(errors)
+
+
+def test_parse_exception_with_alias(testapp, a_case):
+    a_case['project'] = '/projects/invalid-project/'
+    errors = None
+    try:
+        testapp.post_json('/case', a_case)
+    except Exception as e:
+        errors = parse_exception(e, ['/projects/invalid-project/'])
+    assert errors == []
+
+
+def test_compare_fields_same(testapp, fam, new_family):
+    profile = testapp.get('/profiles/family.json').json
+    result = compare_fields(profile, [], new_family, fam)
+    assert not result
+
+
+def test_compare_fields_different(testapp, aunt, fam, new_family):
+    new_family['members'].append(aunt['@id'])
+    new_family['title'] = 'Smythe family'
+    profile = testapp.get('/profiles/family.json').json
+    result = compare_fields(profile, [], new_family, fam)
+    assert len(result) == 2
+    assert 'title' in result
+    assert len(result['members']) == len(fam['members']) + 1
+
+
 def test_validate_item_post_valid(testapp, a_case):
     result = validate_item(testapp, a_case, 'post', 'case', [])
     assert not result

From 9f1a820691bea34ccde6f0bd78530008ba79a63e Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Tue, 14 Jul 2020 15:22:30 -0400
Subject: [PATCH 029/125] edits to submit.py for minor bugs found via unit
 tests

---
 src/encoded/submit.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index f85161eef0..3ec4daae74 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -108,9 +108,10 @@ def xls_to_json(xls_data, project, institution):
         'case': {}, 'report': {}, 'reports': []
     }
     specimen_ids = {}
+    family_dict = create_families(rows)
     for row in rows:
         indiv_alias = '{}:individual-{}'.format(project['name'], row['individual id'])
-        fam_alias = '{}:family-{}'.format(project['name'], row['family id'])
+        fam_alias = '{}:{}'.format(project['name'], family_dict[row['analysis id']])
         # sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
         # create items for Individual
         items = fetch_individual_metadata(row, items, indiv_alias, institution['name'])
@@ -145,6 +146,12 @@ def xls_to_json(xls_data, project, institution):
     return items
 
 
+def create_families(rows):
+    proband_rows = [row for row in rows if row.get('relation to proband').lower() == 'proband']
+    fams = {row.get('analysis id'): 'family-{}'.format(row.get('individual id')) for row in proband_rows}
+    return fams
+
+
 def fetch_individual_metadata(row, items, indiv_alias, inst_name):
     new_items = items.copy()
     info = {'aliases': [indiv_alias]}
@@ -372,9 +379,11 @@ def compare_fields(profile, aliases, json_item, db_item):
             else:
                 val = [v for v in json_item[field]]
             if sorted(val) != sorted(db_item.get(field, [])):
-                if len(val) == 1 and val not in db_item.get(field, []):
+                # if len(val) == 1 and val not in db_item.get(field, []):
+                #     continue
+                if all(v in db_item.get(field, []) for v in val):
                     continue
-                new_val = db_item.get(field, [])
+                new_val = [item for item in db_item.get(field, [])]
                 new_val.extend(val)
                 to_patch[field] = list(set(new_val))
     return to_patch

From 18330aa56b63eb580590776b4c40c4bbf162ee5f Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Tue, 14 Jul 2020 16:05:31 -0400
Subject: [PATCH 030/125] parsing of analysis type for sample_processing in
 submit.py

---
 src/encoded/submit.py | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 3ec4daae74..8a6558288d 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -109,6 +109,7 @@ def xls_to_json(xls_data, project, institution):
     }
     specimen_ids = {}
     family_dict = create_families(rows)
+    a_types = get_analysis_types(rows)
     for row in rows:
         indiv_alias = '{}:individual-{}'.format(project['name'], row['individual id'])
         fam_alias = '{}:{}'.format(project['name'], family_dict[row['analysis id']])
@@ -126,8 +127,8 @@ def xls_to_json(xls_data, project, institution):
             else:
                 specimen_ids[row['specimen id']] = 1
             analysis_alias = '{}:analysis-{}'.format(project['name'], row['analysis id'])
-            items = fetch_sample_metadata(row, items, indiv_alias, samp_alias,
-                                          analysis_alias, fam_alias, project['name'])
+            items = fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias,
+                                          fam_alias, project['name'], a_types)
         else:
             print('WARNING: No specimen id present for patient {},'
                   ' sample will not be created.'.format(row['individual id']))
@@ -152,6 +153,26 @@ def create_families(rows):
     return fams
 
 
+def get_analysis_types(rows):
+    analysis_relations = {}
+    analysis_types = {}
+    for row in rows:
+        analysis_relations.setdefault(row.get('analysis id'), [[], []])
+        analysis_relations[row.get('analysis id')][0].append(row.get('relation to proband', '').lower())
+        analysis_relations[row.get('analysis id')][1].append(row.get('workup type', '').upper())
+    for k, v in analysis_relations.items():
+        if len(list(set(v[1]))) == 1:
+            if len(v[0]) == 1:
+                analysis_types[k] = v[1][0]
+            elif sorted(v[0]) == ['father', 'mother', 'proband']:
+                analysis_types[k] = v[1][0] + '-Trio'
+            else:
+                analysis_types[k] = v[1][0] + '-Group'
+        else:
+            analysis_types[k] = None
+    return analysis_types
+
+
 def fetch_individual_metadata(row, items, indiv_alias, inst_name):
     new_items = items.copy()
     info = {'aliases': [indiv_alias]}
@@ -191,7 +212,7 @@ def fetch_family_metadata(row, items, indiv_alias, fam_alias):
     return new_items
 
 
-def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, fam_alias, proj_name):
+def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, fam_alias, proj_name, analysis_type_dict):
     new_items = items.copy()
     info = {'aliases': [samp_alias], 'files': []}  # TODO: implement creation of file db items
     fields = [
@@ -224,6 +245,8 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, f
         'samples': [],
         'families': []
     }
+    if row.get('analysis id') in analysis_type_dict:
+        new_sp_item['analysis_type'] = analysis_type_dict[row.get('analysis id')]
     new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)
     new_items['sample_processing'][analysis_alias]['samples'].append(samp_alias)
     if row.get('report required').lower().startswith('y'):

From a4cffbd8722b63db844e35391ce1ca469eca99d0 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Tue, 14 Jul 2020 16:05:59 -0400
Subject: [PATCH 031/125] test for analysis type parsing in submit.py

---
 src/encoded/tests/test_submit.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
index 2f4a0c06c7..e51931f4b0 100644
--- a/src/encoded/tests/test_submit.py
+++ b/src/encoded/tests/test_submit.py
@@ -69,12 +69,12 @@ def sample_info():
 @pytest.fixture
 def example_rows():
     return [
-        {'individual id': '456', 'analysis id': '1111', 'relation to proband': 'proband'},
-        {'individual id': '123', 'analysis id': '1111', 'relation to proband': 'mother'},
-        {'individual id': '789', 'analysis id': '1111', 'relation to proband': 'father'},
-        {'individual id': '456', 'analysis id': '2222', 'relation to proband': 'proband'},
-        {'individual id': '555', 'analysis id': '3333', 'relation to proband': 'proband'},
-        {'individual id': '546', 'analysis id': '3333', 'relation to proband': 'mother'}
+        {'individual id': '456', 'analysis id': '1111', 'relation to proband': 'proband', 'workup type': 'WGS'},
+        {'individual id': '123', 'analysis id': '1111', 'relation to proband': 'mother', 'workup type': 'WGS'},
+        {'individual id': '789', 'analysis id': '1111', 'relation to proband': 'father', 'workup type': 'WGS'},
+        {'individual id': '456', 'analysis id': '2222', 'relation to proband': 'proband', 'workup type': 'WGS'},
+        {'individual id': '555', 'analysis id': '3333', 'relation to proband': 'proband', 'workup type': 'WES'},
+        {'individual id': '546', 'analysis id': '3333', 'relation to proband': 'mother', 'workup type': 'WES'}
     ]
 
 
@@ -120,6 +120,16 @@ def test_create_families(example_rows):
     assert fams['3333'] == 'family-555'
 
 
+def test_get_analysis_types(example_rows):
+    a_types = get_analysis_types(example_rows)
+    assert a_types['1111'] == 'WGS-Trio'
+    assert a_types['2222'] == 'WGS'
+    assert a_types['3333'] == 'WES-Group'
+    example_rows[1]['workup type'] = 'WES'
+    new_a_types = get_analysis_types(example_rows)
+    assert new_a_types['1111'] is None
+
+
 def test_fetch_individual_metadata_new(row_dict, empty_items):
     items_out = fetch_individual_metadata(row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi')
     assert items_out['individual']['test-proj:indiv1']['aliases'] == ['test-proj:indiv1']

From 24abbec3b7469417f9b2ee0e2558380580f3bcf5 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Wed, 15 Jul 2020 14:17:49 -0400
Subject: [PATCH 032/125] mother and father relations on individual added in
 submit.py

---
 src/encoded/submit.py | 95 ++++++++++++++++++++++++++-----------------
 1 file changed, 57 insertions(+), 38 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 3ec4daae74..cb44b49afd 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -109,6 +109,7 @@ def xls_to_json(xls_data, project, institution):
     }
     specimen_ids = {}
     family_dict = create_families(rows)
+    a_types = get_analysis_types(rows)
     for row in rows:
         indiv_alias = '{}:individual-{}'.format(project['name'], row['individual id'])
         fam_alias = '{}:{}'.format(project['name'], family_dict[row['analysis id']])
@@ -126,13 +127,14 @@ def xls_to_json(xls_data, project, institution):
             else:
                 specimen_ids[row['specimen id']] = 1
             analysis_alias = '{}:analysis-{}'.format(project['name'], row['analysis id'])
-            items = fetch_sample_metadata(row, items, indiv_alias, samp_alias,
-                                          analysis_alias, fam_alias, project['name'])
+            items = fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias,
+                                          fam_alias, project['name'], a_types)
         else:
             print('WARNING: No specimen id present for patient {},'
                   ' sample will not be created.'.format(row['individual id']))
     # create SampleProcessing item for trio/group if needed
     # items = create_sample_processing_groups(items, sp_alias)
+    items = add_relations(items)
     items = create_case_items(items, project['name'])
     # removed unused fields, add project and institution
     for val1 in items.values():
@@ -152,6 +154,26 @@ def create_families(rows):
     return fams
 
 
+def get_analysis_types(rows):
+    analysis_relations = {}
+    analysis_types = {}
+    for row in rows:
+        analysis_relations.setdefault(row.get('analysis id'), [[], []])
+        analysis_relations[row.get('analysis id')][0].append(row.get('relation to proband', '').lower())
+        analysis_relations[row.get('analysis id')][1].append(row.get('workup type', '').upper())
+    for k, v in analysis_relations.items():
+        if len(list(set(v[1]))) == 1:
+            if len(v[0]) == 1:
+                analysis_types[k] = v[1][0]
+            elif sorted(v[0]) == ['father', 'mother', 'proband']:
+                analysis_types[k] = v[1][0] + '-Trio'
+            else:
+                analysis_types[k] = v[1][0] + '-Group'
+        else:
+            analysis_types[k] = None
+    return analysis_types
+
+
 def fetch_individual_metadata(row, items, indiv_alias, inst_name):
     new_items = items.copy()
     info = {'aliases': [indiv_alias]}
@@ -179,19 +201,17 @@ def fetch_family_metadata(row, items, indiv_alias, fam_alias):
         'family_id': row['family id'],
         'members': [indiv_alias]
     }
-    if row.get('relation to proband', '').lower() == 'proband':
-        info['proband'] = indiv_alias
     if fam_alias not in new_items['family']:
         new_items['family'][fam_alias] = info
-    else:
-        if indiv_alias not in new_items['family'][fam_alias]['members']:
-            new_items['family'][fam_alias]['members'].append(indiv_alias)
-        if row.get('relation to proband', '').lower() == 'proband' and 'proband' not in new_items['family'][fam_alias]:
-            new_items['family'][fam_alias]['proband'] = indiv_alias
+    if indiv_alias not in new_items['family'][fam_alias]['members']:
+        new_items['family'][fam_alias]['members'].append(indiv_alias)
+    for relation in ['proband', 'mother', 'father', 'brother', 'sister', 'sibling']:
+        if row.get('relation to proband', '').lower() == relation and relation not in new_items['family'][fam_alias]:
+            new_items['family'][fam_alias][relation] = indiv_alias
     return new_items
 
 
-def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, fam_alias, proj_name):
+def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, fam_alias, proj_name, analysis_type_dict):
     new_items = items.copy()
     info = {'aliases': [samp_alias], 'files': []}  # TODO: implement creation of file db items
     fields = [
@@ -224,6 +244,8 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, f
         'samples': [],
         'families': []
     }
+    if row.get('analysis id') in analysis_type_dict:
+        new_sp_item['analysis_type'] = analysis_type_dict[row.get('analysis id')]
     new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)
     new_items['sample_processing'][analysis_alias]['samples'].append(samp_alias)
     if row.get('report required').lower().startswith('y'):
@@ -277,27 +299,24 @@ def create_case_items(items, proj_name):
     return new_items
 
 
-# def create_sample_processing_groups(items, sp_alias):
-#     new_items = items.copy()
-#     for v in new_items['family'].values():
-#         if 'members' in v and len(v['members']) > 1:
-#             # create sample_processing item
-#             samples = [items['individual'][indiv].get('samples', [None])[0] for indiv in v['members']]
-#             samples = [s for s in samples if s]
-#             if len (samples) > 1:
-#                 sp = {
-#                     'aliases': [sp_alias],
-#                     'samples': samples
-#                 }
-#                 analysis_type = items['sample'][items['individual'][v['proband']]['samples'][0]]['workup_type']
-#                 if all([relation in v for relation in ['proband', 'mother', 'father']]) and sorted(
-#                     v['members']) == sorted([v['proband'], v['mother'], v['father']]
-#                 ):
-#                     sp['analysis_type'] = analysis_type + '-Trio'
-#                 else:
-#                     sp['analysis_type'] = analysis_type + '-Group'
-#                 new_items['sample_processing'][sp_alias] = sp
-#     return new_items
+def add_relations(items):
+    new_items = items.copy()
+    for alias, fam in items['family'].items():
+        parents = False
+        for relation in ['mother', 'father']:
+            if fam.get(relation):
+                if fam.get('proband'):
+                    new_items['individual'][fam['proband']][relation] = fam[relation]
+                    parents = True
+                del new_items['family'][alias][relation]
+        for relation in ['brother', 'sister', 'sibling']:
+            if fam.get(relation):
+                if parents:
+                    for parent in ['mother', 'father']:
+                        if new_items['individual'][fam['proband']].get(parent):
+                            new_items['individual'][fam[relation]][parent] = new_items['individual'][fam['proband']][parent]
+                del new_items['family'][alias][relation]
+    return new_items
 
 
 def compare_with_db(virtualapp, alias):
@@ -374,18 +393,18 @@ def compare_fields(profile, aliases, json_item, db_item):
                 to_patch[field] = val
         else:
             # if array, patch field vals get added to what's in db
-            if field != 'aliases':
+            if field != 'aliases' and profile['properties'][field].get('items', {}).get('linkTo'):
                 val = [aliases[v] if v in aliases else v for v in json_item[field]]
             else:
                 val = [v for v in json_item[field]]
-            if sorted(val) != sorted(db_item.get(field, [])):
+            # if sorted(val) != sorted(db_item.get(field, [])):
                 # if len(val) == 1 and val not in db_item.get(field, []):
                 #     continue
-                if all(v in db_item.get(field, []) for v in val):
-                    continue
-                new_val = [item for item in db_item.get(field, [])]
-                new_val.extend(val)
-                to_patch[field] = list(set(new_val))
+            if all(v in db_item.get(field, []) for v in val):
+                continue
+            new_val = [item for item in db_item.get(field, [])]
+            new_val.extend(val)
+            to_patch[field] = list(set(new_val))
     return to_patch
 
 
From 381f61e9a45dfe2ddbc04f25e4f008bc4f735944 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 16 Jul 2020 17:10:11 -0400
Subject: [PATCH 033/125] addition of file metadata in submit.py to allow
 creation of File items

---
 src/encoded/submit.py            | 103 +++++++++++++++++++++++--------
 src/encoded/tests/test_submit.py |   3 +-
 2 files changed, 78 insertions(+), 28 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index cb44b49afd..84e9d8ae78 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -44,7 +44,10 @@
 # }
 
 
-POST_ORDER = ['sample', 'individual', 'family', 'sample_processing', 'report', 'case']
+POST_ORDER = [
+    'file_fastq', 'file_processed', 'sample', 'individual',
+    'family', 'sample_processing', 'report', 'case'
+]
 
 
 LINKS = [
@@ -105,8 +108,10 @@ def xls_to_json(xls_data, project, institution):
 
     items = {
         'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {},
-        'case': {}, 'report': {}, 'reports': []
+        'file_fastq': {}, 'file_processed': {}, 'case': {}, 'report': {},
+        'reports': []
     }
+    file_errors = []
     specimen_ids = {}
     family_dict = create_families(rows)
     a_types = get_analysis_types(rows)
@@ -119,7 +124,7 @@ def xls_to_json(xls_data, project, institution):
         # create/edit items for Family
         items = fetch_family_metadata(row, items, indiv_alias, fam_alias)
         # create item for Sample if there is a specimen
-        if row['specimen id']:
+        if row.get('specimen id'):
             samp_alias = '{}:sample-{}'.format(project['name'], row['specimen id'])
             if row['specimen id'] in specimen_ids:
                 samp_alias = samp_alias + '-' + specimen_ids[row['specimen id']]
@@ -129,6 +134,11 @@ def xls_to_json(xls_data, project, institution):
             analysis_alias = '{}:analysis-{}'.format(project['name'], row['analysis id'])
             items = fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias,
                                           fam_alias, project['name'], a_types)
+            if row.get('files'):
+                file_items = fetch_file_metadata(row['files'].split(','), project['name'])
+                file_errors.extend(file_items['errors'])
+                items['file_fastq'].update(file_items['file_fastq'])
+                items['file_processed'].update(file_items['file_processed'])
         else:
             print('WARNING: No specimen id present for patient {},'
                   ' sample will not be created.'.format(row['individual id']))
@@ -144,7 +154,7 @@ def xls_to_json(xls_data, project, institution):
                 del val2[key]
             val2['project'] = project['@id']
             val2['institution'] = institution['@id']
-
+    items['file_errors'] = file_errors
     return items
 
 
@@ -256,17 +266,36 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, f
 
 
 # TODO: finish implementing this function
-def fetch_file_metadata(filenames):
-    files = []
+def fetch_file_metadata(filenames, proj_name):
+    valid_extensions = {
+        '.fastq.gz': ('fastq', 'reads'),
+        '.fq.gz': ('fastq', 'reads'),
+        '.cram': ('cram', 'alignments'),
+        '.vcf.gz': ('vcf_gz', 'raw VCF')
+    }
+    files = {'file_fastq': {}, 'file_processed': {}, 'errors': []}
     for filename in filenames:
+        extension = [ext for ext in valid_extensions if filename.endswith(ext)]
+        if not extension:
+            if [ext for ext in ['.fastq', '.fq', '.vcf'] if filename.endswith(ext)]:
+                files['errors'].append('File must be compressed - please gzip file {}'.format(filename))
+            else:
+                files['errors'].append('File extension on {} not supported - expecting one of: '
+                              '.fastq.gz, .fq.gz, .cram, .vcf.gz'.format(filename))
+            continue
+        file_alias = '{}:{}'.format(proj_name, filename.lstrip(' '))
+        fmt = valid_extensions[extension[0]][0]
         file_info = {
-            'aliases': [],
-            'file_format': '',
-            'file_type': '',
-            'filename': ''
+            'aliases': [file_alias],
+            'file_format': '/file-formats/{}/'.format(fmt),
+            'file_type': valid_extensions[extension[0]][1],
+            'filename': filename  # causes problems without functional file upload
         }
-        files.append(file_info)
-    raise NotImplementedError
+        if fmt == 'fastq':
+            files['file_fastq'][file_alias] = file_info
+        else:
+            files['file_processed'][file_alias] = file_info
+    return files
 
 
 def create_case_items(items, proj_name):
@@ -386,9 +415,8 @@ def compare_fields(profile, aliases, json_item, db_item):
         # if not an array, patch field gets overwritten (if different from db)
         if profile['properties'][field]['type'] != 'array':
             val = json_item[field]
-            if isinstance(val, str):
-                if val in aliases:
-                    val = aliases[val]
+            if profile['properties'][field]['type'] == 'string' and val in aliases:
+                val = aliases[val]
             if val != db_item.get(field):
                 to_patch[field] = val
         else:
@@ -404,7 +432,10 @@ def compare_fields(profile, aliases, json_item, db_item):
                 continue
             new_val = [item for item in db_item.get(field, [])]
             new_val.extend(val)
-            to_patch[field] = list(set(new_val))
+            try:
+                to_patch[field] = list(set(new_val))
+            except TypeError:  # above doesn't handle list of dictionaries
+                to_patch[field] = [dict(t) for t in {tuple(d.items()) for d in new_val}]
     return to_patch
 
 
@@ -422,7 +453,7 @@ def validate_all_items(virtualapp, json_data):
     written or tested.
     '''
     alias_dict = {}
-    errors = []
+    errors = json_data['file_errors']
     all_aliases = [k for itype in json_data for k in json_data[itype]]
     json_data_final = {'post': {}, 'patch': {}}
     validation_results = {}
@@ -431,10 +462,17 @@ def validate_all_items(virtualapp, json_data):
         if itemtype in json_data:
             profile = virtualapp.get('/profiles/{}.json'.format(itemtype)).json
             validation_results[itemtype] = {'validated': 0, 'errors': 0}
+            db_results = {}
         for alias in json_data[itemtype]:
-            # TODO : format fields (e.g. int, list, etc.)
-            result = compare_with_db(virtualapp, alias)
-            if not result:
+            # first collect all atids before comparing and validating items
+            db_result = compare_with_db(virtualapp, alias)
+            if db_result:
+                alias_dict[alias] = db_result['@id']
+                db_results[alias] = db_result
+        for alias in json_data[itemtype]:
+            if 'filename' in json_data[itemtype][alias]:  # until we have functional file upload
+                del json_data[itemtype][alias]['filename']
+            if not db_results.get(alias):
                 error = validate_item(virtualapp, json_data[itemtype][alias], 'post', itemtype, all_aliases)
                 if error:  # modify to check for presence of validation errors
                     # do something to report validation errors
@@ -442,25 +480,32 @@ def validate_all_items(virtualapp, json_data):
                         for e in error:
                             errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
                         validation_results[itemtype]['errors'] += 1
+                elif json_data[itemtype][alias].get('filename') and \
+                        json_data[itemtype][alias]['filename'] in ''.join(json_data['file_errors']):
+                    validation_results[itemtype]['errors'] += 1
                 else:
                     json_data_final['post'].setdefault(itemtype, [])
                     json_data_final['post'][itemtype].append(json_data[itemtype][alias])
                     validation_results[itemtype]['validated'] += 1
             else:
                 # patch if item exists in db
-                alias_dict[alias] = result['@id']
-                patch_data = compare_fields(profile, alias_dict, json_data[itemtype][alias], result)
-                error = validate_item(virtualapp, patch_data, 'patch', itemtype, all_aliases, atid=result['@id'])
+                # alias_dict[alias] = results[alias]['@id']
+                patch_data = compare_fields(profile, alias_dict, json_data[itemtype][alias], db_results[alias])
+                error = validate_item(virtualapp, patch_data, 'patch', itemtype,
+                                      all_aliases, atid=db_results[alias]['@id'])
                 if error:  # do something to report validation errors
                     if itemtype not in ['case', 'report']:
                         for e in error:
                             errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
                         validation_results[itemtype]['errors'] += 1
+                elif json_data[itemtype][alias].get('filename') and \
+                        json_data[itemtype][alias]['filename'] in ''.join(json_data['file_errors']):
+                    validation_results[itemtype]['errors'] += 1
                 else:  # patch
                     json_data_final['patch'].setdefault(itemtype, {})
                     if patch_data:
-                        json_data_final['patch'][itemtype][result['@id']] = patch_data
-                    else:
+                        json_data_final['patch'][itemtype][db_results[alias]['@id']] = patch_data
+                    elif itemtype not in ['case', 'report']:
                         output.append('{} {} - Item already in database, no changes needed'.format(itemtype, alias))
                     # do something to record response
                     validation_results[itemtype]['validated'] += 1
@@ -489,6 +534,8 @@ def post_and_patch_all_items(virtualapp, json_data_final):
             final_status[k] = {'posted': 0, 'not posted': 0, 'patched': 0, 'not patched': 0}
             for item in v:
                 patch_info = {}
+                # if 'filename' in item:  # until we have functional file upload
+                #     del item['filename']
                 for field in LINKS:
                     if field in item:
                         patch_info[field] = item[field]
@@ -507,7 +554,7 @@ def post_and_patch_all_items(virtualapp, json_data_final):
                         final_status[k]['not posted'] += 1
                 except Exception as e:
                     final_status[k]['not posted'] += 1
-                    output.append(e)
+                    output.append(str(e))
         for itype in final_status:
             if final_status[itype]['posted'] > 0 or final_status[itype]['not posted'] > 0:
                 output.append('{}: {} items posted successfully; {} items not posted'.format(
@@ -516,6 +563,8 @@ def post_and_patch_all_items(virtualapp, json_data_final):
     for k, v in json_data_final['patch'].items():
         final_status.setdefault(k, {'patched': 0, 'not patched': 0})
         for item_id, patch_data in v.items():
+            # if 'filename' in patch_data:  # until we have functional file upload
+            #     del patch_data['filename']
             try:
                 response = virtualapp.patch_json('/' + item_id, patch_data, status=200)
                 if response.json['status'] == 'success':
@@ -526,7 +575,7 @@ def post_and_patch_all_items(virtualapp, json_data_final):
                     final_status[k]['not patched'] += 1
             except Exception as e:
                 final_status[k]['not patched'] += 1
-                output.append(e)
+                output.append(str(e))
         if final_status[k]['patched'] > 0 or final_status[k]['not patched'] > 0:
             output.append('{}: {} items patched successfully; {} items not patched'.format(
                 k, final_status[k]['patched'], final_status[k]['not patched']
diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
index e51931f4b0..141ac7bf4a 100644
--- a/src/encoded/tests/test_submit.py
+++ b/src/encoded/tests/test_submit.py
@@ -173,7 +173,8 @@ def test_fetch_sample_metadata_sp(row_dict, empty_items):
     items = empty_items.copy()
     items['individual'] = {'test-proj:indiv1': {}}
     items_out = fetch_sample_metadata(
-        row_dict, items, 'test-proj:indiv1', 'test-proj:samp1', 'test-proj:sp1', 'test-proj:fam1', 'test-proj'
+        row_dict, items, 'test-proj:indiv1', 'test-proj:samp1',
+        'test-proj:sp1', 'test-proj:fam1', 'test-proj', {}
     )
     assert items_out['sample']['test-proj:samp1']['specimen_accession'] == row_dict['specimen id']
     assert items_out['sample_processing']['test-proj:sp1']['samples'] == ['test-proj:samp1']

From 498ce003bacc3a2e264e75a227d000bac6f4592f Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 17 Jul 2020 11:01:57 -0400
Subject: [PATCH 034/125] added tests for fetch_file_metadata in submit.py

---
 src/encoded/tests/test_submit.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
index 141ac7bf4a..194a60d963 100644
--- a/src/encoded/tests/test_submit.py
+++ b/src/encoded/tests/test_submit.py
@@ -181,6 +181,34 @@ def test_fetch_sample_metadata_sp(row_dict, empty_items):
     assert items_out['individual']['test-proj:indiv1']['samples'] == ['test-proj:samp1']
 
 
+def test_fetch_file_metadata_valid():
+    results = fetch_file_metadata(['f1.fastq.gz', 'f2.cram', 'f3.vcf.gz'], 'test-proj')
+    assert 'test-proj:f1.fastq.gz' in results['file_fastq']
+    assert results['file_fastq']['test-proj:f1.fastq.gz']['file_format'] == '/file-formats/fastq/'
+    assert results['file_fastq']['test-proj:f1.fastq.gz']['file_type'] == 'reads'
+    assert 'test-proj:f2.cram' in results['file_processed']
+    assert 'test-proj:f3.vcf.gz' in results['file_processed']
+    assert not results['errors']
+
+
+def test_fetch_file_metadata_uncompressed():
+    results = fetch_file_metadata(['f1.fastq', 'f2.cram', 'f3.vcf'], 'test-proj')
+    assert not results['file_fastq']
+    assert 'test-proj:f2.cram' in results['file_processed']
+    assert 'test-proj:f3.vcf' not in results['file_processed']
+    assert len(results['errors']) == 2
+    assert all('File must be compressed' in error for error in results['errors'])
+
+
+def test_fetch_file_metadata_invalid():
+    results = fetch_file_metadata(['f3.gvcf.gz'], 'test-proj')
+    assert all(not results[key] for key in ['file_fastq', 'file_processed'])
+    assert results['errors'] == [
+        'File extension on f3.gvcf.gz not supported - '
+        'expecting one of: .fastq.gz, .fq.gz, .cram, .vcf.gz'
+    ]
+
+
 def test_xls_to_json(project, institution):
     json_out = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
     assert len(json_out['family']) == 1

From 94800dc2d6cec490ac9f0974401f1015ce830d41 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Tue, 21 Jul 2020 13:39:10 -0400
Subject: [PATCH 035/125] Keep minor version ahead of master.

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 66aed2d77e..ff832caa9d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 # Note: Various modules refer to this system as "encoded", not "cgap-portal".
 name = "encoded"
-version = "2.3.0"
+version = "2.4.0"
 description = "Clinical Genomics Analysis Platform"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"

From e8daaebb38a4e5e6b245356f91d116f3b16b0440 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Sun, 26 Jul 2020 05:43:49 -0400
Subject: [PATCH 036/125] WIP: This is just barely working on the rosy case
 without permissions or logging or progress info etc.

---
 Makefile                                      |   6 +
 pyproject.toml                                |   2 +-
 src/encoded/common.py                         |  92 +++++++
 src/encoded/dev_servers.py                    |  63 +++--
 src/encoded/ingestion_engines.py              | 109 ++++++++
 src/encoded/ingestion_listener.py             | 249 ++++++++++++++++--
 src/encoded/renderers.py                      |  11 +-
 src/encoded/root.py                           |   4 +-
 src/encoded/submit.py                         |  70 ++++-
 .../cgap_submit_test_with_errors.xlsx         | Bin 0 -> 13220 bytes
 .../tests/test_generate_item_from_owl.py      |   2 +-
 src/encoded/util.py                           | 109 +++++++-
 12 files changed, 660 insertions(+), 57 deletions(-)
 create mode 100644 src/encoded/common.py
 create mode 100644 src/encoded/ingestion_engines.py
 create mode 100644 src/encoded/tests/data/documents/cgap_submit_test_with_errors.xlsx

diff --git a/Makefile b/Makefile
index 2af9cca922..e8efbc2f6c 100644
--- a/Makefile
+++ b/Makefile
@@ -62,6 +62,12 @@ download-genes: # grabs latest gene list from the below link, unzips and drops i
 deploy1:  # starts postgres/ES locally and loads inserts
 	@SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load
 
+deploy1a:  # starts postgres/ES locally and loads inserts
+	@SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load --no_ingest
+
+deploy1b:  # starts postgres/ES locally and loads inserts
+	@echo "Starting ingestion listener. Press ^C to exit." && SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` poetry run ingestion-listener development.ini --app-name app
+
 deploy2:  # spins up waittress to serve the application
 	@SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` pserve development.ini
 
diff --git a/pyproject.toml b/pyproject.toml
index ff832caa9d..c251dffd51 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 # Note: Various modules refer to this system as "encoded", not "cgap-portal".
 name = "encoded"
-version = "2.4.0"
+version = "2.3.1.1b0"  # Preparing for minor version bump (2.4.0 probably)
 description = "Clinical Genomics Analysis Platform"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"
diff --git a/src/encoded/common.py b/src/encoded/common.py
new file mode 100644
index 0000000000..c5e06786f4
--- /dev/null
+++ b/src/encoded/common.py
@@ -0,0 +1,92 @@
+"""
+common.py - tools common to various parts of ingestion
+"""
+
+import codecs
+import contextlib
+import io
+import os
+import tempfile
+
+
+
+
+
+DATA_BUNDLE_BUCKET = 'cgap-data-bundles'
+
+
+class SubmissionFailure(Exception):
+    pass
+
+
+CONTENT_TYPE_SPECIAL_CASES = {
+    'application/x-www-form-urlencoded': [
+        # Special case to allow us to POST to metadata TSV requests via form submission
+        '/metadata/'
+    ]
+}
+
+
+def register_path_content_type(*, path, content_type):
+    """
+    Registers that endpoints that begin with the specified path
+    """
+    exceptions = CONTENT_TYPE_SPECIAL_CASES.get(content_type, None)
+    if exceptions is None:
+        CONTENT_TYPE_SPECIAL_CASES[content_type] = exceptions = []
+    if path not in exceptions:
+        exceptions.append(path)
+
+
+def content_type_allowed(request):
+    """
+    Returns True if the current request allows the requested content type.
+    """
+    if request.content_type == "application/json":
+        # For better or worse, we always allow this.
+        return True
+
+    exceptions = CONTENT_TYPE_SPECIAL_CASES.get(request.content_type)
+
+    if exceptions:
+        for prefix in exceptions:
+            if request.path.startswith(prefix):
+                return True
+
+    return False
+
+
+class MissingParameter(Exception):
+
+    def __init__(self, parameter_name):
+        self.parameter_name = parameter_name
+        super().__init__("Missing parameter: %s" % parameter_name)
+
+
+_NO_DEFAULT = object()
+
+
+def get_parameter(parameter_block, parameter_name, default=_NO_DEFAULT):
+    """
+    Returns the value of a given parameter from a dictionary of parameter values.
+
+    If the parameter is not in the dictionary, the default will be returned if one is given.
+    If the parameter is not present but there is no default, an error of type MissingParameter will be raised.
+
+    Args:
+        parameter_block dict: a dictionary whose keys are parameter names and whose values are parameter values
+        parameter_name str: the name of a parameter
+        default object: a default value to be used if the parameter_name is not present.
+    """
+
+    if isinstance(parameter_block, dict):
+        if parameter_name in parameter_block:
+            return parameter_block[parameter_name]
+        elif default is _NO_DEFAULT:
+            raise MissingParameter(parameter_name=parameter_name)
+        else:
+            return default
+    else:
+        raise TypeError("Expected parameter_block to be a dict: %s", parameter_block)
+
+
diff --git a/src/encoded/dev_servers.py b/src/encoded/dev_servers.py
index eeddc2c7bb..b1f26b2e1c 100644
--- a/src/encoded/dev_servers.py
+++ b/src/encoded/dev_servers.py
@@ -49,12 +49,16 @@ def nginx_server_process(prefix='', echo=False):
     return process
 
 
-def ingestion_listener_process(config_uri, app_name, echo=True):
-    """ Uses Popen to start up the ingestion-listener. """
-    args = [
+def ingestion_listener_compute_command(config_uri, app_name):
+    return [
         'poetry', 'run', 'ingestion-listener', config_uri, '--app-name', app_name
     ]
 
+
+def ingestion_listener_process(config_uri, app_name, echo=True):
+    """ Uses Popen to start up the ingestion-listener. """
+    args = ingestion_listener_compute_command(config_uri, app_name)
+
     process = subprocess.Popen(
         args,
         close_fds=True,
@@ -79,34 +83,58 @@ def main():
     parser.add_argument('--init', action="store_true", help="Init database")
     parser.add_argument('--load', action="store_true", help="Load test set")
     parser.add_argument('--datadir', default='/tmp/snovault', help="path to datadir")
+    parser.add_argument('--no_ingest', action="store_true", default=False, help="Don't start the ingestion process.")
+    parser.add_argument('--ingest_only', action="store_true", default=False, help="Only start the ingestion engine.")
     args = parser.parse_args()
 
+    run(app_name=args.app_name, config_uri=args.config_uri, datadir=args.datadir,
+        clear=args.clear, init=args.init, load=args.load, no_ingest=args.no_ingest, ingest_only=args.ingest_only)
+
+def run(app_name, config_uri, datadir, clear=False, init=False, load=False, no_ingest=False, ingest_only=False):
+
+    if ingest_only:
+        clear = False
+        init = False
+        load = False
+
     logging.basicConfig(format='')
     # Loading app will have configured from config file. Reconfigure here:
     logging.getLogger('encoded').setLevel(logging.INFO)
 
     # get the config and see if we want to connect to non-local servers
-    config = get_appsettings(args.config_uri, args.app_name)
+    # TODO: This variable seems to not get used? -kmp 25-Jul-2020
+    config = get_appsettings(config_uri, app_name)
 
-    datadir = os.path.abspath(args.datadir)
+    datadir = os.path.abspath(datadir)
     pgdata = os.path.join(datadir, 'pgdata')
     esdata = os.path.join(datadir, 'esdata')
     ### comment out from HERE...
-    if args.clear:
+    if clear:
         for dirname in [pgdata, esdata]:
             if os.path.exists(dirname):
                 shutil.rmtree(dirname)
-    if args.init:
+    if init:
         postgresql_fixture.initdb(pgdata, echo=True)
     ### ... to HERE to disable recreation of test db
     ### may have to `rm /tmp/snovault/pgdata/postmaster.pid`
 
-    postgres = postgresql_fixture.server_process(pgdata, echo=True)
-    elasticsearch = elasticsearch_fixture.server_process(esdata, echo=True)
-    nginx = nginx_server_process(echo=True)
-    ingestion_listener = ingestion_listener_process(args.config_uri, args.app_name)
-    processes = [postgres, elasticsearch, nginx, ingestion_listener]
-
+    if ingest_only:
+        print("Do this instead: ",
+              "SNOVAULT_DB_TEST_PORT=" + os.environ["SNOVAULT_DB_TEST_PORT"],
+              " ".join(ingestion_listener_compute_command(config_uri, app_name)))
+        return
+
+    processes = []
+    if not ingest_only:
+        postgres = postgresql_fixture.server_process(pgdata, echo=True)
+        processes.append(postgres)
+        elasticsearch = elasticsearch_fixture.server_process(esdata, echo=True)
+        processes.append(elasticsearch)
+        nginx = nginx_server_process(echo=True)
+        processes.append(nginx)
+    if not no_ingest:
+        ingestion_listener = ingestion_listener_process(config_uri, app_name)
+        processes.append(ingestion_listener)
 
     @atexit.register
     def cleanup_process():
@@ -121,15 +149,18 @@ def cleanup_process():
                 pass
             process.wait()
 
+    if init:
+        app = get_app(config_uri, app_name)
+    else:
+        app = None
 
-    app = get_app(args.config_uri, args.app_name)
 
     # clear queues and initialize indices before loading data. No indexing yet.
     # this is needed for items with properties stored in ES
-    if args.init:
+    if init:
         create_mapping.run(app, skip_indexing=True, purge_queue=False)
 
-    if args.init and args.load:
+    if init and load:
         load_test_data = app.registry.settings.get('load_test_data')
         load_test_data = DottedNameResolver().resolve(load_test_data)
         load_res = load_test_data(app)
diff --git a/src/encoded/ingestion_engines.py b/src/encoded/ingestion_engines.py
new file mode 100644
index 0000000000..05a5f0f82e
--- /dev/null
+++ b/src/encoded/ingestion_engines.py
@@ -0,0 +1,109 @@
+import boto3
+import json
+import traceback
+
+from .common import DATA_BUNDLE_BUCKET, get_parameter
+from .util import debuglog, s3_output_stream, create_empty_s3_file
+from .submit import submit_data_bundle
+
+
+INGESTION_UPLOADERS = {}
+
+
+def ingestion_processor(processor_type):
+    """
+    @ingestion_uploader(<ingestion-type-name>) is a decorator that declares the upload handler for an ingestion type.
+    """
+
+    def ingestion_type_decorator(fn):
+        INGESTION_UPLOADERS[processor_type] = fn
+        return fn
+
+    return ingestion_type_decorator
+
+
+class UndefinedIngestionProcessorType(Exception):
+
+    def __init__(self, processor_type):
+        self.ingestion_type_name = processor_type
+        super().__init__("No ingestion processor type %r is defined." % processor_type)
+
+
+def get_ingestion_processor(processor_type):
+    handler = INGESTION_UPLOADERS.get(processor_type, None)
+    if not handler:
+        raise UndefinedIngestionProcessorType(processor_type)
+    return handler
+
+
+def _show_report_lines(lines, fp, default="Nothing to report."):
+    for line in lines or ([default] if default else []):
+        print(line, file=fp)
+
+
+@ingestion_processor('data_bundle')
+def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
+
+    log.info("Processing {uuid} as {ingestion_type}.".format(uuid=uuid, ingestion_type=ingestion_type))
+
+    if ingestion_type != 'data_bundle':
+        raise RuntimeError("handle_data_bundle only works for ingestion_type data_bundle.")
+
+    s3_client = boto3.client('s3')
+    manifest_key = "%s/manifest.json" % uuid
+    response = s3_client.get_object(Bucket=DATA_BUNDLE_BUCKET, Key=manifest_key)
+    manifest = json.load(response['Body'])
+
+    data_key = manifest['object_name']
+    parameters = manifest['parameters']
+    institution = get_parameter(parameters, 'institution')
+    project = get_parameter(parameters, 'project')
+
+    debuglog(uuid, "data_key:", data_key)
+    debuglog(uuid, "parameters:", parameters)
+
+    started_key = "%s/started.txt" % uuid
+    create_empty_s3_file(s3_client, bucket=DATA_BUNDLE_BUCKET, key=started_key)
+
+    # PyCharm thinks this is unused. -kmp 26-Jul-2020
+    # data_stream = s3_client.get_object(Bucket=DATA_BUNDLE_BUCKET, Key="%s/manifest.json" % uuid)['Body']
+
+    resolution = {
+        "data_key": data_key,
+        "manifest_key": manifest_key,
+        "started_key": started_key,
+    }
+
+    try:
+
+        validation_log_lines, final_json, result_lines = submit_data_bundle(s3_client=s3_client,
+                                                                            bucket=DATA_BUNDLE_BUCKET,
+                                                                            key=data_key,
+                                                                            project=project,
+                                                                            institution=institution,
+                                                                            vapp=vapp)
+
+        resolution["validation_report_key"] = validation_report_key = "%s/validation-report.txt" % uuid
+        resolution["submission_key"] = submission_key = "%s/submission.json" % uuid
+        resolution["submission_response_key"] = submission_response_key = "%s/submission-response.txt" % uuid
+
+        with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=validation_report_key) as fp:
+            _show_report_lines(validation_log_lines, fp)
+
+        with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=submission_key) as fp:
+            print(json.dumps(final_json, indent=2), file=fp)
+
+        with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=submission_response_key) as fp:
+            _show_report_lines(result_lines, fp)
+
+    except Exception as e:
+
+        resolution["traceback_key"] = traceback_key = "%s/traceback.json" % uuid
+        with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=traceback_key) as fp:
+            traceback.print_exc(file=fp)
+
+        resolution["error_type"] = e.__class__.__name__
+        resolution["error_message"] = str(e)
+
+    with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key="%s/resolution.json" % uuid) as fp:
+        print(json.dumps(resolution, indent=2), file=fp)
diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index bd737911ad..98c472d6d9 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -1,25 +1,32 @@
-import os
-import boto3
-import time
-import socket
 import argparse
-import structlog
+import atexit
+import boto3
+import botocore.exceptions
 import datetime
+import elasticsearch
+import io
 import json
-import atexit
-import threading
-import signal
+import os
 import psycopg2
-import webtest
-import elasticsearch
 import requests  # XXX: C4-211 should not be needed but is
-from vcf import Reader
+import signal
+import socket
+import structlog
+import threading
+import time
+import uuid
+import webtest
+
+from dcicutils.misc_utils import VirtualApp, ignored
 from pyramid import paster
-from dcicutils.misc_utils import VirtualApp
+from pyramid.response import Response
 from pyramid.view import view_config
 from snovault.util import debug_log
-from .util import resolve_file_path, gunzip_content
+from vcf import Reader
 from .commands.ingest_vcf import VCFParser
+from .common import register_path_content_type, DATA_BUNDLE_BUCKET, SubmissionFailure
+from .ingestion_engines import get_ingestion_processor
+from .util import resolve_file_path, gunzip_content, debuglog
 
 
 log = structlog.getLogger(__name__)
@@ -32,10 +39,99 @@
 def includeme(config):
     config.add_route('queue_ingestion', '/queue_ingestion')
     config.add_route('ingestion_status', '/ingestion_status')
+    config.add_route('prompt_for_ingestion', '/prompt_for_ingestion')
+    config.add_route('submit_for_ingestion', '/submit_for_ingestion')
     config.registry[INGESTION_QUEUE] = IngestionQueueManager(config.registry)
     config.scan(__name__)
 
 
+@view_config(route_name='prompt_for_ingestion', request_method='GET')
+@debug_log
+def prompt_for_ingestion(context, request):
+    ignored(context, request)
+    return Response(PROMPT_FOR_INGESTION)
+
+
+register_path_content_type(path='/submit_for_ingestion', content_type='multipart/form-data')
+@view_config(route_name='submit_for_ingestion', request_method='POST', accept='multipart/form-data')
+@debug_log
+def submit_for_ingestion(context, request):
+
+    ignored(context)
+
+    ingestion_type = request.POST['ingestion_type']
+    filename = request.POST['datafile'].filename
+    override_name = request.POST.get('override_name', None)
+    parameters = dict(request.POST)
+    parameters['datafile'] = filename
+
+    # ``input_file`` contains the actual file data which needs to be
+    # stored somewhere.
+
+    input_file_stream = request.POST['datafile'].file
+    input_file_stream.seek(0)
+
+    # NOTE: Some reference information about uploading files to s3 is here:
+    #   https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html
+
+    upload_id = str(uuid.uuid4())
+    _, ext = os.path.splitext(filename)
+    object_name = "{id}/datafile{ext}".format(id=upload_id, ext=ext)
+    manifest_name = "{id}/manifest.json".format(id=upload_id)
+
+    s3_client = boto3.client('s3')
+
+    upload_time = datetime.datetime.utcnow().isoformat()
+    success = True
+    message = "Uploaded successfully."
+
+    try:
+        s3_client.upload_fileobj(input_file_stream, Bucket=DATA_BUNDLE_BUCKET, Key=object_name)
+
+    except botocore.exceptions.ClientError as e:
+
+        log.error(e)
+
+        success = False
+        message = "{error_type}: {error_message}".format(error_type=type(e), error_message=str(e))
+
+    result = {
+        "filename": filename,
+        "object_name": object_name,
+        "bucket": DATA_BUNDLE_BUCKET,
+        "success": success,
+        "message": message,
+        "upload_time": upload_time,
+        "parameters": parameters
+    }
+
+    pretty_result = json.dumps(result, indent=2)
+
+    if success:
+
+        try:
+            with io.BytesIO(pretty_result.encode('utf-8')) as fp:
+                s3_client.upload_fileobj(fp, Bucket=DATA_BUNDLE_BUCKET, Key=manifest_name)
+
+        except botocore.exceptions.ClientError as e:
+
+            log.error(e)
+
+            message = "{error_type} (while uploading metadata): {error_message}".format(error_type=type(e),
+                                                                                        error_message=str(e))
+
+            raise SubmissionFailure(message)
+
+    queue_manager = get_queue_manager(request, override_name=override_name)
+    _, failed = queue_manager.add_uuids([upload_id], ingestion_type=ingestion_type)
+
+    if failed:
+        # If there's a failure, failed will be a list of one problem description since we only submitted one thing.
+        raise SubmissionFailure(failed[0])
+
+    return result
+
+
 @view_config(route_name='ingestion_status', request_method='GET', permission='index')
 @debug_log
 def ingestion_status(context, request):
@@ -66,6 +162,10 @@ def queue_ingestion(context, request):
     """
     uuids = request.json.get('uuids', [])
     override_name = request.json.get('override_name', None)
+    return enqueue_uuids_for_request(request, uuids, override_name=override_name)
+
+
+def enqueue_uuids_for_request(request, uuids, *, ingestion_type='vcf', override_name=None):
     response = {
         'notification': 'Failure',
         'number_queued': 0,
@@ -73,8 +173,7 @@ def queue_ingestion(context, request):
     }
     if uuids is []:
         return response
-    queue_manager = request.registry[INGESTION_QUEUE] if not override_name \
-        else IngestionQueueManager(request.registry, override_name=override_name)
+    queue_manager = get_queue_manager(request, override_name=override_name)
     _, failed = queue_manager.add_uuids(uuids)
     if not failed:
         response['notification'] = 'Success'
@@ -87,6 +186,12 @@ def queue_ingestion(context, request):
     return response
 
 
+def get_queue_manager(request, *, override_name):
+    return (request.registry[INGESTION_QUEUE]
+            if not override_name
+            else IngestionQueueManager(request.registry, override_name=override_name))
+
+
 class IngestionQueueManager:
     """
     Similar to QueueManager in snovault in that in manages SQS queues, but that code is not generic
@@ -219,9 +324,9 @@ def delete_messages(self, messages):
             failed.extend(response.get('Failed', []))
         return failed
 
-    def add_uuids(self, uuids):
-        """ Takes a list of string uuids (presumed to be VCF files) and adds them to
-            the ingestion queue.
+    def add_uuids(self, uuids, ingestion_type='vcf'):
+        """ Takes a list of string uuids and adds them to the ingestion queue.
+            If ingestion_type is not specified, it defaults to 'vcf'.
 
             :precondition: uuids are all of type FileProcessed
             :param uuids: uuids to be added to the queue.
@@ -231,7 +336,9 @@ def add_uuids(self, uuids):
         msgs = []
         for uuid in uuids:
             current_msg = {
-                'uuid': uuid, 'timestamp': curr_time
+                'ingestion_type': ingestion_type,
+                'uuid': uuid,
+                'timestamp': curr_time
             }
             msgs.append(current_msg)
         failed = self._send_messages(msgs)
@@ -330,8 +437,10 @@ def delete_messages(self, messages):
         """
         failed = self.queue_manager.delete_messages(messages)
         while True:
+            debuglog("Trying to delete messages")
             tries = 3
             if failed:
+                debuglog("Failed to delete messages")
                 if tries > 0:
                     failed = self.queue_manager.delete_messages(failed)  # try again
                     tries -= 1
@@ -339,6 +448,7 @@ def delete_messages(self, messages):
                     log.error('Failed to delete messages from SQS: %s' % failed)
                     break
             else:
+                debuglog("Deleted messages")
                 break
 
     def build_variant_link(self, variant):
@@ -415,15 +525,37 @@ def run(self):
                         delete processed messages
         """
         log.info('Ingestion listener successfully online.')
+
+        debuglog("Ingestion listener started.")
+
         while self.should_remain_online():
+
+            debuglog("About to get messages.")
+
             messages = self.get_messages()  # wait here
 
+            debuglog("Got", len(messages), "messages.")
+
             # ingest each VCF file
             for message in messages:
+
+                debuglog("Message:", message)
+
                 body = json.loads(message['Body'])
                 uuid = body['uuid']
+                ingestion_type = body['ingestion_type']
                 log.info('Ingesting uuid %s' % uuid)
 
+                if ingestion_type != 'vcf':
+                    # Let's minimally disrupt things for now. We can refactor this later
+                    # to make all the parts work the same -kmp
+                    handler = get_ingestion_processor(ingestion_type)
+                    handler(uuid=uuid, ingestion_type=ingestion_type, vapp=self.vapp, log=log)
+                    print("HANDLED", uuid)
+                    continue
+
+                debuglog("Did NOT process", uuid, "as", ingestion_type)
+
                 # locate file meta data
                 try:
                     file_meta = self.vapp.get('/' + uuid).follow().json
@@ -471,7 +603,11 @@ def run(self):
 def run(vapp=None, _queue_manager=None, _update_status=None):
     """ Entry-point for the ingestion listener for waitress. """
     ingestion_listener = IngestionListener(vapp, _queue_manager=_queue_manager, _update_status=_update_status)
-    ingestion_listener.run()
+    try:
+        ingestion_listener.run()
+    except Exception as e:
+        debuglog(str(e))
+        raise
 
 
 class ErrorHandlingThread(threading.Thread):
@@ -595,6 +731,79 @@ def main():
     vapp = VirtualApp(app, config)
     return run(vapp)
 
+PROMPT_FOR_INGESTION = """
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <title>Submit for Ingestion</title>
+    <style>
+  body { background-color: #eeddee; font-size: 14pt; font-weight: bold; margin-left: 25px; }
+  div.banner { margin-bottom: 25px; padding: 10px; text-align: center;
+                   border: 1px solid black; background-color: #ffeeff; width: 50%;
+         }
+      table { border-spacing: 5px; margin-left: 25px; }
+      td.formlabel { text-align: right; }
+      td.formsubmit { text-align: center; padding-top: 10px; }
+      td, input, select { font-size: 14pt; font-weight: bold; }
+      select { padding: 4px; }
+      input { padding: 10px; }
+      input.submit { border: 2px solid black; border-radius: 8px; padding: 10px; width: 100%; }
+    </style>
+  </head>
+  <body>
+    <div class="banner">
+      <p>This page is a demonstration of the ability to kick off an ingestion by form.</p>
+    </div>
+    <h1>Submit for Ingestion</h1>
+    <form action="/submit_for_ingestion" method="post" accept-charset="utf-8"
+          enctype="multipart/form-data">
+      <table>
+        <tr>
+          <td class="formlabel">
+            <label for="ingestion_type">Ingestion Type:</label>
+          </td>
+          <td>
+            <select id="ingestion_type" name="ingestion_type">
+              <option value="data_bundle">Data Bundle&nbsp;</option>
+            </select>
+          </td>
+        </tr>
+        <tr>
+          <td class="formlabel">
+            <label for="project">Project:</label>
+          </td>
+          <td>
+            <input type="text" id="project" name="project" value="/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/" />
+          </td>
+        </tr>
+        <tr>
+          <td class="formlabel">
+            <label for="institution">Institution:</label>
+          </td>
+          <td>
+            <input type="text" id="institution" name="institution" value="/institutions/hms-dbmi/" />
+          </td>
+        </tr>
+        <tr>
+          <td class="formlabel">
+            <label for="datafile">Submit Datafile:</label>
+          </td>
+          <td>
+            <input type="file" id="datafile" name="datafile" value="" />
+          </td>
+        </tr>
+        <tr>
+          <td class="formsubmit" colspan="2">
+            <input class="submit" id="submit" type="submit" value="Submit" />
+          </td>
+        </tr>
+      </table>
+    </form>
+  </body>
+</html>
+"""
+
+
 
 if __name__ == '__main__':
     main()
diff --git a/src/encoded/renderers.py b/src/encoded/renderers.py
index a1ecbc0b9e..5cb832faf0 100644
--- a/src/encoded/renderers.py
+++ b/src/encoded/renderers.py
@@ -26,6 +26,7 @@
 from subprocess_middleware.worker import TransformWorker
 from urllib.parse import urlencode
 from webob.cookies import Cookie
+from .common import content_type_allowed
 
 
 log = logging.getLogger(__name__)
@@ -107,15 +108,13 @@ def validate_request_tween(request):
             # Includes page text/html requests.
             return handler(request)
 
-        elif request.content_type != 'application/json':
-            if request.content_type == 'application/x-www-form-urlencoded' and request.path[0:10] == '/metadata/':
-                # Special case to allow us to POST to metadata TSV requests via form submission
-                return handler(request)
+        elif content_type_allowed(request):
+            return handler(request)
+
+        else:
             detail = "Request content type %s is not 'application/json'" % request.content_type
             raise HTTPUnsupportedMediaType(detail)
 
-        return handler(request)
-
     return validate_request_tween
 
 
diff --git a/src/encoded/root.py b/src/encoded/root.py
index a6ca512016..44e397eefa 100644
--- a/src/encoded/root.py
+++ b/src/encoded/root.py
@@ -99,12 +99,12 @@ def health_page_view(request):
             "beanstalk_env": env_name,
             "blob_bucket": settings.get('blob_bucket'),
             "database": settings.get('sqlalchemy.url').split('@')[1],  # don't show user /password
-            "display_title": "Fourfront Status and Foursight Monitoring",
+            "display_title": "CGAP Status and Foursight Monitoring",
             "elasticsearch": settings.get('elasticsearch.server'),
             "file_upload_bucket": settings.get('file_upload_bucket'),
             "foursight": foursight_url,
             "indexer": settings.get("indexer"),
-            "index_server": settings.get("index_xerver"),
+            "index_server": settings.get("index_server"),
             "load_data": settings.get('load_test_data'),
             "namespace": settings.get('indexer.namespace'),
             "processed_file_bucket": settings.get('file_wfout_bucket'),
diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 84e9d8ae78..17d2104450 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -1,16 +1,17 @@
-from pyramid.paster import get_app
-from pyramid.response import Response
-from pyramid.view import view_config
-from snovault.util import debug_log
-# from webtest import TestApp
-from dcicutils.misc_utils import VirtualApp, VirtualAppError
-from dcicutils import ff_utils
-from webtest.app import AppError
 import ast
 import datetime
 import json
 import xlrd
 
+from dcicutils.misc_utils import VirtualApp, VirtualAppError
+from dcicutils import ff_utils
+from pyramid.paster import get_app
+from pyramid.response import Response
+from snovault.util import debug_log
+from pyramid.view import view_config
+from webtest.app import AppError
+from .util import s3_local_file, debuglog
+
 
 GENERIC_FIELD_MAPPING = {
     'individual': {},
@@ -56,7 +57,17 @@
 ]
 
 
-# This is a placeholder for a submission endpoint modified from loadxl
+
+# This "/submit_data" endpoint is a placeholder for a submission endpoint modified from loadxl.
+#
+# NOTES FROM KMP (25-Jul-2020):
+#
+#  This will be done differently soon as part of the "/submit_for_ingestion" endpoint that
+#  will be in ingestion_listener.py. That endpoint will need an "?ingestion type=data_bundle"
+#  as query parameter. That "data_bundle" ingestion type will defined in ingestion_engines.py.
+#  The new entry point here that will be needed is submit_data_bundle, and then this temporary
+#  "/submit_data" endpoint can presumably go away.. -kmp 25-Jul-2020
+
 @view_config(route_name='submit_data', request_method='POST', permission='add')
 @debug_log
 def submit_data(context, request):
@@ -78,6 +89,27 @@ def submit_data(context, request):
 
     raise NotImplementedError
 
+# This endpoint will soon be the primary entry point. Please keep it working as-is and do not remove it.
+# -kmp 25-Jul-2020
+def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp):  # All keyword arguments, all required.
+    """
+    Handles processing of a submitted workbook.
+
+    Args:
+        data_stream: an open stream to xls workbook data
+        project: a project identifier
+        institution: an institution identifier
+        vapp: a VirtualApp object
+        log: a logging object capable of .info, .warning, .error, or .debug messages
+    """
+    with s3_local_file(s3_client, bucket=bucket, key=key) as file:
+        project_json = vapp.get(project).json
+        institution_json = vapp.get(institution).json
+        json_data = xls_to_json(file, project=project_json, institution=institution_json)
+        final_json, validation_log_lines = validate_all_items(vapp, json_data)
+        result_lines = post_and_patch_all_items(vapp, final_json)
+        return validation_log_lines, final_json, result_lines
+
 
 def map_fields(row, metadata_dict, addl_fields, item_type):
     for map_field in GENERIC_FIELD_MAPPING[item_type]:
@@ -97,8 +129,11 @@ def xls_to_json(xls_data, project, institution):
     sheet, = book.sheets()
     row = row_generator(sheet)
     top_header = next(row)
+    debuglog("top_header:", top_header)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
     keys = next(row)
-    next(row)
+    debuglog("keys:", keys)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
+    descriptions = next(row)
+    debuglog("descriptions:", descriptions)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
     rows = []
     counter = 0
     for values in row:
@@ -116,6 +151,7 @@ def xls_to_json(xls_data, project, institution):
     family_dict = create_families(rows)
     a_types = get_analysis_types(rows)
     for row in rows:
+        debuglog("row:", repr(row))  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
         indiv_alias = '{}:individual-{}'.format(project['name'], row['individual id'])
         fam_alias = '{}:{}'.format(project['name'], family_dict[row['analysis id']])
         # sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
@@ -463,12 +499,21 @@ def validate_all_items(virtualapp, json_data):
             profile = virtualapp.get('/profiles/{}.json'.format(itemtype)).json
             validation_results[itemtype] = {'validated': 0, 'errors': 0}
             db_results = {}
+        # TODO: json_data[itemtype] but item_type might not be in json_data according to previous "if" statement.
+        #       Maybe we want "for alias in json_data.get(item_type, {}):" here?
+        #       Alternatively, maybe give "json_data.get(item_type, {})" a variable name so that it can be referred
+        #       to more concisely in the several places below that it's needed.
+        #       -kmp 25-Jul-2020
         for alias in json_data[itemtype]:
             # first collect all atids before comparing and validating items
             db_result = compare_with_db(virtualapp, alias)
             if db_result:
                 alias_dict[alias] = db_result['@id']
+                # TODO: db_results is only conditionally assigned in the prevous "if".
+                #       Perhaps the db_results = {} above should be moved up outside the "if"?
+                #       Are we supposed to have a new dictionary on each iteration? -kmp 25-Jul-2020
                 db_results[alias] = db_result
+        # TODO: Likewise this should probably loop over json_data.get(itemtype, {}). -kmp 25-Jul-2020
         for alias in json_data[itemtype]:
             if 'filename' in json_data[itemtype][alias]:  # until we have functional file upload
                 del json_data[itemtype][alias]['filename']
@@ -480,6 +525,10 @@ def validate_all_items(virtualapp, json_data):
                         for e in error:
                             errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
                         validation_results[itemtype]['errors'] += 1
+                # TODO: If itemtype might not be in json_data (and conditionals above suggest that's so),
+                #       then json_data[item_type][alias] seems suspect. It does work to do
+                #       json_data.get(item_type, {}).get(alias, {}).get('filename') but I would put that
+                #       quantity in a variable rather than compute it twice in a row. -kmp 25-Jul-2020
                 elif json_data[itemtype][alias].get('filename') and \
                         json_data[itemtype][alias]['filename'] in ''.join(json_data['file_errors']):
                     validation_results[itemtype]['errors'] += 1
@@ -490,6 +539,7 @@ def validate_all_items(virtualapp, json_data):
             else:
                 # patch if item exists in db
                 # alias_dict[alias] = results[alias]['@id']
+                # TODO: profile is only conditionally assigned in an "if" above. -kmp 25-Jul-2020
                 patch_data = compare_fields(profile, alias_dict, json_data[itemtype][alias], db_results[alias])
                 error = validate_item(virtualapp, patch_data, 'patch', itemtype,
                                       all_aliases, atid=db_results[alias]['@id'])
diff --git a/src/encoded/tests/data/documents/cgap_submit_test_with_errors.xlsx b/src/encoded/tests/data/documents/cgap_submit_test_with_errors.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..f6e736b89d8ed45de316089168463ce8570ca109
GIT binary patch
literal 13220
zcmeHuRd5|kvh5aGTWB%MVrFKGSr#)hGc%)w7Be$5vt%(?7BgGS%unasnR{kD=e>CU
z_x6tHj_TdHqPu!!W>w{CSxGQ(6u<`n5C8xW0QlMbC#^vM0B{HZ02u%T)ey9?b~Lhf
z)KPS^HFD6Rb+xj@&jkl1&jx_LU;n?`|KJ`dR~)wPrAO#cIph`UR1V7tDJ=!!wBl$G
z??WcL#;OuG?qZTZdyyH-Db)abNaQRhHAt_xF)Efw)dcvqEomRsWQcWPiXtoQFL++W
zvV{Hf%atSRV!~OnG2qV5K1?@2N;NO()0z{$5t5*>)I&4v^;}9H2$Y5L3X*hC{wf81
zSUBPt?o|Voq;SH}XLk*dr@@j~Q)zFPV`Ne$u1a&jRicf*i=;rQk!`7e#1lUp`H7OP
zRbz_DDTv2sRt{2Z#fS&X6byw>!+>&$Z{qubj%c-Yq?r-AZ^_kD_ed0@HDkN7(Mji-
z%YP~+LMdBjw$`P^d?y;_D#hXIn0ct6B8+B4-RM~Q4(inv!^rc-hC2TW-EVRqKa(Nm
z!mT3^=n#xxwhCBlJwd$2ai{>@hq|~{W<x(me~O2Xnqg;;PG15fx#*#PtK__a1^0tJ
z7PL2AxQ)PhC|ibK&B*1)+eo~C$9{eHv$r=efb74yX@e3y(e*p`q~8C9d3RGCdm~E+
zI@&+>|LdOrgMIQ}9=$yFi%cIqZ19=bbI8!$>}m{xfTRn*SQ~+&x3Boh$A-u}V(ir}
zG8_a&tN<`kpAPTmk>yqH$ipFm+b!nua3mBiqDGgB;G{=8CrC<C`$Q4D^38rk=h>Ut
z+Z0g=H*)8WXv(tY;vC80bs~}J3!xf>Nm>;wDC7d1U^Jc-KlOnxYHRxUl_0YM3MUo8
z)lF>KhjEi>KJ$r%2XMTh98$+WQ&5K-^i38kJ%=m_ZXU3d6ihkHs`WGNISJi#3@y8_
zgwnc@Up#4~(}&~<nUU@pCq)NIvTuCU>zOY`Gu-;wp!&*&&WHWO@ru{pGuD4PiQnh=
zq^tKHX?=GREC3M1)spVN{KUn^-a_BT#^R4`_D`Pyc~5EYDF3s!a``Wk{qG?i<~4-g
zCCwQXanX^E@L1{SBXGE$VwseH$@_X8o3d3)bxx8F%qH0FY<$r9js<=T4Ed&qs`x7s
zxI41t85e}j_|-TVQ2(UjlfNhgGSczkQP~L$e5wO-%jei3>P!^&U-#rBm?3$EghJOX
zKZBXgC+M9R%^iN=j=Xbbw0_6;F;L;S!oR+$2|ty>c=xSEaQXH_mcJX`)D<R_I5(Ud
zU}}Gcfa<<S_!fBC{%rikZNj1_?-6zvr%duwDGQHhup@rRkSLxHMXbYaZc3Zd*`}kw
z(+EY^@+I}?^_lU`@ZjwF`<wnx%FO$qY2$$b0EmzPz{mG1-YN61WGPqDuvx}O@y@D$
z^L2FrS>IB1D$;C_SXQ)Jy=9k^(nnc#^-WWpeSLzL&agiE)GE2-ySWj4mVSlH?&D-?
z?no7>)Jj9om4`8`yf5Fd4C>Ra4T&i-chR`hU~S}gmjAtqR=bmZ-J-5-$~^>--2hRU
zeq=9DUToNRl5%PYlNiYg=R(h{q0dF+W=2J!r4SeMGoD$q;XX7bZBKW9Tm-6RCfCOB
zC<<R>ZUw|kqhun=G}0Abvv0ie3^XSV9^YEAVr3QZRgUO#)T#`wtEA@yO!7xhcU*6l
zoN2Ee89mQpsa&sV8QUpr_VBlB<Tp%`h!+(9zG)2Zj})XLYm`}i$W(E!BxTou37`EL
z9w$v<^I1$uLN;DV)a|#>w!*I>OLvR%91+i2UD_{FN*PL9oXX@m)X-c(1v`>>za)32
z-Iacz%0ouev$u=AO5iV8Iq!6y$KOHxgjRUKEcGmbFi`INvwmeb$QpP5yr;@%cEz`&
zn3)`>BT2`@h!_QJAI38%F+6>Jf$EfUCatx5Cl6w87#`?2!qQH%ii@bR29d!UHBV6^
z5|fi^B#sl<9UIFfJrZ(RvjYitRRnPbN%^ZcB$&@K>N)zXYXEA6{-Jn=$^@ZZhjyK>
z@$<RH5H7jljx*HoaOVmcK_uFgI~MhPoLP+2HgPhIavgrE(a|C|JeeAb7Z{yXD$KZt
z3eSFz0UEosw}JcU$Go7ac9q@j*SqUY^!wK&<9e^F%*VL1xLA*;VjNzfkjYY-o6c^6
z@$R&?Y5K2v&3F-6N(ZH&q6A-L9I6VIl)IC^r(^{}UyMO}c4ba0rTPz2!{^~okc!8Q
z&t2i?jPB}>ETmWb0wcNvE7**HFc*wo%AqwD5Id3q?yvwtT6sgf;xR=??>tgQW8u_r
zc+uO@WIuLdqZvm8D?|iZ3){dBS%aWuu>eE4o7v{zc#0K39MGm01EsfpD|5k<W%}^L
z^XOIkM{N_Nlt*W|FG{ZFMPGU0r*gzDFSIZFsjOETHuWK!7Cl;09AF5!9)X_xc|;bB
z-cA>|f9Ff?N{>=_-~d1gEC7J=U%upEs%LLxsNiUCW^Ll|NA!(Qv5>*$KzZr>`5IE@
zc~DtTPhFwBN|`UwD=5~UDhq;_k&+gXgy~U@yY&3@X_7Cyu?R*?=-vr$Z^gkTIIC+O
zm8Z5wzAT_;z0CtmKjBEAbhc;X=;mT#5JZhkk$|$|UbkA0>z@1Zr75Q&%2s7Dfd4@A
ziel7K+n(mQ(ppu|q~r??&MQmV7;CVwo}{sg%Hr$V8m}5nK9+3d{7BE~=i|x*Pt~h>
zjRzMSdMtGgtk$Uls>L4%oAeHOdcWpdZ#aBDCbkP^SrXZ^Lx(4h6MYn6vaYvd-VSMC
z)|9=UxIT82l9U@Y2cxRj|1QFgQy!!q4qdkNLthX^F9rj?iVgKxuA~f9o6z)UMOS2>
z0V}sAikg-Q_H!WD-rC4T8mM`MHUQD5$Cd;qo2cl(jl=VSwk<Rw2xcnGU^5RG9rPMG
zv8>x?pmH9Dij>WX)ZKGFbi5CvWs^#tmh{04{9(UzMKwhdadpL-nB1-uI;9B;>Ll_i
zy@HIxR!e9&%iMt7<?@?ZMD)-qbIm>ia}Ea~Ttx%6f)*QUfS6pZ@OXaG%ykeD&7Ba$
z@0E#KBb5RUU6TT5S76Ft90fw4--<tI3J&!#Emvac$DpIw%VY;PTCl?dIjrp^{0@Ur
zpxfJL(DkI;1RA?1j-PSYjZtl*T$9Z?fY|29UvPW1#t9oE$RbJ*8wL>*j)O$0hndLp
zBIL`Iqa1DWqU0Xh#Km)|8FsIg_`a;$BpG4C$=TkR5*7#(qXi+I<0ZvI(33nv7SUH4
zxX?5{R7TqVPN;PC(@v@sW>U~PkTI(sHTt!mPtO-yzkDO>@Q~K9C4R9diU`2bh^?{K
zG29k&&BlduguzT!<Q&*Z4lg!~NPjpd@HEx04@EeMZR`qJ*BK5MW&f%p;F)b6k`!lq
zM61cp!Fhed_064d#;cjDBHw>NDdSP&1A)Xwx#tp7J)j8WG0py|zaKK@M;*gA$@(P7
zDxsU@YEWtQjym05WYcV;+-tc~4z|u7QqF0_U;fY<O-)VJok#;RgcVqGbQHwvFDQ!4
zo*hTuwYhX={r!bRA{XvHU3wl<l4yMhPO_wjXHe3=&{Ux5%7C{S!^B6qhUr<fE_l?2
z8Bmtj=SdZ0+zgWui8j`J;MkeP*fL4s^;QnbGK4^e%Y9^u5Crh%;T4<_viqal8*G;J
zg)_t>rpOiV3H5m*uF)*GmJePnmJ$3aO|v4Ld%`>!iB^2$$~x(fN8TWbcuP`zMDKqr
zN(f`F%;!SUQ0#DVkpo#bb?CB=-5wtwex_^X=*4q>u)fD|&ZG3iDxj4o_MUx>eOmqz
z+cr2!WMk^k{uO#O+9C|9{fO~mKs5|{JM9E>s+$&eE^Hjx^Na1Z((WwPM@G+CjF-Pn
z&Qr4}7W(h&5?n+80Ng(UildvQ(VvlIMqR^Ziyg&V$Lv)w-X#?r$c89VV3q&PSd^-P
z>ZA+`Ue!r|CmDXs{O#!z7ZgKK3?PWHJ%)W^?@ZHkq9$W=UP&mDZ49|rf70>Pa=P2D
zyO223?bkY;cb*AW7~cho9GbzT=63%x=i}udJ)*_etUU}EF#N54rab}b4~Yrj4mdeG
zQ8c!&G6Dlc#2bl|<%T^Bsic<$q3U0jPgM~KYf=bdbF<;a0`HIPr+}*UeEw2E68(y5
zDI(|(>&6V*rUHJ4BW8KssvNj$%*xz&LJg-8$lrYH0)w{TZE4VgiM!hzpOn`H`2E0#
ziU>F=jo6wDbuN&<EujtC2&GQqzzL#-_X8Ig(J~i7a^)cc=96C{ydHqm={Hz#aXJIo
zxj(j&=S(r^N16$`9kV%Rk@A2=Jf9FqkfjZq5QGCOr+*b%%FazZ3WGb+Vbcr~88Kl=
z8&*wXjMDCG)rFZnc2;=Twji8lA`9JpZ6p!)f{KEeF+ElB`O>|8tqE0{FVh_w_%l+n
z<)=rs9&=y|8GW=@GehlB(haC5Sup*#N8dx0DUX4vC9~EW9We|Ng<<{MY;M-N@-yCU
znO><{NaD;b)XZLHQ!2;EYR4a(J8RJiDUbsB`1bxR$8E-K>u1im4@b9;6)?yPz^56n
z!%tj^v6!%&dY;f1pQDj@WQ~`YvE*@}WeLdYTAtA@2~bA@eUsU-q@hx4FyuITuGp^G
z2xRyq&ZQVaVlqWh>QSg+$P)u6e^5P9!haPoQprxKD?d(yzN)FsPi=Ke#p>B`s`C1*
zw%4+^+G=yW>9!2r0S8Gl^$iWBa*PQ^9{~wW>Yxr*`z<i7nH;7-$+UlTD7v3*hzQ}<
zNG(oB0Cbpk8Y6jaXrVZ-U;)cga$tGeE>fu(b7P?iw-~Ih39erC$Bn9yFEwRS4h#Q^
zlt?qR;R=6TP(i;$jS;`>3Mx^7%o#<>cSLC&47i+1DVtqrp%}izJP_{hvK4PTrWgJy
z(CX$+sEJ1v5f(=sJ8WYOUICi?J|13x!rn$v+@BU+X(}n0it*DR9~!i36WfyX0xWQm
zZm5r;@3>*wdNU?^X<2wNs;SdWA6KEt<{e2yV}}G>Q#xF(Zw)ZN(BI9{2BZCI^w*}b
z5(sS9=gVM{_68BhlsEdy(trR1ab<U*RM_eDaLQ*~5_-pVUHYq21f+00r^QDdlr#0m
zUJBD>jPOJ|NPu-*No`>vcd~5~Ev@6Sx9?@Wxk30djWTwF$?2j~+I5ct@_kq;SB!Nf
z7guyfm^)#qRnALTvGO6uG7P+FYq?0fJdeh%z0q}LeFO(#1um^wS@|2m!!4n=n#ZmR
znSXr6HRZ^)j9NlsLh_}iu?0_OhdXCka(l$5N_nYM*{TjVTgQ`MiSb)W^J~@*-x?Gl
zzA}!WPB`Xn<5eb4<iqC9tn;uBLw(j9U0sj9G`c*Ki^_2p&^uFD)?HoFUbT$Jx@4bY
z@);6lomJ#fLxL8y@@P~BacHGvwV1^$jTwA3$97?Y-kG6*pNjpOG%h$vUHR6pP#dAU
zWYY0UXd^`qd_U7QdPQGU#~~fuL)59b7cJlTvKMofdWGEPGVXO>QmY15uZYJNygGRX
zm7d(wRkPF2^yY=-@)RxOT{l%X*dfvuv?{WW%gefQ+joC4b+-KFX_NOvGK8tqK^D)r
z+dFFd6ER9>@J5O;(-$6etIU$s{8o8F5UUK2uEm-&twHO1nq{+kuHyN9Cw1#SmyBNq
zMLgd2UMkzCnfE+_RvglgMnS}9@Ze$eBzC75EnNE*8;G_Zd_bzEKqb!`#z(xr8{KhP
zF-rEl2VsHtN%MO={^v}|!PLme(cvHS>^~a}|2@kNjPCi`OOGOa2lguH>Jc{=1S_b(
zM%b*h2d4kH^kF$E<__h>qYlTYdw17%a+fvjiMs^b82%geOe<*6jsd!9^|{9qO&5O+
z`{4=-6b*Nb-ZX7zDwwpiMA%@ADk@A(kR0&~MxS^8VoelONzCV8i{fE!^urI~*gwik
z&8;qVAnWO*8_uYAyi@`>dF0hzkE6WsxUKWLCd4i$1I0OFbU){caSREcT>9$sAGzFS
zT6n>O7g~*tUfn|dT@P5+={;jX0|2o!0072+^uS-wp02(uzdVBE#jCW3r|BIXNt+l6
zvrte&5Q7kF3p1QX52{&1V<Wy}FMZ;(&LclrJj=r#T<V25D#^n$ZE+J}dGc~&(X#(6
zcBP+LaoeVCQ?x9uQg>|K>ArBLt@Wt`(}_zsrTjEjxKZM+t!p~>X}-tjt?rI#v<p4j
zOx+p3w{+p`bK<W1${BjypkoS3hh;|PM%)v%4k^iV3h#BeQzog)?3YpD_4UQD$5F%1
z!#Tu(2UdpKr>6(%4CA^YqgKize79CHiJ>mn^ePx+D~@H-6~^H%-ir0H+FdMxl?n8#
zOBdAoWwnU`vX5odGS=wyXFA%+W1L;~_k$rmU`$d$;olOk)G|>Qy%qz8UN}uSURGbN
zc~cg}>|Zs4E_%H)Uz3kOS#zG}3wGcvsk@-i+8I?10}gga24$|=UQXX$bX}N}1E<-z
zGO;?K@(@jrO)<Kl;9m}|=|g9~u{{)dRJ*W+Kp&|&U0!>4VS8+g<2XG*dL;N(?j20}
zYr2f=+!AW#*D(h1w_ES|s5Q1Hle9W(14<ixvdn~O5oRen$7|0xSD;p!!NAa!FnDYk
zsrGn(injtD4`?wq&nQuw1DPLLKe}7lbCx>3QJk%i3Udyl>HKopC4z-cSt+<1p3b%G
zUd==g;`+v0ot0t28$JEdXHYU7*f70?z5bF}_#Ia9quFh{--KQwjV|B4jS0v6EyOHo
zSLxeZ|HZSmp+5SqO$U}-6O#w!h^3i)FP{^T+yfQV_{lSLa{$3T$_dN%!V|%i8vFA6
zSaW0R0l&32EjRhV)$nsQpcw^Vtq#rfF>8$Im_MOWL!1dLYfQKL<%HA;k9G_+umk_x
z1yn}TsEwG9M~TL>nO-jUBn~)!VtcV9e`r{S1Bzt0ObC8!+?=NtY_*5yt9Vqi*}Kx|
zg$RmZ=EbGi-U<ZSFORC7Qc8nE#A1QhJ8g{BkZ=Qrs~Ba|X?K1Vto!=7wOhu{Tvkk{
z-SLm6{HMiA6o@>L(EPjHoNTXyQ|+d?(m|OxMvjHix-8s<+@7)ToKujWY|5GZV93Q*
zMGgyf>>Q77^D^hWUs6u?CY(?EI+><74RC$Buzj@%1VGNC!RS_w-hNOJh!T*Mb2fEa
z5XO}7!9&;+$25U*pbdrbtr5uEh;wG$h59ZXQ$MfXQcDY`e+1<~9uoEQL>e;jONZGJ
zz)uQ=Jt@^D5U@Zzt~arx+44jjid4<2in_t9rSw6z(<h25Jg=D%p#F{^$<i+mUo#^}
z4S^^r+%Jz*GX*X_8|u#|4_1T7PgIDZ1knB|nmIy34hEeAPaY_cjX>!qKn?+&gGL^x
zpN%k=DWK{Jt#?BaxLbsvoW=)T_JyFFHK=+CrPo{m`2TFES&@`g7&au7H^$B?*DgsY
zYYdxH{drQVp^Pj+qHIJljjwPCj+tS$GW)Y7tMj1W72dkt^}#~RZeVpEXC_+ei7;dt
z9;Kzhr^todtu1h?iT)nF47)?<KKGTJ04ALJ5KepBcGlerD%fotE_QFOG4U@@x^`*b
zF&jC~VgxBHx6UlSehdj9g-x5GFcxGJi?;jB&JH%f;qX0diNeRxZlV5j3%eA0mKIK9
zNmiS^@sY#kR&@<6ZaTsA2C6;Z<v2lr%_J?n!T>sYi1_vgj*8S;cAc8jbKW$WZuWJY
z^w4ptC@tb&>8c!Jg37ZPR;73ZY4Hm)`po8O{SE>}jbez%1`*0uqFZ5kYRkl*pjMYY
zMICU8bTty$Az?KNV=*T557atlh;SphH)bGD`$QBLhz=m?*fP^_K;{TT`3DmNm!k1a
zS^cvcfc_$cz@=dP`G$C)e<D5*%NL*A#Xq|lTPP<M%D<KbXdaJGF6N*82=tdH0v>+F
zCnxmJ)(84u;see7@Ks&?brYZn%}a&8Bovn*u_)yhO3WyRrBcc%ko<8Hb#A6<(A-E}
zH6S^7mfJk_RFQ{d+5y_M(@DCdvm<bCJ#r7x3prhM!*7MPnBXeh-uB)OoPs_TuJ3VT
zMt)4NKgYt-_>l-QL~gCMx87yL05ZtRm5hbN^y@{ZNb2B}LImh~&w@Do0yS?l`-$NM
z4oX(Vo+x?<+|4ixibmRdA1~)myro|T1@wM|t&CW#v{!Pto^KiV^E2{1*{G)1d6jD6
zhvpF%v7CPRiaA}8LNm?=88prnMM9<@61Bh{ni?^f!alH-mUe;Qgt;yeSw<eRXP_y}
z;Tgx=n2Y00&d^Di9y-WxFeMS_seQJy@^G1r?X^@)=)J-SU+{3wH{cyPWw6Z(is^tj
z<Qm2|zzNQ%Oz?wbB8qPxJerT(6FTPyMPP4X%oIbIk)xzD8j%7hrV`RnXbhi-12`iR
zQH&glUxY8v@LB+$=p!g53j(lArm#Y)0&0|52lSu3|IHc7lp8d3_<Voqbk-cDl}2j$
z1mMD}rYTiK2l$@t-P>J7t}Eh)uxx!iIva9vQA^^e6=Yt`#C)p=VAM}HfHdjuVlU>X
zb~_q||De|mJhK2@9Az-Kpfd?S2O(oG%?<7e<t5Kt#Q!CBHAOgk_F>H$15U8hhAa41
zjqK@ot6JK5VtUyquG1F$V!_$&Kpj$fhzcjvdKpZ)z+#j^@<JG&E!6dv`UzFE3XL6@
zhmct2rTS=$v7<SRXKDZ$!aIr8F^!viCV)IfSA2x)4zjKj0hM~z6A6{s1!<M-^JWz_
z@H{n#;b(upF#wG3V{1aLL%9uqbOGh3V^#{4vD=#6RxG>P&vHA6w$k8{-%qpL!?fou
zn;WdX+8E5g0`63cSnh>!|5_p98n~($<iEIANu-5m`XN5<)_Iez9}}7Jf$Z9|sD|IN
zsAecwdK*LC5eq%U1uHXipE(LYce;7b&jky|)S##aXW0zgJ^*;Y%+7hhyoGN=-d~g(
zDK;TnkBIa9kf?jzmSeVnk(L@vq&LNklWx34Q+329pndG*6?fA*h-aX5WN_g0ob0Og
z{xbACK3(dN9V3iOxLFc;9&J@8+8uA%R?VlwX%FIKcSIq(uN|?5-n0s{x~?+RpSOnh
zbxcenYrB9WYP(c`-EqR#PaP#1bkN3N3x&?xIc_<f%DH~4`AM@X88lBK>W&M!zH!6L
z7)5OmH;)SiQ^m!;a&#jZcLT#R%Is6DT}jPZcP!pDhtV6WHq&_v#Y@|F$@Dye-6vCl
z^mZ6|Lj`^raJPoxy*F!f%e?*RxFRm(S}>M(Ohwq_+nP$1vcJdW<LYQern7wG9G&Lj
ze5G|A(pnm`4^Qvt7)ASW?Q}rhtXE>fua6i)gB@a{MNVX+d*>s@H{O@tE*ZN<nPZX|
zdgVI7<1(j*>Bq}6kZ*rm)9pb==#+l1I}G6eA-(u>MD1v5WMxG6=l;)O^s(A_I5rzf
z7y2VFtb@xl%XT!$+Qy`1+$ynQMm$bK<FTAF6Ju;UHZmv)$BjIBenFCe4Nv?}J}|hB
z>u4zA`UAWvvhf9?ie2I~O9^#nxNy<ej-sNwOmFuq$BUmG$?l@bFuk#fI-RnYJn0b&
z$;8u%J{Iic%$_o0@ezndP}UDH)X$R3S>E=DR>Q!-1+WJp%bY|!agJq5wC}*+Z_VAt
z?0)N{5rbmAL|JHqIJg08$!Jss@)a0FvG>~k`m!Egj08l-a0wDV9wqHEn+)Fs;3CM^
zRZ43}a%#5wY6U`r_T^7;36?JPG8wo&ZF&5>BS%8=Hhmy@*+XxlLz_}2725rvCTN}g
z#@HgI>!~{BaCy*U{u3V<pzcwi16Hg#cysik-qrZC2*i}xKcTR3f%7QAIz|sp_Hj0+
zP325!6m9K?j#ga@ZfSJTe72%bbub=m>xM8)6+gvi&AyNf<06xz`D_#&jlOxw6K&b1
zYYSN+ar@?A+Mot=mzE_|!`}TU<Tcm;{*G9RNx2xfvSWD#DOEzYUG<1l#?Js>P<dkU
zt>N#mGCzQ|&Ee!68~w{9(?BGfU?fuAW<?4(38nk0&wj6kL|+@}8S@jgbi-7!<!@c}
z`-i0(pQ*BE+3@Kyc|2av53WMGczmw+ANq7^TN)@OtszMi6i#QOb>3cH(ARXjUe6CN
z*bZjME_(aBo*zb&y5250Z>qOeQ0Y3oE-w$_(AT|g_GF@QmzHsGlg|mlYR)S+4bDiz
z_59$UE<kuQ`soGwWBCyGor_OLhd{M8@DF>Ps!w{Caa0E&+PS+d7F+QyT8N`kszhoa
zRtzGXF<dw|N>fMA&uufMR^zT(!rH=qe6u*El_&???J;GmY{@T1RvZ0-`-&7+$lmi)
zp{qtP0=KM9Fm!jGhjB})I_&xsp(-GBnnciEmqebnUQj=DwATnfaOrY<&8n)e*kYE)
z(~wB-{XXcuLrZoCMonquj^#Oo7+7#S!wq*B?R|PgNh|4yh1|bVTs<042TK0p47tJL
z?}CQihJX5`)z2C$X}|xoB^-n48;|osq%60?l2q(azc(p7f$%q%0C9BB=tV9e9zh)0
zT85P-sK(wGW5Q;L9rM(9vd~AP3EZ0Yh?EXIbVvW{aHA#e#)XFQFeWa}tCEDJHVg&F
zhye_brq2$lKlGSJ_gbb@2z3*1k0r{fh9;D5(se<MVoNRNDD0DfR7Q6f@mAF)Hg^85
zXyjpt>-i5@LaP#5?7QiT?io?=Up81kcky<@rzz$k_UU8Wy2Otnfodj6?r5i;TDDZ)
zHg$u#I%_;9o#2?B=9u`mO}FC+au)vFdci<wSoBr@P}EmVp^P$h%hUL@g*4WWU??KW
z9msJzk>4l7YRR5mMn1WKIDCMJ*0^|z=S(9v+7t4tgSx|`ws1GiT<NxrEq9o6Q*zLr
zy?=WuHJ;Y~TxB$2k#eOzaC3XZZ=TZb={G`pcDbJ^(_XX7L=b;E3h}*7z{Lx^eJTQy
zzIa)#m8``aWP<QMl_i;aC7y_8+m4i%jBzRY2duiF(Q0@4SE(CCS0>VIgZvODD%7kW
z6;IMvuF;Xx(k3nCt*Iq25U~%g6q=EONfL$o>O5y_6*EWP`-<;5`UCmmG=`p(F}9o&
zhMW$(0E)Irp!Q;l>Xf7El<Z+rj=G55E;PLrDyr-5?S^p2ZV3Mpf3Pu5{GI`7QGH5#
zL?!e@iimq0Q%MSltUetRezc>)Vym5-xszFGhj2a(QG{b%B>hi%fAmdiuJObtwSm@@
zkQqjq7X21%d#1z4kBAn+gk;Trq@)Tz`njwh8q9%Bltj!H56Jr%ee<p#T8L&e&~p`<
zoiW8Tt?ym*5;4(hLxkcvHRGg5J}`Vs_K5GN(V_84fMcG*$+e<V@kRKozMaZPg_mZB
z;K()P%Gg6>7QBifmnn_sx2aHR`&#;a9k-l#5vR8UFEY9VjB}3-F<(`cjtVj=%r`v1
z{&1YZFRdE&+wOzQmhOn@eEZj*9Pv>^oZ+(0xygo^y)0psV^cCzco;H^sXaMIwA{(1
zV7DDIt893)BMu+Tewh@0r#zf-j5`S7+JP&(6r8E0EU<o&rm?TIplyLinst+C-<<IZ
z2kW7y$r@w)qSh|m_qDE0Zn>U`8VD;h!+LM!B*M;fQAx%-lrg66#7BK3k#YYxveG_l
z4C&&>?QZMCUL?OSD`1mEME>{_dnle&H+&^dEfToi&Ot`==1pM<xhk<h6OX0Wg`gN9
zpM%&~7UYbfJo-|tdFVv>aYOf;oF@GA(Ibea-Vb4Cv#P;)+SBb_n(-jS@Ng-eG4zJb
z$E{AyMNhjkMEs1WhLLPFD`)c{;|3mb@DAy`AIEc?LE04>XK?PT)l)d!=N&|F@JqAS
zvPW#t4fahMKywl!?>x0)$OH+l;wJ2u>=%401KnK;FT+m2oq1d1+xwE$f0q9Dt*Ukt
z{#eX<FaIOFt3?cL3}o$XY#r$IY;FG`d;DLG$osG)J@$*u@*lb)uv<LvRcwwESy;dF
zTrsZeYJHGcXA|i{gPca<(DHA`^D>Q7GiokNk$804pL2_C;I*jSrFQ)3cJ1ZF{MLd;
z9=s^BoY!ePCHP@G{aIb9cM-+ck|(pSNrOVeaASwiG9?!22NUECv5$SxT235_;7h<J
z%$CYz=O?zbs;h9_m6L?+D$cw2smf`$hGnIr$}->P>!F=D^7d94jCJ#D3@kGsCaRdD
zCkpv;j4yz5ZtydqPXXVR%UYyNCTN!CckWH0D}I%%shiqmh$dsH3skJZJc8({u7A)Z
z%=6H`G36lUKXuXGC_w4*wFOWaeH`}NgJmBoG>ljlOCOtz(JtmU{7DH@xP>v%aJAsY
zFe(=l;IOGn@3%9+u{B{Aex0|*x8;LCx7Sb9*Uz=Nq8`qy8C~qV3AT5C&3#dYdGtg0
zd~k(bcs;`>r>GgmrTe~d?kabUHKuxm*G~PZBpdHiM1d>09Q)j(e2p0oyN`oghRr5%
zK76Zm=#8t)?8bZb*B=^{|D>{23}L6yJC!}&$C-%#N@W8Zd!zs5@_V2CV+)Mck$GQ{
z{=WX@8D27LQT;<G>vE{XxiDcELZr<X56DQ3wtC@LPxm%Bgxs*`6OOxi2kX>`Mk7Q|
zzjxIEiW8APsH`STNg{>%7}+Kbbh&{<wE&ga6g)MS(Yy*&75V%JV-XeIbZ~6f*xGm%
z3Uv+@xd<8^{$a}<+>!7WRuYF-)5K=|=QtXW!N8RbBpl>122HTOLm?r^{0%b023BD(
z1ROyS5^@tew!<yFDlmm5UP!FnF*S*%GS4Ex6^F)+3RioZ3T{lcv@i7sEDWBnwBA{t
zyI7S3OAfW{_|yp5cRwUglX|Gv@F>(GuV616Ihizm9gy>%#JORGF<zaa`<#UT$$3%H
zKCfaz>pXTZR<OES7wLI<U&s7w!=xY3Q*jB6i_)fJ@WRu-Ho2dexFcp|d>9TFmLiPz
z5C{Fr>H0R8JO3ex9d^5io3x%cJKz=b0yvbqlC)0p^x^NgIk_QP-}}x@YE%G#?EN*o
zE39na<sgnm_6kOhj(_Mves43->YG^~D~?O7$YSs?)tu}X!lSG>%H>np?Zd{EtSY7C
zJ4^fwBxY*1V{nuQMhwRY2^IL?V|^A&qSd+uX>}&}4r!aWSID%Gm=m!fa(}wpwm!+j
z!FCtp$x7(S{}mkbgv!3%f0SE5v!A1^RDT1kF!E>m?KRm7`cfLwOuMESCD`NM0%g)c
z(+HLd48fv(<$+@26RM;^^QBShB^yo}uFhNB@V@6M?KVzT!|dVHNnhSu#%2COI$iUp
z4b-45L`wRHN@KSh2Y*kl`~2$g`zXPg3wX}ASWYZWHrar%CevEwJWSQ?a>QxFRYOIS
zN+z2hO(`r>l(ja7*HY`%)Q;Qaa7?HzFLKF`Q~bbDSkUQ!I1#<{yDRdrO-}6%D@RaQ
zy+l}-a4bQS{M-*q!}Q`Oh6t}wubr#rsQU-OJS|TLz3+1<f-Y06lhz%@>u{c@ox{b!
zg1Whjk8|Z0pD0~4q0254YE&LLD~c|K&tIXQ#%P@vz!XgmYS)XcH*zP*`##+%zye><
z4{Yf&5tm5^Ka1?#Jp^*fIMt@JVCMRg<3opn{>@=5xBI<1@{bS&0!s5P1p50g4gTqo
z|2+Q9cL%bP{|@l)8t8u-{&6gNPvQTfi2mL1-*ut?YB=^z+5fvr^mm-!#e#n!y}f@-
z@tbt;cjMpHQGXdrzU!sljsHU{^*h4va)`eW(h>d(;lITZznlJEy!*?P0`0%={Qp(T
z`yKH2TG(HJROr70{#6<K9p(2GkiSqo2>wL*eLduNfPWvu{bddSJdy(dzmMpCxBmCE
x`LEXBss6?KpGow0^WP)eUjTZvf8PE7Pn?sLgm~v5008^`BYmf_Dg7U<{{vXgk|6*9

literal 0
HcmV?d00001

diff --git a/src/encoded/tests/test_generate_item_from_owl.py b/src/encoded/tests/test_generate_item_from_owl.py
index 3df658f7b7..45d3b5400e 100644
--- a/src/encoded/tests/test_generate_item_from_owl.py
+++ b/src/encoded/tests/test_generate_item_from_owl.py
@@ -10,7 +10,7 @@
 from unittest import mock
 from ..commands import generate_items_from_owl as gifo
 from ..commands.owltools import Owler
-from ..util import MockFileSystem
+from dcicutils.qa_utils import MockFileSystem
 
 
 pytestmark = [pytest.mark.setone, pytest.mark.working]
diff --git a/src/encoded/util.py b/src/encoded/util.py
index 5a2906683c..815e3752b0 100644
--- a/src/encoded/util.py
+++ b/src/encoded/util.py
@@ -1,6 +1,10 @@
+import contextlib
+import datetime
+import gzip
 import io
 import os
-import gzip
+import tempfile
+
 from io import BytesIO
 
 
@@ -92,3 +96,106 @@ def __exit__(self, exc_type, exc_val, exc_tb):
                 self.file_system.files[file] = text
 
         return MockFileWriter(file_system=file_system, file=file)
+
+
+DEBUGLOG_ENABLED = os.environ.get('DEBUGLOG_ENABLED', "FALSE").lower() == "true"
+
+
+def debuglog(*args):
+    """
+    As the name implies, this is a low-tech logging facility for temporary debugging info.
+    Prints info to a file in user's home directory.
+
+    The debuglog facility allows simple debugging for temporary debugging of disparate parts of the system.
+    It takes arguments like print or one of the logging operations and outputs to ~/DEBUGLOG-yyyymmdd.txt.
+    Each line in the log is timestamped.
+    """
+    if DEBUGLOG_ENABLED:
+        nowstr = str(datetime.datetime.now())
+        dateid = nowstr[:10].replace('-', '')
+        with io.open(os.path.expanduser("~/DEBUGLOG-%s.txt" % dateid), "a+") as fp:
+            print(nowstr, *args, file=fp)
+
+
+# These next few could be in dcicutils.s3_utils as part of s3Utils, but details of interfaces would have to change.
+# For now, for expedience, they can live here and we can refactor later. -kmp 25-Jul-2020
+
+@contextlib.contextmanager
+def s3_output_stream(s3_client, bucket, key):
+    """
+    This context manager allows one to write:
+
+        with s3_output_stream(s3_client, bucket, key) as fp:
+            print("foo", file=fp)
+
+    to do output to an s3 bucket.
+
+    In fact, an intermediate local file is involved, so this function yields a file pointer (fp) to a
+    temporary local file that is open for write. That fp should be used to supply content to the file
+    during the dynamic scope of the context manager. Once the context manager's body executes, the
+    file will be closed, its contents will be copied to s3, and finally the temporary local file will
+    be deleted.
+
+    Args:
+        s3_client: a client object that results from a boto3.client('s3', ...) call.
+        bucket str: an S3 bucket name
+        key str: the name of a key within the given S3 bucket
+    """
+
+    tempfile_name = tempfile.mktemp()
+    try:
+        with io.open(tempfile_name, 'w') as fp:
+            yield fp
+        s3_client.upload_file(Filename=tempfile_name, Bucket=bucket, Key=key)
+    finally:
+        try:
+            os.remove(tempfile_name)
+        except Exception:
+            pass
+
+
+
+@contextlib.contextmanager
+def s3_local_file(s3_client, bucket, key):
+    """
+    This context manager allows one to write:
+
+        with s3_local_file(s3_client, bucket, key) as file:
+            with io.open(local_file, 'r') as fp:
+                dictionary = json.load(fp)
+
+    to do input from an s3 bucket.
+    """
+
+    tempfile_name = tempfile.mktemp()
+    try:
+        s3_client.download_file(Bucket=bucket, Key=key, Filename=tempfile_name)
+        yield tempfile_name
+    finally:
+        try:
+            os.remove(tempfile_name)
+        except Exception:
+            pass
+
+
+@contextlib.contextmanager
+def s3_input_stream(s3_client, bucket, key, mode='r'):
+    """
+    This context manager allows one to write:
+
+        with s3_input_stream(s3_client, bucket, key) as fp:
+            dictionary = json.load(fp)
+
+    to do input from an s3 bucket.
+
+    In fact, an intermediate local file is created, copied, and deleted.
+    """
+
+    with s3_local_file(s3_client, bucket, key) as file:
+        with io.open(file, mode=mode) as fp:
+            yield fp
+
+
+def create_empty_s3_file(s3_client, bucket, key):
+    empty_file = "/dev/null"
+    s3_client.upload_file(empty_file, Bucket=bucket, Key=key)

From f71f2380f5b9ba3e59b0dbafcfe5c91840631299 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Sun, 26 Jul 2020 07:38:33 -0400
Subject: [PATCH 037/125] Update pyproject.toml for latest versions of snovault
 and utils.

---
 poetry.lock    | 156 +++++++++++++++++++++++++------------------------
 pyproject.toml |   4 +-
 2 files changed, 81 insertions(+), 79 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 74056ecd4a..0fdaeac11c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -194,7 +194,7 @@ description = "Code coverage measurement for Python"
 name = "coverage"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
-version = "5.2"
+version = "5.2.1"
 
 [package.extras]
 toml = ["toml"]
@@ -221,17 +221,18 @@ description = "cryptography is a package which provides cryptographic recipes an
 name = "cryptography"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*"
-version = "2.9.2"
+version = "3.0"
 
 [package.dependencies]
 cffi = ">=1.8,<1.11.3 || >1.11.3"
 six = ">=1.4.1"
 
 [package.extras]
-docs = ["sphinx (>=1.6.5,<1.8.0 || >1.8.0)", "sphinx-rtd-theme"]
+docs = ["sphinx (>=1.6.5,<1.8.0 || >1.8.0,<3.1.0 || >3.1.0,<3.1.1 || >3.1.1)", "sphinx-rtd-theme"]
 docstest = ["doc8", "pyenchant (>=1.6.11)", "twine (>=1.12.0)", "sphinxcontrib-spelling (>=4.0.1)"]
 idna = ["idna (>=2.1)"]
-pep8test = ["flake8", "flake8-import-order", "pep8-naming"]
+pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"]
+ssh = ["bcrypt (>=3.1.5)"]
 test = ["pytest (>=3.6.0,<3.9.0 || >3.9.0,<3.9.1 || >3.9.1,<3.9.2 || >3.9.2)", "pretend", "iso8601", "pytz", "hypothesis (>=1.11.4,<3.79.2 || >3.79.2)"]
 
 [[package]]
@@ -251,15 +252,15 @@ description = "Storage support for 4DN Data Portals."
 name = "dcicsnovault"
 optional = false
 python-versions = ">=3.6,<3.7"
-version = "3.1.4"
+version = "3.1.8"
 
 [package.dependencies]
 MarkupSafe = ">=0.23,<1"
-Pillow = ">=3.1.1,<4.0.0"
+Pillow = "3.1.1"
 PyBrowserID = ">=0.10.0,<0.11.0"
 PyYAML = ">=5.1,<5.3"
 SPARQLWrapper = ">=1.7.6,<2.0.0"
-SQLAlchemy = ">=1.2.16,<2.0.0"
+SQLAlchemy = "1.3.16"
 WSGIProxy2 = "0.4.2"
 WebOb = ">=1.8.5,<2.0.0"
 WebTest = ">=2.0.21,<3.0.0"
@@ -267,7 +268,7 @@ aws_requests_auth = ">=0.4.1,<0.5.0"
 awscli = ">=1.15.42,<2.0.0"
 "backports.statistics" = "0.1.0"
 boto3 = ">=1.7.42,<2.0.0"
-dcicutils = ">=0.25.0,<1"
+dcicutils = ">=0.34.0,<1"
 elasticsearch_dsl = ">=5.3.0,<6.0.0"
 future = ">=0.15.2,<0.16.0"
 futures = ">=3.1.1,<4.0.0"
@@ -302,7 +303,7 @@ venusian = ">=1.2.0,<2.0.0"
 xlrd = ">=1.0.0,<2.0.0"
 "zope.deprecation" = ">=4.4.0,<5.0.0"
 "zope.interface" = ">=4.6.0,<5.0.0"
-"zope.sqlalchemy" = ">=1.2,<2.0"
+"zope.sqlalchemy" = "1.3"
 
 [[package]]
 category = "main"
@@ -310,7 +311,7 @@ description = "Utility package for interacting with the 4DN Data Portal and othe
 name = "dcicutils"
 optional = false
 python-versions = ">=3.4,<3.8"
-version = "0.32.2"
+version = "0.35.1"
 
 [package.dependencies]
 aws-requests-auth = ">=0.4.2,<1"
@@ -495,7 +496,7 @@ description = "Python Git Library"
 name = "gitpython"
 optional = false
 python-versions = ">=3.4"
-version = "3.1.3"
+version = "3.1.7"
 
 [package.dependencies]
 gitdb = ">=4.0.1,<5"
@@ -1409,7 +1410,7 @@ description = "Fast, Extensible Progress Meter"
 name = "tqdm"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*"
-version = "4.47.0"
+version = "4.48.0"
 
 [package.extras]
 dev = ["py-make (>=0.1.0)", "twine", "argopt", "pydoc-markdown"]
@@ -1452,7 +1453,7 @@ description = "HTTP library with thread-safe connection pooling, file post, and
 name = "urllib3"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
-version = "1.25.9"
+version = "1.25.10"
 
 [package.extras]
 brotli = ["brotlipy (>=0.6.0)"]
@@ -1654,7 +1655,8 @@ transaction = ">=1.6.0"
 test = ["zope.testing"]
 
 [metadata]
-content-hash = "f480337cad8012fe1536aef687bdbce1f60139b9b9887feeed77fc59be1db485"
+content-hash = "7b0f42c553a61ed14b8ad181a6ba4b1c2e6d860cdaa10e01a43ab055a014549f"
+lock-version = "1.0"
 python-versions = ">=3.6,<3.7"
 
 [metadata.files]
@@ -1749,76 +1751,76 @@ colorama = [
     {file = "colorama-0.3.3.tar.gz", hash = "sha256:eb21f2ba718fbf357afdfdf6f641ab393901c7ca8d9f37edd0bee4806ffa269c"},
 ]
 coverage = [
-    {file = "coverage-5.2-cp27-cp27m-macosx_10_13_intel.whl", hash = "sha256:d9ad0a988ae20face62520785ec3595a5e64f35a21762a57d115dae0b8fb894a"},
-    {file = "coverage-5.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:4bb385a747e6ae8a65290b3df60d6c8a692a5599dc66c9fa3520e667886f2e10"},
-    {file = "coverage-5.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:9702e2cb1c6dec01fb8e1a64c015817c0800a6eca287552c47a5ee0ebddccf62"},
-    {file = "coverage-5.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:42fa45a29f1059eda4d3c7b509589cc0343cd6bbf083d6118216830cd1a51613"},
-    {file = "coverage-5.2-cp27-cp27m-win32.whl", hash = "sha256:41d88736c42f4a22c494c32cc48a05828236e37c991bd9760f8923415e3169e4"},
-    {file = "coverage-5.2-cp27-cp27m-win_amd64.whl", hash = "sha256:bbb387811f7a18bdc61a2ea3d102be0c7e239b0db9c83be7bfa50f095db5b92a"},
-    {file = "coverage-5.2-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:3740b796015b889e46c260ff18b84683fa2e30f0f75a171fb10d2bf9fb91fc70"},
-    {file = "coverage-5.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ebf2431b2d457ae5217f3a1179533c456f3272ded16f8ed0b32961a6d90e38ee"},
-    {file = "coverage-5.2-cp35-cp35m-macosx_10_13_x86_64.whl", hash = "sha256:d54d7ea74cc00482a2410d63bf10aa34ebe1c49ac50779652106c867f9986d6b"},
-    {file = "coverage-5.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:87bdc8135b8ee739840eee19b184804e5d57f518578ffc797f5afa2c3c297913"},
-    {file = "coverage-5.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:ed9a21502e9223f563e071759f769c3d6a2e1ba5328c31e86830368e8d78bc9c"},
-    {file = "coverage-5.2-cp35-cp35m-win32.whl", hash = "sha256:509294f3e76d3f26b35083973fbc952e01e1727656d979b11182f273f08aa80b"},
-    {file = "coverage-5.2-cp35-cp35m-win_amd64.whl", hash = "sha256:ca63dae130a2e788f2b249200f01d7fa240f24da0596501d387a50e57aa7075e"},
-    {file = "coverage-5.2-cp36-cp36m-macosx_10_13_x86_64.whl", hash = "sha256:5c74c5b6045969b07c9fb36b665c9cac84d6c174a809fc1b21bdc06c7836d9a0"},
-    {file = "coverage-5.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:c32aa13cc3fe86b0f744dfe35a7f879ee33ac0a560684fef0f3e1580352b818f"},
-    {file = "coverage-5.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1e58fca3d9ec1a423f1b7f2aa34af4f733cbfa9020c8fe39ca451b6071237405"},
-    {file = "coverage-5.2-cp36-cp36m-win32.whl", hash = "sha256:3b2c34690f613525672697910894b60d15800ac7e779fbd0fccf532486c1ba40"},
-    {file = "coverage-5.2-cp36-cp36m-win_amd64.whl", hash = "sha256:a4d511012beb967a39580ba7d2549edf1e6865a33e5fe51e4dce550522b3ac0e"},
-    {file = "coverage-5.2-cp37-cp37m-macosx_10_13_x86_64.whl", hash = "sha256:32ecee61a43be509b91a526819717d5e5650e009a8d5eda8631a59c721d5f3b6"},
-    {file = "coverage-5.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:6f91b4492c5cde83bfe462f5b2b997cdf96a138f7c58b1140f05de5751623cf1"},
-    {file = "coverage-5.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:bfcc811883699ed49afc58b1ed9f80428a18eb9166422bce3c31a53dba00fd1d"},
-    {file = "coverage-5.2-cp37-cp37m-win32.whl", hash = "sha256:60a3d36297b65c7f78329b80120f72947140f45b5c7a017ea730f9112b40f2ec"},
-    {file = "coverage-5.2-cp37-cp37m-win_amd64.whl", hash = "sha256:12eaccd86d9a373aea59869bc9cfa0ab6ba8b1477752110cb4c10d165474f703"},
-    {file = "coverage-5.2-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:d82db1b9a92cb5c67661ca6616bdca6ff931deceebb98eecbd328812dab52032"},
-    {file = "coverage-5.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:214eb2110217f2636a9329bc766507ab71a3a06a8ea30cdeebb47c24dce5972d"},
-    {file = "coverage-5.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8a3decd12e7934d0254939e2bf434bf04a5890c5bf91a982685021786a08087e"},
-    {file = "coverage-5.2-cp38-cp38-win32.whl", hash = "sha256:1dcebae667b73fd4aa69237e6afb39abc2f27520f2358590c1b13dd90e32abe7"},
-    {file = "coverage-5.2-cp38-cp38-win_amd64.whl", hash = "sha256:f50632ef2d749f541ca8e6c07c9928a37f87505ce3a9f20c8446ad310f1aa87b"},
-    {file = "coverage-5.2-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:7403675df5e27745571aba1c957c7da2dacb537c21e14007ec3a417bf31f7f3d"},
-    {file = "coverage-5.2-cp39-cp39-manylinux1_i686.whl", hash = "sha256:0fc4e0d91350d6f43ef6a61f64a48e917637e1dcfcba4b4b7d543c628ef82c2d"},
-    {file = "coverage-5.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:25fe74b5b2f1b4abb11e103bb7984daca8f8292683957d0738cd692f6a7cc64c"},
-    {file = "coverage-5.2-cp39-cp39-win32.whl", hash = "sha256:d67599521dff98ec8c34cd9652cbcfe16ed076a2209625fca9dc7419b6370e5c"},
-    {file = "coverage-5.2-cp39-cp39-win_amd64.whl", hash = "sha256:10f2a618a6e75adf64329f828a6a5b40244c1c50f5ef4ce4109e904e69c71bd2"},
-    {file = "coverage-5.2.tar.gz", hash = "sha256:1874bdc943654ba46d28f179c1846f5710eda3aeb265ff029e0ac2b52daae404"},
+    {file = "coverage-5.2.1-cp27-cp27m-macosx_10_13_intel.whl", hash = "sha256:40f70f81be4d34f8d491e55936904db5c527b0711b2a46513641a5729783c2e4"},
+    {file = "coverage-5.2.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:675192fca634f0df69af3493a48224f211f8db4e84452b08d5fcebb9167adb01"},
+    {file = "coverage-5.2.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:2fcc8b58953d74d199a1a4d633df8146f0ac36c4e720b4a1997e9b6327af43a8"},
+    {file = "coverage-5.2.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:64c4f340338c68c463f1b56e3f2f0423f7b17ba6c3febae80b81f0e093077f59"},
+    {file = "coverage-5.2.1-cp27-cp27m-win32.whl", hash = "sha256:52f185ffd3291196dc1aae506b42e178a592b0b60a8610b108e6ad892cfc1bb3"},
+    {file = "coverage-5.2.1-cp27-cp27m-win_amd64.whl", hash = "sha256:30bc103587e0d3df9e52cd9da1dd915265a22fad0b72afe54daf840c984b564f"},
+    {file = "coverage-5.2.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:9ea749fd447ce7fb1ac71f7616371f04054d969d412d37611716721931e36efd"},
+    {file = "coverage-5.2.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ce7866f29d3025b5b34c2e944e66ebef0d92e4a4f2463f7266daa03a1332a651"},
+    {file = "coverage-5.2.1-cp35-cp35m-macosx_10_13_x86_64.whl", hash = "sha256:4869ab1c1ed33953bb2433ce7b894a28d724b7aa76c19b11e2878034a4e4680b"},
+    {file = "coverage-5.2.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:a3ee9c793ffefe2944d3a2bd928a0e436cd0ac2d9e3723152d6fd5398838ce7d"},
+    {file = "coverage-5.2.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:28f42dc5172ebdc32622a2c3f7ead1b836cdbf253569ae5673f499e35db0bac3"},
+    {file = "coverage-5.2.1-cp35-cp35m-win32.whl", hash = "sha256:e26c993bd4b220429d4ec8c1468eca445a4064a61c74ca08da7429af9bc53bb0"},
+    {file = "coverage-5.2.1-cp35-cp35m-win_amd64.whl", hash = "sha256:4186fc95c9febeab5681bc3248553d5ec8c2999b8424d4fc3a39c9cba5796962"},
+    {file = "coverage-5.2.1-cp36-cp36m-macosx_10_13_x86_64.whl", hash = "sha256:b360d8fd88d2bad01cb953d81fd2edd4be539df7bfec41e8753fe9f4456a5082"},
+    {file = "coverage-5.2.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:1adb6be0dcef0cf9434619d3b892772fdb48e793300f9d762e480e043bd8e716"},
+    {file = "coverage-5.2.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:098a703d913be6fbd146a8c50cc76513d726b022d170e5e98dc56d958fd592fb"},
+    {file = "coverage-5.2.1-cp36-cp36m-win32.whl", hash = "sha256:962c44070c281d86398aeb8f64e1bf37816a4dfc6f4c0f114756b14fc575621d"},
+    {file = "coverage-5.2.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b1ed2bdb27b4c9fc87058a1cb751c4df8752002143ed393899edb82b131e0546"},
+    {file = "coverage-5.2.1-cp37-cp37m-macosx_10_13_x86_64.whl", hash = "sha256:c890728a93fffd0407d7d37c1e6083ff3f9f211c83b4316fae3778417eab9811"},
+    {file = "coverage-5.2.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:538f2fd5eb64366f37c97fdb3077d665fa946d2b6d95447622292f38407f9258"},
+    {file = "coverage-5.2.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:27ca5a2bc04d68f0776f2cdcb8bbd508bbe430a7bf9c02315cd05fb1d86d0034"},
+    {file = "coverage-5.2.1-cp37-cp37m-win32.whl", hash = "sha256:aab75d99f3f2874733946a7648ce87a50019eb90baef931698f96b76b6769a46"},
+    {file = "coverage-5.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:c2ff24df02a125b7b346c4c9078c8936da06964cc2d276292c357d64378158f8"},
+    {file = "coverage-5.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:304fbe451698373dc6653772c72c5d5e883a4aadaf20343592a7abb2e643dae0"},
+    {file = "coverage-5.2.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:c96472b8ca5dc135fb0aa62f79b033f02aa434fb03a8b190600a5ae4102df1fd"},
+    {file = "coverage-5.2.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8505e614c983834239f865da2dd336dcf9d72776b951d5dfa5ac36b987726e1b"},
+    {file = "coverage-5.2.1-cp38-cp38-win32.whl", hash = "sha256:700997b77cfab016533b3e7dbc03b71d33ee4df1d79f2463a318ca0263fc29dd"},
+    {file = "coverage-5.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:46794c815e56f1431c66d81943fa90721bb858375fb36e5903697d5eef88627d"},
+    {file = "coverage-5.2.1-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:16042dc7f8e632e0dcd5206a5095ebd18cb1d005f4c89694f7f8aafd96dd43a3"},
+    {file = "coverage-5.2.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:c1bbb628ed5192124889b51204de27c575b3ffc05a5a91307e7640eff1d48da4"},
+    {file = "coverage-5.2.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:4f6428b55d2916a69f8d6453e48a505c07b2245653b0aa9f0dee38785939f5e4"},
+    {file = "coverage-5.2.1-cp39-cp39-win32.whl", hash = "sha256:9e536783a5acee79a9b308be97d3952b662748c4037b6a24cbb339dc7ed8eb89"},
+    {file = "coverage-5.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:b8f58c7db64d8f27078cbf2a4391af6aa4e4767cc08b37555c4ae064b8558d9b"},
+    {file = "coverage-5.2.1.tar.gz", hash = "sha256:a34cb28e0747ea15e82d13e14de606747e9e484fb28d63c999483f5d5188e89b"},
 ]
 coveralls = [
     {file = "coveralls-2.1.1-py2.py3-none-any.whl", hash = "sha256:3726d35c0f93a28631a003880e2aa6cc93c401d62bc6919c5cb497217ba30c55"},
     {file = "coveralls-2.1.1.tar.gz", hash = "sha256:afe359cd5b350e1b3895372bda32af8f0260638c7c4a31a5c0f15aa6a96f40d9"},
 ]
 cryptography = [
-    {file = "cryptography-2.9.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:daf54a4b07d67ad437ff239c8a4080cfd1cc7213df57d33c97de7b4738048d5e"},
-    {file = "cryptography-2.9.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:3b3eba865ea2754738616f87292b7f29448aec342a7c720956f8083d252bf28b"},
-    {file = "cryptography-2.9.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:c447cf087cf2dbddc1add6987bbe2f767ed5317adb2d08af940db517dd704365"},
-    {file = "cryptography-2.9.2-cp27-cp27m-win32.whl", hash = "sha256:f118a95c7480f5be0df8afeb9a11bd199aa20afab7a96bcf20409b411a3a85f0"},
-    {file = "cryptography-2.9.2-cp27-cp27m-win_amd64.whl", hash = "sha256:c4fd17d92e9d55b84707f4fd09992081ba872d1a0c610c109c18e062e06a2e55"},
-    {file = "cryptography-2.9.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:d0d5aeaedd29be304848f1c5059074a740fa9f6f26b84c5b63e8b29e73dfc270"},
-    {file = "cryptography-2.9.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:1e4014639d3d73fbc5ceff206049c5a9a849cefd106a49fa7aaaa25cc0ce35cf"},
-    {file = "cryptography-2.9.2-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:96c080ae7118c10fcbe6229ab43eb8b090fccd31a09ef55f83f690d1ef619a1d"},
-    {file = "cryptography-2.9.2-cp35-abi3-manylinux1_x86_64.whl", hash = "sha256:e993468c859d084d5579e2ebee101de8f5a27ce8e2159959b6673b418fd8c785"},
-    {file = "cryptography-2.9.2-cp35-abi3-manylinux2010_x86_64.whl", hash = "sha256:88c881dd5a147e08d1bdcf2315c04972381d026cdb803325c03fe2b4a8ed858b"},
-    {file = "cryptography-2.9.2-cp35-cp35m-win32.whl", hash = "sha256:651448cd2e3a6bc2bb76c3663785133c40d5e1a8c1a9c5429e4354201c6024ae"},
-    {file = "cryptography-2.9.2-cp35-cp35m-win_amd64.whl", hash = "sha256:726086c17f94747cedbee6efa77e99ae170caebeb1116353c6cf0ab67ea6829b"},
-    {file = "cryptography-2.9.2-cp36-cp36m-win32.whl", hash = "sha256:091d31c42f444c6f519485ed528d8b451d1a0c7bf30e8ca583a0cac44b8a0df6"},
-    {file = "cryptography-2.9.2-cp36-cp36m-win_amd64.whl", hash = "sha256:bb1f0281887d89617b4c68e8db9a2c42b9efebf2702a3c5bf70599421a8623e3"},
-    {file = "cryptography-2.9.2-cp37-cp37m-win32.whl", hash = "sha256:18452582a3c85b96014b45686af264563e3e5d99d226589f057ace56196ec78b"},
-    {file = "cryptography-2.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:22e91636a51170df0ae4dcbd250d318fd28c9f491c4e50b625a49964b24fe46e"},
-    {file = "cryptography-2.9.2-cp38-cp38-win32.whl", hash = "sha256:844a76bc04472e5135b909da6aed84360f522ff5dfa47f93e3dd2a0b84a89fa0"},
-    {file = "cryptography-2.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:1dfa985f62b137909496e7fc182dac687206d8d089dd03eaeb28ae16eec8e7d5"},
-    {file = "cryptography-2.9.2.tar.gz", hash = "sha256:a0c30272fb4ddda5f5ffc1089d7405b7a71b0b0f51993cb4e5dbb4590b2fc229"},
+    {file = "cryptography-3.0-cp27-cp27m-macosx_10_10_x86_64.whl", hash = "sha256:ab49edd5bea8d8b39a44b3db618e4783ef84c19c8b47286bf05dfdb3efb01c83"},
+    {file = "cryptography-3.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:124af7255ffc8e964d9ff26971b3a6153e1a8a220b9a685dc407976ecb27a06a"},
+    {file = "cryptography-3.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:51e40123083d2f946794f9fe4adeeee2922b581fa3602128ce85ff813d85b81f"},
+    {file = "cryptography-3.0-cp27-cp27m-win32.whl", hash = "sha256:dea0ba7fe6f9461d244679efa968d215ea1f989b9c1957d7f10c21e5c7c09ad6"},
+    {file = "cryptography-3.0-cp27-cp27m-win_amd64.whl", hash = "sha256:8ecf9400d0893836ff41b6f977a33972145a855b6efeb605b49ee273c5e6469f"},
+    {file = "cryptography-3.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:0c608ff4d4adad9e39b5057de43657515c7da1ccb1807c3a27d4cf31fc923b4b"},
+    {file = "cryptography-3.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:bec7568c6970b865f2bcebbe84d547c52bb2abadf74cefce396ba07571109c67"},
+    {file = "cryptography-3.0-cp35-abi3-macosx_10_10_x86_64.whl", hash = "sha256:0cbfed8ea74631fe4de00630f4bb592dad564d57f73150d6f6796a24e76c76cd"},
+    {file = "cryptography-3.0-cp35-abi3-manylinux1_x86_64.whl", hash = "sha256:a09fd9c1cca9a46b6ad4bea0a1f86ab1de3c0c932364dbcf9a6c2a5eeb44fa77"},
+    {file = "cryptography-3.0-cp35-abi3-manylinux2010_x86_64.whl", hash = "sha256:ce82cc06588e5cbc2a7df3c8a9c778f2cb722f56835a23a68b5a7264726bb00c"},
+    {file = "cryptography-3.0-cp35-cp35m-win32.whl", hash = "sha256:9367d00e14dee8d02134c6c9524bb4bd39d4c162456343d07191e2a0b5ec8b3b"},
+    {file = "cryptography-3.0-cp35-cp35m-win_amd64.whl", hash = "sha256:384d7c681b1ab904fff3400a6909261cae1d0939cc483a68bdedab282fb89a07"},
+    {file = "cryptography-3.0-cp36-cp36m-win32.whl", hash = "sha256:4d355f2aee4a29063c10164b032d9fa8a82e2c30768737a2fd56d256146ad559"},
+    {file = "cryptography-3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:45741f5499150593178fc98d2c1a9c6722df88b99c821ad6ae298eff0ba1ae71"},
+    {file = "cryptography-3.0-cp37-cp37m-win32.whl", hash = "sha256:8ecef21ac982aa78309bb6f092d1677812927e8b5ef204a10c326fc29f1367e2"},
+    {file = "cryptography-3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:4b9303507254ccb1181d1803a2080a798910ba89b1a3c9f53639885c90f7a756"},
+    {file = "cryptography-3.0-cp38-cp38-win32.whl", hash = "sha256:8713ddb888119b0d2a1462357d5946b8911be01ddbf31451e1d07eaa5077a261"},
+    {file = "cryptography-3.0-cp38-cp38-win_amd64.whl", hash = "sha256:bea0b0468f89cdea625bb3f692cd7a4222d80a6bdafd6fb923963f2b9da0e15f"},
+    {file = "cryptography-3.0.tar.gz", hash = "sha256:8e924dbc025206e97756e8903039662aa58aa9ba357d8e1d8fc29e3092322053"},
 ]
 dcicpyvcf = [
     {file = "dcicpyvcf-1.0.0.tar.gz", hash = "sha256:c5bf8d585002ab3b95d13a47803376b456b931865e4189c38a18cca47b108449"},
 ]
 dcicsnovault = [
-    {file = "dcicsnovault-3.1.4-py3-none-any.whl", hash = "sha256:3ad78b95255f4a409fb7e29d1933fee113b03c2a6abf65e62b821a0e6ac1666e"},
-    {file = "dcicsnovault-3.1.4.tar.gz", hash = "sha256:5efc2ea37d0fc78411817925d63c383e322680f264629c2060533a50721a9bd4"},
+    {file = "dcicsnovault-3.1.8-py3-none-any.whl", hash = "sha256:928fc529d769208356cfdb4aade6bf39bdb31db2cc4d35899536d505a89704ee"},
+    {file = "dcicsnovault-3.1.8.tar.gz", hash = "sha256:0127c6dde6eef7271cf07a63fa141f596c25b9109c5fccb40892fbe0e4e0b464"},
 ]
 dcicutils = [
-    {file = "dcicutils-0.32.2-py3-none-any.whl", hash = "sha256:7403d422a12160162a9691aff2af04f6f37869fc40252f2f61cd92a10076a4b2"},
-    {file = "dcicutils-0.32.2.tar.gz", hash = "sha256:888feae7870294fe12979fbe567b653874273d149bcc31ecef5817300b01c0f1"},
+    {file = "dcicutils-0.35.1-py3-none-any.whl", hash = "sha256:5f6cd17fb1c78adfaca9f2e23dedbe4a8a3d8c0dee2dc73cdd7f2dd832debc5d"},
+    {file = "dcicutils-0.35.1.tar.gz", hash = "sha256:7924f83de55673b580d02242a4bcc3440c290f1139a862f951ef00958a24fb71"},
 ]
 docker = [
     {file = "docker-4.2.2-py2.py3-none-any.whl", hash = "sha256:03a46400c4080cb6f7aa997f881ddd84fef855499ece219d75fbdb53289c17ab"},
@@ -1872,8 +1874,8 @@ gitdb = [
     {file = "gitdb-4.0.5.tar.gz", hash = "sha256:c9e1f2d0db7ddb9a704c2a0217be31214e91a4fe1dea1efad19ae42ba0c285c9"},
 ]
 gitpython = [
-    {file = "GitPython-3.1.3-py3-none-any.whl", hash = "sha256:ef1d60b01b5ce0040ad3ec20bc64f783362d41fa0822a2742d3586e1f49bb8ac"},
-    {file = "GitPython-3.1.3.tar.gz", hash = "sha256:e107af4d873daed64648b4f4beb89f89f0cfbe3ef558fc7821ed2331c2f8da1a"},
+    {file = "GitPython-3.1.7-py3-none-any.whl", hash = "sha256:fa3b92da728a457dd75d62bb5f3eb2816d99a7fe6c67398e260637a40e3fafb5"},
+    {file = "GitPython-3.1.7.tar.gz", hash = "sha256:2db287d71a284e22e5c2846042d0602465c7434d910406990d5b74df4afb0858"},
 ]
 html5lib = [
     {file = "html5lib-0.9999999.tar.gz", hash = "sha256:2612a191a8d5842bfa057e41ba50bbb9dcb722419d2408c78cff4758d0754868"},
@@ -2379,8 +2381,8 @@ toml = [
     {file = "toml-0.10.1.tar.gz", hash = "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f"},
 ]
 tqdm = [
-    {file = "tqdm-4.47.0-py2.py3-none-any.whl", hash = "sha256:7810e627bcf9d983a99d9ff8a0c09674400fd2927eddabeadf153c14a2ec8656"},
-    {file = "tqdm-4.47.0.tar.gz", hash = "sha256:63ef7a6d3eb39f80d6b36e4867566b3d8e5f1fe3d6cb50c5e9ede2b3198ba7b7"},
+    {file = "tqdm-4.48.0-py2.py3-none-any.whl", hash = "sha256:fcb7cb5b729b60a27f300b15c1ffd4744f080fb483b88f31dc8654b082cc8ea5"},
+    {file = "tqdm-4.48.0.tar.gz", hash = "sha256:6baa75a88582b1db6d34ce4690da5501d2a1cb65c34664840a456b2c9f794d29"},
 ]
 transaction = [
     {file = "transaction-2.4.0-py2.py3-none-any.whl", hash = "sha256:b96a5e9aaa73f905759bc9ccf0021bf4864c01ac36666e0d28395e871f6d584a"},
@@ -2394,8 +2396,8 @@ uptime = [
     {file = "uptime-3.0.1.tar.gz", hash = "sha256:7c300254775b807ce46e3dcbcda30aa3b9a204b9c57a7ac1e79ee6dbe3942973"},
 ]
 urllib3 = [
-    {file = "urllib3-1.25.9-py2.py3-none-any.whl", hash = "sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115"},
-    {file = "urllib3-1.25.9.tar.gz", hash = "sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527"},
+    {file = "urllib3-1.25.10-py2.py3-none-any.whl", hash = "sha256:e7983572181f5e1522d9c98453462384ee92a0be7fac5f1413a1e35c56cc0461"},
+    {file = "urllib3-1.25.10.tar.gz", hash = "sha256:91056c15fa70756691db97756772bb1eb9678fa585d9184f24534b100dc60f4a"},
 ]
 venusian = [
     {file = "venusian-1.2.0-py2.py3-none-any.whl", hash = "sha256:2f2d077a1eedc3fda40425f65687c8c494da7e83d7c23bc2c4d1a40eb3ca5b6d"},
diff --git a/pyproject.toml b/pyproject.toml
index c251dffd51..83d2d9ff01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,8 +42,8 @@ certifi = ">=2020.4.5.2"
 chardet = "3.0.4"
 colorama = "0.3.3"
 dcicpyvcf = "1.0.0"
-dcicsnovault = ">=3.1.4,<4"
-dcicutils = ">=0.31.1,<1"
+dcicsnovault = ">=3.1.8,<4"
+dcicutils = ">=0.35.1,<1"
 docutils = "0.12"
 elasticsearch = "5.5.3"
 elasticsearch-dsl = "^5.4.0"

From ce9eb26cd1e37cad1d7d244c87be39f0609a63f7 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Sun, 26 Jul 2020 20:54:50 -0400
Subject: [PATCH 038/125] WIP: Attempts to use a IngestionSubmission type for
 progress info.

---
 src/encoded/ingestion_engines.py              |  35 +
 src/encoded/ingestion_listener.py             |  24 +-
 src/encoded/schemas/ingestion_submission.json |  87 +++
 src/encoded/submit.py                         |   8 +-
 src/encoded/submit.py.SAVE                    | 664 ++++++++++++++++++
 src/encoded/types/ingestion.py                |  42 ++
 6 files changed, 851 insertions(+), 9 deletions(-)
 create mode 100644 src/encoded/schemas/ingestion_submission.json
 create mode 100644 src/encoded/submit.py.SAVE
 create mode 100644 src/encoded/types/ingestion.py

diff --git a/src/encoded/ingestion_engines.py b/src/encoded/ingestion_engines.py
index 05a5f0f82e..4a8abf6b07 100644
--- a/src/encoded/ingestion_engines.py
+++ b/src/encoded/ingestion_engines.py
@@ -76,6 +76,23 @@ def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
 
     try:
 
+        if isinstance(institution, str):
+            institution = vapp.get(institution).json
+        if isinstance(project, str):
+            project = vapp.get(project).json
+
+        vapp.patch_json("/ingestion-submission", {
+            "object_name": manifest['object_name'],
+            "ingestion_type": ingestion_type,
+            "submission_id": uuid,
+            "parameters": manifest['parameters'],
+            "institution": institution,
+            "project": project,
+            "processing_status": {
+                "state": "processing",
+            }
+        })
+
         validation_log_lines, final_json, result_lines = submit_data_bundle(s3_client=s3_client,
                                                                             bucket=DATA_BUNDLE_BUCKET,
                                                                             key=data_key,
@@ -96,6 +113,15 @@ def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
         with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=submission_response_key) as fp:
             _show_report_lines(result_lines, fp)
 
+        vapp.patch_json("ingestion-submission", {
+            "submission_id": uuid,
+            "progress": {
+                "state": "done",
+                "outcome": "failure" if validation_log_lines else "success",
+                "progress": "complete",
+            },
+        })
+
     except Exception as e:
 
         resolution["traceback_key"] = traceback_key = "%s/traceback.json" % uuid
@@ -105,5 +131,14 @@ def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
         resolution["error_type"] = e.__class__.__name__
         resolution["error_message"] = str(e)
 
+        vapp.patch_json("ingestion-submission", {
+            "submission_id": uuid,
+            "progress": {
+                "state": "done",
+                "outcome": "error",
+                "progress": "incomplete",
+            },
+        })
+
     with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key="%s/resolution.json" % uuid) as fp:
         print(json.dumps(resolution, indent=2), file=fp)
diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index 98c472d6d9..f69766c41f 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -74,10 +74,10 @@ def submit_for_ingestion(context, request):
     # NOTE: Some reference information about uploading files to s3 is here:
     #   https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html
 
-    upload_id = str(uuid.uuid4())
+    submission_id = str(uuid.uuid4())
     _, ext = os.path.splitext(filename)
-    object_name = "{id}/datafile{ext}".format(id=upload_id, ext=ext)
-    manifest_name = "{id}/manifest.json".format(id=upload_id)
+    object_name = "{id}/datafile{ext}".format(id=submission_id, ext=ext)
+    manifest_name = "{id}/manifest.json".format(id=submission_id)
 
     s3_client = boto3.client('s3')
 
@@ -98,6 +98,7 @@ def submit_for_ingestion(context, request):
     result = {
         "filename": filename,
         "object_name": object_name,
+        "submission_id": submission_id,
         "bucket": DATA_BUNDLE_BUCKET,
         "success": success,
         "message": message,
@@ -123,7 +124,7 @@ def submit_for_ingestion(context, request):
             raise SubmissionFailure(message)
 
     queue_manager = get_queue_manager(request, override_name=override_name)
-    _, failed = queue_manager.add_uuids([upload_id], ingestion_type=ingestion_type)
+    _, failed = queue_manager.add_uuids([submission_id], ingestion_type=ingestion_type)
 
     if failed:
         # If there's a failure, failed will be a list of one problem description since we only submitted one thing.
@@ -214,7 +215,7 @@ def __init__(self, registry, override_name=None):
             'region_name': 'us-east-1'
         }
         self.client = boto3.client('sqs', **kwargs)
-        self.queue_name = self.env_name + self.BUCKET_EXTENSION if not override_name else override_name
+        self.queue_name = override_name or (self.env_name + self.BUCKET_EXTENSION)
         self.queue_attrs = {
             self.queue_name: {
                 'DelaySeconds': '1',  # messages initially invisible for 1 sec
@@ -549,9 +550,17 @@ def run(self):
                 if ingestion_type != 'vcf':
                     # Let's minimally disrupt things for now. We can refactor this later
                     # to make all the parts work the same -kmp
+                    self.vapp.post_json("/ingestion-submission", {
+                        "ingestion_type": ingestion_type,
+                        "submission_id": uuid,
+                    })
                     handler = get_ingestion_processor(ingestion_type)
                     handler(uuid=uuid, ingestion_type=ingestion_type, vapp=self.vapp, log=log)
-                    print("HANDLED", uuid)
+                    # TODO: If we delete messages at the end of each loop, I think we'll here need to do this,
+                    #       since we're bypassing bottom of lop with the 'continue':
+                    #          self.delete_messages([message])
+                    #          messages.remove(message)
+                    debuglog("HANDLED", uuid)
                     continue
 
                 debuglog("Did NOT process", uuid, "as", ingestion_type)
@@ -597,6 +606,9 @@ def run(self):
                 log.error(msg)
                 self.update_status(msg=msg)
 
+            # TODO: I worry waiting to delete multiple messages means that if there's an error
+            #       we'll have things that were completed not get deleted. Should delete one per iteration?
+            #       -kmp 26-Jul-2020
             self.delete_messages(messages)
 
 
diff --git a/src/encoded/schemas/ingestion_submission.json b/src/encoded/schemas/ingestion_submission.json
new file mode 100644
index 0000000000..1de4a11dbe
--- /dev/null
+++ b/src/encoded/schemas/ingestion_submission.json
@@ -0,0 +1,87 @@
+{
+    "title": "Ingestion Submission",
+    "description": "Schema for metadata related to ingestion requests submitted to CGAP.",
+    "id": "/profiles/ingestion_submission.json",
+    "$schema": "http://json-schema.org/draft-04/schema#",
+    "type": "object",
+    "required": [
+        "ingestion_type",
+        "submission_id"
+    ],
+    "additionalProperties": false,
+    "identifyingProperties": ["uuid", "aliases"],
+    "mixinProperties": [
+        { "$ref": "mixins.json#/schema_version" },
+        { "$ref": "mixins.json#/aliases" },
+        { "$ref": "mixins.json#/uuid" },
+        { "$ref": "mixins.json#/documents" },
+        { "$ref": "mixins.json#/attribution" },
+        { "$ref": "mixins.json#/status" },
+        { "$ref": "mixins.json#/submitted" },
+        { "$ref": "mixins.json#/modified" },
+        { "$ref": "mixins.json#/static_embeds" }
+    ],
+    "mixinFacets" : [
+        { "$ref": "mixins.json#/facets_common" }
+    ],
+    "properties": {
+        "schema_version": {
+            "default": "3"
+        },
+        "object_name": {
+            "title": "Object Name",
+            "type": "string"
+        },
+        "ingestion_type": {
+            "title": "Ingestion Type",
+            "type": "string",
+            "enum": [
+                "data_bundle",
+                "vcf"
+            ]
+        },
+        "submission_id": {
+            "title": "Submission ID",
+            "type": "string"
+        },
+        "parameters": {
+            "title": "Parameters",
+            "type": "object",
+            "additionalProperties": true,
+            "properties": {}
+        },
+        "processing_status": {
+            "title": "Processing Status",
+            "type": "object",
+            "additionalProperties": false,
+            "properties": {
+                "state": {
+                    "title": "State",
+                    "type": "string",
+                    "enum": [
+                        "submitted",
+                        "processing",
+                        "done"
+                    ],
+                    "default": "submitted"
+                },
+                "outcome": {
+                    "title": "Outcome",
+                    "type": "string",
+                    "enum": [
+                        "unknown",
+                        "success",
+                        "failure",
+                        "error"
+                    ],
+                    "default": "unknown"
+                },
+                "progress": {
+                    "title": "Progress",
+                    "type": "string",
+                    "default": "unavailable"
+                }
+            }
+        }
+    }
+}
diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 17d2104450..76d850e3a3 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -103,9 +103,11 @@ def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp):
         log: a logging object capable of .info, .warning, .error, or .debug messages
     """
     with s3_local_file(s3_client, bucket=bucket, key=key) as file:
-        project_json = vapp.get(project).json
-        institution_json = vapp.get(institution).json
-        json_data = xls_to_json(file, project=project_json, institution=institution_json)
+        assert isinstance(project, dict)
+        assert isinstance(institution, dict)
+        # project_json = vapp.get(project).json
+        # institution_json = vapp.get(institution).json
+        json_data = xls_to_json(file, project=project, institution=institution)
         final_json, validation_log_lines = validate_all_items(vapp, json_data)
         result_lines = post_and_patch_all_items(vapp, final_json)
         return validation_log_lines, final_json, result_lines
diff --git a/src/encoded/submit.py.SAVE b/src/encoded/submit.py.SAVE
new file mode 100644
index 0000000000..67b2338524
--- /dev/null
+++ b/src/encoded/submit.py.SAVE
@@ -0,0 +1,664 @@
+import ast
+import datetime
+import json
+import xlrd
+
+from dcicutils.misc_utils import VirtualApp, VirtualAppError
+from dcicutils import ff_utils
+from pyramid.paster import get_app
+from pyramid.response import Response
+from snovault.util import debug_log
+from pyramid.view import view_config
+from webtest.app import AppError
+from .common import s3_local_file, debuglog
+
+
+GENERIC_FIELD_MAPPING = {
+    'individual': {},
+    'family': {},
+    'sample': {
+        'date collected': 'specimen_collection_date',
+        'location stored': 'specimen_storage_location',
+        'specimen id': 'specimen_accession',
+        'transport method': 'transported_by',
+        'sequencing ref lab': 'sequencing_lab',
+        "date rec'd at ref lab": 'date_received',
+        'specimen accepted by ref lab': 'specimen_accepted',
+        'sample id by ref lab': 'sequence_id',
+        'req type': 'requisition_type',
+        "date req rec'd": 'date_requisition_received',
+        'physician/provider': 'ordering_physician'
+    },
+    'requisition': {
+        'req accepted y/n': 'accepted_rejected',
+        'reason rejected': 'rejection_reason',
+        'corrective action taken': 'corrective_action',
+        'corrective action taken by': 'action_taken_by',
+        'correction notes': 'notes'
+    }
+}
+
+# BGM_FIELD_MAPPING = {
+#     'bcgg-id': 'patient id',
+#     'bcgg-f-id': 'family id',
+#     "date req rec'd": 'date requisition received'
+# }
+
+
+POST_ORDER = [
+    'file_fastq', 'file_processed', 'sample', 'individual',
+    'family', 'sample_processing', 'report', 'case'
+]
+
+
+LINKS = [
+    'samples', 'members', 'mother', 'father', 'proband', 'report',
+    'individual', 'sample_processing', 'families'
+]
+
+
+
+# This "/submit_data" endpoint is a placeholder for a submission endpoint modified from loadxl.
+#
+# NOTES FROM KMP (25-Jul-2020):
+#
+#  This will be done differently soon as part of the "/submit_for_ingestion" endpoint that
+#  will be in ingestion_listener.py. That endpoint will need an "?ingestion type=data_bundle"
+#  as query parameter. That "data_bundle" ingestion type will defined in ingestion_engines.py.
+#  The new entry point here that will be needed is submit_data_bundle, and then this temporary
+#  "/submit_data" endpoint can presumably go away.. -kmp 25-Jul-2020
+
+@view_config(route_name='submit_data', request_method='POST', permission='add')
+@debug_log
+def submit_data(context, request):
+    '''
+    usage notes here later
+    '''
+    config_uri = request.json.get('config_uri', 'production.ini')
+    patch_only = request.json.get('patch_only', False)
+    post_only = request.json.get('post_only', False)
+    app = get_app(config_uri, 'app')
+    environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': 'TEST'}
+    virtualapp = VirtualApp(app, environ)
+    # expected response
+    request.response.status = 200
+    result = {
+        'status': 'success',
+        '@type': ['result'],
+    }
+
+    raise NotImplementedError
+
+# This endpoint will soon be the primary entry point. Please keep it working as-is and do not remove it.
+# -kmp 25-Jul-2020
+def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp):  # All keyword arguments, all required.
+    """
+    Handles processing of a submitted workbook.
+
+    Args:
+        data_stream: an open stream to xls workbook data
+        project: a project identifier
+        institution: an institution identifier
+        vapp: a VirtualApp object
+        log: a logging object capable of .info, .warning, .error, or .debug messages
+    """
+    with s3_local_file(s3_client, bucket=bucket, key=key) as file:
+        project_json = vapp.get(project).json
+        institution_json = vapp.get(institution).json
+        json_data = xls_to_json(file, project=project_json, institution=institution_json)
+        final_json, validation_log_lines = validate_all_items(vapp, json_data)
+        result_lines = post_and_patch_all_items(vapp, final_json)
+        return validation_log_lines, final_json, result_lines
+
+
+def map_fields(row, metadata_dict, addl_fields, item_type):
+    for map_field in GENERIC_FIELD_MAPPING[item_type]:
+        if map_field in row:
+            metadata_dict[GENERIC_FIELD_MAPPING[item_type][map_field]] = row.get(map_field)
+    for field in addl_fields:
+        metadata_dict[field] = row.get(field.replace('_', ' '))
+    return metadata_dict
+
+
+def xls_to_json(xls_data, project, institution):
+    '''
+    Converts excel file to json for submission.
+    Functional but expect future changes.
+    '''
+    book = xlrd.open_workbook(xls_data)
+    sheet, = book.sheets()
+    row = row_generator(sheet)
+    top_header = next(row)
+    debuglog("top_header:", top_header)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
+    keys = next(row)
+    debuglog("keys:", keys)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
+    descriptions = next(row)
+    debuglog("descriptions:", descriptions)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
+    rows = []
+    counter = 0
+    for values in row:
+        r = [val for val in values]
+        row_dict = {keys[i].lower().rstrip('*'): item for i, item in enumerate(r)}
+        rows.append(row_dict)
+
+    items = {
+        'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {},
+        'file_fastq': {}, 'file_processed': {}, 'case': {}, 'report': {},
+        'reports': []
+    }
+    file_errors = []
+    specimen_ids = {}
+    family_dict = create_families(rows)
+    a_types = get_analysis_types(rows)
+    for row in rows:
+        debuglog("row:", repr(row))  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
+        indiv_alias = '{}:individual-{}'.format(project['name'], row['individual id'])
+        fam_alias = '{}:{}'.format(project['name'], family_dict[row['analysis id']])
+        # sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
+        # create items for Individual
+        items = fetch_individual_metadata(row, items, indiv_alias, institution['name'])
+        # create/edit items for Family
+        items = fetch_family_metadata(row, items, indiv_alias, fam_alias)
+        # create item for Sample if there is a specimen
+        if row.get('specimen id'):
+            samp_alias = '{}:sample-{}'.format(project['name'], row['specimen id'])
+            if row['specimen id'] in specimen_ids:
+                samp_alias = samp_alias + '-' + specimen_ids[row['specimen id']]
+                specimen_ids[row['specimen id']] += 1
+            else:
+                specimen_ids[row['specimen id']] = 1
+            analysis_alias = '{}:analysis-{}'.format(project['name'], row['analysis id'])
+            items = fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias,
+                                          fam_alias, project['name'], a_types)
+            if row.get('files'):
+                file_items = fetch_file_metadata(row['files'].split(','), project['name'])
+                file_errors.extend(file_items['errors'])
+                items['file_fastq'].update(file_items['file_fastq'])
+                items['file_processed'].update(file_items['file_processed'])
+        else:
+            print('WARNING: No specimen id present for patient {},'
+                  ' sample will not be created.'.format(row['individual id']))
+    # create SampleProcessing item for trio/group if needed
+    # items = create_sample_processing_groups(items, sp_alias)
+    items = add_relations(items)
+    items = create_case_items(items, project['name'])
+    # removed unused fields, add project and institution
+    for val1 in items.values():
+        for val2 in val1.values():
+            remove_keys = [k for k, v in val2.items() if not v]
+            for key in remove_keys:
+                del val2[key]
+            val2['project'] = project['@id']
+            val2['institution'] = institution['@id']
+    items['file_errors'] = file_errors
+    return items
+
+
+def create_families(rows):
+    proband_rows = [row for row in rows if row.get('relation to proband').lower() == 'proband']
+    fams = {row.get('analysis id'): 'family-{}'.format(row.get('individual id')) for row in proband_rows}
+    return fams
+
+
+def get_analysis_types(rows):
+    analysis_relations = {}
+    analysis_types = {}
+    for row in rows:
+        analysis_relations.setdefault(row.get('analysis id'), [[], []])
+        analysis_relations[row.get('analysis id')][0].append(row.get('relation to proband', '').lower())
+        analysis_relations[row.get('analysis id')][1].append(row.get('workup type', '').upper())
+    for k, v in analysis_relations.items():
+        if len(list(set(v[1]))) == 1:
+            if len(v[0]) == 1:
+                analysis_types[k] = v[1][0]
+            elif sorted(v[0]) == ['father', 'mother', 'proband']:
+                analysis_types[k] = v[1][0] + '-Trio'
+            else:
+                analysis_types[k] = v[1][0] + '-Group'
+        else:
+            analysis_types[k] = None
+    return analysis_types
+
+
+def fetch_individual_metadata(row, items, indiv_alias, inst_name):
+    new_items = items.copy()
+    info = {'aliases': [indiv_alias]}
+    info = map_fields(row, info, ['individual_id', 'sex', 'age', 'birth_year'], 'individual')
+    if row.get('other individual id'):
+        other_id = {'id': row['other individual id'], 'id_source': inst_name}
+        if row.get('other individual id type'):
+            other_id['id_source'] = row['other individual id source']
+        info['institutional_id'] = other_id
+    info['age'] = int(info['age']) if info.get('age') else None
+    info['birth_year'] = int(info['birth year']) if info.get('birth year') else None
+    if indiv_alias not in new_items['individual']:
+        new_items['individual'][indiv_alias] = {k: v for k, v in info.items() if v}
+    else:
+        for key in info:
+            if key not in new_items['individual'][indiv_alias]:
+                new_items['individual'][indiv_alias][key] = info[key]
+    return new_items
+
+
+def fetch_family_metadata(row, items, indiv_alias, fam_alias):
+    new_items = items.copy()
+    info = {
+        'aliases': [fam_alias],
+        'family_id': row['family id'],
+        'members': [indiv_alias]
+    }
+    if fam_alias not in new_items['family']:
+        new_items['family'][fam_alias] = info
+    if indiv_alias not in new_items['family'][fam_alias]['members']:
+        new_items['family'][fam_alias]['members'].append(indiv_alias)
+    for relation in ['proband', 'mother', 'father', 'brother', 'sister', 'sibling']:
+        if row.get('relation to proband', '').lower() == relation and relation not in new_items['family'][fam_alias]:
+            new_items['family'][fam_alias][relation] = indiv_alias
+    return new_items
+
+
+def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, fam_alias, proj_name, analysis_type_dict):
+    new_items = items.copy()
+    info = {'aliases': [samp_alias], 'files': []}  # TODO: implement creation of file db items
+    fields = [
+        'workup_type', 'specimen_type', 'dna_concentration', 'date_transported',
+        'specimen_notes', 'research_protocol_name', 'sent_by', 'physician_id', 'indication'
+    ]
+    info = map_fields(row, info, fields, 'sample')
+    if info.get('specimen_accepted', '').lower() == 'y':
+        info['specimen_accepted'] = 'Yes'
+    elif info.get('specimen_accepted', '').lower() == 'n':
+        info['specimen_accepted'] = 'No'
+    if row.get('second specimen id'):
+        other_id = {'id': row['second specimen id'], 'id_type': proj_name}  # add proj info?
+        if row.get('second specimen id type'):
+            other_id['id_type'] = row['second specimen id type']
+        info['other_specimen_ids'] = [other_id]
+    req_info = map_fields(row, {}, ['date sent', 'date completed'], 'requisition')
+    if req_info.get('accepted_rejected', '').lower() in ['yes', 'no', 'y', 'n']:
+        if req_info['accepted_rejected'].lower().startswith('y'):
+            req_info['accepted_rejected'] = 'Accepted'
+        else:
+            req_info['accepted_rejected'] = "Rejected"
+    info['requisition_acceptance'] = {k: v for k, v in req_info.items() if v}
+    new_items['sample'][samp_alias] = {k: v for k, v in info.items() if v}
+    if indiv_alias in new_items['individual']:
+        new_items['individual'][indiv_alias]['samples'] = [samp_alias]
+    new_sp_item = {
+        # not trivial to add analysis_type here, turn into calculated property
+        'aliases': [analysis_alias],
+        'samples': [],
+        'families': []
+    }
+    if row.get('analysis id') in analysis_type_dict:
+        new_sp_item['analysis_type'] = analysis_type_dict[row.get('analysis id')]
+    new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)
+    new_items['sample_processing'][analysis_alias]['samples'].append(samp_alias)
+    if row.get('report required').lower().startswith('y'):
+        new_items['reports'].append(samp_alias)
+    if fam_alias not in new_items['sample_processing'][analysis_alias]['families']:
+        new_items['sample_processing'][analysis_alias]['families'].append(fam_alias)
+    return new_items
+
+
+# TODO: finish implementing this function
+def fetch_file_metadata(filenames, proj_name):
+    valid_extensions = {
+        '.fastq.gz': ('fastq', 'reads'),
+        '.fq.gz': ('fastq', 'reads'),
+        '.cram': ('cram', 'alignments'),
+        '.vcf.gz': ('vcf_gz', 'raw VCF')
+    }
+    files = {'file_fastq': {}, 'file_processed': {}, 'errors': []}
+    for filename in filenames:
+        extension = [ext for ext in valid_extensions if filename.endswith(ext)]
+        if not extension:
+            if [ext for ext in ['.fastq', '.fq', '.vcf'] if filename.endswith(ext)]:
+                files['errors'].append('File must be compressed - please gzip file {}'.format(filename))
+            else:
+                files['errors'].append('File extension on {} not supported - expecting one of: '
+                              '.fastq.gz, .fq.gz, .cram, .vcf.gz'.format(filename))
+            continue
+        file_alias = '{}:{}'.format(proj_name, filename.lstrip(' '))
+        fmt = valid_extensions[extension[0]][0]
+        file_info = {
+            'aliases': [file_alias],
+            'file_format': '/file-formats/{}/'.format(fmt),
+            'file_type': valid_extensions[extension[0]][1],
+            'filename': filename  # causes problems without functional file upload
+        }
+        if fmt == 'fastq':
+            files['file_fastq'][file_alias] = file_info
+        else:
+            files['file_processed'][file_alias] = file_info
+    return files
+
+
+def create_case_items(items, proj_name):
+    new_items = items.copy()
+    for k, v in items['sample_processing'].items():
+        analysis_id = k[k.index('analysis-')+9:]
+        for sample in v['samples']:
+            case_id = '{}-{}'.format(analysis_id, items['sample'][sample]['specimen_accession'])
+            if len(v['samples']) == 1:
+                case_id += '-single'
+            elif len(v['samples']) > 1:
+                case_id += '-group'
+            case_alias = '{}:case-{}'.format(proj_name, case_id)
+            indiv = [ikey for ikey, ival in items['individual'].items() if sample in ival.get('samples', [])][0]
+            case_info = {
+                'aliases': [case_alias],
+                # 'case_id': case_id,
+                'sample_processing': k,
+                'individual': indiv
+            }
+            if sample in items['reports']:
+                report_alias = case_alias.replace('case', 'report')
+                new_items['report'][report_alias] = {
+                    'aliases': [report_alias],
+                    'description': 'Analysis Report for Individual ID {}'.format(items['individual'][indiv]['individual_id'])
+                }
+                case_info['report'] = report_alias
+            new_items['case'][case_alias] = case_info
+    del new_items['reports']
+    return new_items
+
+
+def add_relations(items):
+    new_items = items.copy()
+    for alias, fam in items['family'].items():
+        parents = False
+        for relation in ['mother', 'father']:
+            if fam.get(relation):
+                if fam.get('proband'):
+                    new_items['individual'][fam['proband']][relation] = fam[relation]
+                    parents = True
+                del new_items['family'][alias][relation]
+        for relation in ['brother', 'sister', 'sibling']:
+            if fam.get(relation):
+                if parents:
+                    for parent in ['mother', 'father']:
+                        if new_items['individual'][fam['proband']].get(parent):
+                            new_items['individual'][fam[relation]][parent] = new_items['individual'][fam['proband']][parent]
+                del new_items['family'][alias][relation]
+    return new_items
+
+
+def compare_with_db(virtualapp, alias):
+    try:  # check if already in db
+        result = virtualapp.get('/' + alias + '/?frame=object')
+        if result.status_code == 301:
+            msg = json.loads(result.body).get('message', '')
+            result = virtualapp.get(msg[msg.index('/'):msg.index(';')])
+    except Exception as e:  # if not in db
+        if 'HTTPNotFound' in str(e):
+            return None
+    else:
+        return result.json
+
+
+def validate_item(virtualapp, item, method, itemtype, aliases, atid=None):
+    if method == 'post':
+        try:
+            validation = virtualapp.post_json('/{}/?check_only=true'.format(itemtype), item)
+        except (AppError, VirtualAppError) as e:
+            return parse_exception(e, aliases)
+        else:
+            return
+    elif method == 'patch':
+        try:
+            validation = virtualapp.patch_json(atid + '?check_only=true', item, status=200)
+        except (AppError, VirtualAppError) as e:
+            return parse_exception(e, aliases)
+        else:
+            return
+    else:
+        raise ValueError("Unrecognized method -- must be 'post' or 'patch'")
+
+
+def parse_exception(e, aliases):
+    """ff_utils functions raise an exception when the expected code is not returned.
+    This response is a pre-formatted text, and this function will get the resonse json
+    out of it. [Adapted from Submit4DN]"""
+    try:
+        # try parsing the exception
+        if isinstance(e, VirtualAppError):
+            text = e.raw_exception.args[0]
+        else:
+            text = e.args[0]
+        resp_text = text[text.index('{'):-1]
+        resp_dict = json.loads(resp_text.replace('\\', ''))
+    except Exception:  # pragma: no cover
+        raise e
+    if resp_dict.get('description') == 'Failed validation':
+        keep = []
+        resp_list = [error['description'] for error in resp_dict['errors']]
+        for error in resp_list:
+            # if error is caused by linkTo to item not submitted yet but in aliases list,
+            # remove that error
+            if 'not found' in error and error.split("'")[1] in aliases:
+                continue
+            else:
+                keep.append(error)
+        return keep
+    else:
+        raise e
+
+
+def compare_fields(profile, aliases, json_item, db_item):
+    to_patch = {}
+    for field in json_item:
+        # if not an array, patch field gets overwritten (if different from db)
+        if profile['properties'][field]['type'] != 'array':
+            val = json_item[field]
+            if profile['properties'][field]['type'] == 'string' and val in aliases:
+                val = aliases[val]
+            if val != db_item.get(field):
+                to_patch[field] = val
+        else:
+            # if array, patch field vals get added to what's in db
+            if field != 'aliases' and profile['properties'][field].get('items', {}).get('linkTo'):
+                val = [aliases[v] if v in aliases else v for v in json_item[field]]
+            else:
+                val = [v for v in json_item[field]]
+            # if sorted(val) != sorted(db_item.get(field, [])):
+                # if len(val) == 1 and val not in db_item.get(field, []):
+                #     continue
+            if all(v in db_item.get(field, []) for v in val):
+                continue
+            new_val = [item for item in db_item.get(field, [])]
+            new_val.extend(val)
+            try:
+                to_patch[field] = list(set(new_val))
+            except TypeError:  # above doesn't handle list of dictionaries
+                to_patch[field] = [dict(t) for t in {tuple(d.items()) for d in new_val}]
+    return to_patch
+
+
+def validate_all_items(virtualapp, json_data):
+    '''
+    Still in progress, not necessarily functional yet. NOT YET TESTED.
+
+    Function that:
+    1. looks up each item in json
+    2. if item in db, will validate and patch any different metadata
+    3. if item not in db, will post item
+
+    Current status:
+    Still testing validation/data organization parts - patch/post part hasn't been fully
+    written or tested.
+    '''
+    alias_dict = {}
+    errors = json_data['file_errors']
+    all_aliases = [k for itype in json_data for k in json_data[itype]]
+    json_data_final = {'post': {}, 'patch': {}}
+    validation_results = {}
+    output = []
+    for itemtype in POST_ORDER:  # don't pre-validate case and report
+        if itemtype in json_data:
+            profile = virtualapp.get('/profiles/{}.json'.format(itemtype)).json
+            validation_results[itemtype] = {'validated': 0, 'errors': 0}
+            db_results = {}
+        # TODO: json_data[itemtype] but item_type might not be in json_data according to previous "if" statement.
+        #       Maybe we want "for alias in json_data.get(item_type, {}):" here?
+        #       Alternatively, maybe give "json_data.get(item_type, {})" a variable name so that it can be referred
+        #       to more concisely in the several places below that it's needed.
+        #       -kmp 25-Jul-2020
+        for alias in json_data[itemtype]:
+            # first collect all atids before comparing and validating items
+            db_result = compare_with_db(virtualapp, alias)
+            if db_result:
+                alias_dict[alias] = db_result['@id']
+                # TODO: db_results is only conditionally assigned in the prevous "if".
+                #       Perhaps the db_results = {} above should be moved up outside the "if"?
+                #       Are we supposed to have a new dictionary on each iteration? -kmp 25-Jul-2020
+                db_results[alias] = db_result
+        # TODO: Likewise this should probably loop over json_data.get(itemtype, {}). -kmp 25-Jul-2020
+        for alias in json_data[itemtype]:
+            if 'filename' in json_data[itemtype][alias]:  # until we have functional file upload
+                del json_data[itemtype][alias]['filename']
+            if not db_results.get(alias):
+                error = validate_item(virtualapp, json_data[itemtype][alias], 'post', itemtype, all_aliases)
+                if error:  # modify to check for presence of validation errors
+                    # do something to report validation errors
+                    if itemtype not in ['case', 'report']:
+                        for e in error:
+                            errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
+                        validation_results[itemtype]['errors'] += 1
+                # TODO: If itemtype might not be in json_data (and conditionals above suggest that's so),
+                #       then json_data[item_type][alias] seems suspect. It does work to do
+                #       json_data.get(item_type, {}).get(alias, {}).get('filename') but I would put that
+                #       quantity in a variable rather than compute it twice in a row. -kmp 25-Jul-2020
+                elif json_data[itemtype][alias].get('filename') and \
+                        json_data[itemtype][alias]['filename'] in ''.join(json_data['file_errors']):
+                    validation_results[itemtype]['errors'] += 1
+                else:
+                    json_data_final['post'].setdefault(itemtype, [])
+                    json_data_final['post'][itemtype].append(json_data[itemtype][alias])
+                    validation_results[itemtype]['validated'] += 1
+            else:
+                # patch if item exists in db
+                # alias_dict[alias] = results[alias]['@id']
+                # TODO: profile is only conditionally assigned in an "if" above. -kmp 25-Jul-2020
+                patch_data = compare_fields(profile, alias_dict, json_data[itemtype][alias], db_results[alias])
+                error = validate_item(virtualapp, patch_data, 'patch', itemtype,
+                                      all_aliases, atid=db_results[alias]['@id'])
+                if error:  # do something to report validation errors
+                    if itemtype not in ['case', 'report']:
+                        for e in error:
+                            errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
+                        validation_results[itemtype]['errors'] += 1
+                elif json_data[itemtype][alias].get('filename') and \
+                        json_data[itemtype][alias]['filename'] in ''.join(json_data['file_errors']):
+                    validation_results[itemtype]['errors'] += 1
+                else:  # patch
+                    json_data_final['patch'].setdefault(itemtype, {})
+                    if patch_data:
+                        json_data_final['patch'][itemtype][db_results[alias]['@id']] = patch_data
+                    elif itemtype not in ['case', 'report']:
+                        output.append('{} {} - Item already in database, no changes needed'.format(itemtype, alias))
+                    # do something to record response
+                    validation_results[itemtype]['validated'] += 1
+    output.extend([error for error in errors])
+    for itemtype in validation_results:
+        output.append('{} items: {} validated; {} errors'.format(
+            itemtype, validation_results[itemtype]['validated'], validation_results[itemtype]['errors']
+        ))
+    if errors:
+        output.append('Validation errors found in items. Please fix spreadsheet before submitting.')
+        return ({}, output)
+    else:
+        json_data_final['aliases'] = alias_dict
+        output.append('All items validated.')
+        return (json_data_final, output)
+
+
+def post_and_patch_all_items(virtualapp, json_data_final):
+    output = []
+    if not json_data_final:
+        return output
+    item_names = {'individual': 'individual_id', 'family': 'family_id', 'sample': 'specimen_accession'}
+    final_status = {}
+    if json_data_final.get('post'):
+        for k, v in json_data_final['post'].items():
+            final_status[k] = {'posted': 0, 'not posted': 0, 'patched': 0, 'not patched': 0}
+            for item in v:
+                patch_info = {}
+                # if 'filename' in item:  # until we have functional file upload
+                #     del item['filename']
+                for field in LINKS:
+                    if field in item:
+                        patch_info[field] = item[field]
+                        del item[field]
+                try:
+                    response = virtualapp.post_json('/' + k, item, status=201)
+                    if response.json['status'] == 'success':
+                        final_status[k]['posted'] += 1
+                        atid = response.json['@graph'][0]['@id']
+                        json_data_final['aliases'][item['aliases'][0]] = atid
+                        json_data_final['patch'].setdefault(k, {})
+                        json_data_final['patch'][k][atid] = patch_info
+                        if k in item_names:
+                            output.append('Success - {} {} posted'.format(k, item[item_names[k]]))
+                    else:
+                        final_status[k]['not posted'] += 1
+                except Exception as e:
+                    final_status[k]['not posted'] += 1
+                    output.append(str(e))
+        for itype in final_status:
+            if final_status[itype]['posted'] > 0 or final_status[itype]['not posted'] > 0:
+                output.append('{}: {} items posted successfully; {} items not posted'.format(
+                    itype, final_status[itype]['posted'], final_status[itype]['not posted']
+                ))
+    for k, v in json_data_final['patch'].items():
+        final_status.setdefault(k, {'patched': 0, 'not patched': 0})
+        for item_id, patch_data in v.items():
+            # if 'filename' in patch_data:  # until we have functional file upload
+            #     del patch_data['filename']
+            try:
+                response = virtualapp.patch_json('/' + item_id, patch_data, status=200)
+                if response.json['status'] == 'success':
+                    # if k in item_names:
+                    #     output.append('Success - {} {} patched'.format(k, patch_data[item_names[k]]))
+                    final_status[k]['patched'] += 1
+                else:
+                    final_status[k]['not patched'] += 1
+            except Exception as e:
+                final_status[k]['not patched'] += 1
+                output.append(str(e))
+        if final_status[k]['patched'] > 0 or final_status[k]['not patched'] > 0:
+            output.append('{}: {} items patched successfully; {} items not patched'.format(
+                k, final_status[k]['patched'], final_status[k]['not patched']
+            ))
+    return output
+
+
+def cell_value(cell, datemode):
+    """Get cell value from excel. [From Submit4DN]"""
+    # This should be always returning text format
+    ctype = cell.ctype
+    value = cell.value
+    if ctype == xlrd.XL_CELL_ERROR:  # pragma: no cover
+        raise ValueError(repr(cell), 'cell error')
+    elif ctype == xlrd.XL_CELL_BOOLEAN:
+        return str(value).upper().strip()
+    elif ctype == xlrd.XL_CELL_NUMBER:
+        if value.is_integer():
+            value = int(value)
+        return str(value).strip()
+    elif ctype == xlrd.XL_CELL_DATE:
+        value = xlrd.xldate_as_tuple(value, datemode)
+        if value[3:] == (0, 0, 0):
+            return datetime.date(*value[:3]).isoformat()
+        else:  # pragma: no cover
+            return datetime.datetime(*value).isoformat()
+    elif ctype in (xlrd.XL_CELL_TEXT, xlrd.XL_CELL_EMPTY, xlrd.XL_CELL_BLANK):
+        return value.strip()
+    raise ValueError(repr(cell), 'unknown cell type')  # pragma: no cover
+
+
+def row_generator(sheet):
+    '''Generator that gets rows from excel sheet [From Submit4DN]'''
+    datemode = sheet.book.datemode
+    for index in range(sheet.nrows):
+        yield [cell_value(cell, datemode) for cell in sheet.row(index)]
diff --git a/src/encoded/types/ingestion.py b/src/encoded/types/ingestion.py
new file mode 100644
index 0000000000..0dfbf71c9b
--- /dev/null
+++ b/src/encoded/types/ingestion.py
@@ -0,0 +1,42 @@
+"""
+Collection for objects related to ingestion submissions.
+"""
+
+from snovault import collection, load_schema
+from pyramid.security import Allow, Deny, Everyone
+from .base import (
+    Item,
+    # TODO: Maybe collect all these permission styles into a single file, give them symbolic names,
+    #       and permit only the symbolic names to be used in each situation so we can curate a full inventory of modes.
+    #       -kmp 26-Jul-2020
+    ALLOW_SUBMITTER_ADD,
+)
+from .institution import (
+    ONLY_ADMIN_VIEW,
+)
+
+
+ALLOW_SUBMITTER_VIEW = (
+    # TODO: There is an issue here where we want a logged in user remotely only to view this
+    #       but if we are proxying for them internall we want to be able to view OR edit.
+    #       There is never reason for a user outside the system to update this status. -kmp 26-Jul-2020
+    []  # Special additional permissions might go here.
+    + ALLOW_SUBMITTER_ADD  # Is this right? See note above.
+    + ONLY_ADMIN_VIEW      # Slightly misleading name. Allows admins to edit, too, actually. But only they can view.
+)
+
+
+@collection(
+    name='ingestion-submissions',
+    acl=ALLOW_SUBMITTER_VIEW,
+    unique_key='object_name',
+    properties={
+        'title': 'Ingestion Submissions',
+        'description': 'List of Ingestion Submissions',
+    })
+class IngestionSubmission(Item):
+    """The IngestionSubmission class that holds info on requests to ingest data."""
+
+    item_type = 'ingestion_submission'
+    schema = load_schema('encoded:schemas/ingestion_submission.json')
+    # embedded_list = [...] + Item.embedded_list

From e5a5b59a76bc2cdf1acba47565bba06765f304f9 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Thu, 30 Jul 2020 02:52:02 -0400
Subject: [PATCH 039/125] WIP: First pass at progress info working.

---
 src/encoded/ingestion/__init__.py             |  0
 src/encoded/{ => ingestion}/common.py         | 15 +---
 src/encoded/ingestion/exceptions.py           | 21 +++++
 .../processors.py}                            | 80 +++++++------------
 src/encoded/ingestion_listener.py             | 12 ++-
 src/encoded/renderers.py                      |  6 +-
 src/encoded/submit.py                         |  6 +-
 src/encoded/types/ingestion.py                | 55 +++++++++++++
 8 files changed, 113 insertions(+), 82 deletions(-)
 create mode 100644 src/encoded/ingestion/__init__.py
 rename src/encoded/{ => ingestion}/common.py (90%)
 create mode 100644 src/encoded/ingestion/exceptions.py
 rename src/encoded/{ingestion_engines.py => ingestion/processors.py} (62%)

diff --git a/src/encoded/ingestion/__init__.py b/src/encoded/ingestion/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/encoded/common.py b/src/encoded/ingestion/common.py
similarity index 90%
rename from src/encoded/common.py
rename to src/encoded/ingestion/common.py
index c5e06786f4..c1422fe998 100644
--- a/src/encoded/common.py
+++ b/src/encoded/ingestion/common.py
@@ -7,18 +7,12 @@
 import io
 import os
 import tempfile
-
-
-
+from .exceptions import SubmissionFailure, MissingParameter
 
 
 DATA_BUNDLE_BUCKET = 'cgap-data-bundles'
 
 
-class SubmissionFailure(Exception):
-    pass
-
-
 CONTENT_TYPE_SPECIAL_CASES = {
     'application/x-www-form-urlencoded': [
         # Special case to allow us to POST to metadata TSV requests via form submission
@@ -56,13 +50,6 @@ def content_type_allowed(request):
     return False
 
 
-class MissingParameter(Exception):
-
-    def __init__(self, parameter_name):
-        self.parameter_name = parameter_name
-        super().__init__("Missing parameter: %s" % parameter_name)
-
-
 _NO_DEFAULT = object()
 
 
diff --git a/src/encoded/ingestion/exceptions.py b/src/encoded/ingestion/exceptions.py
new file mode 100644
index 0000000000..af653fe78a
--- /dev/null
+++ b/src/encoded/ingestion/exceptions.py
@@ -0,0 +1,21 @@
+"""
+Exception definitions for ingestion
+"""
+
+
+class SubmissionFailure(Exception):
+    pass
+
+
+class UndefinedIngestionProcessorType(Exception):
+
+    def __init__(self, processor_type):
+        self.ingestion_type_name = processor_type
+        super().__init__("No ingestion processor type %r is defined." % processor_type)
+
+
+class MissingParameter(Exception):
+
+    def __init__(self, parameter_name):
+        self.parameter_name = parameter_name
+        super().__init__("Missing parameter: %s" % parameter_name)
diff --git a/src/encoded/ingestion_engines.py b/src/encoded/ingestion/processors.py
similarity index 62%
rename from src/encoded/ingestion_engines.py
rename to src/encoded/ingestion/processors.py
index 4a8abf6b07..4d3173da17 100644
--- a/src/encoded/ingestion_engines.py
+++ b/src/encoded/ingestion/processors.py
@@ -2,9 +2,10 @@
 import json
 import traceback
 
-from .common import DATA_BUNDLE_BUCKET, get_parameter
-from .util import debuglog, s3_output_stream, create_empty_s3_file
-from .submit import submit_data_bundle
+from encoded.ingestion.common import DATA_BUNDLE_BUCKET, get_parameter
+from encoded.util import debuglog, s3_output_stream, create_empty_s3_file
+from encoded.submit import submit_data_bundle
+from .exceptions import UndefinedIngestionProcessorType
 
 
 INGESTION_UPLOADERS = {}
@@ -22,13 +23,6 @@ def ingestion_type_decorator(fn):
     return ingestion_type_decorator
 
 
-class UndefinedIngestionProcessorType(Exception):
-
-    def __init__(self, processor_type):
-        self.ingestion_type_name = processor_type
-        super().__init__("No ingestion processor type %r is defined." % processor_type)
-
-
 def get_ingestion_processor(processor_type):
     handler = INGESTION_UPLOADERS.get(processor_type, None)
     if not handler:
@@ -42,15 +36,17 @@ def _show_report_lines(lines, fp, default="Nothing to report."):
 
 
 @ingestion_processor('data_bundle')
-def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
+def handle_data_bundle(submission):
 
-    log.info("Processing {uuid} as {ingestion_type}.".format(uuid=uuid, ingestion_type=ingestion_type))
+    submission.log.info("Processing {submission_id} as {ingestion_type}."
+                        .format(submission_id=submission.submission_id, ingestion_type=submission.ingestion_type))
 
-    if ingestion_type != 'data_bundle':
+    if submission.ingestion_type != 'data_bundle':
         raise RuntimeError("handle_data_bundle only works for ingestion_type data_bundle.")
 
+    submission_id = submission.submission_id
     s3_client = boto3.client('s3')
-    manifest_key = "%s/manifest.json" % uuid
+    manifest_key = "%s/manifest.json" % submission_id
     response = s3_client.get_object(Bucket=DATA_BUNDLE_BUCKET, Key=manifest_key)
     manifest = json.load(response['Body'])
 
@@ -59,10 +55,10 @@ def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
     institution = get_parameter(parameters, 'institution')
     project = get_parameter(parameters, 'project')
 
-    debuglog(uuid, "data_key:", data_key)
-    debuglog(uuid, "parameters:", parameters)
+    debuglog(submission_id, "data_key:", data_key)
+    debuglog(submission_id, "parameters:", parameters)
 
-    started_key = "%s/started.txt" % uuid
+    started_key = "%s/started.txt" % submission_id
     create_empty_s3_file(s3_client, bucket=DATA_BUNDLE_BUCKET, key=started_key)
 
     # PyCharm thinks this is unused. -kmp 26-Jul-2020
@@ -76,33 +72,24 @@ def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
 
     try:
 
+        submission.set_item_detail(object_name=manifest['object_name'], parameters=manifest['parameters'],
+                                   institution=institution, project=project)
+
         if isinstance(institution, str):
-            institution = vapp.get(institution).json
+            institution = submission.vapp.get(institution).json
         if isinstance(project, str):
-            project = vapp.get(project).json
-
-        vapp.patch_json("/ingestion-submission", {
-            "object_name": manifest['object_name'],
-            "ingestion_type": ingestion_type,
-            "submission_id": uuid,
-            "parameters": manifest['parameters'],
-            "institution": institution,
-            "project": project,
-            "processing_status": {
-                "state": "processing",
-            }
-        })
+            project = submission.vapp.get(project).json
 
         validation_log_lines, final_json, result_lines = submit_data_bundle(s3_client=s3_client,
                                                                             bucket=DATA_BUNDLE_BUCKET,
                                                                             key=data_key,
                                                                             project=project,
                                                                             institution=institution,
-                                                                            vapp=vapp)
+                                                                            vapp=submission.vapp)
 
-        resolution["validation_report_key"] = validation_report_key = "%s/validation-report.txt" % uuid
-        resolution["submission_key"] = submission_key = "%s/submission.json" % uuid
-        resolution["submission_response_key"] = submission_response_key = "%s/submission-response.txt" % uuid
+        resolution["validation_report_key"] = validation_report_key = "%s/validation-report.txt" % submission_id
+        resolution["submission_key"] = submission_key = "%s/submission.json" % submission_id
+        resolution["submission_response_key"] = submission_response_key = "%s/submission-response.txt" % submission_id
 
         with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=validation_report_key) as fp:
             _show_report_lines(validation_log_lines, fp)
@@ -113,32 +100,19 @@ def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
         with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=submission_response_key) as fp:
             _show_report_lines(result_lines, fp)
 
-        vapp.patch_json("ingestion-submission", {
-            "submission_id": uuid,
-            "progress": {
-                "state": "done",
-                "outcome": "failure" if validation_log_lines else "success",
-                "progress": "complete",
-            },
-        })
+        # TODO: Sarah will provide a way to tell success from failure. -kmp 28-Jul-2020
+        submission.patch_item(processing_status={"state": "done", "outcome": "success", "progress": "complete"})
 
     except Exception as e:
 
-        resolution["traceback_key"] = traceback_key = "%s/traceback.json" % uuid
+        resolution["traceback_key"] = traceback_key = "%s/traceback.txt" % submission_id
         with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=traceback_key) as fp:
             traceback.print_exc(file=fp)
 
         resolution["error_type"] = e.__class__.__name__
         resolution["error_message"] = str(e)
 
-        vapp.patch_json("ingestion-submission", {
-            "submission_id": uuid,
-            "progress": {
-                "state": "done",
-                "outcome": "error",
-                "progress": "incomplete",
-            },
-        })
+        submission.patch_item(processing_status={"state": "done", "outcome": "error", "progress": "incomplete"})
 
-    with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key="%s/resolution.json" % uuid) as fp:
+    with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key="%s/resolution.json" % submission_id) as fp:
         print(json.dumps(resolution, indent=2), file=fp)
diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index f69766c41f..ef652844bd 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -24,8 +24,9 @@
 from snovault.util import debug_log
 from vcf import Reader
 from .commands.ingest_vcf import VCFParser
-from .common import register_path_content_type, DATA_BUNDLE_BUCKET, SubmissionFailure
-from .ingestion_engines import get_ingestion_processor
+from .ingestion.common import register_path_content_type, DATA_BUNDLE_BUCKET, SubmissionFailure
+from .ingestion.processors import get_ingestion_processor
+from .types.ingestion import SubmissionFolio
 from .util import resolve_file_path, gunzip_content, debuglog
 
 
@@ -550,12 +551,9 @@ def run(self):
                 if ingestion_type != 'vcf':
                     # Let's minimally disrupt things for now. We can refactor this later
                     # to make all the parts work the same -kmp
-                    self.vapp.post_json("/ingestion-submission", {
-                        "ingestion_type": ingestion_type,
-                        "submission_id": uuid,
-                    })
+                    submission = SubmissionFolio(vapp=self.vapp, ingestion_type=ingestion_type, submission_id=uuid)
                     handler = get_ingestion_processor(ingestion_type)
-                    handler(uuid=uuid, ingestion_type=ingestion_type, vapp=self.vapp, log=log)
+                    handler(submission)
                     # TODO: If we delete messages at the end of each loop, I think we'll here need to do this,
                     #       since we're bypassing bottom of lop with the 'continue':
                     #          self.delete_messages([message])
diff --git a/src/encoded/renderers.py b/src/encoded/renderers.py
index 5cb832faf0..ae16269458 100644
--- a/src/encoded/renderers.py
+++ b/src/encoded/renderers.py
@@ -11,22 +11,18 @@
     HTTPMovedPermanently,
     HTTPPreconditionFailed,
     HTTPUnauthorized,
-    HTTPForbidden,
     HTTPUnsupportedMediaType,
     HTTPNotAcceptable,
     HTTPServerError
 )
 from pyramid.response import Response
-from pyramid.security import forget
 from pyramid.settings import asbool
 from pyramid.threadlocal import manager
 from pyramid.traversal import split_path_info, _join_path_tuple
-from snovault.validation import CSRFTokenError
-from subprocess_middleware.tween import SubprocessTween
 from subprocess_middleware.worker import TransformWorker
 from urllib.parse import urlencode
 from webob.cookies import Cookie
-from .common import content_type_allowed
+from encoded.ingestion.common import content_type_allowed
 
 
 log = logging.getLogger(__name__)
diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 76d850e3a3..4ecd2a4487 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -64,16 +64,16 @@
 #
 #  This will be done differently soon as part of the "/submit_for_ingestion" endpoint that
 #  will be in ingestion_listener.py. That endpoint will need an "?ingestion type=data_bundle"
-#  as query parameter. That "data_bundle" ingestion type will defined in ingestion_engines.py.
+#  as query parameter. That "data_bundle" ingestion type will defined in ingestion/processors.py.
 #  The new entry point here that will be needed is submit_data_bundle, and then this temporary
 #  "/submit_data" endpoint can presumably go away.. -kmp 25-Jul-2020
 
 @view_config(route_name='submit_data', request_method='POST', permission='add')
 @debug_log
 def submit_data(context, request):
-    '''
+    """
     usage notes here later
-    '''
+    """
     config_uri = request.json.get('config_uri', 'production.ini')
     patch_only = request.json.get('patch_only', False)
     post_only = request.json.get('post_only', False)
diff --git a/src/encoded/types/ingestion.py b/src/encoded/types/ingestion.py
index 0dfbf71c9b..f4dc75b119 100644
--- a/src/encoded/types/ingestion.py
+++ b/src/encoded/types/ingestion.py
@@ -2,6 +2,9 @@
 Collection for objects related to ingestion submissions.
 """
 
+import json
+import logging
+
 from snovault import collection, load_schema
 from pyramid.security import Allow, Deny, Everyone
 from .base import (
@@ -26,6 +29,58 @@
 )
 
 
+class SubmissionFolio:
+
+    INGESTION_SUBMISSION_URI = '/IngestionSubmission'
+
+    def __init__(self, *, vapp, ingestion_type, submission_id, log=None):
+        self.vapp = vapp
+        self.ingestion_type = ingestion_type
+        self.log = log or logging
+        self.folio_id = None  # This will be more properly initialized in _create_item()
+        self.submission_id = submission_id
+        self._create_item()
+
+    @property
+    def folio_uri(self):
+        if not self.folio_id:
+            raise RuntimeError("%s.folio_id has not been set." % self)
+        return "/" + self.folio_id
+
+    def _create_item(self):
+        res = self.vapp.post_json(self.INGESTION_SUBMISSION_URI, {
+            "ingestion_type": self.ingestion_type,
+            "submission_id": self.submission_id,
+            "processing_status": {
+                "state": "submitted"
+            }
+        })
+        [item] = res.json['@graph']
+        print(json.dumps(item, indent=2))
+        self.folio_id = item['uuid']
+
+    def set_item_detail(self, object_name, parameters, institution, project):
+        res = self.vapp.patch_json(self.folio_uri, {
+            "object_name": object_name,
+            "ingestion_type": self.ingestion_type,
+            "submission_id": self.submission_id,
+            "parameters": parameters,
+            "institution": institution,
+            "project": project,
+            "processing_status": {
+                "state": "processing",
+            }
+        })
+        [item] = res.json['@graph']
+        print(json.dumps(item, indent=2))
+
+
+    def patch_item(self, **kwargs):
+        res = self.vapp.patch_json(self.folio_uri, kwargs)
+        [item] = res.json['@graph']
+        print(json.dumps(item, indent=2))
+
+
 @collection(
     name='ingestion-submissions',
     acl=ALLOW_SUBMITTER_VIEW,

From 279bb15ea5ab2faeb8b0517b4e51dfd685a070e8 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 31 Jul 2020 14:12:31 -0400
Subject: [PATCH 040/125] more extensive error handling in submit.py

---
 src/encoded/submit.py | 211 +++++++++++++++++++++++++++++++-----------
 1 file changed, 155 insertions(+), 56 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 17d2104450..24c22e3a7d 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -105,10 +105,26 @@ def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp):
     with s3_local_file(s3_client, bucket=bucket, key=key) as file:
         project_json = vapp.get(project).json
         institution_json = vapp.get(institution).json
-        json_data = xls_to_json(file, project=project_json, institution=institution_json)
-        final_json, validation_log_lines = validate_all_items(vapp, json_data)
-        result_lines = post_and_patch_all_items(vapp, final_json)
-        return validation_log_lines, final_json, result_lines
+        results = {
+            'success': False,
+            'validation_output': [],
+            'final_json': {},
+            'post_output': []
+        }
+        json_data, json_success = xls_to_json(file, project=project_json, institution=institution_json)
+        if not json_success:
+            results['validation_output'] = json_data['errors']
+            return results
+        final_json, validation_log_lines, validate_success = validate_all_items(vapp, json_data)
+        results['final_json'] = final_json
+        results['validation_output'] = validation_log_lines
+        if not validate_success:
+            return results
+        results['success'] = validate_success
+        result_lines, post_success = post_and_patch_all_items(vapp, final_json)
+        results['post_output'] = result_lines
+        results['success'] = post_success
+        return results
 
 
 def map_fields(row, metadata_dict, addl_fields, item_type):
@@ -128,37 +144,66 @@ def xls_to_json(xls_data, project, institution):
     book = xlrd.open_workbook(xls_data)
     sheet, = book.sheets()
     row = row_generator(sheet)
-    top_header = next(row)
-    debuglog("top_header:", top_header)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
-    keys = next(row)
-    debuglog("keys:", keys)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
-    descriptions = next(row)
-    debuglog("descriptions:", descriptions)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
-    rows = []
+    header = False
     counter = 0
+    # debuglog("top_header:", top_header)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
+    while True:
+        try:
+            keys = next(row)
+            keys = [key.lower().strip().rstrip('*').rstrip() for key in keys]
+            counter += 1
+            if 'individual id' in keys:
+                header = True
+                break
+        except StopIteration:
+            break
+    if not header:
+        msg = 'Column headers not detected in spreadsheet! "Individual ID*" column must be present in header.'
+        return {'errors': [msg]}, False
+    # debuglog("keys:", keys)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
+    # descriptions = next(row)
+    # debuglog("descriptions:", descriptions)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
+    rows = []
+    # keys = [key.lower().strip().rstrip('*').rstrip() for key in keys]
+    required = ['individual id', 'relation to proband', 'report required', 'analysis id']
+    missing = [col for col in required if col not in keys]
+    if missing:
+        msg = 'Column(s) "{}" not found in spreadsheet! Spreadsheet cannot be processed.'.format('", "'.join(missing))
+        return {'errors': [msg]}, False
+
     for values in row:
         r = [val for val in values]
-        row_dict = {keys[i].lower().rstrip('*'): item for i, item in enumerate(r)}
+        if 'y/n' in ''.join(r).lower() or ''.join(r) == '':  # skip comments/description/blank row if present
+            counter += 1
+            continue
+        row_dict = {keys[i]: item for i, item in enumerate(r)}
         rows.append(row_dict)
 
     items = {
         'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {},
         'file_fastq': {}, 'file_processed': {}, 'case': {}, 'report': {},
-        'reports': []
+        'reports': [], 'errors': []
     }
     file_errors = []
     specimen_ids = {}
     family_dict = create_families(rows)
     a_types = get_analysis_types(rows)
-    for row in rows:
+    for i, row in enumerate(rows):
         debuglog("row:", repr(row))  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
+        row_num = i + counter + 1
+        missing_required = [col for col in required if col not in row]
+        if missing_required:
+            items['errors'].append(
+                'Spreadsheet row {} cannot be processed - missing required field(s) {}'
+                ''.format(row_num, ', '.join(missing_required))
+            )
         indiv_alias = '{}:individual-{}'.format(project['name'], row['individual id'])
         fam_alias = '{}:{}'.format(project['name'], family_dict[row['analysis id']])
         # sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
         # create items for Individual
-        items = fetch_individual_metadata(row, items, indiv_alias, institution['name'])
+        items = fetch_individual_metadata(row_num, row, items, indiv_alias, institution['name'])
         # create/edit items for Family
-        items = fetch_family_metadata(row, items, indiv_alias, fam_alias)
+        items = fetch_family_metadata(row_num, row, items, indiv_alias, fam_alias)
         # create item for Sample if there is a specimen
         if row.get('specimen id'):
             samp_alias = '{}:sample-{}'.format(project['name'], row['specimen id'])
@@ -168,30 +213,31 @@ def xls_to_json(xls_data, project, institution):
             else:
                 specimen_ids[row['specimen id']] = 1
             analysis_alias = '{}:analysis-{}'.format(project['name'], row['analysis id'])
-            items = fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias,
+            items = fetch_sample_metadata(row_num, row, items, indiv_alias, samp_alias, analysis_alias,
                                           fam_alias, project['name'], a_types)
             if row.get('files'):
-                file_items = fetch_file_metadata(row['files'].split(','), project['name'])
+                file_items = fetch_file_metadata(row_num, row['files'].split(','), project['name'])
                 file_errors.extend(file_items['errors'])
                 items['file_fastq'].update(file_items['file_fastq'])
                 items['file_processed'].update(file_items['file_processed'])
         else:
-            print('WARNING: No specimen id present for patient {},'
-                  ' sample will not be created.'.format(row['individual id']))
+            items['errors'].append('WARNING: No specimen id present for patient {},'
+                                   ' sample will not be created.'.format(row['individual id']))
     # create SampleProcessing item for trio/group if needed
     # items = create_sample_processing_groups(items, sp_alias)
     items = add_relations(items)
     items = create_case_items(items, project['name'])
     # removed unused fields, add project and institution
     for val1 in items.values():
-        for val2 in val1.values():
-            remove_keys = [k for k, v in val2.items() if not v]
-            for key in remove_keys:
-                del val2[key]
-            val2['project'] = project['@id']
-            val2['institution'] = institution['@id']
-    items['file_errors'] = file_errors
-    return items
+        if isinstance(val1, dict):
+            for val2 in val1.values():
+                remove_keys = [k for k, v in val2.items() if not v]
+                for key in remove_keys:
+                    del val2[key]
+                val2['project'] = project['@id']
+                val2['institution'] = institution['@id']
+    items['errors'].extend(file_errors)
+    return items, True  # most errors passed to next step in order to combine with validation errors
 
 
 def create_families(rows):
@@ -208,7 +254,8 @@ def get_analysis_types(rows):
         analysis_relations[row.get('analysis id')][0].append(row.get('relation to proband', '').lower())
         analysis_relations[row.get('analysis id')][1].append(row.get('workup type', '').upper())
     for k, v in analysis_relations.items():
-        if len(list(set(v[1]))) == 1:
+        workup = list(set(v[1]))
+        if len(workup) == 1 and '' not in workup:
             if len(v[0]) == 1:
                 analysis_types[k] = v[1][0]
             elif sorted(v[0]) == ['father', 'mother', 'proband']:
@@ -220,7 +267,7 @@ def get_analysis_types(rows):
     return analysis_types
 
 
-def fetch_individual_metadata(row, items, indiv_alias, inst_name):
+def fetch_individual_metadata(idx, row, items, indiv_alias, inst_name):
     new_items = items.copy()
     info = {'aliases': [indiv_alias]}
     info = map_fields(row, info, ['individual_id', 'sex', 'age', 'birth_year'], 'individual')
@@ -229,10 +276,12 @@ def fetch_individual_metadata(row, items, indiv_alias, inst_name):
         if row.get('other individual id type'):
             other_id['id_source'] = row['other individual id source']
         info['institutional_id'] = other_id
-    info['age'] = int(info['age']) if info.get('age') else None
-    info['birth_year'] = int(info['birth year']) if info.get('birth year') else None
+    for col in ['age', 'birth_year']:
+        if info.get(col) and isinstance(info[col], str) and info[col].isnumeric():
+            info[col] = int(info[col])
     if indiv_alias not in new_items['individual']:
         new_items['individual'][indiv_alias] = {k: v for k, v in info.items() if v}
+        new_items['individual'][indiv_alias]['row'] = idx
     else:
         for key in info:
             if key not in new_items['individual'][indiv_alias]:
@@ -240,24 +289,34 @@ def fetch_individual_metadata(row, items, indiv_alias, inst_name):
     return new_items
 
 
-def fetch_family_metadata(row, items, indiv_alias, fam_alias):
+def fetch_family_metadata(idx, row, items, indiv_alias, fam_alias):
     new_items = items.copy()
     info = {
         'aliases': [fam_alias],
         'family_id': row['family id'],
-        'members': [indiv_alias]
+        'members': [indiv_alias],
+        'row': idx
     }
     if fam_alias not in new_items['family']:
         new_items['family'][fam_alias] = info
     if indiv_alias not in new_items['family'][fam_alias]['members']:
         new_items['family'][fam_alias]['members'].append(indiv_alias)
-    for relation in ['proband', 'mother', 'father', 'brother', 'sister', 'sibling']:
-        if row.get('relation to proband', '').lower() == relation and relation not in new_items['family'][fam_alias]:
+    valid_relations = ['proband', 'mother', 'father', 'brother', 'sister', 'sibling']
+    relation_found = False
+    for relation in valid_relations:
+        if row.get('relation to proband', '').lower().startswith(relation) and relation not in new_items['family'][fam_alias]:
             new_items['family'][fam_alias][relation] = indiv_alias
+            relation_found = True
+            break
+    if not relation_found:
+        msg = 'Row {}: Invalid relation "{}" for individual {} - Relation should be one of: {}'.format(
+            idx, row.get('relation to proband'), row.get('individual id'), ', '.join(valid_relations)
+        )
+        items['errors'].append(msg)
     return new_items
 
 
-def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, fam_alias, proj_name, analysis_type_dict):
+def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_alias, fam_alias, proj_name, analysis_type_dict):
     new_items = items.copy()
     info = {'aliases': [samp_alias], 'files': []}  # TODO: implement creation of file db items
     fields = [
@@ -265,9 +324,10 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, f
         'specimen_notes', 'research_protocol_name', 'sent_by', 'physician_id', 'indication'
     ]
     info = map_fields(row, info, fields, 'sample')
-    if info.get('specimen_accepted', '').lower() == 'y':
+    info['row'] = idx
+    if info.get('specimen_accepted', '').lower() in ['y', 'yes']:
         info['specimen_accepted'] = 'Yes'
-    elif info.get('specimen_accepted', '').lower() == 'n':
+    elif info.get('specimen_accepted', '').lower() in ['n', 'no']:
         info['specimen_accepted'] = 'No'
     if row.get('second specimen id'):
         other_id = {'id': row['second specimen id'], 'id_type': proj_name}  # add proj info?
@@ -302,7 +362,7 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, f
 
 
 # TODO: finish implementing this function
-def fetch_file_metadata(filenames, proj_name):
+def fetch_file_metadata(idx, filenames, proj_name):
     valid_extensions = {
         '.fastq.gz': ('fastq', 'reads'),
         '.fq.gz': ('fastq', 'reads'),
@@ -323,6 +383,7 @@ def fetch_file_metadata(filenames, proj_name):
         fmt = valid_extensions[extension[0]][0]
         file_info = {
             'aliases': [file_alias],
+            'row': idx,
             'file_format': '/file-formats/{}/'.format(fmt),
             'file_type': valid_extensions[extension[0]][1],
             'filename': filename  # causes problems without functional file upload
@@ -427,24 +488,44 @@ def parse_exception(e, aliases):
         else:
             text = e.args[0]
         resp_text = text[text.index('{'):-1]
-        resp_dict = json.loads(resp_text.replace('\\', ''))
+        resp_dict = json.loads(resp_text.replace('\\"', "\'").replace('\\', ''))
     except Exception:  # pragma: no cover
         raise e
     if resp_dict.get('description') == 'Failed validation':
         keep = []
-        resp_list = [error['description'] for error in resp_dict['errors']]
+        resp_list = [error['name'] + ' - ' + error['description'] for error in resp_dict['errors']]
         for error in resp_list:
             # if error is caused by linkTo to item not submitted yet but in aliases list,
             # remove that error
             if 'not found' in error and error.split("'")[1] in aliases:
                 continue
             else:
+                error = error.lstrip('Schema: ')
+                field_name = error[:error.index(' - ')]
+                field = None
+                if field_name in GENERIC_FIELD_MAPPING['sample'].values():
+                    field = [key for key, val in GENERIC_FIELD_MAPPING['sample'].items() if val == field_name][0]
+                elif field_name == 'requisition_acceptance.accepted_rejected':
+                    field = 'Req Accepted Y\\N'
+                error = map_enum_options(field_name, error)
+                if not field:
+                    field = field_name.replace('_', ' ')
+
+                error = 'field: ' + error.replace(field_name, field)
                 keep.append(error)
         return keep
     else:
         raise e
 
 
+def map_enum_options(fieldname, error_message):
+    if fieldname == 'requisition_acceptance.accepted_rejected':
+        error_message = error_message.replace("['Accepted', 'Rejected']", "['Y', 'N']")
+    elif fieldname == 'specimen_accepted':
+        error_message = error_message.replace("['Yes', 'No']", "['Y', 'N']")
+    return error_message
+
+
 def compare_fields(profile, aliases, json_item, db_item):
     to_patch = {}
     for field in json_item:
@@ -489,7 +570,7 @@ def validate_all_items(virtualapp, json_data):
     written or tested.
     '''
     alias_dict = {}
-    errors = json_data['file_errors']
+    errors = json_data['errors']
     all_aliases = [k for itype in json_data for k in json_data[itype]]
     json_data_final = {'post': {}, 'patch': {}}
     validation_results = {}
@@ -515,22 +596,29 @@ def validate_all_items(virtualapp, json_data):
                 db_results[alias] = db_result
         # TODO: Likewise this should probably loop over json_data.get(itemtype, {}). -kmp 25-Jul-2020
         for alias in json_data[itemtype]:
-            if 'filename' in json_data[itemtype][alias]:  # until we have functional file upload
-                del json_data[itemtype][alias]['filename']
+            data = json_data[itemtype][alias].copy()
+            row = data.get('row')
+            if row:
+                del data['row']
+            if 'filename' in data:  # until we have functional file upload
+                del data['filename']
             if not db_results.get(alias):
-                error = validate_item(virtualapp, json_data[itemtype][alias], 'post', itemtype, all_aliases)
+                error = validate_item(virtualapp, data, 'post', itemtype, all_aliases)
                 if error:  # modify to check for presence of validation errors
                     # do something to report validation errors
                     if itemtype not in ['case', 'report']:
                         for e in error:
-                            errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
+                            if row:
+                                errors.append('Row {} {} - Error found: {}'.format(row, itemtype, e))
+                            else:
+                                errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
                         validation_results[itemtype]['errors'] += 1
                 # TODO: If itemtype might not be in json_data (and conditionals above suggest that's so),
                 #       then json_data[item_type][alias] seems suspect. It does work to do
                 #       json_data.get(item_type, {}).get(alias, {}).get('filename') but I would put that
                 #       quantity in a variable rather than compute it twice in a row. -kmp 25-Jul-2020
                 elif json_data[itemtype][alias].get('filename') and \
-                        json_data[itemtype][alias]['filename'] in ''.join(json_data['file_errors']):
+                        json_data[itemtype][alias]['filename'] in ''.join(json_data['errors']):
                     validation_results[itemtype]['errors'] += 1
                 else:
                     json_data_final['post'].setdefault(itemtype, [])
@@ -540,22 +628,25 @@ def validate_all_items(virtualapp, json_data):
                 # patch if item exists in db
                 # alias_dict[alias] = results[alias]['@id']
                 # TODO: profile is only conditionally assigned in an "if" above. -kmp 25-Jul-2020
-                patch_data = compare_fields(profile, alias_dict, json_data[itemtype][alias], db_results[alias])
+                patch_data = compare_fields(profile, alias_dict, data, db_results[alias])
                 error = validate_item(virtualapp, patch_data, 'patch', itemtype,
                                       all_aliases, atid=db_results[alias]['@id'])
                 if error:  # do something to report validation errors
                     if itemtype not in ['case', 'report']:
                         for e in error:
-                            errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
+                            if row:
+                                errors.append('Row {} {} - Error found: {}'.format(row, itemtype, e))
+                            else:
+                                errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
                         validation_results[itemtype]['errors'] += 1
                 elif json_data[itemtype][alias].get('filename') and \
-                        json_data[itemtype][alias]['filename'] in ''.join(json_data['file_errors']):
+                        json_data[itemtype][alias]['filename'] in ''.join(json_data['errors']):
                     validation_results[itemtype]['errors'] += 1
                 else:  # patch
                     json_data_final['patch'].setdefault(itemtype, {})
                     if patch_data:
                         json_data_final['patch'][itemtype][db_results[alias]['@id']] = patch_data
-                    elif itemtype not in ['case', 'report']:
+                    elif itemtype not in ['case', 'report', 'sample_processing']:
                         output.append('{} {} - Item already in database, no changes needed'.format(itemtype, alias))
                     # do something to record response
                     validation_results[itemtype]['validated'] += 1
@@ -566,24 +657,28 @@ def validate_all_items(virtualapp, json_data):
         ))
     if errors:
         output.append('Validation errors found in items. Please fix spreadsheet before submitting.')
-        return ({}, output)
+        return {}, output, False
     else:
         json_data_final['aliases'] = alias_dict
         output.append('All items validated.')
-        return (json_data_final, output)
+        return json_data_final, output, True
 
 
 def post_and_patch_all_items(virtualapp, json_data_final):
     output = []
     if not json_data_final:
-        return output
+        return output, 'not run'
     item_names = {'individual': 'individual_id', 'family': 'family_id', 'sample': 'specimen_accession'}
     final_status = {}
+    no_errors = True
     if json_data_final.get('post'):
         for k, v in json_data_final['post'].items():
             final_status[k] = {'posted': 0, 'not posted': 0, 'patched': 0, 'not patched': 0}
             for item in v:
                 patch_info = {}
+                row = item['row']
+                if row:
+                    del item['row']
                 # if 'filename' in item:  # until we have functional file upload
                 #     del item['filename']
                 for field in LINKS:
@@ -602,9 +697,11 @@ def post_and_patch_all_items(virtualapp, json_data_final):
                             output.append('Success - {} {} posted'.format(k, item[item_names[k]]))
                     else:
                         final_status[k]['not posted'] += 1
+                        no_errors = False
                 except Exception as e:
                     final_status[k]['not posted'] += 1
                     output.append(str(e))
+                    no_errors = False
         for itype in final_status:
             if final_status[itype]['posted'] > 0 or final_status[itype]['not posted'] > 0:
                 output.append('{}: {} items posted successfully; {} items not posted'.format(
@@ -623,14 +720,16 @@ def post_and_patch_all_items(virtualapp, json_data_final):
                     final_status[k]['patched'] += 1
                 else:
                     final_status[k]['not patched'] += 1
+                    no_errors = False
             except Exception as e:
                 final_status[k]['not patched'] += 1
                 output.append(str(e))
+                no_errors = False
         if final_status[k]['patched'] > 0 or final_status[k]['not patched'] > 0:
             output.append('{}: {} items patched successfully; {} items not patched'.format(
                 k, final_status[k]['patched'], final_status[k]['not patched']
             ))
-    return output
+    return output, no_errors
 
 
 def cell_value(cell, datemode):

From b5fb8b20c0a652371717e4c521ed4bab36de13c0 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 31 Jul 2020 14:16:46 -0400
Subject: [PATCH 041/125] edits to ingestion code for data bundles

---
 src/encoded/ingestion_engines.py | 75 +++++++++++++++++++++++++++++++-
 1 file changed, 73 insertions(+), 2 deletions(-)

diff --git a/src/encoded/ingestion_engines.py b/src/encoded/ingestion_engines.py
index 05a5f0f82e..9f6c42d778 100644
--- a/src/encoded/ingestion_engines.py
+++ b/src/encoded/ingestion_engines.py
@@ -41,8 +41,8 @@ def _show_report_lines(lines, fp, default="Nothing to report."):
         print(line, file=fp)
 
 
-@ingestion_processor('data_bundle')
-def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
+#@ingestion_processor('data_bundle')
+def handle_data_bundle_old(*, uuid, ingestion_type, vapp, log):
 
     log.info("Processing {uuid} as {ingestion_type}.".format(uuid=uuid, ingestion_type=ingestion_type))
 
@@ -107,3 +107,74 @@ def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
 
     with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key="%s/resolution.json" % uuid) as fp:
         print(json.dumps(resolution, indent=2), file=fp)
+
+
+@ingestion_processor('data_bundle')
+def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
+
+    log.info("Processing {uuid} as {ingestion_type}.".format(uuid=uuid, ingestion_type=ingestion_type))
+
+    if ingestion_type != 'data_bundle':
+        raise RuntimeError("handle_data_bundle only works for ingestion_type data_bundle.")
+
+    s3_client = boto3.client('s3')
+    manifest_key = "%s/manifest.json" % uuid
+    response = s3_client.get_object(Bucket=DATA_BUNDLE_BUCKET, Key=manifest_key)
+    manifest = json.load(response['Body'])
+
+    data_key = manifest['object_name']
+    parameters = manifest['parameters']
+    institution = get_parameter(parameters, 'institution')
+    project = get_parameter(parameters, 'project')
+
+    debuglog(uuid, "data_key:", data_key)
+    debuglog(uuid, "parameters:", parameters)
+
+    started_key = "%s/started.txt" % uuid
+    create_empty_s3_file(s3_client, bucket=DATA_BUNDLE_BUCKET, key=started_key)
+
+    # PyCharm thinks this is unused. -kmp 26-Jul-2020
+    # data_stream = s3_client.get_object(Bucket=DATA_BUNDLE_BUCKET, Key="%s/manifest.json" % uuid)['Body']
+
+    resolution = {
+        "data_key": data_key,
+        "manifest_key": manifest_key,
+        "started_key": started_key,
+    }
+
+    try:
+
+        data_bundle_result = submit_data_bundle(s3_client=s3_client,
+                                                bucket=DATA_BUNDLE_BUCKET,
+                                                key=data_key,
+                                                project=project,
+                                                institution=institution,
+                                                vapp=vapp)
+
+        resolution["validation_report_key"] = validation_report_key = "%s/validation-report.txt" % uuid
+        resolution["submission_key"] = submission_key = "%s/submission.json" % uuid
+        resolution["submission_response_key"] = submission_response_key = "%s/submission-response.txt" % uuid
+
+        with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=validation_report_key) as fp:
+            _show_report_lines(data_bundle_result['validation_output'], fp)
+
+        # here I am only creating submission.json and submission-response.txt if there is something to write to file
+        if data_bundle_result['final_json']:
+            with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=submission_key) as fp:
+                print(json.dumps(data_bundle_result['final_json'], indent=2), file=fp)
+
+        if data_bundle_result['post_output']:
+            with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=submission_response_key) as fp:
+                _show_report_lines(data_bundle_result['post_output'], fp)
+
+    except Exception as e:
+
+        resolution["traceback_key"] = traceback_key = "%s/traceback.json" % uuid
+        with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=traceback_key) as fp:
+            traceback.print_exc(file=fp)
+
+        resolution["error_type"] = e.__class__.__name__
+        resolution["error_message"] = str(e)
+
+    with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key="%s/resolution.json" % uuid) as fp:
+        print(json.dumps(resolution, indent=2), file=fp)

From bfb3162868b8dfac8dfe552a7eb5b9f022aa940e Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 31 Jul 2020 15:49:34 -0400
Subject: [PATCH 042/125] submission-test script edited for submit.py changes

---
 src/encoded/commands/submission_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/encoded/commands/submission_test.py b/src/encoded/commands/submission_test.py
index 07eb772985..9c02ad7c77 100644
--- a/src/encoded/commands/submission_test.py
+++ b/src/encoded/commands/submission_test.py
@@ -10,11 +10,11 @@ def main():
     virtualapp = VirtualApp(app, environ)
     proj = virtualapp.get('/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/').json
     inst = virtualapp.get('/institutions/hms-dbmi/').json
-    json_data = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', proj, inst)
-    final_json, validation_log = validate_all_items(virtualapp, json_data)
+    json_data, passing = xls_to_json('/src/encoded/tests/data/documents/cgap_submit_test.xlsx', proj, inst)
+    final_json, validation_log, passing = validate_all_items(virtualapp, json_data)
     print('\n'.join(validation_log))
     print(json.dumps(final_json, indent=4))
-    result = post_and_patch_all_items(virtualapp, final_json)
+    result, passing = post_and_patch_all_items(virtualapp, final_json)
     print('\n'.join(result))
 
 
From 40a17448f48407477ffcc312f3f6164cc750be31 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Sun, 2 Aug 2020 14:45:29 -0400
Subject: [PATCH 043/125] in submit.py filenames now passed to s3 output file
 for later upload

---
 src/encoded/ingestion_engines.py |  5 +++++
 src/encoded/submit.py            | 29 ++++++++++++++++++++++++-----
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/src/encoded/ingestion_engines.py b/src/encoded/ingestion_engines.py
index 9f6c42d778..e286e8035b 100644
--- a/src/encoded/ingestion_engines.py
+++ b/src/encoded/ingestion_engines.py
@@ -154,6 +154,7 @@ def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
         resolution["validation_report_key"] = validation_report_key = "%s/validation-report.txt" % uuid
         resolution["submission_key"] = submission_key = "%s/submission.json" % uuid
         resolution["submission_response_key"] = submission_response_key = "%s/submission-response.txt" % uuid
+        resolution["info_for_file_upload_key"] = info_for_file_upload_key = "%s/info_for_file_upload.txt" % uuid
 
         with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=validation_report_key) as fp:
             _show_report_lines(data_bundle_result['validation_output'], fp)
@@ -167,6 +168,10 @@ def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
             with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=submission_response_key) as fp:
                 _show_report_lines(data_bundle_result['post_output'], fp)
 
+        if data_bundle_result['file_info']:
+            with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=info_for_file_upload_key) as fp:
+                _show_report_lines(data_bundle_result['file_info'], fp)
+
     except Exception as e:
 
         resolution["traceback_key"] = traceback_key = "%s/traceback.json" % uuid
diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 24c22e3a7d..aa04c3f0c1 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -109,7 +109,8 @@ def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp):
             'success': False,
             'validation_output': [],
             'final_json': {},
-            'post_output': []
+            'post_output': [],
+            'file_info': []
         }
         json_data, json_success = xls_to_json(file, project=project_json, institution=institution_json)
         if not json_success:
@@ -121,9 +122,10 @@ def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp):
         if not validate_success:
             return results
         results['success'] = validate_success
-        result_lines, post_success = post_and_patch_all_items(vapp, final_json)
+        result_lines, post_success, files_to_upload = post_and_patch_all_items(vapp, final_json)
         results['post_output'] = result_lines
         results['success'] = post_success
+        results['file_info'] = files_to_upload
         return results
 
 
@@ -386,7 +388,7 @@ def fetch_file_metadata(idx, filenames, proj_name):
             'row': idx,
             'file_format': '/file-formats/{}/'.format(fmt),
             'file_type': valid_extensions[extension[0]][1],
-            'filename': filename  # causes problems without functional file upload
+            'filename': filename.strip()  # causes problems without functional file upload
         }
         if fmt == 'fastq':
             files['file_fastq'][file_alias] = file_info
@@ -666,6 +668,7 @@ def validate_all_items(virtualapp, json_data):
 
 def post_and_patch_all_items(virtualapp, json_data_final):
     output = []
+    files = []
     if not json_data_final:
         return output, 'not run'
     item_names = {'individual': 'individual_id', 'family': 'family_id', 'sample': 'specimen_accession'}
@@ -676,11 +679,14 @@ def post_and_patch_all_items(virtualapp, json_data_final):
             final_status[k] = {'posted': 0, 'not posted': 0, 'patched': 0, 'not patched': 0}
             for item in v:
                 patch_info = {}
-                row = item['row']
+                row = item.get('row')
                 if row:
                     del item['row']
                 # if 'filename' in item:  # until we have functional file upload
                 #     del item['filename']
+                fname = item.get('filename')
+                if fname:
+                    del item['filename']
                 for field in LINKS:
                     if field in item:
                         patch_info[field] = item[field]
@@ -695,6 +701,11 @@ def post_and_patch_all_items(virtualapp, json_data_final):
                         json_data_final['patch'][k][atid] = patch_info
                         if k in item_names:
                             output.append('Success - {} {} posted'.format(k, item[item_names[k]]))
+                        if fname:
+                            files.append({
+                                'uuid': response.json['@graph'][0]['uuid'],
+                                'filename': fname
+                            })
                     else:
                         final_status[k]['not posted'] += 1
                         no_errors = False
@@ -712,12 +723,20 @@ def post_and_patch_all_items(virtualapp, json_data_final):
         for item_id, patch_data in v.items():
             # if 'filename' in patch_data:  # until we have functional file upload
             #     del patch_data['filename']
+            fname = patch_data.get('filename')
+            if fname:
+                del patch_data['filename']
             try:
                 response = virtualapp.patch_json('/' + item_id, patch_data, status=200)
                 if response.json['status'] == 'success':
                     # if k in item_names:
                     #     output.append('Success - {} {} patched'.format(k, patch_data[item_names[k]]))
                     final_status[k]['patched'] += 1
+                    if fname:
+                        files.append({
+                            'uuid': response.json['@graph'][0]['uuid'],
+                            'filename': fname
+                        })
                 else:
                     final_status[k]['not patched'] += 1
                     no_errors = False
@@ -729,7 +748,7 @@ def post_and_patch_all_items(virtualapp, json_data_final):
             output.append('{}: {} items patched successfully; {} items not patched'.format(
                 k, final_status[k]['patched'], final_status[k]['not patched']
             ))
-    return output, no_errors
+    return output, no_errors, files
 
 
 def cell_value(cell, datemode):

From 9564b2e84cdb47276b8f2f6ab4e85f15b0d82594 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Sun, 2 Aug 2020 14:54:30 -0400
Subject: [PATCH 044/125] fixed info_for_file_upload.txt to be json rather than
 list of lines (data bundle submission)

---
 src/encoded/ingestion_engines.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/encoded/ingestion_engines.py b/src/encoded/ingestion_engines.py
index e286e8035b..174ed452ad 100644
--- a/src/encoded/ingestion_engines.py
+++ b/src/encoded/ingestion_engines.py
@@ -170,7 +170,7 @@ def handle_data_bundle(*, uuid, ingestion_type, vapp, log):
 
         if data_bundle_result['file_info']:
             with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=info_for_file_upload_key) as fp:
-                _show_report_lines(data_bundle_result['file_info'], fp)
+                print(json.dumps(data_bundle_result['file_info'], indent=2), file=fp)
 
     except Exception as e:
 

From 65a36edec36571424aa5069ea0017e81fd6d6f95 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Tue, 4 Aug 2020 15:21:09 -0400
Subject: [PATCH 045/125] missing specimen id now stops submission properly

---
 src/encoded/submit.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index aa04c3f0c1..cf7f16eba6 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -167,7 +167,7 @@ def xls_to_json(xls_data, project, institution):
     # debuglog("descriptions:", descriptions)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
     rows = []
     # keys = [key.lower().strip().rstrip('*').rstrip() for key in keys]
-    required = ['individual id', 'relation to proband', 'report required', 'analysis id']
+    required = ['individual id', 'relation to proband', 'report required', 'analysis id', 'specimen id']
     missing = [col for col in required if col not in keys]
     if missing:
         msg = 'Column(s) "{}" not found in spreadsheet! Spreadsheet cannot be processed.'.format('", "'.join(missing))
@@ -193,7 +193,7 @@ def xls_to_json(xls_data, project, institution):
     for i, row in enumerate(rows):
         debuglog("row:", repr(row))  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
         row_num = i + counter + 1
-        missing_required = [col for col in required if col not in row]
+        missing_required = [col for col in required if col not in row or not row[col]]
         if missing_required:
             items['errors'].append(
                 'Spreadsheet row {} cannot be processed - missing required field(s) {}'
@@ -222,9 +222,9 @@ def xls_to_json(xls_data, project, institution):
                 file_errors.extend(file_items['errors'])
                 items['file_fastq'].update(file_items['file_fastq'])
                 items['file_processed'].update(file_items['file_processed'])
-        else:
-            items['errors'].append('WARNING: No specimen id present for patient {},'
-                                   ' sample will not be created.'.format(row['individual id']))
+        # else:
+        #     items['errors'].append('WARNING: No specimen id present for patient {},'
+        #                            ' sample will not be created.'.format(row['individual id']))
     # create SampleProcessing item for trio/group if needed
     # items = create_sample_processing_groups(items, sp_alias)
     items = add_relations(items)
@@ -670,7 +670,7 @@ def post_and_patch_all_items(virtualapp, json_data_final):
     output = []
     files = []
     if not json_data_final:
-        return output, 'not run'
+        return output, 'not run', []
     item_names = {'individual': 'individual_id', 'family': 'family_id', 'sample': 'specimen_accession'}
     final_status = {}
     no_errors = True

From 31f68c23a9dcb45f53c1be895c28e28ea6e8a15f Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Tue, 4 Aug 2020 16:23:45 -0400
Subject: [PATCH 046/125] tweaks to error reporting in submit.py

---
 src/encoded/submit.py | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index cf7f16eba6..4133f3a25a 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -196,10 +196,15 @@ def xls_to_json(xls_data, project, institution):
         missing_required = [col for col in required if col not in row or not row[col]]
         if missing_required:
             items['errors'].append(
-                'Spreadsheet row {} cannot be processed - missing required field(s) {}'
+                'Row {} - missing required field(s) {}. This row cannot be processed.'
                 ''.format(row_num, ', '.join(missing_required))
             )
         indiv_alias = '{}:individual-{}'.format(project['name'], row['individual id'])
+        if not family_dict.get(row['analysis id']):
+            msg = ('Row {} - Proband for this analysis could not be found. '
+                   'This row cannot be processed.'.format(i))
+            items['errors'].append(msg)
+            continue
         fam_alias = '{}:{}'.format(project['name'], family_dict[row['analysis id']])
         # sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
         # create items for Individual
@@ -239,6 +244,7 @@ def xls_to_json(xls_data, project, institution):
                 val2['project'] = project['@id']
                 val2['institution'] = institution['@id']
     items['errors'].extend(file_errors)
+    items['errors'] = list(set(items['errors']))
     return items, True  # most errors passed to next step in order to combine with validation errors
 
 
@@ -311,7 +317,7 @@ def fetch_family_metadata(idx, row, items, indiv_alias, fam_alias):
             relation_found = True
             break
     if not relation_found:
-        msg = 'Row {}: Invalid relation "{}" for individual {} - Relation should be one of: {}'.format(
+        msg = 'Row {} - Invalid relation "{}" for individual {} - Relation should be one of: {}'.format(
             idx, row.get('relation to proband'), row.get('individual id'), ', '.join(valid_relations)
         )
         items['errors'].append(msg)
@@ -354,6 +360,10 @@ def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_ali
     }
     if row.get('analysis id') in analysis_type_dict:
         new_sp_item['analysis_type'] = analysis_type_dict[row.get('analysis id')]
+        if not analysis_type_dict[row.get('analysis id')]:
+            msg = ('Row {} - Samples with analysis ID {} contain mis-matched or invalid workup type values. '
+                   'Sample cannot be processed.'.format(idx, row.get('analysis id')))
+            items['errors'].append(msg)
     new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)
     new_items['sample_processing'][analysis_alias]['samples'].append(samp_alias)
     if row.get('report required').lower().startswith('y'):
@@ -611,7 +621,7 @@ def validate_all_items(virtualapp, json_data):
                     if itemtype not in ['case', 'report']:
                         for e in error:
                             if row:
-                                errors.append('Row {} {} - Error found: {}'.format(row, itemtype, e))
+                                errors.append('Row {} - Error found: {}'.format(row, e))
                             else:
                                 errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
                         validation_results[itemtype]['errors'] += 1
@@ -648,8 +658,15 @@ def validate_all_items(virtualapp, json_data):
                     json_data_final['patch'].setdefault(itemtype, {})
                     if patch_data:
                         json_data_final['patch'][itemtype][db_results[alias]['@id']] = patch_data
-                    elif itemtype not in ['case', 'report', 'sample_processing']:
-                        output.append('{} {} - Item already in database, no changes needed'.format(itemtype, alias))
+                    elif itemtype not in ['case', 'report', 'sample_processing', 'file_fastq']:
+                        item_name = alias[alias.index(':')+1:]
+                        if item_name.startswith(itemtype + '-'):
+                            item_name = item_name[item_name.index('-') + 1:]
+                        if itemtype == 'family':
+                            item_name = 'family for ' + item_name
+                        else:
+                            item_name = itemtype + ' ' + item_name
+                        output.append('{} - Item already in database, no changes needed'.format(item_name))
                     # do something to record response
                     validation_results[itemtype]['validated'] += 1
     output.extend([error for error in errors])
@@ -658,7 +675,7 @@ def validate_all_items(virtualapp, json_data):
             itemtype, validation_results[itemtype]['validated'], validation_results[itemtype]['errors']
         ))
     if errors:
-        output.append('Validation errors found in items. Please fix spreadsheet before submitting.')
+        output.append('Errors found in items. Please fix spreadsheet before submitting.')
         return {}, output, False
     else:
         json_data_final['aliases'] = alias_dict

From 1bd88c4768eeaceb8e40afa660b83e71129a588f Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Tue, 4 Aug 2020 16:34:07 -0400
Subject: [PATCH 047/125] WIP: Checkpointed work. Not ready for release.

---
 poetry.lock                                   |  14 +-
 pyproject.toml                                |   2 +-
 src/encoded/ingestion/processors.py           |  14 +-
 src/encoded/ingestion_listener.py             | 130 +++++++++++++++---
 src/encoded/schemas/ingestion_submission.json |  45 +++++-
 src/encoded/tests/test_util.py                |  13 +-
 src/encoded/types/ingestion.py                |  82 ++++++-----
 src/encoded/util.py                           |  80 ++++++++++-
 8 files changed, 308 insertions(+), 72 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index a288056228..2f3dcecda9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -311,7 +311,7 @@ description = "Utility package for interacting with the 4DN Data Portal and othe
 name = "dcicutils"
 optional = false
 python-versions = ">=3.4,<3.8"
-version = "0.37.0"
+version = "0.38.0"
 
 [package.dependencies]
 aws-requests-auth = ">=0.4.2,<1"
@@ -1410,7 +1410,7 @@ description = "Fast, Extensible Progress Meter"
 name = "tqdm"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*"
-version = "4.48.0"
+version = "4.48.1"
 
 [package.extras]
 dev = ["py-make (>=0.1.0)", "twine", "argopt", "pydoc-markdown"]
@@ -1655,7 +1655,7 @@ transaction = ">=1.6.0"
 test = ["zope.testing"]
 
 [metadata]
-content-hash = "b245acd5efed1e52abfe3baf811585ca3307212bde8931f0067ec94e41fbf345"
+content-hash = "26aaf14a00bb85d74fb32f02edf89778c1e361710661b2c8ab844e09b33d16c3"
 lock-version = "1.0"
 python-versions = ">=3.6,<3.7"
 
@@ -1819,8 +1819,8 @@ dcicsnovault = [
     {file = "dcicsnovault-3.1.9.tar.gz", hash = "sha256:347ab5ee3053a80273b081803f93fe115ed48ad53e6ce91c65a5d21a8f02d0e2"},
 ]
 dcicutils = [
-    {file = "dcicutils-0.37.0-py3-none-any.whl", hash = "sha256:faa5f6c84a70a9b04ad6b5abac0afba67cc7e9b0f2a342d4bfff746c750355d6"},
-    {file = "dcicutils-0.37.0.tar.gz", hash = "sha256:eff330adbf34ac4b8e65ecc48c2b1fe91251f8f25691f8c614825669d40ba128"},
+    {file = "dcicutils-0.38.0-py3-none-any.whl", hash = "sha256:57636fa6b802881a02375123080d3e121a8f22fbe731d7c2312e1a17fa69e575"},
+    {file = "dcicutils-0.38.0.tar.gz", hash = "sha256:fcdd88e6169b0b98393e052859bf09d98a31102d5d55f79339046d3e67b5edf9"},
 ]
 docker = [
     {file = "docker-4.2.2-py2.py3-none-any.whl", hash = "sha256:03a46400c4080cb6f7aa997f881ddd84fef855499ece219d75fbdb53289c17ab"},
@@ -2381,8 +2381,8 @@ toml = [
     {file = "toml-0.10.1.tar.gz", hash = "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f"},
 ]
 tqdm = [
-    {file = "tqdm-4.48.0-py2.py3-none-any.whl", hash = "sha256:fcb7cb5b729b60a27f300b15c1ffd4744f080fb483b88f31dc8654b082cc8ea5"},
-    {file = "tqdm-4.48.0.tar.gz", hash = "sha256:6baa75a88582b1db6d34ce4690da5501d2a1cb65c34664840a456b2c9f794d29"},
+    {file = "tqdm-4.48.1-py2.py3-none-any.whl", hash = "sha256:44b896c38f70f91826a3f83a3195b23c0460322bfc729566ec8e4e89bb5ad713"},
+    {file = "tqdm-4.48.1.tar.gz", hash = "sha256:7b7dd59cd9f03b89365ba67eb8515f5d2803fd1eb707abdbb914691a3123d9df"},
 ]
 transaction = [
     {file = "transaction-2.4.0-py2.py3-none-any.whl", hash = "sha256:b96a5e9aaa73f905759bc9ccf0021bf4864c01ac36666e0d28395e871f6d584a"},
diff --git a/pyproject.toml b/pyproject.toml
index d18ac3316c..5bddccc4d6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,7 @@ chardet = "3.0.4"
 colorama = "0.3.3"
 dcicpyvcf = "1.0.0"
 dcicsnovault = ">=3.1.9,<4"  # Fixes build problems in 3.1.8
-dcicutils = ">=0.37.0,<1"  # has the LockoutManager needed for Snovault
+dcicutils = ">=0.38.0,<1"  # has the LockoutManager needed for Snovault
 docutils = "0.12"
 elasticsearch = "5.5.3"
 elasticsearch-dsl = "^5.4.0"
diff --git a/src/encoded/ingestion/processors.py b/src/encoded/ingestion/processors.py
index 4d3173da17..e83846518d 100644
--- a/src/encoded/ingestion/processors.py
+++ b/src/encoded/ingestion/processors.py
@@ -50,12 +50,12 @@ def handle_data_bundle(submission):
     response = s3_client.get_object(Bucket=DATA_BUNDLE_BUCKET, Key=manifest_key)
     manifest = json.load(response['Body'])
 
-    data_key = manifest['object_name']
+    object_name = manifest['object_name']
     parameters = manifest['parameters']
     institution = get_parameter(parameters, 'institution')
     project = get_parameter(parameters, 'project')
 
-    debuglog(submission_id, "data_key:", data_key)
+    debuglog(submission_id, "object_name:", object_name)
     debuglog(submission_id, "parameters:", parameters)
 
     started_key = "%s/started.txt" % submission_id
@@ -65,15 +65,17 @@ def handle_data_bundle(submission):
     # data_stream = s3_client.get_object(Bucket=DATA_BUNDLE_BUCKET, Key="%s/manifest.json" % uuid)['Body']
 
     resolution = {
-        "data_key": data_key,
+        "data_key": object_name,
         "manifest_key": manifest_key,
         "started_key": started_key,
     }
 
     try:
 
-        submission.set_item_detail(object_name=manifest['object_name'], parameters=manifest['parameters'],
-                                   institution=institution, project=project)
+        submission.patch_item(submission_id=submission_id,
+                              object_name=object_name,
+                              parameters=parameters,
+                              processing_status={"state": "processing"})
 
         if isinstance(institution, str):
             institution = submission.vapp.get(institution).json
@@ -82,7 +84,7 @@ def handle_data_bundle(submission):
 
         validation_log_lines, final_json, result_lines = submit_data_bundle(s3_client=s3_client,
                                                                             bucket=DATA_BUNDLE_BUCKET,
-                                                                            key=data_key,
+                                                                            key=object_name,
                                                                             project=project,
                                                                             institution=institution,
                                                                             vapp=submission.vapp)
diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index 204181aa3e..c49807d6ad 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -8,26 +8,34 @@
 import json
 import os
 import psycopg2
-import requests  # XXX: C4-211 should not be needed but is
+import pyramid.request
+import requests  # XXX: C4-211 should not be needed but is // KMP needs this, too, until subrequest posts work
 import signal
 import socket
 import structlog
 import threading
 import time
+import urllib.parse
 import uuid
 import webtest
 
 from dcicutils.misc_utils import VirtualApp, ignored
 from pyramid import paster
+from pyramid.request import Request
 from pyramid.response import Response
 from pyramid.view import view_config
+from requests.auth import HTTPBasicAuth
+from snovault import COLLECTIONS, Collection
+from snovault.crud_views import collection_add as sno_collection_add
+from snovault.embed import make_subrequest
+from snovault.schema_utils import validate_request
 from snovault.util import debug_log
 from vcf import Reader
 from .commands.ingest_vcf import VCFParser
 from .ingestion.common import register_path_content_type, DATA_BUNDLE_BUCKET, SubmissionFailure
 from .ingestion.processors import get_ingestion_processor
 from .types.ingestion import SubmissionFolio
-from .util import resolve_file_path, gunzip_content, debuglog
+from .util import resolve_file_path, gunzip_content, debuglog, subrequest_item_creation
 
 
 log = structlog.getLogger(__name__)
@@ -42,10 +50,75 @@ def includeme(config):
     config.add_route('ingestion_status', '/ingestion_status')
     config.add_route('prompt_for_ingestion', '/prompt_for_ingestion')
     config.add_route('submit_for_ingestion', '/submit_for_ingestion')
+
+    # THESE TWO ARE FOR DEBUGGING ONLY.
+    config.add_route('prompt_for_subrequest', '/prompt_for_subrequest')
+    config.add_route('submit_subrequest', '/submit_subrequest')
+
     config.registry[INGESTION_QUEUE] = IngestionQueueManager(config.registry)
     config.scan(__name__)
 
 
+# Moved to util.py and modified.
+# def subrequest_item_creation(request: pyramid.request.Request, item_type: str, json_body: dict = None) -> dict:
+#     if json_body is None:
+#         json_body = {}
+#     collection_path = '/' + item_type
+#     method = 'POST'
+#     # json_utf8 = json.dumps(json_body).encode('utf-8')  # Unused, but here just in case
+#     subrequest = make_subrequest(request=request, path=collection_path, method=method, json_body=json_body)
+#     subrequest.remote_user = 'EMBED'
+#     subrequest.registry = request.registry
+#     # Maybe...
+#     # validated = json_body.copy()
+#     # subrequest.validated = validated
+#     collection: Collection = subrequest.registry[COLLECTIONS][item_type]
+#     check_true(subrequest.json_body, "subrequest.json_body is not properly initialized.")
+#     check_true(not subrequest.validated, "subrequest was unexpectedly validated already.")
+#     check_true(subrequest.remote_user == 'EMBED', "subrequest.remote_user is not 'EMBED'.")
+#     check_true(not subrequest.errors, "subrequest.errors already has errors before trying to validate.")
+#     check_true(subrequest.remote_user is None, "subrequest.remote_user should have been None before we set it.")
+#     check_true(request.remote_user is None, "request.remote_user should have been None before we set it.")
+#     request.remote_user = 'EMBED'
+#     validate_request(schema=collection.type_info.schema, request=subrequest, data=json_body)
+#     if not subrequest.validated:
+#         return {
+#             "@type": ["Exception"],
+#             "errors": subrequest.errors
+#         }
+#     else:
+#         json_result: dict = sno_collection_add(context=collection, request=subrequest, render=False)
+#         return json_result
+
+
+# FOR DEBUGGING ONLY
+@view_config(route_name='prompt_for_subrequest', request_method='GET')
+@debug_log
+def prompt_for_subrequest(context, request):
+    ignored(context, request)
+    return Response(PROMPT_FOR_SUBREQUEST)
+
+
+# FOR DEBUGGING ONLY
+register_path_content_type(path='/submit_subrequest', content_type='multipart/form-data')
+@view_config(route_name='submit_subrequest', request_method='POST', accept='multipart/form-data')
+@debug_log
+def submit_subrequest(context, request):
+    # import pdb; pdb.set_trace()
+    institution = "/institutions/hms-dbmi/"
+    project = "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/"
+    # institution = request.invoke_subrequest(make_subrequest(request, institution)).json
+    print("institution=", institution)
+    # project = request.invoke_subrequest(make_subrequest(request, project)).json
+    print("project=", project)
+    json_body = {
+        "ingestion_type": 'data_bundle',
+        "institution": institution,
+        "project": project,
+    }
+    return subrequest_item_creation(request=request, item_type='IngestionSubmission', json_body=json_body)
+
+
 @view_config(route_name='prompt_for_ingestion', request_method='GET')
 @debug_log
 def prompt_for_ingestion(context, request):
@@ -65,6 +138,14 @@ def submit_for_ingestion(context, request):
     override_name = request.POST.get('override_name', None)
     parameters = dict(request.POST)
     parameters['datafile'] = filename
+    institution = parameters.get('institution', 'institution-missing')
+    project = parameters.get('project', 'project-missing')
+
+    submission_id = SubmissionFolio.create_item(request,
+                                                ingestion_type=ingestion_type,
+                                                institution=institution,
+                                                project=project)
+
 
     # ``input_file`` contains the actual file data which needs to be
     # stored somewhere.
@@ -75,7 +156,10 @@ def submit_for_ingestion(context, request):
     # NOTE: Some reference information about uploading files to s3 is here:
     #   https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html
 
-    submission_id = str(uuid.uuid4())
+    # submission.set_item_detail(object_name=manifest['object_name'], parameters=manifest['parameters'],
+    #                            institution=institution, project=project)
+
+    # submission_id = str(uuid.uuid4())
     _, ext = os.path.splitext(filename)
     object_name = "{id}/datafile{ext}".format(id=submission_id, ext=ext)
     manifest_name = "{id}/manifest.json".format(id=submission_id)
@@ -525,6 +609,14 @@ def run(self):
 
         debuglog("Ingestion listener started.")
 
+        messages = []  # This'll get a better value below in each loop iteration. This is just a declaration of intent.
+
+        def discard(msg):
+            self.delete_messages([msg])
+            # Assuming we didn't get an error trying to remove it,
+            # it should also get removed from our to-do list.
+            messages.remove(msg)
+
         while self.should_remain_online():
 
             debuglog("About to get messages.")
@@ -548,12 +640,14 @@ def run(self):
                     # to make all the parts work the same -kmp
                     submission = SubmissionFolio(vapp=self.vapp, ingestion_type=ingestion_type, submission_id=uuid)
                     handler = get_ingestion_processor(ingestion_type)
-                    handler(submission)
-                    # TODO: If we delete messages at the end of each loop, I think we'll here need to do this,
-                    #       since we're bypassing bottom of lop with the 'continue':
-                    #          self.delete_messages([message])
-                    #          messages.remove(message)
-                    debuglog("HANDLED", uuid)
+                    try:
+                        debuglog("HANDLING:", uuid)
+                        handler(submission)
+                        debuglog("HANDLED:", uuid)
+                    except Exception as e:
+                        log.error(e)
+                    # If we suceeded, we don't need to do it again, and if we failed we don't need to fail again.
+                    discard(message)
                     continue
 
                 debuglog("Did NOT process", uuid, "as", ingestion_type)
@@ -599,9 +693,10 @@ def run(self):
                 log.error(msg)
                 self.update_status(msg=msg)
 
-            # TODO: I worry waiting to delete multiple messages means that if there's an error
-            #       we'll have things that were completed not get deleted. Should delete one per iteration?
-            #       -kmp 26-Jul-2020
+                discard(message)
+
+            # This is just fallback cleanup in case messages weren't cleaned up within the loop.
+            # In normal operation, they will be.
             self.delete_messages(messages)
 
 
@@ -716,7 +811,7 @@ def status_app(environ, start_response):
 # Command Application (for waitress)
 def main():
     """ Entry point for the local deployment. """
-    parser = argparse.ArgumentParser(
+    parser = argparse.ArgumentParser(  # noqa - PyCharm wrongly thinks the formatter_class is specified wrong here.
         description='Listen for VCF File uuids to ingest',
         epilog=EPILOG,
         formatter_class=argparse.RawDescriptionHelpFormatter
@@ -736,13 +831,13 @@ def main():
     vapp = VirtualApp(app, config)
     return run(vapp)
 
-PROMPT_FOR_INGESTION = """
+PROMPT_TEMPLATE = """
 <!DOCTYPE html>
 <html lang="en">
   <head>
     <title>Submit for Ingestion</title>
     <style>
-  body { background-color: #eeddee; font-size: 14pt; font-weight: bold; margin-left: 25px; }
+  body { background-color: <COLOR>; font-size: 14pt; font-weight: bold; margin-left: 25px; }
   div.banner { margin-bottom: 25px; padding: 10px; text-align: center;
                    border: 1px solid black; background-color: #ffeeff; width: 50%;
          }
@@ -760,7 +855,7 @@ def main():
       <p>This page is a demonstration of the ability to kick off an ingestion by form.</p>
     </div>
     <h1>Submit for Ingestion</h1>
-    <form action="/submit_for_ingestion" method="post" accept-charset="utf-8"
+    <form action="<TARGET-URL>" method="post" accept-charset="utf-8"
           enctype="multipart/form-data">
       <table>
         <tr>
@@ -808,7 +903,8 @@ def main():
 </html>
 """
 
-
+PROMPT_FOR_INGESTION = PROMPT_TEMPLATE.replace("<TARGET-URL>", "/submit_for_ingestion").replace("<COLOR>", "#eeddee")
+PROMPT_FOR_SUBREQUEST = PROMPT_TEMPLATE.replace("<TARGET-URL>", "/submit_subrequest").replace("<COLOR>", "#ddeedd")
 
 if __name__ == '__main__':
     main()
diff --git a/src/encoded/schemas/ingestion_submission.json b/src/encoded/schemas/ingestion_submission.json
index 1de4a11dbe..e4ed93816c 100644
--- a/src/encoded/schemas/ingestion_submission.json
+++ b/src/encoded/schemas/ingestion_submission.json
@@ -6,7 +6,8 @@
     "type": "object",
     "required": [
         "ingestion_type",
-        "submission_id"
+        "project",
+        "institution"
     ],
     "additionalProperties": false,
     "identifyingProperties": ["uuid", "aliases"],
@@ -28,12 +29,19 @@
         "schema_version": {
             "default": "3"
         },
+        "object_bucket": {
+            "title": "Object Bucket",
+            "description": "The name of the S3 bucket in which the 'object_name' resides.",
+            "type": "string"
+        },
         "object_name": {
             "title": "Object Name",
+            "description": "The name of the S3 object corresponding to the submitted document.",
             "type": "string"
         },
         "ingestion_type": {
             "title": "Ingestion Type",
+            "description": "The type of processing requested for this submission.",
             "type": "string",
             "enum": [
                 "data_bundle",
@@ -42,21 +50,25 @@
         },
         "submission_id": {
             "title": "Submission ID",
+            "description": "The name of a folder in the S3 bucket that contains all artifacts related to this submission.",
             "type": "string"
         },
         "parameters": {
             "title": "Parameters",
+            "description":  "A record of explicitly offered form parameters in the submission request.",
             "type": "object",
             "additionalProperties": true,
             "properties": {}
         },
         "processing_status": {
             "title": "Processing Status",
+            "description": "A structured description of what has happened so far as the submission is processed.",
             "type": "object",
             "additionalProperties": false,
             "properties": {
                 "state": {
                     "title": "State",
+                    "description": "A state machine description of how processing is progressing (submitted, processed, or done).",
                     "type": "string",
                     "enum": [
                         "submitted",
@@ -67,6 +79,7 @@
                 },
                 "outcome": {
                     "title": "Outcome",
+                    "description": "A token describing the nature of the final outcome, if any. Options are unknown, success, failure, or error.",
                     "type": "string",
                     "enum": [
                         "unknown",
@@ -82,6 +95,36 @@
                     "default": "unavailable"
                 }
             }
+        },
+        "result": {
+            "title": "Result",
+            "description": "An object representing a result if processing ran to completion, whether the outcome was success or failure.",
+            "type": "object",
+            "additionalProperties": true,
+            "properties": {},
+            "default": {}
+        },
+        "output": {
+            "title": "Output",
+            "description": "A list of ordinary output lines that were printed while processing.",
+            "type": "array",
+            "items": {
+                "title": "Output Line",
+                "description": "One of possibly several lines of output printed during processing.",
+                "type": "string"
+            },
+            "default": []
+        },
+        "errors": {
+            "title": "Errors",
+            "description": "A list of error messages if processing was aborted before results were obtained.",
+            "type": "array",
+            "items": {
+                "title": "Error Message",
+                "description": "One of possibly several reasons that processing was not completed.",
+                "type": "string"
+            },
+            "default": []
         }
     }
 }
diff --git a/src/encoded/tests/test_util.py b/src/encoded/tests/test_util.py
index 10909af284..861af02eeb 100644
--- a/src/encoded/tests/test_util.py
+++ b/src/encoded/tests/test_util.py
@@ -6,10 +6,21 @@
 
 from unittest import mock
 from dcicutils.qa_utils import ControlledTime
-from ..util import debuglog, deduplicate_list, gunzip_content, resolve_file_path, ENCODED_ROOT_DIR
+from ..util import debuglog, deduplicate_list, gunzip_content, resolve_file_path, ENCODED_ROOT_DIR, check_true
 from .. import util as util_module
 
 
+def test_check_true():
+
+    x = [1, 2, 3]
+    check_true(x == [1, 2, 3], "x is not a list of one, two, and three.")
+
+    msg = "x is not a list of four, five, and six."
+    with pytest.raises(RuntimeError) as e:
+        check_true(x == [4, 5, 6], msg)
+    assert msg in str(e)
+
+
 def test_deduplicate_list():
 
     def sort_somehow(seq):
diff --git a/src/encoded/types/ingestion.py b/src/encoded/types/ingestion.py
index f4dc75b119..27f5769d07 100644
--- a/src/encoded/types/ingestion.py
+++ b/src/encoded/types/ingestion.py
@@ -4,8 +4,12 @@
 
 import json
 import logging
+import re
+import uuid
 
+from dcicutils.misc_utils import ignored
 from snovault import collection, load_schema
+from pyramid.request import Request
 from pyramid.security import Allow, Deny, Everyone
 from .base import (
     Item,
@@ -17,6 +21,7 @@
 from .institution import (
     ONLY_ADMIN_VIEW,
 )
+from ..util import debuglog, subrequest_item_creation, check_true
 
 
 ALLOW_SUBMITTER_VIEW = (
@@ -37,48 +42,61 @@ def __init__(self, *, vapp, ingestion_type, submission_id, log=None):
         self.vapp = vapp
         self.ingestion_type = ingestion_type
         self.log = log or logging
-        self.folio_id = None  # This will be more properly initialized in _create_item()
         self.submission_id = submission_id
-        self._create_item()
+
+    def __str__(self):
+        return "<SubmissionFolio(%s) %s>" % (self.ingestion_type, self.submission_id)
 
     @property
-    def folio_uri(self):
-        if not self.folio_id:
-            raise RuntimeError("%s.folio_id has not been set." % self)
-        return "/" + self.folio_id
-
-    def _create_item(self):
-        res = self.vapp.post_json(self.INGESTION_SUBMISSION_URI, {
-            "ingestion_type": self.ingestion_type,
-            "submission_id": self.submission_id,
-            "processing_status": {
-                "state": "submitted"
-            }
-        })
-        [item] = res.json['@graph']
-        print(json.dumps(item, indent=2))
-        self.folio_id = item['uuid']
-
-    def set_item_detail(self, object_name, parameters, institution, project):
-        res = self.vapp.patch_json(self.folio_uri, {
-            "object_name": object_name,
-            "ingestion_type": self.ingestion_type,
-            "submission_id": self.submission_id,
-            "parameters": parameters,
+    def submission_uri(self):
+        return "/ingestion-submissions/" + self.submission_id
+
+    SUBMISSION_PATTERN = re.compile(r'^/ingestion-submissions/([0-9a-fA-F-]+)/?$')
+
+    @classmethod
+    def create_item(cls, request, institution, project, ingestion_type, **kwargs):
+        ignored(kwargs)
+        json_body = {
+            "ingestion_type": ingestion_type,
             "institution": institution,
             "project": project,
             "processing_status": {
-                "state": "processing",
+                "state": "submitted"
             }
-        })
-        [item] = res.json['@graph']
-        print(json.dumps(item, indent=2))
-
+        }
+        guid = None
+        item_url, res_json = None, None
+        try:
+            res_json = subrequest_item_creation(request=request, item_type='IngestionSubmission', json_body=json_body)
+            [item_url] = res_json['@graph']
+            matched = cls.SUBMISSION_PATTERN.match(item_url)
+            if matched:
+                guid = matched.group(1)
+        except Exception as e:
+            logging.error("%s: %s" % (e.__class__.__name__, e))
+            pass
+        check_true(guid, "Guid was not extracted from %s in %s" % (item_url, json.dumps(res_json)))
+        return guid
+
+    # def set_item_detail(self, object_name, parameters, institution, project):
+    #     res = self.vapp.patch_json(self.submission_uri, {
+    #         "object_name": object_name,
+    #         "ingestion_type": self.ingestion_type,
+    #         "submission_id": self.submission_id,
+    #         "parameters": parameters,
+    #         "institution": institution,
+    #         "project": project,
+    #         "processing_status": {
+    #             "state": "processing",
+    #         }
+    #     })
+    #     [item] = res.json['@graph']
+    #     print(json.dumps(item, indent=2))
 
     def patch_item(self, **kwargs):
-        res = self.vapp.patch_json(self.folio_uri, kwargs)
+        res = self.vapp.patch_json(self.submission_uri, kwargs)
         [item] = res.json['@graph']
-        print(json.dumps(item, indent=2))
+        debuglog(json.dumps(item))
 
 
 @collection(
diff --git a/src/encoded/util.py b/src/encoded/util.py
index 73dee470fe..7a5a15f4af 100644
--- a/src/encoded/util.py
+++ b/src/encoded/util.py
@@ -3,9 +3,15 @@
 import gzip
 import io
 import os
+import pyramid.request
 import tempfile
 
 from io import BytesIO
+from snovault import COLLECTIONS, Collection
+from snovault.crud_views import collection_add as sno_collection_add
+from snovault.embed import make_subrequest
+from snovault.schema_utils import validate_request
+from typing import Type
 
 
 ENCODED_ROOT_DIR = os.path.dirname(__file__)
@@ -74,11 +80,55 @@ def debuglog(*args):
             print(nowstr, *args, file=fp)
 
 
+def check_true(test_value: object,
+               message: str,
+               error_class: Type[Exception] = RuntimeError):
+    """
+    If the first argument does not evaluate to a true value, an error is raised.
+
+    The error, if one is raised, will be of type error_class, and its message will be given by message.
+    """
+    if not test_value:
+        raise error_class(message)
+
+
+def subrequest_item_creation(request: pyramid.request.Request, item_type: str, json_body: dict = None) -> dict:
+    if json_body is None:
+        json_body = {}
+    collection_path = '/' + item_type
+    method = 'POST'
+    # json_utf8 = json.dumps(json_body).encode('utf-8')  # Unused, but here just in case
+    check_true(not request.remote_user, "request.remote_user has %s before we set it." % request.remote_user)
+    request.remote_user = 'EMBED'
+    subrequest = make_subrequest(request=request, path=collection_path, method=method, json_body=json_body)
+    subrequest.remote_user = 'EMBED'
+    subrequest.registry = request.registry
+    # Maybe...
+    # validated = json_body.copy()
+    # subrequest.validated = validated
+    collection: Collection = subrequest.registry[COLLECTIONS][item_type]
+    check_true(subrequest.json_body, "subrequest.json_body is not properly initialized.")
+    check_true(not subrequest.validated, "subrequest was unexpectedly validated already.")
+    check_true(not subrequest.errors, "subrequest.errors already has errors before trying to validate.")
+    check_true(subrequest.remote_user == request.remote_user,
+               "Mismatch: subrequest.remote_user=%r request.remote_user=%r"
+               % (subrequest.remote_user, request.remote_user))
+    validate_request(schema=collection.type_info.schema, request=subrequest, data=json_body)
+    if not subrequest.validated:
+        return {
+            "@type": ["Exception"],
+            "errors": subrequest.errors
+        }
+    else:
+        json_result: dict = sno_collection_add(context=collection, request=subrequest, render=False)
+        return json_result
+
+
 # These next few could be in dcicutils.s3_utils as part of s3Utils, but details of interfaces would have to change.
 # For now, for expedience, they can live here and we can refactor later. -kmp 25-Jul-2020
 
 @contextlib.contextmanager
-def s3_output_stream(s3_client, bucket, key):
+def s3_output_stream(s3_client, bucket: str, key: str):
     """
     This context manager allows one to write:
 
@@ -95,8 +145,8 @@ def s3_output_stream(s3_client, bucket, key):
 
     Args:
         s3_client: a client object that results from a boto3.client('s3', ...) call.
-        bucket str: an S3 bucket name
-        key str: the name of a key within the given S3 bucket
+        bucket: an S3 bucket name
+        key: the name of a key within the given S3 bucket
     """
 
     tempfile_name = tempfile.mktemp()
@@ -111,9 +161,8 @@ def s3_output_stream(s3_client, bucket, key):
             pass
 
 
-
 @contextlib.contextmanager
-def s3_local_file(s3_client, bucket, key):
+def s3_local_file(s3_client, bucket: str, key: str):
     """
     This context manager allows one to write:
 
@@ -122,6 +171,11 @@ def s3_local_file(s3_client, bucket, key):
                 dictionary = json.load(fp)
 
     to do input from an s3 bucket.
+
+    Args:
+        s3_client: a client object that results from a boto3.client('s3', ...) call.
+        bucket: an S3 bucket name
+        key: the name of a key within the given S3 bucket
     """
 
     tempfile_name = tempfile.mktemp()
@@ -136,7 +190,7 @@ def s3_local_file(s3_client, bucket, key):
 
 
 @contextlib.contextmanager
-def s3_input_stream(s3_client, bucket, key, mode='r'):
+def s3_input_stream(s3_client, bucket: str, key: str, mode: str = 'r'):
     """
     This context manager allows one to write:
 
@@ -146,6 +200,12 @@ def s3_input_stream(s3_client, bucket, key, mode='r'):
     to do input from an s3 bucket.
 
     In fact, an intermediate local file is created, copied, and deleted.
+
+    Args:
+        s3_client: a client object that results from a boto3.client('s3', ...) call.
+        bucket: an S3 bucket name
+        key: the name of a key within the given S3 bucket
+        mode: an input mode acceptable to io.open
     """
 
     with s3_local_file(s3_client, bucket, key) as file:
@@ -153,6 +213,12 @@ def s3_input_stream(s3_client, bucket, key, mode='r'):
             yield fp
 
 
-def create_empty_s3_file(s3_client, bucket, key):
+def create_empty_s3_file(s3_client, bucket: str, key: str):
+    """
+    Args:
+        s3_client: a client object that results from a boto3.client('s3', ...) call.
+        bucket: an S3 bucket name
+        key: the name of a key within the given S3 bucket
+    """
     empty_file = "/dev/null"
     s3_client.upload_file(empty_file, Bucket=bucket, Key=key)

From cef6231bbf921399ae1fcecac4592ea09ec291be Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Wed, 5 Aug 2020 01:59:05 -0400
Subject: [PATCH 048/125] Adjust version expectation.

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5bddccc4d6..162c702dd6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 # Note: Various modules refer to this system as "encoded", not "cgap-portal".
 name = "encoded"
-version = "2.3.1.1b0"  # Preparing for minor version bump (2.6.0 probably)
+version = "2.3.1.1b0"  # Preparing for minor version bump (2.4.0 probably)
 description = "Clinical Genomics Analysis Platform"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"

From 4eaba9d671f6440688ba6a7df9ffb581faba3a86 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 6 Aug 2020 11:32:23 -0400
Subject: [PATCH 049/125] more submission unit tests

---
 src/encoded/tests/test_submit.py | 47 ++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
index 194a60d963..686c18d169 100644
--- a/src/encoded/tests/test_submit.py
+++ b/src/encoded/tests/test_submit.py
@@ -1,5 +1,7 @@
 import pytest
 from encoded.submit import *
+from unittest import mock
+import xlrd
 import json
 
 
@@ -17,6 +19,14 @@ def row_dict():
     }
 
 
+@pytest.fixture
+def xls_list():
+    book = xlrd.open_workbook('src/encoded/tests/data/documents/cgap_submit_test.xlsx')
+    sheet, = book.sheets()
+    row = row_generator(sheet)
+    return list(row)
+
+
 @pytest.fixture
 def empty_items():
     return {
@@ -217,6 +227,43 @@ def test_xls_to_json(project, institution):
     assert all(['encode-project:individual-' + x in json_out['individual'] for x in ['123', '456', '789']])
 
 
+def test_xls_to_json_no_header(project, institution, xls_list):
+    no_top_header = xls_list[1:]  # top header missing should work ok (e.g. 'Patient Information', etc)
+    no_main_header = [xls_list[0]] + xls_list[2:]  # main header missing should cause a caught error
+    no_comments = xls_list[0:2] + xls_list[3:]
+    with mock.patch('encoded.submit.row_generator') as row_gen:
+        row_gen.return_value = iter(no_top_header)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert success
+        row_gen.return_value = iter(no_main_header)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert not success
+        row_gen.return_value = iter(no_comments)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert success
+
+
+def test_xls_to_json_missing_req_col(project, institution, xls_list):
+    # test error is caught when a required column in missing from excel file
+    idx = xls_list[1].index('Specimen ID')
+    rows = [row[0:idx] + row[idx+1:] for row in xls_list]
+    with mock.patch('encoded.submit.row_generator') as row_gen:
+        row_gen.return_value = iter(rows)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert not success
+
+
+def test_xls_to_json_missing_req_val(project, institution, xls_list):
+    # test error is caught when a required column is present but value is missing in a row
+    idx = xls_list[1].index('Specimen ID')
+    xls_list[4] = xls_list[4][0:idx] + [''] + xls_list[4][idx+1:]
+    with mock.patch('encoded.submit.row_generator') as row_gen:
+        row_gen.return_value = iter(xls_list)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert json_out['errors']
+        assert success
+
+
 def test_parse_exception_invalid_alias(testapp, a_case):
     a_case['invalid_field'] = 'value'
     a_case['project'] = '/projects/invalid-project/'

From 93a879c93abf262c05577ff6d621462bf99c8c0f Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 6 Aug 2020 16:52:12 -0400
Subject: [PATCH 050/125] parse_exception handles additional validation error
 case in submit.py

---
 src/encoded/submit.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 0e743ef387..d59ca5a84b 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -513,18 +513,21 @@ def parse_exception(e, aliases):
                 continue
             else:
                 error = error.lstrip('Schema: ')
-                field_name = error[:error.index(' - ')]
-                field = None
-                if field_name in GENERIC_FIELD_MAPPING['sample'].values():
-                    field = [key for key, val in GENERIC_FIELD_MAPPING['sample'].items() if val == field_name][0]
-                elif field_name == 'requisition_acceptance.accepted_rejected':
-                    field = 'Req Accepted Y\\N'
-                error = map_enum_options(field_name, error)
-                if not field:
-                    field = field_name.replace('_', ' ')
-
-                error = 'field: ' + error.replace(field_name, field)
-                keep.append(error)
+                if error.index('- ') > 0:
+                    field_name = error[:error.index(' - ')]
+                    field = None
+                    if field_name in GENERIC_FIELD_MAPPING['sample'].values():
+                        field = [key for key, val in GENERIC_FIELD_MAPPING['sample'].items() if val == field_name][0]
+                    elif field_name == 'requisition_acceptance.accepted_rejected':
+                        field = 'Req Accepted Y\\N'
+                    error = map_enum_options(field_name, error)
+                    if not field:
+                        field = field_name.replace('_', ' ')
+
+                    error = 'field: ' + error.replace(field_name, field)
+                    keep.append(error)
+                elif 'Additional properties are not allowed' in error:
+                    keep.append(error[2:])
         return keep
     else:
         raise e

From 36ec7f8253cc6457895645aabe0a9acbaf4e7a19 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 6 Aug 2020 16:52:34 -0400
Subject: [PATCH 051/125] fixed broken unit tests for submit.py

---
 src/encoded/tests/test_submit.py | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
index 686c18d169..cfef6fd086 100644
--- a/src/encoded/tests/test_submit.py
+++ b/src/encoded/tests/test_submit.py
@@ -30,8 +30,9 @@ def xls_list():
 @pytest.fixture
 def empty_items():
     return {
-        'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {},
-        'case': {}, 'report': {}, 'reports': []
+        'individual': {}, 'family': {}, 'file_fastq': {},
+        'file_processed': {}, 'sample': {}, 'sample_processing': {},
+        'case': {}, 'report': {}, 'reports': [], 'errors': []
     }
 
 
@@ -44,7 +45,8 @@ def submission_info():
         }},
         'individual': {'test-proj:indiv1': {'samples': ['test-proj:samp1']}},
         'sample': {'test-proj:samp1': {'workup_type': 'WGS'}},
-        'sample_processing': {}
+        'sample_processing': {},
+        'errors': []
     }
 
 
@@ -141,7 +143,7 @@ def test_get_analysis_types(example_rows):
 
 
 def test_fetch_individual_metadata_new(row_dict, empty_items):
-    items_out = fetch_individual_metadata(row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi')
+    items_out = fetch_individual_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi')
     assert items_out['individual']['test-proj:indiv1']['aliases'] == ['test-proj:indiv1']
     assert items_out['individual']['test-proj:indiv1']['individual_id'] == '456'
 
@@ -153,14 +155,14 @@ def test_fetch_individual_metadata_old(row_dict, empty_items):
         'age': 46,
         'aliases': ['test-proj:indiv1']
     }}
-    items_out = fetch_individual_metadata(row_dict, items, 'test-proj:indiv1', 'hms-dbmi')
+    items_out = fetch_individual_metadata(1, row_dict, items, 'test-proj:indiv1', 'hms-dbmi')
     assert len(items['individual']) == len(items_out['individual'])
     assert 'sex' in items_out['individual']['test-proj:indiv1']
     assert 'age' in items_out['individual']['test-proj:indiv1']
 
 
 def test_fetch_family_metadata_new(row_dict, empty_items):
-    items_out = fetch_family_metadata(row_dict, empty_items, 'test-proj:indiv1', 'test-proj:fam1')
+    items_out = fetch_family_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'test-proj:fam1')
     assert items_out['family']['test-proj:fam1']['members'] == ['test-proj:indiv1']
     assert items_out['family']['test-proj:fam1']['proband'] == 'test-proj:indiv1'
 
@@ -173,7 +175,7 @@ def test_fetch_family_metadata_old(row_dict, empty_items):
         'members': ['test-proj:indiv2'],
         'mother': 'test-proj:indiv2'
     }}
-    items_out = fetch_family_metadata(row_dict, items, 'test-proj:indiv1', 'test-proj:fam1')
+    items_out = fetch_family_metadata(1, row_dict, items, 'test-proj:indiv1', 'test-proj:fam1')
     assert items_out['family']['test-proj:fam1']['members'] == ['test-proj:indiv2', 'test-proj:indiv1']
     assert items_out['family']['test-proj:fam1']['proband'] == 'test-proj:indiv1'
     assert items_out['family']['test-proj:fam1']['mother'] == 'test-proj:indiv2'
@@ -183,7 +185,7 @@ def test_fetch_sample_metadata_sp(row_dict, empty_items):
     items = empty_items.copy()
     items['individual'] = {'test-proj:indiv1': {}}
     items_out = fetch_sample_metadata(
-        row_dict, items, 'test-proj:indiv1', 'test-proj:samp1',
+        1, row_dict, items, 'test-proj:indiv1', 'test-proj:samp1',
         'test-proj:sp1', 'test-proj:fam1', 'test-proj', {}
     )
     assert items_out['sample']['test-proj:samp1']['specimen_accession'] == row_dict['specimen id']
@@ -192,7 +194,7 @@ def test_fetch_sample_metadata_sp(row_dict, empty_items):
 
 
 def test_fetch_file_metadata_valid():
-    results = fetch_file_metadata(['f1.fastq.gz', 'f2.cram', 'f3.vcf.gz'], 'test-proj')
+    results = fetch_file_metadata(1, ['f1.fastq.gz', 'f2.cram', 'f3.vcf.gz'], 'test-proj')
     assert 'test-proj:f1.fastq.gz' in results['file_fastq']
     assert results['file_fastq']['test-proj:f1.fastq.gz']['file_format'] == '/file-formats/fastq/'
     assert results['file_fastq']['test-proj:f1.fastq.gz']['file_type'] == 'reads'
@@ -202,7 +204,7 @@ def test_fetch_file_metadata_valid():
 
 
 def test_fetch_file_metadata_uncompressed():
-    results = fetch_file_metadata(['f1.fastq', 'f2.cram', 'f3.vcf'], 'test-proj')
+    results = fetch_file_metadata(1, ['f1.fastq', 'f2.cram', 'f3.vcf'], 'test-proj')
     assert not results['file_fastq']
     assert 'test-proj:f2.cram' in results['file_processed']
     assert 'test-proj:f3.vcf' not in results['file_processed']
@@ -211,7 +213,7 @@ def test_fetch_file_metadata_uncompressed():
 
 
 def test_fetch_file_metadata_invalid():
-    results = fetch_file_metadata(['f3.gvcf.gz'], 'test-proj')
+    results = fetch_file_metadata(1, ['f3.gvcf.gz'], 'test-proj')
     assert all(not results[key] for key in ['file_fastq', 'file_processed'])
     assert results['errors'] == [
         'File extension on f3.gvcf.gz not supported - '
@@ -220,7 +222,7 @@ def test_fetch_file_metadata_invalid():
 
 
 def test_xls_to_json(project, institution):
-    json_out = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+    json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
     assert len(json_out['family']) == 1
     assert 'encode-project:family-456' in json_out['family']
     assert len(json_out['individual']) == 3
@@ -342,8 +344,9 @@ def test_validate_all_items_errors(testapp, mother, empty_items):
     }
     items = empty_items
     items['individual']['new-individual-alias'] = new_individual
-    data_out, result = validate_all_items(testapp, items)
+    data_out, result, success = validate_all_items(testapp, items)
     assert not data_out
+    assert not success
     assert len(result) > 1
     errors = ' '.join(result)
     assert "'test-proj:invalid-project-alias' not found" in errors

From 546765a3e28c9e159f37a76bcad37cdfd04d21d1 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 7 Aug 2020 14:07:42 -0400
Subject: [PATCH 052/125] more unit tests for submit.py

---
 src/encoded/submit.py            |  4 +-
 src/encoded/tests/test_submit.py | 72 ++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index d59ca5a84b..bacb4fdf87 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -320,7 +320,7 @@ def fetch_family_metadata(idx, row, items, indiv_alias, fam_alias):
         msg = 'Row {} - Invalid relation "{}" for individual {} - Relation should be one of: {}'.format(
             idx, row.get('relation to proband'), row.get('individual id'), ', '.join(valid_relations)
         )
-        items['errors'].append(msg)
+        new_items['errors'].append(msg)
     return new_items
 
 
@@ -363,7 +363,7 @@ def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_ali
         if not analysis_type_dict[row.get('analysis id')]:
             msg = ('Row {} - Samples with analysis ID {} contain mis-matched or invalid workup type values. '
                    'Sample cannot be processed.'.format(idx, row.get('analysis id')))
-            items['errors'].append(msg)
+            new_items['errors'].append(msg)
     new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)
     new_items['sample_processing'][analysis_alias]['samples'].append(samp_alias)
     if row.get('report required').lower().startswith('y'):
diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
index cfef6fd086..8f275e15fc 100644
--- a/src/encoded/tests/test_submit.py
+++ b/src/encoded/tests/test_submit.py
@@ -1,6 +1,7 @@
 import pytest
 from encoded.submit import *
 from unittest import mock
+from copy import deepcopy
 import xlrd
 import json
 
@@ -161,6 +162,23 @@ def test_fetch_individual_metadata_old(row_dict, empty_items):
     assert 'age' in items_out['individual']['test-proj:indiv1']
 
 
+def test_fetch_individual_metadata_nums(row_dict, empty_items):
+    items2 = deepcopy(empty_items)
+    row_dict['age'] = '33'
+    row_dict['birth year'] = '1988'
+    items_out_nums = fetch_individual_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi')
+    assert not items_out_nums['errors']
+    assert isinstance(items_out_nums['individual']['test-proj:indiv1']['age'], int)
+    assert isinstance(items_out_nums['individual']['test-proj:indiv1']['birth_year'], int)
+    # text values for age and birth year should be passed on without errors to eventually fail validation
+    row_dict['age'] = 'abc'
+    row_dict['birth year'] = 'def'
+    items_out_text = fetch_individual_metadata(1, row_dict, items2, 'test-proj:indiv1', 'hms-dbmi')
+    assert not items_out_text['errors']
+    assert isinstance(items_out_text['individual']['test-proj:indiv1']['age'], str)
+    assert isinstance(items_out_text['individual']['test-proj:indiv1']['birth_year'], str)
+
+
 def test_fetch_family_metadata_new(row_dict, empty_items):
     items_out = fetch_family_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'test-proj:fam1')
     assert items_out['family']['test-proj:fam1']['members'] == ['test-proj:indiv1']
@@ -181,14 +199,25 @@ def test_fetch_family_metadata_old(row_dict, empty_items):
     assert items_out['family']['test-proj:fam1']['mother'] == 'test-proj:indiv2'
 
 
+def test_fetch_family_metadata_invalid_relation(row_dict, empty_items):
+    row_dict['relation to proband'] = 'grandmother'
+    items_out = fetch_family_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'test-proj:fam1')
+    assert 'Row 1 - Invalid relation' in items_out['errors'][0]
+
+
 def test_fetch_sample_metadata_sp(row_dict, empty_items):
     items = empty_items.copy()
     items['individual'] = {'test-proj:indiv1': {}}
+    row_dict['req accepted y/n'] = 'Yes'
+    row_dict['specimen accepted by ref lab'] = "n"
     items_out = fetch_sample_metadata(
         1, row_dict, items, 'test-proj:indiv1', 'test-proj:samp1',
         'test-proj:sp1', 'test-proj:fam1', 'test-proj', {}
     )
+    print(items_out['sample']['test-proj:samp1'])
     assert items_out['sample']['test-proj:samp1']['specimen_accession'] == row_dict['specimen id']
+    assert items_out['sample']['test-proj:samp1']['specimen_accepted'] == 'No'
+    assert items_out['sample']['test-proj:samp1']['requisition_acceptance']['accepted_rejected'] == 'Accepted'
     assert items_out['sample_processing']['test-proj:sp1']['samples'] == ['test-proj:samp1']
     assert items_out['individual']['test-proj:indiv1']['samples'] == ['test-proj:samp1']
 
@@ -266,6 +295,38 @@ def test_xls_to_json_missing_req_val(project, institution, xls_list):
         assert success
 
 
+def test_xls_to_json_invalid_workup(project, institution, xls_list):
+    # invalid workup type is caught as an error
+    idx = xls_list[1].index('Workup Type')
+    xls_list[4] = xls_list[4][0:idx] + ['Other'] + xls_list[4][idx+1:]
+    with mock.patch('encoded.submit.row_generator') as row_gen:
+        row_gen.return_value = iter(xls_list)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert json_out['errors']
+        print(json_out['errors'])
+        assert success
+        assert ('Row 5 - Samples with analysis ID 55432 contain mis-matched '
+                'or invalid workup type values.') in ''.join(json_out['errors'])
+
+
+def test_xls_to_json_mixed_workup(project, institution, xls_list):
+    # mixed workup types per analysis caught as an error
+    idx = xls_list[1].index('Workup Type')
+    xls_list[3] = xls_list[3][0:idx] + ['WES'] + xls_list[3][idx+1:]
+    one_row = xls_list[:4]
+    with mock.patch('encoded.submit.row_generator') as row_gen:
+        row_gen.return_value = iter(xls_list)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert json_out['errors']
+        print(json_out['errors'])
+        assert success
+        assert ('Row 5 - Samples with analysis ID 55432 contain mis-matched '
+                'or invalid workup type values.') in ''.join(json_out['errors'])
+        row_gen.return_value = iter(one_row)
+        one_json_out, one_success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert not one_json_out['errors']
+
+
 def test_parse_exception_invalid_alias(testapp, a_case):
     a_case['invalid_field'] = 'value'
     a_case['project'] = '/projects/invalid-project/'
@@ -315,6 +376,17 @@ def test_validate_item_post_invalid(testapp, a_case):
     assert 'not found' in result[0]
 
 
+def test_validate_item_post_invalid_yn(testapp, sample_info):
+    sample_info['req accepted y/n'] = 'not sure'
+    sample_info['specimen accepted by ref lab'] = "I don't know"
+    sample_item = map_fields(sample_info, {}, ['workup_type'], 'sample')
+    req_info = map_fields(sample_info, {}, ['date sent', 'date completed'], 'requisition')
+    sample_item['requisition_acceptance'] = req_info
+    result = validate_item(testapp, sample_item, 'post', 'sample', [])
+    assert len(result) == 2
+    assert all("is not one of ['Y', 'N']" in error for error in result)
+
+
 def test_validate_item_patch_valid(testapp, mother, grandpa):
     patch_dict = {'mother': mother['aliases'][0]}
     result = validate_item(testapp, patch_dict, 'patch', 'individual', [], atid=grandpa['@id'])

From 46a7c7349c92ffaf24cd31942153aa024ff15c6c Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 7 Aug 2020 15:01:54 -0400
Subject: [PATCH 053/125] fixed tests by adding ingestion_submission to
 ordering lists

---
 src/encoded/commands/create_mapping_on_deploy.py | 1 +
 src/encoded/tests/datafixtures.py                | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/encoded/commands/create_mapping_on_deploy.py b/src/encoded/commands/create_mapping_on_deploy.py
index c3cd69f0f3..1de457c03c 100644
--- a/src/encoded/commands/create_mapping_on_deploy.py
+++ b/src/encoded/commands/create_mapping_on_deploy.py
@@ -59,6 +59,7 @@
     'EvidenceDisPheno',
     'Page',
     'GeneAnnotationField',
+    'IngestionSubmission'
 ]
 
 ENV_HOTSEAT = 'fourfront-cgaphot'
diff --git a/src/encoded/tests/datafixtures.py b/src/encoded/tests/datafixtures.py
index 1746985685..0356994301 100644
--- a/src/encoded/tests/datafixtures.py
+++ b/src/encoded/tests/datafixtures.py
@@ -15,7 +15,7 @@
     'quality_metric_vcfqc',
     'software', 'static_section', 'tracking_item', 'workflow_mapping',
     'workflow_run_awsem', 'workflow_run', 'annotation_field', 'variant_sample',
-    'variant', 'gene_annotation_field', 'gene',
+    'variant', 'gene_annotation_field', 'gene', 'ingestion_submission'
 ]
 
 
From 7bfe9ae07be69947cb8ea0d5b0a74ff7f183353c Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Fri, 7 Aug 2020 16:52:54 -0400
Subject: [PATCH 054/125] Misc improvements to error handling and addition of a
 validate_only option.

---
 pyproject.toml                                |   1 +
 src/__init__.py                               |   0
 .../commands/submit_data_bundle.notes.txt     |  36 +++++
 src/encoded/commands/submit_data_bundle.py    | 148 ++++++++++++++++++
 src/encoded/ingestion/common.py               |  34 +++-
 src/encoded/ingestion/exceptions.py           |  11 ++
 src/encoded/ingestion/processors.py           |   4 +-
 src/encoded/ingestion_listener.py             |  82 ++--------
 src/encoded/submit.py                         |  39 +----
 src/encoded/tests/test_ingestion_common.py    |  72 +++++++++
 src/encoded/tests/test_submit_data_bundle.py  |   4 +
 src/encoded/types/ingestion.py                |  24 +--
 12 files changed, 326 insertions(+), 129 deletions(-)
 create mode 100644 src/__init__.py
 create mode 100644 src/encoded/commands/submit_data_bundle.notes.txt
 create mode 100644 src/encoded/commands/submit_data_bundle.py
 create mode 100644 src/encoded/tests/test_ingestion_common.py
 create mode 100644 src/encoded/tests/test_submit_data_bundle.py

diff --git a/pyproject.toml b/pyproject.toml
index 162c702dd6..01286b04ae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -175,6 +175,7 @@ purge-item-type = "encoded.commands.purge_item_type:main"
 run-upgrade-on-inserts = "encoded.commands.run_upgrader_on_inserts:main"
 spreadsheet-to-json = "encoded.commands.spreadsheet_to_json:main"
 submission-test = "encoded.commands.submission_test:main"
+submit-data-bundle = "encoded.commands.submit_data_bundle:main"
 update-inserts-from-server = "encoded.commands.update_inserts_from_server:main"
 verify-item = "encoded.commands.verify_item:main"
 
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/encoded/commands/submit_data_bundle.notes.txt b/src/encoded/commands/submit_data_bundle.notes.txt
new file mode 100644
index 0000000000..51798b0a5f
--- /dev/null
+++ b/src/encoded/commands/submit_data_bundle.notes.txt
@@ -0,0 +1,36 @@
+from dcicutils import ff_utils
+
+From Submit4DN...
+
+def get_upload_creds(file_id, connection):  # pragma: no cover
+    url = "%s/upload/" % (file_id)
+    req = ff_utils.post_metadata({}, url, key=connection.key)
+    return req['@graph'][0]['upload_credentials']
+
+def upload_file(creds, path):  # pragma: no cover
+
+    ####################
+    # POST file to S3
+    env = os.environ.copy()  # pragma: no cover
+    try:
+        env.update({
+            'AWS_ACCESS_KEY_ID': creds['AccessKeyId'],
+            'AWS_SECRET_ACCESS_KEY': creds['SecretAccessKey'],
+            'AWS_SECURITY_TOKEN': creds['SessionToken'],
+        })
+    except Exception as e:
+        raise("Didn't get back s3 access keys from file/upload endpoint.  Error was %s" % str(e))
+    # ~10s/GB from Stanford - AWS Oregon
+    # ~12-15s/GB from AWS Ireland - AWS Oregon
+    print("Uploading file.")
+    start = time.time()
+    try:
+        subprocess.check_call(['aws', 's3', 'cp', '--only-show-errors', path, creds['upload_url']], env=env)
+    except subprocess.CalledProcessError as e:
+        # The aws command returns a non-zero exit code on error.
+        print("Upload failed with exit code %d" % e.returncode)
+        sys.exit(e.returncode)
+    else:
+        end = time.time()
+        duration = end - start
+        print("Uploaded in %.2f seconds" % duration)
diff --git a/src/encoded/commands/submit_data_bundle.py b/src/encoded/commands/submit_data_bundle.py
new file mode 100644
index 0000000000..8357417198
--- /dev/null
+++ b/src/encoded/commands/submit_data_bundle.py
@@ -0,0 +1,148 @@
+import argparse
+import datetime
+import io
+import json
+import os
+import re
+import requests
+import time
+
+
+EPILOG = __doc__
+
+ACCESS_KEY_FILENAME = ".cgap-access"
+
+def get_cgap_auth():
+    key_id = os.environ.get("CGAP_ACCESS_KEY_ID", "")
+    secret = os.environ.get("CGAP_SECRET_ACCESS_KEY", "")
+    if key_id and secret:
+        return (key_id, secret)
+    raise RuntimeError("Both of the environment variables CGAP_ACCESS_KEY_ID and CGAP_SECRET_ACCESS_KEY must be set."
+                       " Appropriate values can be obtained by creating an access key in your CGAP user profile.")
+
+
+SITE_REGEXP = re.compile(
+    r"^(http://localhost:[0-9]+|https://fourfront-cgap[a-z.-]*|https://[a-z.-]*cgap.hms.harvard.edu)/?$"
+)
+
+
+def main(simulated_args_for_testing=None):
+    parser = argparse.ArgumentParser(  # noqa - PyCharm wrongly thinks the formatter_class is invalid
+        description="Submits a data bundle",
+        epilog=EPILOG,
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    parser.add_argument('bundle_filename', help='a local Excel filename that is the data bundle')
+    parser.add_argument('--institution', '-i', help='institution identifier', default=None)
+    parser.add_argument('--project', '-p', help='project identifier', default=None)
+    parser.add_argument('--site', '-s', help="The http or https address of the site")
+    args = parser.parse_args(args=simulated_args_for_testing)
+
+    bundle_filename = args.bundle_filename
+    institution = args.institution
+    project = args.project
+    site = args.site
+
+    try:
+
+        matched = SITE_REGEXP.match(site)
+        if not matched:
+            raise ValueError("The site should be 'http://localhost:<port>' or 'https://<cgap-hostname>'.")
+        site = matched.group(1)
+
+        auth = get_cgap_auth()
+
+        user_url = site + "/me?format=json"
+        user_record = requests.get(user_url, auth=auth).json()
+
+        if not institution:
+            submits_for = user_record.get('submits_for', [])
+            if len(submits_for) == 0:
+                raise SyntaxError("Your user profile declares no institution"
+                                  " on behalf of which you are authorized to make submissions.")
+            elif len(submits_for) > 1:
+                raise SyntaxError("You must use --institution to specify which institution you are submitting for"
+                                  " (probably one of: %s)." % ", ".join([x['@id'] for x in submits_for]))
+            else:
+                institution = submits_for[0]['@id']
+                print("Using institution:", institution)
+
+        if not project:
+            project = user_record.get('project', {}).get('@id', None)
+            if not project:
+                raise SyntaxError("Your user profile has not project declared,"
+                                  " so you must specify a --project explicitly.")
+            print("Using project:", project)
+
+        if not os.path.exists(bundle_filename):
+            raise ValueError("The file '%s' does not exist." % bundle_filename)
+
+        post_files = {
+            "datafile": open(bundle_filename, 'rb')
+        }
+
+        post_data = {
+            'ingestion_type': 'data_bundle',
+            'institution': institution,
+            'project': project,
+        }
+
+        submission_url = site + "/submit_for_ingestion"
+
+        res = requests.post(submission_url, auth=auth, data=post_data, files=post_files).json()
+
+        # print(json.dumps(res, indent=2))
+
+        uuid = res['submission_id']
+
+        def tprint(*args):
+            print(str(datetime.datetime.now().strftime("%H:%M:%S")), *args)
+
+        tprint("Bundle uploaded. Awaiting processing...")
+
+        tracking_url = site + "/ingestion-submissions/" + uuid + "?format=json"
+
+        success = False
+        outcome = None
+        n_tries = 8
+        tries_left = n_tries
+        done = False
+        while tries_left > 0:
+            time.sleep(15)
+            # print(json.dumps(res, indent=2))
+            res = res = requests.get(tracking_url, auth=auth).json()
+            processing_status = res['processing_status']
+            done = processing_status['state'] == 'done'
+            if done:
+                outcome = processing_status['outcome']
+                success = outcome == 'success'
+                break
+            else:
+                tprint("Progress is %s. Continuing to wait..." % processing_status['progress'])
+            tries_left -= 1
+
+        if not done:
+            tprint("Timed out after %d tries." % n_tries)
+        else:
+            tprint("Final status: %s" % outcome)
+
+        def show_section(section):
+            print("----- %s -----" % section.replace("_", " ").title())
+            lines = res['additional_data'].get(section)
+            if lines:
+                for line in lines:
+                    print(line)
+            else:
+                print("Nothing to show.")
+
+        show_section('validation_output')
+        if success:
+            show_section('post_output')
+
+    except Exception as e:
+        print("%s: %s" % (e.__class__.__name__, str(e)))
+        exit(1)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/encoded/ingestion/common.py b/src/encoded/ingestion/common.py
index c1422fe998..38ababfae1 100644
--- a/src/encoded/ingestion/common.py
+++ b/src/encoded/ingestion/common.py
@@ -7,7 +7,7 @@
 import io
 import os
 import tempfile
-from .exceptions import SubmissionFailure, MissingParameter
+from .exceptions import SubmissionFailure, MissingParameter, BadParameter
 
 
 DATA_BUNDLE_BUCKET = 'cgap-data-bundles'
@@ -53,7 +53,7 @@ def content_type_allowed(request):
 _NO_DEFAULT = object()
 
 
-def get_parameter(parameter_block, parameter_name, default=_NO_DEFAULT):
+def get_parameter(parameter_block, parameter_name, as_type=None, default=_NO_DEFAULT, update=False):
     """
     Returns the value of a given parameter from a dictionary of parameter values.
 
@@ -63,17 +63,39 @@ def get_parameter(parameter_block, parameter_name, default=_NO_DEFAULT):
     Args:
         parameter_block dict: a dictionary whose keys are parameter names and whose values are parameter values
         parameter_name str: the name of a parameter
+        as_type: if supplied, a type coercion to perform on the result
         default object: a default value to be used if the parameter_name is not present.
+        update bool: if as_type is applied, whether to update the parameter_block
     """
 
     if isinstance(parameter_block, dict):
         if parameter_name in parameter_block:
-            return parameter_block[parameter_name]
+            parameter_value = parameter_block[parameter_name]
+            result = parameter_value
+            if as_type:
+                if isinstance(as_type, type) and isinstance(result, as_type):
+                    return result
+                elif as_type is bool:
+                    lower_value = str(result).lower()
+                    if lower_value == "true":
+                        result = True
+                    elif lower_value in ("false", "none", "null", ""):
+                        result = False
+                    else:
+                        raise BadParameter(parameter_name=parameter_name, parameter_value=parameter_value,
+                                           extra_detail=("Expected a string representing a boolean, such as"
+                                                         " 'true' for True, or 'false' or the empty string for False."))
+                else:
+                    result = as_type(result)
         elif default is _NO_DEFAULT:
             raise MissingParameter(parameter_name=parameter_name)
         else:
-            return default
-    else:
-        raise TypeError("Expected parameter_block to be a dict: %s", parameter_block)
+            result = default
+
+        if update:
+            parameter_block[parameter_name] = result
 
+        return result
 
+    else:
+        raise TypeError("Expected parameter_block to be a dict: %s", parameter_block)
diff --git a/src/encoded/ingestion/exceptions.py b/src/encoded/ingestion/exceptions.py
index f766a92914..1758417a7f 100644
--- a/src/encoded/ingestion/exceptions.py
+++ b/src/encoded/ingestion/exceptions.py
@@ -23,6 +23,17 @@ def __init__(self, parameter_name):
         super().__init__(detail="Missing parameter: %s" % parameter_name)
 
 
+class BadParameter(HTTPBadRequest):
+
+    def __init__(self, parameter_name, parameter_value, extra_detail=None):
+        self.parameter_name = parameter_name
+        self.parameter_value = parameter_value
+        self.extra_detail = extra_detail
+        suffix = " " + extra_detail if extra_detail else ""
+        super().__init__(detail="The value of the %s parameter, %r, is invalid.%s"
+                                % (parameter_name, parameter_value, suffix))
+
+
 class UnspecifiedFormParameter(HTTPBadRequest):
 
     def __init__(self, parameter_name):
diff --git a/src/encoded/ingestion/processors.py b/src/encoded/ingestion/processors.py
index 8ca4a442c2..21e8ffcd64 100644
--- a/src/encoded/ingestion/processors.py
+++ b/src/encoded/ingestion/processors.py
@@ -54,6 +54,7 @@ def handle_data_bundle(submission):
     parameters = manifest['parameters']
     institution = get_parameter(parameters, 'institution')
     project = get_parameter(parameters, 'project')
+    validate_only = get_parameter(parameters, 'validate_only', as_type=bool, default=False)
 
     debuglog(submission_id, "object_name:", object_name)
     debuglog(submission_id, "parameters:", parameters)
@@ -87,7 +88,8 @@ def handle_data_bundle(submission):
                                                 key=object_name,
                                                 project=project,
                                                 institution=institution,
-                                                vapp=submission.vapp)
+                                                vapp=submission.vapp,
+                                                validate_only=validate_only)
 
         resolution["validation_report_key"] = validation_report_key = "%s/validation-report.txt" % submission_id
         resolution["submission_key"] = submission_key = "%s/submission.json" % submission_id
diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index ee91b53ac8..f511b5248f 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -33,7 +33,7 @@
 from snovault.util import debug_log
 from vcf import Reader
 from .commands.ingest_vcf import VCFParser
-from .ingestion.common import register_path_content_type, DATA_BUNDLE_BUCKET, SubmissionFailure
+from .ingestion.common import register_path_content_type, DATA_BUNDLE_BUCKET, SubmissionFailure, get_parameter
 from .ingestion.exceptions import UnspecifiedFormParameter
 from .ingestion.processors import get_ingestion_processor
 from .types.ingestion import SubmissionFolio
@@ -52,75 +52,11 @@ def includeme(config):
     config.add_route('ingestion_status', '/ingestion_status')
     config.add_route('prompt_for_ingestion', '/prompt_for_ingestion')
     config.add_route('submit_for_ingestion', '/submit_for_ingestion')
-
-    # THESE TWO ARE FOR DEBUGGING ONLY.
-    config.add_route('prompt_for_subrequest', '/prompt_for_subrequest')
-    config.add_route('submit_subrequest', '/submit_subrequest')
-
     config.registry[INGESTION_QUEUE] = IngestionQueueManager(config.registry)
     config.scan(__name__)
 
 
-# Moved to util.py and modified.
-# def subrequest_item_creation(request: pyramid.request.Request, item_type: str, json_body: dict = None) -> dict:
-#     if json_body is None:
-#         json_body = {}
-#     collection_path = '/' + item_type
-#     method = 'POST'
-#     # json_utf8 = json.dumps(json_body).encode('utf-8')  # Unused, but here just in case
-#     subrequest = make_subrequest(request=request, path=collection_path, method=method, json_body=json_body)
-#     subrequest.remote_user = 'EMBED'
-#     subrequest.registry = request.registry
-#     # Maybe...
-#     # validated = json_body.copy()
-#     # subrequest.validated = validated
-#     collection: Collection = subrequest.registry[COLLECTIONS][item_type]
-#     check_true(subrequest.json_body, "subrequest.json_body is not properly initialized.")
-#     check_true(not subrequest.validated, "subrequest was unexpectedly validated already.")
-#     check_true(subrequest.remote_user == 'EMBED', "subrequest.remote_user is not 'EMBED'.")
-#     check_true(not subrequest.errors, "subrequest.errors already has errors before trying to validate.")
-#     check_true(subrequest.remote_user is None, "subrequest.remote_user should have been None before we set it.")
-#     check_true(request.remote_user is None, "request.remote_user should have been None before we set it.")
-#     request.remote_user = 'EMBED'
-#     validate_request(schema=collection.type_info.schema, request=subrequest, data=json_body)
-#     if not subrequest.validated:
-#         return {
-#             "@type": ["Exception"],
-#             "errors": subrequest.errors
-#         }
-#     else:
-#         json_result: dict = sno_collection_add(context=collection, request=subrequest, render=False)
-#         return json_result
-
-
-# FOR DEBUGGING ONLY
-@view_config(route_name='prompt_for_subrequest', request_method='GET')
-@debug_log
-def prompt_for_subrequest(context, request):
-    ignored(context, request)
-    return Response(PROMPT_FOR_SUBREQUEST)
-
-
-# FOR DEBUGGING ONLY
-register_path_content_type(path='/submit_subrequest', content_type='multipart/form-data')
-@view_config(route_name='submit_subrequest', request_method='POST', accept='multipart/form-data')
-@debug_log
-def submit_subrequest(context, request):
-    # import pdb; pdb.set_trace()
-    institution = "/institutions/hms-dbmi/"
-    project = "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/"
-    # institution = request.invoke_subrequest(make_subrequest(request, institution)).json
-    print("institution=", institution)
-    # project = request.invoke_subrequest(make_subrequest(request, project)).json
-    print("project=", project)
-    json_body = {
-        "ingestion_type": 'data_bundle',
-        "institution": institution,
-        "project": project,
-    }
-    return subrequest_item_creation(request=request, item_type='IngestionSubmission', json_body=json_body)
-
-
+# This endpoint is intended only for debugging. Use the command line tool.
 @view_config(route_name='prompt_for_ingestion', request_method='GET')
 @debug_log
 def prompt_for_ingestion(context, request):
@@ -146,15 +82,15 @@ def submit_for_ingestion(context, request):
     override_name = request.POST.get('override_name', None)
     parameters = dict(request.POST)
     parameters['datafile'] = filename
-    institution = parameters.get('institution', 'institution-missing')
-    project = parameters.get('project', 'project-missing')
+    institution = get_parameter(parameters, 'institution')
+    project = get_parameter(parameters, 'project')
+    # Other parameters, like validate_only, will ride in on parameters via the manifest on s3
 
     submission_id = SubmissionFolio.create_item(request,
                                                 ingestion_type=ingestion_type,
                                                 institution=institution,
                                                 project=project)
 
-
     # ``input_file`` contains the actual file data which needs to be
     # stored somewhere.
 
@@ -192,6 +128,7 @@ def submit_for_ingestion(context, request):
         "filename": filename,
         "object_name": object_name,
         "submission_id": submission_id,
+        "submission_uri": SubmissionFolio.make_submission_uri(submission_id),
         "bucket": DATA_BUNDLE_BUCKET,
         "success": success,
         "message": message,
@@ -900,6 +837,13 @@ def main():
             <input type="file" id="datafile" name="datafile" value="" />
           </td>
         </tr>
+        <tr>
+          <td><i>Special Options:</i><br /></td>
+          <td>
+            <input type="checkbox" id="validate_only" name="validate_only" value="true" />
+            <label for="validate_only"> Validate Only</label>
+          </td>
+        </tr>
         <tr>
           <td class="formsubmit" colspan="2">
             <input class="submit" id="submit" type="submit" value="Submit" />
diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 0e743ef387..c534efdf59 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -57,41 +57,8 @@
 ]
 
 
-
-# This "/submit_data" endpoint is a placeholder for a submission endpoint modified from loadxl.
-#
-# NOTES FROM KMP (25-Jul-2020):
-#
-#  This will be done differently soon as part of the "/submit_for_ingestion" endpoint that
-#  will be in ingestion_listener.py. That endpoint will need an "?ingestion type=data_bundle"
-#  as query parameter. That "data_bundle" ingestion type will defined in ingestion/processors.py.
-#  The new entry point here that will be needed is submit_data_bundle, and then this temporary
-#  "/submit_data" endpoint can presumably go away.. -kmp 25-Jul-2020
-
-@view_config(route_name='submit_data', request_method='POST', permission='add')
-@debug_log
-def submit_data(context, request):
-    """
-    usage notes here later
-    """
-    config_uri = request.json.get('config_uri', 'production.ini')
-    patch_only = request.json.get('patch_only', False)
-    post_only = request.json.get('post_only', False)
-    app = get_app(config_uri, 'app')
-    environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': 'TEST'}
-    virtualapp = VirtualApp(app, environ)
-    # expected response
-    request.response.status = 200
-    result = {
-        'status': 'success',
-        '@type': ['result'],
-    }
-
-    raise NotImplementedError
-
-# This endpoint will soon be the primary entry point. Please keep it working as-is and do not remove it.
-# -kmp 25-Jul-2020
-def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp):  # All keyword arguments, all required.
+def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp,  # <- All keyword arguments, all required.
+                       validate_only=False):  # <-- Additional options with defaults.
     """
     Handles processing of a submitted workbook.
 
@@ -122,6 +89,8 @@ def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp):
         if not validate_success:
             return results
         results['success'] = validate_success
+        if validate_only:
+            return results
         result_lines, post_success, upload_info = post_and_patch_all_items(vapp, json_data_final=processing_result)
         results['post_output'] = result_lines
         results['success'] = post_success
diff --git a/src/encoded/tests/test_ingestion_common.py b/src/encoded/tests/test_ingestion_common.py
new file mode 100644
index 0000000000..c8b19b55ae
--- /dev/null
+++ b/src/encoded/tests/test_ingestion_common.py
@@ -0,0 +1,72 @@
+import pytest
+
+from ..ingestion.common import get_parameter
+from ..ingestion.exceptions import MissingParameter, BadParameter
+
+
+def test_get_parameter():
+
+    parameters = {
+        "foo": "bar",
+        "enabled": "true",
+        "alpha": "42",
+        "beta": 42,
+    }
+    parameters_original = parameters.copy()
+
+    assert get_parameter(parameters, "foo") == "bar"
+    assert get_parameter(parameters, "enabled") == "true"
+    assert get_parameter(parameters, "alpha") == "42"
+    assert get_parameter(parameters, "beta") == 42
+
+    with pytest.raises(MissingParameter):
+        get_parameter(parameters, "gamma")
+
+    with pytest.raises(BadParameter):
+        get_parameter(parameters, "beta", as_type=bool)
+
+    assert get_parameter(parameters, "gamma", default=17) == 17
+    assert get_parameter(parameters, "gamma", default=17, as_type=str) == 17
+
+    assert get_parameter(parameters, "beta", as_type=str) == "42"
+
+    def force_title(x):
+        return str(x).title()
+    assert get_parameter(parameters, "alpha", as_type=force_title, default="stuff") == "42"
+    assert get_parameter(parameters, "beta", as_type=force_title, default="stuff") == "42"
+    assert get_parameter(parameters, "gamma", as_type=force_title, default="stuff") == "stuff"
+    assert get_parameter(parameters, "foo", as_type=force_title, default="stuff") == "Bar"
+
+    assert get_parameter(parameters, "foo", as_type=str) == "bar"
+    assert get_parameter(parameters, "enabled", as_type=bool) == True
+    assert get_parameter(parameters, "alpha", as_type=int) == 42
+    assert get_parameter(parameters, "beta", as_type=int) == 42
+
+    assert parameters == parameters_original  # No side effects before this point. No uses of update=True yet.
+
+    assert get_parameter(parameters, "gamma", default=17, update=True) == 17
+    assert get_parameter(parameters, "gamma") == 17  # update don previous line
+
+    bool_tests = {
+        "truth1": "TrUe",
+        "truth2": True,
+        "falsity1": "",
+        "falsity2": "faLSE",
+        "falsity3": "NONE",
+        "falsity4": "NuLL",
+        "falsity5": None,
+    }
+
+    for key in bool_tests:
+        if key.startswith("t"):
+            assert get_parameter(bool_tests, key, as_type=bool) == True
+        elif key.startswith("f"):
+            assert get_parameter(bool_tests, key, as_type=bool) == False
+
+    for key in bool_tests:
+        if key.startswith("t"):
+            assert get_parameter(bool_tests, key, as_type=bool, update=True) == True
+        elif key.startswith("f"):
+            assert get_parameter(bool_tests, key, as_type=bool, update=True) == False
+
+    assert bool_tests == { k: k.startswith('t') for k in bool_tests.keys() }
diff --git a/src/encoded/tests/test_submit_data_bundle.py b/src/encoded/tests/test_submit_data_bundle.py
new file mode 100644
index 0000000000..390644c773
--- /dev/null
+++ b/src/encoded/tests/test_submit_data_bundle.py
@@ -0,0 +1,4 @@
+# To be written
+
+# from encoded.commands.submit_data_bundle import ...
+
diff --git a/src/encoded/types/ingestion.py b/src/encoded/types/ingestion.py
index 27f5769d07..88f524576a 100644
--- a/src/encoded/types/ingestion.py
+++ b/src/encoded/types/ingestion.py
@@ -47,15 +47,18 @@ def __init__(self, *, vapp, ingestion_type, submission_id, log=None):
     def __str__(self):
         return "<SubmissionFolio(%s) %s>" % (self.ingestion_type, self.submission_id)
 
+    @classmethod
+    def make_submission_uri(cls, submission_id):
+        return "/ingestion-submissions/" + submission_id
+
     @property
     def submission_uri(self):
-        return "/ingestion-submissions/" + self.submission_id
+        return self.make_submission_uri(self.submission_id)
 
     SUBMISSION_PATTERN = re.compile(r'^/ingestion-submissions/([0-9a-fA-F-]+)/?$')
 
     @classmethod
-    def create_item(cls, request, institution, project, ingestion_type, **kwargs):
-        ignored(kwargs)
+    def create_item(cls, request, institution, project, ingestion_type):
         json_body = {
             "ingestion_type": ingestion_type,
             "institution": institution,
@@ -78,21 +81,6 @@ def create_item(cls, request, institution, project, ingestion_type, **kwargs):
         check_true(guid, "Guid was not extracted from %s in %s" % (item_url, json.dumps(res_json)))
         return guid
 
-    # def set_item_detail(self, object_name, parameters, institution, project):
-    #     res = self.vapp.patch_json(self.submission_uri, {
-    #         "object_name": object_name,
-    #         "ingestion_type": self.ingestion_type,
-    #         "submission_id": self.submission_id,
-    #         "parameters": parameters,
-    #         "institution": institution,
-    #         "project": project,
-    #         "processing_status": {
-    #             "state": "processing",
-    #         }
-    #     })
-    #     [item] = res.json['@graph']
-    #     print(json.dumps(item, indent=2))
-
     def patch_item(self, **kwargs):
         res = self.vapp.patch_json(self.submission_uri, kwargs)
         [item] = res.json['@graph']

From 3e3bbfcce1b28506acc58c19589a24c963668986 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 7 Aug 2020 17:13:24 -0400
Subject: [PATCH 055/125] added files column to submit test workbook

---
 .../data/documents/cgap_submit_test.xlsx      | Bin 13213 -> 13317 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/src/encoded/tests/data/documents/cgap_submit_test.xlsx b/src/encoded/tests/data/documents/cgap_submit_test.xlsx
index de1a55de5fd1af955829d96e04f80ff8a679b648..53dfe53be83fc1e849959a0d30f37fc736598afd 100644
GIT binary patch
delta 6587
zcmZu$WmME(x2A_72c(1nDH-V!7`haYM!KX^N?PhCt#sE&2uOFs(A^*{4HD8wI+WM{
zdq2GQzW1KB&a?Jf&;GRcTF>6+Ihi)0wpD~+z>nMGfF2VKjTrRzqM@OAJ8*kByS_Dd
zc7Ds{?dVXZZS4Hx8JWL{-5<=&73@3mffCV(N4kb~p4wxc)^ze@iBz+KZ#q><>>qC5
zd=|;HGZA?bj~Nz8yu0el^|?dTowFsipKXOQEmI@PlH0xf#yC)N)d=UY+}yABo$_NV
zAn-|k-wD^l-4DOYv6b>UFI+Sq!}b1V23T)|oab`Zp7fRJcl+sgAlX_bPo9MNhWIHD
zPB5#=s{!rUSls6K_+hI~$#FEC{i;{TxaO84w3Cg|)Q3@XpBg(omY1Vt>D`6UdduyT
zebc0P_g@6mAJlI&ayI#!f<>sjU;pOE161HASOvO<9*R_smvfou&xI2Lwk7jIs!wEG
zylI0Q8Txt$eYf4b&8?=gEt9jVrfhiRfXC5+bu8sukk;%MN~+5;e(&0~%kSKixHfxE
ziBJ3r45AF2jJ4Te2N>L@Mb~FYFh9|DkwFz-izx{UB}~|aMr(xgiJ=#!OxA`OV3xtv
z(_4vUR%QTGnaDIcSPdk~zPr&LQ&~8W3-0~~Lah}=g>+;12UbqH#-AJdju{8bO;e=#
z)vb$FFQ0~eq<s=3O*3F<9!;-FGLiv5T~5dlRoyQvsotX5JDc}7!`$5NmJg9qh^dP`
z;qJjaRFS~HSbU6iqKmkW%Lh<^cs@Z0uN(4A_*45u_txQvMT2r_8Cy?Ilp=M$*=W!#
z-%t=AflysOLc31S58FmL1Ak0@UaYuFmJ)t;<8OC^cw&v8hbr2)p^m-Sqx}8wl7Fe)
zC;Yzo`qfaI|6KHw+#Z|Rc^{MYkpA_V9<eA=yq<f@yE6NEw9vSM!sZSje^H<_tuH-0
z<mPk~@6KC)icO_dyK|EVZ_^<j>-&BU0cp$kvec;LGB%jp7LDRG7=NaK?ov~`EF^9&
zn|M$OJ;oitQ|sy<+zQ}!)2?MrKi>^%#3<?Vn(mJJ=JF|ivW+SleYUb|GZBOy54v7$
zDF!*RnU0&c7))A0C)p$wKcM&sYT$Um9<AU~OAW^ozF)=Xsqzn^F_XnF&QLmMy);hC
z(HomY$2G*RhOYPqqBppH(k1+F`2de!l4yxvFv;*5>y=6cu+Y$`2op^SpumY5jWFas
zVCtSY&u=eNOAqhoOsPx>a~$3$6BRi6lgyP@a|0D^3q+1L^K0ig81-%9Qj8G;S5dod
z@A)qzfl78CON*~M@@>ROUvcx~1DTR-jg#zyq45DaeV7&lTlw#zM(d?pvETQN?e#G(
zI^Xl&>=~W2jySw=<vgr#(gV6`CJQKtOvd<Xqs=rH628xBH*_>!>+0Ya`y?GCyPhI}
zg}mehVs<)bod=0S!W$i;8z+}NML~C;<%#8-Y*hW&O;iU+Ng_wBfkYT1RDNV9-Zz+4
zXc3-q)}u(x^{my#&r>8nw8*MHjMAXSoCzv8CsY0+RNK+Ey`Y&#0U!t;K;S(T&1eTc
z`(-=j*c#nqCAe%vrf+QX_AVsF?ZoP=03;wc5%^Kkw272xGJg7^6}sw|YW!lnJG68x
zdBfG+h4Jo3x1-rY@VOu;{!z#<-g^xiC;DTTk&7*M<4VM{p;Vmj*s#44Bg1qhlFVgl
zQe|Q7(9tEa(1)=1<G`HT6PF1l<^Dyg@k~EU>P;p*p)ny59ADe8)UZ{99=&;$zIELs
z2Mzu`Yyo36EMf7E4^r23DV(=Sa<~;_F|J>1$C)EA)OzitL%$J{-ym`o7kT<G5?d;L
zn>qZ(xAS;9hd^-yNhCY#^cZWpMai7mNa3mYi-uOAaIdU)vVbx3JDO4IajP-5u4GtL
z30~&_RpuUHtZkX7rxDE;2X^@Gp(5><!}swy1_z$QC}a@YViKi0%Z^!4%PcFW$^thf
z-Hl*jE+f0xbxsRym-9C3#a(&Qd+?mL*u3hz+1GqtRzF&)=###Lp&PI8`8mf3vAq?y
z#URc-1dSO~3s_8u+P-6?k?Y1hvdu9Z>@1q%WqHJv`-abD=ai?_*0Sy+WQExgAnxc$
zpp1372mt>IETJ`tk(oAbI^Et{m_Je2k5|9`;$id(JGxX*rHc<&1H)!}I_nERoN#h2
zX$i(6@gXED`jb)aftdHxlC>~$m#O_s>_=Hrn4FKk#saU$nt%BVeXCOC;nLz+SzYbE
zeN82qQ)jQ&kss%$sipqy^hzc2Y7tdJLB`CtKhb_2xBH{u^^}#VcNdN(spiw234z(O
zrM=VMD0CU=4#?+n*4b+Vo`wX{gKq>0pA`J~M4tZ`j)hegJd)hJ`&G+!$IMRUep({g
z<Au9!bCCyJ7-Jd6VRUdVd-(B&@G<h^U4FJr<L*Xc)1-R5knk5+avy~q{q)?1mH(k#
z@-!a?pOTLMQDM8sH(!|^R-o3MqK)7zSw$j?J~-Dkn+Od3+SwU76kqwgPK@*uY9*K}
z!poK`jwJhJQs0s9e_NODaV7>1mk1vzs=8}Rp&AtsGG-AZNG{@O{~kj^c-Y;V$)l=%
zt5M7)amW?oL5v)Ea$;BAaZei72LCBY%cA`Fq4x0|=pP}e&4L8t0$zUeRG!#qXfe!a
zXoUYgk=@+iJ6O5>GmmEs44w1fWC132515<c*i&%`895ch{CI15_ImcCQdwp5+4Vtq
zrlrp9+Vz5zO9o`HKD`wy3U!wmIU(He*w0vQ#(O}Ff#XVfcU+JV3UZLyEVg;L-xq2r
zh{q*UgIMtbSEJ;rqm@0}00C)r?r*EGY92I3RWUb~-d1P*S3Er=ipLAkfm3EnF&PXe
z)QsW|H18FoNkq|X-BANM11W-4MiOuT!;6m{oIBAUUb-<4rl~XP;Eg=b&H3X<%^%ZG
zR>YM_ffx04dPl`#=v-RsU=B_KhAZhkJ4Tx~H&eg>T2&ftE;-1^6h8Pt6Mh(Yy2)NW
zl_V{;dUvOqj~d;ex2#r(`oL+vk?u&(racKnexF!KjqU8#uiT(VS+z3ZamLsYg3$A#
zXfTvP8Je8D5iW~ANHq9%UU=eby!grH=roLDbyQ~ZLh8EaTY}hY!JaO?tDC6mLm4g!
zTHk^bU}+zQz~7*QOS>1^(!8EYU00Wi-f_B|#}ZSXNX}8b!HQCNW{gZ?Zv#<>VW#<F
zoG*maFd+ks$GW`CaUT^E7X^^F6++8D7a0Bt4bk9>=O~CBw%{MRd0@I@9)<f#ZGzuy
zf|Y|)Wae>|K;^Q>KlIYy)4#xDgz5FPzisdV+EfSPJ5(FJ05goYX4#?>nX&_~`GTbf
zqSWYDcQtbHhm{dwm=Ut*Y+IHhiGPB|zRILSAdkX>!P1%(Ol&akhp-2hEwBPOi`5!3
zb^bAkBFGB8FX8mDkCjQEv_{b(-gCE<WPeSwadxGWVnksv3Bju8;%=fxmhWo$Z9fr^
z)mT2c<T6Q0YbU)iELKa8!@%2_)IPX=sn!$|S)*<cCfEA$PQfo&48zPMMF91bIqr}z
zI&d#KDq6%KqP(jirbi%U!{>vnzOXvT+L^{yqOdf67)@V`OZIJ0Q+kw({$Num3%=5)
zWb=`roF;ZT#cT<6zG$Y;?L1H^<MdTvnEWGsAcs7rcf=QiKSST{$hpbuX7>3w8s#6)
z-^M1a@yyQ=x?<lSU7)AoA?WJWDlZ-1SXrxUsDh|wJckU9--HoX)T_-TR<SRiGaur4
ziGYzaccz^+F1dSNOrMRWlcBE|@i(llR;QL?Uq0_&)<seoq%Gn4)YpMY&LYqOkA64`
zo7qYsnRRYPhJ$Q0yiYg%#&a7fp8l0_)%ja##w|5=n&9ou!#nU#Q<-d=aWPp6;d2~<
zmRAMC85w7$-Nr3Vla9VP+jg`4I4Hw1LHmj2!~?;--~pCXCV@uSijkKmoyeoh7g|d)
z$VVFGGj27Mhu_@wqrJcSGz@P8!#|Df9=k~yESXH{=>OqBT`>me`)=#Pf|ANF*oH6E
z^pjJP)6R^nEyUZKeME}WTBGPHUMo9kRJOf$aXH#aN!m=EU-d-A*K1)%@C}oXyXXHn
zsZ3MNC&-;y6Ne9j$1AqSPvlX2sB4ZG4V}-%A|JCI)9YYbN5x6QB~^|<wNgqIYKPAO
z_SScfMs)6=%|<rM#dn9e*;!iPgq9mObb5pqIr~9apW=3HU(aBWn{{UVw+~sId5~;f
z&ZfuxB!A?A<gXA8#yHl4OUm~)GSc4G@9vH1ctwsb!QupTzd}W(l53767VjrNKM4x3
zJ?>H~fAL&O=@Q*?LH!{M=wx8Nqrq1dJ73$?D7?Nn6rwTqopG!?u3q}toL3mO<O3BK
zM~NftBp(X>#az1!nf47S7Z}|P?nAmkEMn6R1;wekWBm09EnP+CoN~7lfu*b9g9XAr
ztDe;5g8PCkN4||4NY(S=6>4#0d+9r`*t_1|1}N-a?`4A5ZO{v<v;??U)H3ab&M9uw
z3R6h3VK<uuGxY=UKMGBRQ#J3aj)#W!mFw@ihn9%oqXs@3*l8{elioj+tO715Q9%O^
zZtUf$Dy;5=&^W1fFjFV_(pfUrtJTumHRsqFetQq%m@oTWo4<<+ZB(z*<I6KU0yuXv
zcb{`xe|K#>xNJaXx(kmTTaSdV9M7iam1{I=CJmi&rQ3|SH{ObJwg5->*OH==wRiHr
z(h96Vi~vXE%CUuh1<QQ^ufH<ot8XJZd!!Q06K5}9cW0wyJM^~gTy<CK(ys51j*2=I
ztVf(^%@UkRnUEdXD$~IZH+2fBR<z7&yRX0~%Gg?Mvs}&sM2I0XDz<QT?zv$ZeeaQj
z1UsNBjm+$GcRi~bJ=^jmI)B&>9jZHL4+s2y142AKT1`}TyoeqB_?1-gkIR*ADbr7`
z2ktHHnTAl#Wc~QJ?{CDWF$BNdHGbV9bl~U!Q?&BxSqATI55v@Ikt@f4?oGw055qx*
zOIg&8q2F-r4{fQOvBMv_y38P@OwNC@k5|W62*~DY_qX=$orJDfZrS#)u02aIZbXy-
zfMTmflch0rPb@Qqm0;6D^v_65-}duwi|^63>hEu5bhn@y#(j<YSF+)vfi#U~>$!M&
zAk~8HL*W&*U8&d!@wAt}yfUL6FkkO28U3suIw&f+kA1%@$U1fJ@ZjjFE79E<H)iaU
zEjBiS1HEp2wj6U8cz@`2bq>!I7fTV*2J|G2KO`d8Kz6*L!{J)Kt5Ul>V>>+bca61a
zdlX^ADgoMm{(Sm@Y8&k38ea2xnd@Y8qm}f%J3l0FA42a>ZoYif3N>#M49T*{#4&1x
zVvSm<&}C)f{|#T1BDo^cW8`-*@job`ueW^VIX@i|>!vHJ`>vSr__rla@R+j@P<-oR
zMnQjw9oRwZ-J&I^gi%sB)g6xi=;@-^r^#g^O6TO)FvD#~96cfevVho#v3@L=37~y7
zh$W=lxf*+ko1<{Bw6QtHh^21hLH0V7h;_bE+jdyPq*Xg|l59JUW4FM7V9b0!a{evv
z^T=ZEH<f*pWx4I%wG>RX5jUHc03mNp0~vN+!RJrG7KTCKeeqnupZ0}tg<(r)*hGM*
zX>B-9AKN#1m~$?FApWzx#i;%Ayv?74ov}VG!TS@373<4nehXa%B7S8*PCPMfeLkwA
z-L>u#dfrTkJl%S>6_Wht0C|P&N*^a12nBCtO2uHiGR9qhoS{LC?~;)e0cxU=rbzKf
zev(}ydP)oweOxRCiaBlqqnT`w@<o`Zb|vJC0oEH|3X_+goiUmz4%~j|Wix1^kRKl*
zHf=*<tvGvYsLb^Vt2x5;=43hW8B@qfMiM@%sKjZhXh0O^(4Y(ZIP@Hv*ed>gdN`AB
zZaB0Bi005kt02!plBF0p0Bk%nPIcVKK$#-(P<21NFE?Z|DAo}BM+#pS$z<yPMw+;@
z1&|UI1}%h4kzlC0Bi{c*ymov#tMmVX+|fR7QHi$Ij8l$(o?u|qBKb`C{eY8<OVt)4
zPQp{v3-|)Lau~2ssQvNN^m01la{pV}5u8hPVdqd3K6*xMWR`YBD+2E+5}%0=?(suq
zvPZ2ExgD<a48KwVo4!NzJ+AL*l_&aI<)HdDf_OKZg#a=0*jn~lIriN+?Pv5KMopUp
zORRv)yM;Q57I+z#gfLv<(S0quO@l-#7ZJ@tBoNGRDc~InefAzXF0oc^3}v|$qJ-I2
zZV0#<WYoZgibU@!g+aXIULctHd4vV!ClA#!Xg-6cK)>MZM1hK;gFUaHJ)+mGTh%e1
zQ74MG!CsV94T`8&*^I9;T<WQAiK#YOiid+~8eUS@uzgY)new3irLa(Yq)_%?;b3qN
zyjXlLY4YySz-I<}F11j<Wf>x_2su;;GW|n>g^Z?OFvRLoWgMJuX~7LLmVq3CgG@mX
ztb;abG72H-sswP5l@g>(Bgj;m6st6rUe6w@tA!Dlf<(H?95-lN8d8QGlyizDoeXAt
zt}Uba^#1~&X=OB))t3CLYA0}pN++1gt0b#p&rA-GR?03aJyG3get-1k<r-Evg?C(|
zxPYbjS145OyF6Z$bq^0hw2JIJ|J_e&gmBN_?hqVhvJ;5rz@LpysM#y7FNI2!$M5i7
z5#)91N_r+>T6gVosfwoKgZgYDFT488Wc}R<`%-|D#K<1&2h;?siO4gKVM^V8<>Jze
zybfh2RdS9Wl|)2{-;I)*74b5$2$H3ACmxFyp!Su&m->PoTg+3KIV^F(9pcDGH&6#T
zqHlkfPL~U9U(2Y-g=WR>u?=s~6x*>6Z@^M52A1DL9XLvQKTI67MfJoQE<{TRlzdE7
z4hIrGGqsN|d^wG-yYKIDH=Mq$w&TJ`f5RAOM~jiJ!w_djh+ziv#_H}agiwftR#mZD
z?x*$gc`Qi{C$R=9Ym*L=F$5})kqjnL1uDNJ86<-QDz`xflR$yW0ubC;*tCw^+it!c
zC3=entOR@Z;)eeKAi=&gWwD%_D~Iq+$NT}Jr0osxnC@)Xc=iU4`<o*6iO?Du9qv|@
z+@0CI$F2A1=-wdFHwUyb$u;3;bmx&4Ae+u9@nYeszYYr#j#;PjX&p=45p7;WgV&Dt
zF1$;FT!281gyyOnhxhobi*Z&BvI>t1W5B=t##=WrdXyLm^iZ7et2vLd`0wFv^2WC}
z%?0)~cL)AdrQ=O*H;B=X_$xYcy|vbU!Fjs)&=3vWky!~ze<1?bnJwb(>foO2V3oG%
z!dLCQM-~Cn<z7Fr?}lcdOV=XOSiDhm4w(1B?rsJ<fhtVuL~AF?|H3Mg4Dy&YL|-)x
z{T0Rr@Oh(m=$vIv;_k}+q{qIHcq@O~FzhQ(5pdb_seOogPTenk@kWoDB0!lyi9TGw
zoG>z*B=!)(O_LJ5`HNe@7&_+(6WF!t5$U0Sdk@|{uaW#tKbOr>)1M8M#Rpv9HA()u
z2$2*H!UbH*F|T1z+j2?$m`hlo5tGavSRMvE^dk8L$);Ai{g`c-M?oOqyMi*RqMDk<
zB6Ai_N}=;(-KDg-NSIU9HpK3GV8F)atY?mMjpsV*?rp@}z!cG`g*gaAxQyg2h+#=L
z<`6Pw@52%8#YMDq5^D@S1_{_iKQWD~5BTuN=R2_j<N!pW{{s~BCG<%Xe3{tWryC%K
zN{2l8TlHIXqPjU^j)9v%sR|N?fDd_(Bq3`Ms$mhyIOrY-{+n9zHxnlNZ(@03`R;CF
zD0BlPP`C|JX`BgA?~pa<cwDD&9=`8Coy$L#bzzvrz)|}02vK#)F09ad%I=KO@mT4u
zAJKcN*>tx)gXkolyCMH-z1)_m{pKt#3x?`((0AY26W^*@2__mj(A6kC;4{>I*0kcN
z)BQGor&8X#Lo~F}v%%M;202kPb%!4OvnkaMRkv@6B3LyofAf9ysG_EIG*KVSq<nGP
zg&H!>O!V*aJ2Cl}!HCsra+_3(%_@Kn5Mn#@e^fku$@mEp?X2PpiZ(>ny?pTVK@vi8
zKgAviU(dTaWyCaCk@y8xzb<8nU<$%eH^MJYk*!Np>qf^bvrw#(VW-Na)q6f%QTh6f
zMg`$$R7EEoEO^Rn#abC=yKVH6x4{qlX>GjMGwz+mq)}9!{$|84wFa!ukg1-h0GuVw
zq6sH#_R~iqg)X&0d&dYIdUzOyqWbtIJmOdEd?4#lHfFTz4#(Fe?e=MgV`0Nn9kJ#{
zovbLt7|WtNf=U92S}UBWRGC#T@zI5<r{JkJqwmxWW$8FfBpqU1<6QZyq?hNuOWNd@
zZyO8J7rKm`c=|EiSTK3nw$zPMqjjr!3&O#XM)pVWCN;zS$c-N3NpeG9%&_70JF)B>
z4hAszl@PO++f-hozhXkH$UvVTZCl$jV!MZ@L^4y^Fy81#KGUL1bm;ntw`WMpwSU#O
zHj_OC8~a~BA&GEaIL*KPTr@PYzYfg5k8)xpuQYmX;ux<wdVL}t9}CTYx;6jKeV%B-
zM^E#=9}NFy(a>fSGx*q{|FqT$9SzMF0}YMjf6{1%tce?Z;^^0jPx!Ua9}?a9nQ(@9
H{>k_+8=8vK

delta 6497
zcmaJ`bx_>RmIVS~aCditOM=TVfkA@11%eOmF26t+AOyGI790k587x2ugb5Z1P6!Yn
zxLeqKZ{OC|zCX6BZq@12eXF~w>z+Dw8xG5Js3*VyX0-w|=pUh>5n=xgG&HmT7eRk_
zFK26acV~eBSC<-tRjoxK;;@3|hyIh``aI5}coiL>gqAet>+t-BEqz;q%8zkB7|Fr|
z(p&BGYpTc9QxX>(1-)Ij2O(aa)x18u@dllvLej<5qtIWFXF2#luQ`@YT+N0?KkxD<
z{8d2pomM%Me6CANN5{9D$LL@iQFE!evPff{&)JJ?%by?fIlDR?ymAuz*a|%v$k6TV
zWMwJEsqAn%Vq|$%*Z*~|h19E3$!KhNUm>oh{n1UUK7!$^(=ly(c1}($L1eAj>vqF*
zFMnBXXp0I;EMDxc6Qj+a4=Vl02m^qwQxXEw;X6ZyEu>Jh9zSI?jf)b?D-^fYZ&i?=
zpv{p~Nep#hbqFC*jrs}Ha1@<45Yy&t8HQgn>&yXP>mlKn^T0bkvqkU(w!9bk;d%?7
zlhdPsQnU#Orll%HC0bRros358ECiPG)Qe=-Q>X4IOi(?h@cPpA`DH{Bc0hozDkos|
zzzMQ3lysy$_z4jw^toD_3w|api~d`wQepj9_1)|fwxW8l<r?o&hst$J&swcwUZsdS
z(##{@*eyWyV^o_$b^HcJ12hR4W2ZnL;HKKoz%}%I*7)ST&-IwpHTJRBx{Wen^Cb#3
z_~^HW6zFV#1{38l=B+X%9)|bikrjZyG!dc+m8Y}Py*QPl*Xb<TP!%%U3<N8aG=Gq>
zz<NQ8FpP`R$xH;^h;pjJxAJ(IX4Z;CWlnBF-EPMH-@qrE^@iBugKv)T?yi1|&4&d?
zhpu+%dO0)sZj{3=pFe}I*h`d=Ncf7YD+|MSTE+ip!+FCYDyf4DT$N%RLV*3`ydWer
zz@7$a`Nc7x{hIHUjs$M;u_t|;Lv<kW^rn}vmCM9QAS&igsq(NMj9}84HX2d~IfepY
z3*4_$p;QNN;*(YHEw*Z2g^EO@u<9`N-!R=!jSwH<Kq2DAqL1}^(cWn>Vk#aib@<+c
z7WDgam3;ivxpcs%I{4#U(pEwVP^<cw3@b*a?XeKX+nMV@wtFW2X2E11iFBFc6gq#$
z_V)0m-__=*4lKleC?=A5HvAb4GW!pDYSANdV0Ap~8zm+hS}8sn8u@?Mk2lQ9%g$EM
z$IH>p!TX;giZgQ7ViPC7x2Swr=do-}$9?|YV73-wsQQ|Mw2o81PuXFMb3??2s5N!@
z?v@b+Mq6xxs}x@OGHow;3l0jd?!}6^)XeO{j&A?-zc$|NOmgyldbPdfbBIC*Ot|z}
zco0{xdOPVWk(=xCqLu^?qwgOScFa#D$6PGD_<tkajIA6hHTda~*#`D>gT?h6P1V)E
zkJi=_YbbRIxaXWVUtJRW&8f-N1}-4(e1)h95cCAgzdonC{Q4b`*)b_}>b!iD+l&2T
zr3lqZB1sQd8nYjKGZ{zcO8?~w@Z=|;B1ekK%F|8j&NMCz;3AA0NC>f=i@p^PREagu
zPeG{JqXzdhu!8k>t6skcW&7;8ZyaS35K$=^&V8qpU!Q=tdEL~$ru{$#{%9#=j)VQ{
zlVgwtp(PR}z-F^P;CpG?PXAJdLazPD_}9g+;x7uen6-@%3;hv-D+b0aAn#5?e34V_
zBwfL;U$s-9&2URsQQV&nr!2P;vgMi&3Eq%#*y|u_ry}9hbH(4*bjuvRBJ0T6qGNa?
z`&7B4sKw8wYOl=th*zz(BI_(A8%eC7i4tV~3PjnB$(620@WMZsbDIjiEe6N5^H&IU
z7C`_#eO0=fOv1{Cj`t(LfJd#^02<dJm{AX!naIR;6bd)}e$r9ECadZ`*45R08ArFv
z`gRev;vwK{h*K+LrEh>HH~h=oU#7V7K+@ADvJtUnUeD_r*|@C~;e_~3v(1Ey0TZ>D
z>M)MP*)=L}e!AXm9`tK5H#WBBXz9)Fg+y;^`PY*+-7OAdqhkml{W3Z|Ww_+X7wuUX
zF)!!d24MkWkvwp!Yr7dsc}h@xK%%0VgJ$g7PAnl3dtnmXmK3g9YNM{%5fe1}DNCzh
zAes~U4m2s?7>#{(r(z*9pmRt1<B@?hGCR`ja(!Sf)yA@ytda{N8`(9j#sf2-eTFs0
ztca7Ft`!_LvGx=I14eEInO7jvATds_x3#Cg(ksH44Et28i73?rFbJB})8y2iZ4X~A
zD-KHaw`*JN9UKsTGa9%;FEtpv;eh|lDUOJZs`KYxaNe#q)lT|UA%DB_JU{VeX~|gM
zQ3^pBdzl(vzH`-P+w^+8<fF)lFlLD^g#M~?|GcVNjj}hO!YOk+`fD=ISw6ozTk|gF
zbj-{kfQ^^hhj_NmiXP*NrY9NsZVjZnmIyCT5FF8l+Cq!+$p}uB0{mZXG%%tZ>g;p!
zuS*z~Ywt_Wdu<ji)Fsg^2CjNlO96!%z%)x`L#<P}L1{J^V4^reh!O2^O0F!0<vl>#
zxUPl-R%M8o2Ho`GS|4WYVn5a`Sb70ere2q4fuw}42}U|r*Pi^snf@<1SEzY)tU{V(
z*=XnCfwy1mqX1Fp!@mS1l~#nfX_D9z1WeA7J6mI+p-Ho$p%MIBNqhob?EWPoGp1(l
zKP1S*EFJ$mPxH^l1c{SCOI=Hn?Ui|3c=xMu2#tNMe`XSnJ3ZVoO5+H_lhEKI-ANJ?
z+lS^M6AgLma|Vj>;^Slk)+nC?m+3ywzB2awz^zrmun!KjZ^6gpI+TD7%6#MFoz%_A
zh!BZ$Y{51)-XrE8A4Rtze2+8IF}>-FekSmH;A=sK+1S@IP&Kyw!r7cBr7@-&iwDLe
ztPNSL_{D{UDv?)vUXvi>W^jZ$h{GCDug-@1c-3Bb0|p7-{p|Rm&sdUS<*A_zlVZz3
z99i<~rbze?LJxk*D0ZN)%jec`6`~N1`Kg>mver(#&DQdmEO~))#9c8PMNjaY^4&+!
zya;9fcl2UitT%I+_i>@uAimslT7p!|VY=e4KQh0;sD(bav-J5CO3spg0F}!!5_6Dg
z+O`oZMb=JlmAPnto48TJ^bw@vA7!%>rPZ{pLs5?j{QS}M*5L*~BElLwh!66~6fa|2
zIg~<i60l}q2S$KK-^Q6aPF0CkUrgjw{ELpM;6kgXksVw@iJ|SnjeGCUF+#YagpzMw
z?;1@84^J*QcAi_RP;=;wT3-~(h&9z5%J*pvs5fG>=l`I{A9xDm^@*?d`6~5uB{4k<
z8&bmT74htMmwgwodg#Y+y?1egz$2Rn-OhyWGD>Hp(BMm1h2S2`C6dZ&+b=w&)uqSP
zX5nh;xTA7mq4*s6I#YsH6DPZYT1T?~RQybwMGLHUq%Ittl&?(QOwNbLoe_!p%6rR0
z7z?p8D$HuC`JID%+R#{%-5Hoo+rQ>p7b<76-Lbvg>HZs74_w6UA;9LJOr|8S9T&y3
zCMJEPzSD$n@erBQ&W%@U0Q)%hDe<HDCpO{>JCjsP2*#+uG<D|6r}>HxrSr5dFNPO4
zJmU@8Y1Zc37%Cn$wGlKMe?2$0d(Fr5zUZ04k~*}V&vr>66<+#Lt-($~dx?UrRO|3b
z))0xNB{e}2P^<2~iL00d&iH^XGo+1p_z63XFv2x;@}<bwL&Q1n_52hcZwY;4t^fpw
zqLmG_Dl<$uhvpb%(deg5qd#tW*_hGw-s+7r1L-;6Z`?~6ysm+~i2;FU^3?Twxv-mM
zT&_7E4&{_jkbtZn|Fa7l8V#Y#S%E0Zt=0$&epg5&&}|LQ6V(huSEbRli+$EYjE8mV
zd2CRI481-8+gHY1N}pA2A))AHUy3>aQw*u>uiIIos3fI{ZuB>9sg;doWipu_?*#;W
z7Q<ejG&|Y6yW=;cYjHUE?wfPguSa(EHd{K$tyWq(G4E{<Yn5x!{o4w|UCBi}LRe=F
zv|CpeFx&L9JF9JulVn9O2sl>PJg{6}u!flgZyIq$q#@3DKA&lsq-UgOo|xM^%OZP%
zq^dK!;}~mo)ep4mdICLs_O~+9e!QPsal1}#(Z`Av`Ajk4Q@p`cn>kT}Up%uaD=~^A
zXFj&Pns{P&a;TF~<Oi`j)LVSDykN2Hl16(1NPH6oJ}HUK>dTst;wrlaw;G~*ck+li
z&(c&Sjr?(<^B2YaIrD^YG9~u?m**(H+J}#Njd8CQ9D4q+x~p@8{xZE|r$XgTlA2Nd
zY)Jj>MBUc=?WDu(Q?f4qFQHeJjV6zp^_jp?%cw(~+|2&ohMylrA3|yUZxgk`nkJio
zksfFld>OjRATPFbF?e-6dAO)?yDq=~B3cybt<7ZL7nU&1$WD%oTFbH*)sUrf&97W8
z>C|OKcg+j#{oZi+a>VV5f6=j7x?=8^uc_Ohf8Lf&xontMsL{qj{*`Q`tEA?Q9i07;
z5c91Nhvxxxhx9MR8i_|R7{uEaX%K>WE^@>4kEO$n(<Mn@!bL+%=1<*cWCj*>H&jSN
zS7Yw6qgNXyhZ;T6$WjOhzLYv1oaG2%(7T#3s)8@w#(T9g(hSt>DBYfaG#@|KF*Mwi
zG~qB=1n#p@rgf?rTGK<Ci#sl@W~0_Aa_UrC()-n8_wfo{=~Tx*Sn^BX*_Pi+k1hiH
ze=g-0-5oDP0nL<REE39VP2jeJJ7dsIyg-KQRN}B?%ocrBrtG_nz#8gzWsYDIdl<K1
zHmdVr%l>QL$h1;dtkC$kx5;vHnp~q2dEMiwN;+7hDJJINKi8)|S$nY4QK|JU&x=#L
z4q?^p7a&jhJdJm&c3=IR<!Bh?_^0@g1^wNCWSETO{pU3%sz2vvLI8&VG+T?yB)dFO
z37gfwI(PG|t<!8dz?yaRx{hM_xXa{3$^fh8c}KyE(VZ+~H-_<B+xk9Cy|Hux%Q7PA
z{fDlVZ-wF~BXl{MPv?)`pu=2xncM<`4+6Tk;gd+woAREndm%V(wMk3kPtS4&sX-m;
z=B}>A($}@+qh^|NZ9weRAJ#06RMv2XT)#cQQK~odW0hY5c9j@Vz@s!lY`}xmnl9j3
zVli*Wd9-au{met=Nm<<}cNV&AB-ZFdt*)m5?aNf``x@^BL0=%F<zPdGrnD(%=lL=I
z8we-SsLh=Suaigh2Ie38wTp58?VUvhk<cIk{p@-_wUEK+RbYd?;Po!cmxO6ON%bij
ze9(jIm-5fZ!i6iN>j!aq)IO$^pT(VFHA%Bs>ice$Z7B|XefVpW43^df#O3odqbP0f
znyJ@BhuGo6jjTi?<5+UtOo|462IR5~(c{>ZXzwxl3|<s%ucriFF;q+i@m4ylrO=#x
z>oqSw7{Km93j;<@mo!j1x+a1++FB-fRC2$#`)^dQZoICJzUx-mj?v4I+AgyOY+1J#
zTSWP8ABO8!H>|%whTO9TFi-9N-L=CSP}s!tS<MI`7FjntAKj6oR{LaAj?W@1{O0J`
z;P;+^K9d|WHnb;pcd*>3q0UGr^r}4#yNnlWrI3<!2k3!u!z`=1j1Voqk2l(5{M(w&
z5LjkUGbJsDcl$Q*4LFa`adlggt9IVg&vq^serUD~b-3wh$?K{=RM)@K9$Iw_eq|Js
z^ysd0@2s_}j)&u?;-}-W2}CU|jxAr3Z1``ypYOngm|omTbus#8DWA7NB-%My@V4){
zLk%(BtU!`pcnI02EHD!9r#UkfMu%dFyWu%5i76h+apH&R#}1ND2;&0jv70lKbRb5e
zh*J6Gm9`U0nl-4D7V;!odzU3G0#wQhdGc1<IijRWNrAaCT>D)irCtmVnqVcH)(&YJ
zI$Rr@9KxavN(Zx~VZgQV$RWboAZINWJ~?<ja6>+Dq6ezhDEu2NHDd7k1@Zx>e~ABQ
zVzRGdD4jB!VB5m>pKxkc;80%;0qtDCoSLmIO0M<8@wI9amQLtAekGkC#bDM(&*W!3
zwhZa%sdzQ30S6q%-|)Bg%u9r&iZ*%}15Aqp^^9!}>t}Dav~MX1od+yktd{IEAK^vo
z5c0-izcUk@D5Tlq_2P!xdF$(#OVH0)&6gwy({(CH#d%w=pHXkmP?oQ%U+2URmigLm
z2)ztx^Ds9&^ER7s2^Dam7yDKUpLU?_h7hW()HIE?IRJFg>G#9K$Jx#S?{c9UCb`lz
z(sy-oXF?N1cwDbeamSPiJVHvsF}8Ar8Wne&s-j@iQ1(O(e#H-Bsxn+YPNH0#!^DV%
zxQ}GYO@A5E&K61fY2TPISLhG+r!H@19C&bA>~>;_0@%-7L`kQCn6gskVRTE6r~H!G
zMM^jkKomP@A(0s-7E#y&ih#0$7NVHv($hc@8O$Ks*Ua4h5ryr}><|tzOuHAvGu#T=
zNtg=fB$%a8;u#7BZADClA!5u;1>zYDh594RejYN#nWv=B;uSU6bf#7Q6V{h)POHmQ
zz0woL-d7Q?xiQr9F9%Kpvhwx7r}5ru*Jq>$fKk5kjFJaqIGxwD=<T>*GJKGmt)vge
ziNE*C=J}*JNpNdJ*3KnA{kLl5{6KwEG<?AH+)C+q*P7lyB~&^nPG^lKF3UF2caDs%
zJZroX{-bf|#(tx|Xu{=r!{)8qMtxSngXAV29$cxT*>0T27clUg*;u;i>k~n?ddz=x
z|D)cage$B1*k!t7FmA^&Tx0j|5rk@1+>+?hK9LIwGqy4bt;D{>t4qz%+|ll(3kGR}
zr<1kC^hi}FGaW+tG<xH$iH*Vr7)MZug{2*RSPDeEvsni#w}e5>i||Uy`F0jc)UpUF
z`J%+U&w=3x>=CP&!NCi(m=G=rOvt((2(HOOE(UK-P~}6IH8#PrNL8v+z^0LyQ`u}u
z8Pws;#h?-_MP>vQhGQVHN_9BcI8<a`WiGqk=aeI<#*e+4Vbn>vaUuEtz?lla4RlP{
zpAEXM&Wfe^Su&S`)i!8+oopDOB{ERA#}#H1{@WmCu`WN^-L*lp**&VFOICD)3UA2j
z%X;6yP6Ma6vFEE(!_@urkVN0^53%42uegQzF_U2FejLKj6-aT=e0@1*jNDX+sLFL$
zc+)0!XaQWd34gy)^J00}>Ye|@^rCetvL4gZI9b4TukVNXI@o8C&^R5aB~TU|iU+%q
zc#fM~5Go5&&7VGE%BT)BzA2*lX+G)>dyW^qigN0iR)&O7K_&+UTT1OhMmGEl7ckGg
zJzOI$>lElJR)M__!bCOkV95~gX=oumvZIJ(mx8c&cStz#U=lCVI}ZNDy__~r`F{Kb
zQ)sR4iy{G$ymG;OH!%R2k%!&OnWxMgOkpiWB>#Oshh^nZhB9`rz<5DoTt1p&`+mYZ
zPQg%&HWzgpKa43HKg`Ta8*c}jgt^eKzygAbCygUwpf+aMD=?43^!Q;9^$#SU_*qLP
z*M5~udIZC>Auc?fF+cGYsrvg@4?(6#-k_+r4OI@tq?EFQ13+c<Vtr>2%g;eWBPyC|
zX#v3F>y@b~LocTx)h7!$b@gq{^<fUnC)7D#yr3B2Vj7VP{lR%`)4(3JZRwmR<S=*{
zQ`kcM6K<VZ6s1L*XZu`W(7;{>^T1xK@0!I(xM9$;N8B8i*E`U$RDx!=z`^%tcIk5*
z%0Ud+mh0#8B7k;3Ai?zaE*5Pb?Q_-soYj|(TdEiONVBXq&YgEb)sxX2q7{LAWMO7e
zn+{j$qLG%S9AVq|vvimz@7Nqb@~2f)!MmRRZSRluyht6irj=H*ITAN>(%159g3i5~
zKIEne%-&|~BC2njU(a!#TcVBxyfF?tUHsNy=b2@2-an5X>v#&L{IL73*7T&i!33G#
zJ#A5Dk~}nx0i~|;w`v-U=HWUOIX~z{`^P?F)(IZuH@QA}h7P!*x6rWh^i#cMhdf0-
z+h=&{wm(IAeY&$R29DvBYu#S5M0GfdEg0GzVGXMxqPP=4xL{BI5>vuSYdIWuomUKc
z5$UG=#QS~4I$@#d$Jt)4uk3vYhE?msxt_cTlvU*2c8YD-<gLhz?|vTG@U_f(g6RwC
zTpZBIu@;$xUJLXI_tOs&2-RpQ`hao6Am5zzTG9H2)3n3AC`WYSfNnx{y9at$x~K!k
za_vPPp(pBlg#VAxo|Y5aB^R+tPFI_&JK?Ybx!zWT=atznJ;8yT5}S`Rr#S)B23fsE
zswKMlJzw|M*kPS06#W;m7xv3Dg2*IhM!#!)uMY?TbL}Ht@|5TG^{+(k7Jpefz4Jwd
zgF^HdD5QDZcQyj#6apcnq$y9?{k=sB@9ET15;;ePLdbf$DhM6_aI8dnN@;;K$Y?K7
zG0NbTE##fRwRqDJZ#5RyzkV!I3xy@<{<HV`w`ub4p^|zktbmb3oQfjmNHr7T!AK!V
zog|@1%@rZ0`_Deh|Ky;dk))1@Fw_69{YOKi{@=yFD4#L)n2|P>L6rDkkDMwjN{MmG
bovJD-i=LPoEvk>6k-8+x_Sj13pRE4^T@g%z


From 0dc9df84e99ebee2b37bde5bbea09c749c87a118 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Fri, 7 Aug 2020 17:51:27 -0400
Subject: [PATCH 056/125] Merge Sarah's work.

---
 pyproject.toml                                |   2 +-
 .../commands/create_mapping_on_deploy.py      |   1 +
 src/encoded/submit.py                         |  31 ++--
 .../data/documents/cgap_submit_test.xlsx      | Bin 13213 -> 13317 bytes
 src/encoded/tests/datafixtures.py             |   2 +-
 src/encoded/tests/test_submit.py              | 148 ++++++++++++++++--
 6 files changed, 155 insertions(+), 29 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index bd0bb62410..57c2782848 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 # Note: Various modules refer to this system as "encoded", not "cgap-portal".
 name = "encoded"
-version = "2.4.1.1b0"  # Preparing for minor version bump (2.5.0 probably)
+version = "2.3.1.1b0"  # Preparing for minor version bump (2.5.0 probably)
 description = "Clinical Genomics Analysis Platform"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"
diff --git a/src/encoded/commands/create_mapping_on_deploy.py b/src/encoded/commands/create_mapping_on_deploy.py
index 5b7a584ed8..a32fb1dad4 100644
--- a/src/encoded/commands/create_mapping_on_deploy.py
+++ b/src/encoded/commands/create_mapping_on_deploy.py
@@ -60,6 +60,7 @@
     'EvidenceDisPheno',
     'Page',
     'GeneAnnotationField',
+    'IngestionSubmission'
 ]
 
 ENV_HOTSEAT = 'fourfront-cgaphot'
diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index c534efdf59..c4127ecd65 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -289,7 +289,7 @@ def fetch_family_metadata(idx, row, items, indiv_alias, fam_alias):
         msg = 'Row {} - Invalid relation "{}" for individual {} - Relation should be one of: {}'.format(
             idx, row.get('relation to proband'), row.get('individual id'), ', '.join(valid_relations)
         )
-        items['errors'].append(msg)
+        new_items['errors'].append(msg)
     return new_items
 
 
@@ -332,7 +332,7 @@ def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_ali
         if not analysis_type_dict[row.get('analysis id')]:
             msg = ('Row {} - Samples with analysis ID {} contain mis-matched or invalid workup type values. '
                    'Sample cannot be processed.'.format(idx, row.get('analysis id')))
-            items['errors'].append(msg)
+            new_items['errors'].append(msg)
     new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)
     new_items['sample_processing'][analysis_alias]['samples'].append(samp_alias)
     if row.get('report required').lower().startswith('y'):
@@ -482,18 +482,21 @@ def parse_exception(e, aliases):
                 continue
             else:
                 error = error.lstrip('Schema: ')
-                field_name = error[:error.index(' - ')]
-                field = None
-                if field_name in GENERIC_FIELD_MAPPING['sample'].values():
-                    field = [key for key, val in GENERIC_FIELD_MAPPING['sample'].items() if val == field_name][0]
-                elif field_name == 'requisition_acceptance.accepted_rejected':
-                    field = 'Req Accepted Y\\N'
-                error = map_enum_options(field_name, error)
-                if not field:
-                    field = field_name.replace('_', ' ')
-
-                error = 'field: ' + error.replace(field_name, field)
-                keep.append(error)
+                if error.index('- ') > 0:
+                    field_name = error[:error.index(' - ')]
+                    field = None
+                    if field_name in GENERIC_FIELD_MAPPING['sample'].values():
+                        field = [key for key, val in GENERIC_FIELD_MAPPING['sample'].items() if val == field_name][0]
+                    elif field_name == 'requisition_acceptance.accepted_rejected':
+                        field = 'Req Accepted Y\\N'
+                    error = map_enum_options(field_name, error)
+                    if not field:
+                        field = field_name.replace('_', ' ')
+
+                    error = 'field: ' + error.replace(field_name, field)
+                    keep.append(error)
+                elif 'Additional properties are not allowed' in error:
+                    keep.append(error[2:])
         return keep
     else:
         raise e
diff --git a/src/encoded/tests/data/documents/cgap_submit_test.xlsx b/src/encoded/tests/data/documents/cgap_submit_test.xlsx
index de1a55de5fd1af955829d96e04f80ff8a679b648..53dfe53be83fc1e849959a0d30f37fc736598afd 100644
GIT binary patch
delta 6587
zcmZu$WmME(x2A_72c(1nDH-V!7`haYM!KX^N?PhCt#sE&2uOFs(A^*{4HD8wI+WM{
zdq2GQzW1KB&a?Jf&;GRcTF>6+Ihi)0wpD~+z>nMGfF2VKjTrRzqM@OAJ8*kByS_Dd
zc7Ds{?dVXZZS4Hx8JWL{-5<=&73@3mffCV(N4kb~p4wxc)^ze@iBz+KZ#q><>>qC5
zd=|;HGZA?bj~Nz8yu0el^|?dTowFsipKXOQEmI@PlH0xf#yC)N)d=UY+}yABo$_NV
zAn-|k-wD^l-4DOYv6b>UFI+Sq!}b1V23T)|oab`Zp7fRJcl+sgAlX_bPo9MNhWIHD
zPB5#=s{!rUSls6K_+hI~$#FEC{i;{TxaO84w3Cg|)Q3@XpBg(omY1Vt>D`6UdduyT
zebc0P_g@6mAJlI&ayI#!f<>sjU;pOE161HASOvO<9*R_smvfou&xI2Lwk7jIs!wEG
zylI0Q8Txt$eYf4b&8?=gEt9jVrfhiRfXC5+bu8sukk;%MN~+5;e(&0~%kSKixHfxE
ziBJ3r45AF2jJ4Te2N>L@Mb~FYFh9|DkwFz-izx{UB}~|aMr(xgiJ=#!OxA`OV3xtv
z(_4vUR%QTGnaDIcSPdk~zPr&LQ&~8W3-0~~Lah}=g>+;12UbqH#-AJdju{8bO;e=#
z)vb$FFQ0~eq<s=3O*3F<9!;-FGLiv5T~5dlRoyQvsotX5JDc}7!`$5NmJg9qh^dP`
z;qJjaRFS~HSbU6iqKmkW%Lh<^cs@Z0uN(4A_*45u_txQvMT2r_8Cy?Ilp=M$*=W!#
z-%t=AflysOLc31S58FmL1Ak0@UaYuFmJ)t;<8OC^cw&v8hbr2)p^m-Sqx}8wl7Fe)
zC;Yzo`qfaI|6KHw+#Z|Rc^{MYkpA_V9<eA=yq<f@yE6NEw9vSM!sZSje^H<_tuH-0
z<mPk~@6KC)icO_dyK|EVZ_^<j>-&BU0cp$kvec;LGB%jp7LDRG7=NaK?ov~`EF^9&
zn|M$OJ;oitQ|sy<+zQ}!)2?MrKi>^%#3<?Vn(mJJ=JF|ivW+SleYUb|GZBOy54v7$
zDF!*RnU0&c7))A0C)p$wKcM&sYT$Um9<AU~OAW^ozF)=Xsqzn^F_XnF&QLmMy);hC
z(HomY$2G*RhOYPqqBppH(k1+F`2de!l4yxvFv;*5>y=6cu+Y$`2op^SpumY5jWFas
zVCtSY&u=eNOAqhoOsPx>a~$3$6BRi6lgyP@a|0D^3q+1L^K0ig81-%9Qj8G;S5dod
z@A)qzfl78CON*~M@@>ROUvcx~1DTR-jg#zyq45DaeV7&lTlw#zM(d?pvETQN?e#G(
zI^Xl&>=~W2jySw=<vgr#(gV6`CJQKtOvd<Xqs=rH628xBH*_>!>+0Ya`y?GCyPhI}
zg}mehVs<)bod=0S!W$i;8z+}NML~C;<%#8-Y*hW&O;iU+Ng_wBfkYT1RDNV9-Zz+4
zXc3-q)}u(x^{my#&r>8nw8*MHjMAXSoCzv8CsY0+RNK+Ey`Y&#0U!t;K;S(T&1eTc
z`(-=j*c#nqCAe%vrf+QX_AVsF?ZoP=03;wc5%^Kkw272xGJg7^6}sw|YW!lnJG68x
zdBfG+h4Jo3x1-rY@VOu;{!z#<-g^xiC;DTTk&7*M<4VM{p;Vmj*s#44Bg1qhlFVgl
zQe|Q7(9tEa(1)=1<G`HT6PF1l<^Dyg@k~EU>P;p*p)ny59ADe8)UZ{99=&;$zIELs
z2Mzu`Yyo36EMf7E4^r23DV(=Sa<~;_F|J>1$C)EA)OzitL%$J{-ym`o7kT<G5?d;L
zn>qZ(xAS;9hd^-yNhCY#^cZWpMai7mNa3mYi-uOAaIdU)vVbx3JDO4IajP-5u4GtL
z30~&_RpuUHtZkX7rxDE;2X^@Gp(5><!}swy1_z$QC}a@YViKi0%Z^!4%PcFW$^thf
z-Hl*jE+f0xbxsRym-9C3#a(&Qd+?mL*u3hz+1GqtRzF&)=###Lp&PI8`8mf3vAq?y
z#URc-1dSO~3s_8u+P-6?k?Y1hvdu9Z>@1q%WqHJv`-abD=ai?_*0Sy+WQExgAnxc$
zpp1372mt>IETJ`tk(oAbI^Et{m_Je2k5|9`;$id(JGxX*rHc<&1H)!}I_nERoN#h2
zX$i(6@gXED`jb)aftdHxlC>~$m#O_s>_=Hrn4FKk#saU$nt%BVeXCOC;nLz+SzYbE
zeN82qQ)jQ&kss%$sipqy^hzc2Y7tdJLB`CtKhb_2xBH{u^^}#VcNdN(spiw234z(O
zrM=VMD0CU=4#?+n*4b+Vo`wX{gKq>0pA`J~M4tZ`j)hegJd)hJ`&G+!$IMRUep({g
z<Au9!bCCyJ7-Jd6VRUdVd-(B&@G<h^U4FJr<L*Xc)1-R5knk5+avy~q{q)?1mH(k#
z@-!a?pOTLMQDM8sH(!|^R-o3MqK)7zSw$j?J~-Dkn+Od3+SwU76kqwgPK@*uY9*K}
z!poK`jwJhJQs0s9e_NODaV7>1mk1vzs=8}Rp&AtsGG-AZNG{@O{~kj^c-Y;V$)l=%
zt5M7)amW?oL5v)Ea$;BAaZei72LCBY%cA`Fq4x0|=pP}e&4L8t0$zUeRG!#qXfe!a
zXoUYgk=@+iJ6O5>GmmEs44w1fWC132515<c*i&%`895ch{CI15_ImcCQdwp5+4Vtq
zrlrp9+Vz5zO9o`HKD`wy3U!wmIU(He*w0vQ#(O}Ff#XVfcU+JV3UZLyEVg;L-xq2r
zh{q*UgIMtbSEJ;rqm@0}00C)r?r*EGY92I3RWUb~-d1P*S3Er=ipLAkfm3EnF&PXe
z)QsW|H18FoNkq|X-BANM11W-4MiOuT!;6m{oIBAUUb-<4rl~XP;Eg=b&H3X<%^%ZG
zR>YM_ffx04dPl`#=v-RsU=B_KhAZhkJ4Tx~H&eg>T2&ftE;-1^6h8Pt6Mh(Yy2)NW
zl_V{;dUvOqj~d;ex2#r(`oL+vk?u&(racKnexF!KjqU8#uiT(VS+z3ZamLsYg3$A#
zXfTvP8Je8D5iW~ANHq9%UU=eby!grH=roLDbyQ~ZLh8EaTY}hY!JaO?tDC6mLm4g!
zTHk^bU}+zQz~7*QOS>1^(!8EYU00Wi-f_B|#}ZSXNX}8b!HQCNW{gZ?Zv#<>VW#<F
zoG*maFd+ks$GW`CaUT^E7X^^F6++8D7a0Bt4bk9>=O~CBw%{MRd0@I@9)<f#ZGzuy
zf|Y|)Wae>|K;^Q>KlIYy)4#xDgz5FPzisdV+EfSPJ5(FJ05goYX4#?>nX&_~`GTbf
zqSWYDcQtbHhm{dwm=Ut*Y+IHhiGPB|zRILSAdkX>!P1%(Ol&akhp-2hEwBPOi`5!3
zb^bAkBFGB8FX8mDkCjQEv_{b(-gCE<WPeSwadxGWVnksv3Bju8;%=fxmhWo$Z9fr^
z)mT2c<T6Q0YbU)iELKa8!@%2_)IPX=sn!$|S)*<cCfEA$PQfo&48zPMMF91bIqr}z
zI&d#KDq6%KqP(jirbi%U!{>vnzOXvT+L^{yqOdf67)@V`OZIJ0Q+kw({$Num3%=5)
zWb=`roF;ZT#cT<6zG$Y;?L1H^<MdTvnEWGsAcs7rcf=QiKSST{$hpbuX7>3w8s#6)
z-^M1a@yyQ=x?<lSU7)AoA?WJWDlZ-1SXrxUsDh|wJckU9--HoX)T_-TR<SRiGaur4
ziGYzaccz^+F1dSNOrMRWlcBE|@i(llR;QL?Uq0_&)<seoq%Gn4)YpMY&LYqOkA64`
zo7qYsnRRYPhJ$Q0yiYg%#&a7fp8l0_)%ja##w|5=n&9ou!#nU#Q<-d=aWPp6;d2~<
zmRAMC85w7$-Nr3Vla9VP+jg`4I4Hw1LHmj2!~?;--~pCXCV@uSijkKmoyeoh7g|d)
z$VVFGGj27Mhu_@wqrJcSGz@P8!#|Df9=k~yESXH{=>OqBT`>me`)=#Pf|ANF*oH6E
z^pjJP)6R^nEyUZKeME}WTBGPHUMo9kRJOf$aXH#aN!m=EU-d-A*K1)%@C}oXyXXHn
zsZ3MNC&-;y6Ne9j$1AqSPvlX2sB4ZG4V}-%A|JCI)9YYbN5x6QB~^|<wNgqIYKPAO
z_SScfMs)6=%|<rM#dn9e*;!iPgq9mObb5pqIr~9apW=3HU(aBWn{{UVw+~sId5~;f
z&ZfuxB!A?A<gXA8#yHl4OUm~)GSc4G@9vH1ctwsb!QupTzd}W(l53767VjrNKM4x3
zJ?>H~fAL&O=@Q*?LH!{M=wx8Nqrq1dJ73$?D7?Nn6rwTqopG!?u3q}toL3mO<O3BK
zM~NftBp(X>#az1!nf47S7Z}|P?nAmkEMn6R1;wekWBm09EnP+CoN~7lfu*b9g9XAr
ztDe;5g8PCkN4||4NY(S=6>4#0d+9r`*t_1|1}N-a?`4A5ZO{v<v;??U)H3ab&M9uw
z3R6h3VK<uuGxY=UKMGBRQ#J3aj)#W!mFw@ihn9%oqXs@3*l8{elioj+tO715Q9%O^
zZtUf$Dy;5=&^W1fFjFV_(pfUrtJTumHRsqFetQq%m@oTWo4<<+ZB(z*<I6KU0yuXv
zcb{`xe|K#>xNJaXx(kmTTaSdV9M7iam1{I=CJmi&rQ3|SH{ObJwg5->*OH==wRiHr
z(h96Vi~vXE%CUuh1<QQ^ufH<ot8XJZd!!Q06K5}9cW0wyJM^~gTy<CK(ys51j*2=I
ztVf(^%@UkRnUEdXD$~IZH+2fBR<z7&yRX0~%Gg?Mvs}&sM2I0XDz<QT?zv$ZeeaQj
z1UsNBjm+$GcRi~bJ=^jmI)B&>9jZHL4+s2y142AKT1`}TyoeqB_?1-gkIR*ADbr7`
z2ktHHnTAl#Wc~QJ?{CDWF$BNdHGbV9bl~U!Q?&BxSqATI55v@Ikt@f4?oGw055qx*
zOIg&8q2F-r4{fQOvBMv_y38P@OwNC@k5|W62*~DY_qX=$orJDfZrS#)u02aIZbXy-
zfMTmflch0rPb@Qqm0;6D^v_65-}duwi|^63>hEu5bhn@y#(j<YSF+)vfi#U~>$!M&
zAk~8HL*W&*U8&d!@wAt}yfUL6FkkO28U3suIw&f+kA1%@$U1fJ@ZjjFE79E<H)iaU
zEjBiS1HEp2wj6U8cz@`2bq>!I7fTV*2J|G2KO`d8Kz6*L!{J)Kt5Ul>V>>+bca61a
zdlX^ADgoMm{(Sm@Y8&k38ea2xnd@Y8qm}f%J3l0FA42a>ZoYif3N>#M49T*{#4&1x
zVvSm<&}C)f{|#T1BDo^cW8`-*@job`ueW^VIX@i|>!vHJ`>vSr__rla@R+j@P<-oR
zMnQjw9oRwZ-J&I^gi%sB)g6xi=;@-^r^#g^O6TO)FvD#~96cfevVho#v3@L=37~y7
zh$W=lxf*+ko1<{Bw6QtHh^21hLH0V7h;_bE+jdyPq*Xg|l59JUW4FM7V9b0!a{evv
z^T=ZEH<f*pWx4I%wG>RX5jUHc03mNp0~vN+!RJrG7KTCKeeqnupZ0}tg<(r)*hGM*
zX>B-9AKN#1m~$?FApWzx#i;%Ayv?74ov}VG!TS@373<4nehXa%B7S8*PCPMfeLkwA
z-L>u#dfrTkJl%S>6_Wht0C|P&N*^a12nBCtO2uHiGR9qhoS{LC?~;)e0cxU=rbzKf
zev(}ydP)oweOxRCiaBlqqnT`w@<o`Zb|vJC0oEH|3X_+goiUmz4%~j|Wix1^kRKl*
zHf=*<tvGvYsLb^Vt2x5;=43hW8B@qfMiM@%sKjZhXh0O^(4Y(ZIP@Hv*ed>gdN`AB
zZaB0Bi005kt02!plBF0p0Bk%nPIcVKK$#-(P<21NFE?Z|DAo}BM+#pS$z<yPMw+;@
z1&|UI1}%h4kzlC0Bi{c*ymov#tMmVX+|fR7QHi$Ij8l$(o?u|qBKb`C{eY8<OVt)4
zPQp{v3-|)Lau~2ssQvNN^m01la{pV}5u8hPVdqd3K6*xMWR`YBD+2E+5}%0=?(suq
zvPZ2ExgD<a48KwVo4!NzJ+AL*l_&aI<)HdDf_OKZg#a=0*jn~lIriN+?Pv5KMopUp
zORRv)yM;Q57I+z#gfLv<(S0quO@l-#7ZJ@tBoNGRDc~InefAzXF0oc^3}v|$qJ-I2
zZV0#<WYoZgibU@!g+aXIULctHd4vV!ClA#!Xg-6cK)>MZM1hK;gFUaHJ)+mGTh%e1
zQ74MG!CsV94T`8&*^I9;T<WQAiK#YOiid+~8eUS@uzgY)new3irLa(Yq)_%?;b3qN
zyjXlLY4YySz-I<}F11j<Wf>x_2su;;GW|n>g^Z?OFvRLoWgMJuX~7LLmVq3CgG@mX
ztb;abG72H-sswP5l@g>(Bgj;m6st6rUe6w@tA!Dlf<(H?95-lN8d8QGlyizDoeXAt
zt}Uba^#1~&X=OB))t3CLYA0}pN++1gt0b#p&rA-GR?03aJyG3get-1k<r-Evg?C(|
zxPYbjS145OyF6Z$bq^0hw2JIJ|J_e&gmBN_?hqVhvJ;5rz@LpysM#y7FNI2!$M5i7
z5#)91N_r+>T6gVosfwoKgZgYDFT488Wc}R<`%-|D#K<1&2h;?siO4gKVM^V8<>Jze
zybfh2RdS9Wl|)2{-;I)*74b5$2$H3ACmxFyp!Su&m->PoTg+3KIV^F(9pcDGH&6#T
zqHlkfPL~U9U(2Y-g=WR>u?=s~6x*>6Z@^M52A1DL9XLvQKTI67MfJoQE<{TRlzdE7
z4hIrGGqsN|d^wG-yYKIDH=Mq$w&TJ`f5RAOM~jiJ!w_djh+ziv#_H}agiwftR#mZD
z?x*$gc`Qi{C$R=9Ym*L=F$5})kqjnL1uDNJ86<-QDz`xflR$yW0ubC;*tCw^+it!c
zC3=entOR@Z;)eeKAi=&gWwD%_D~Iq+$NT}Jr0osxnC@)Xc=iU4`<o*6iO?Du9qv|@
z+@0CI$F2A1=-wdFHwUyb$u;3;bmx&4Ae+u9@nYeszYYr#j#;PjX&p=45p7;WgV&Dt
zF1$;FT!281gyyOnhxhobi*Z&BvI>t1W5B=t##=WrdXyLm^iZ7et2vLd`0wFv^2WC}
z%?0)~cL)AdrQ=O*H;B=X_$xYcy|vbU!Fjs)&=3vWky!~ze<1?bnJwb(>foO2V3oG%
z!dLCQM-~Cn<z7Fr?}lcdOV=XOSiDhm4w(1B?rsJ<fhtVuL~AF?|H3Mg4Dy&YL|-)x
z{T0Rr@Oh(m=$vIv;_k}+q{qIHcq@O~FzhQ(5pdb_seOogPTenk@kWoDB0!lyi9TGw
zoG>z*B=!)(O_LJ5`HNe@7&_+(6WF!t5$U0Sdk@|{uaW#tKbOr>)1M8M#Rpv9HA()u
z2$2*H!UbH*F|T1z+j2?$m`hlo5tGavSRMvE^dk8L$);Ai{g`c-M?oOqyMi*RqMDk<
zB6Ai_N}=;(-KDg-NSIU9HpK3GV8F)atY?mMjpsV*?rp@}z!cG`g*gaAxQyg2h+#=L
z<`6Pw@52%8#YMDq5^D@S1_{_iKQWD~5BTuN=R2_j<N!pW{{s~BCG<%Xe3{tWryC%K
zN{2l8TlHIXqPjU^j)9v%sR|N?fDd_(Bq3`Ms$mhyIOrY-{+n9zHxnlNZ(@03`R;CF
zD0BlPP`C|JX`BgA?~pa<cwDD&9=`8Coy$L#bzzvrz)|}02vK#)F09ad%I=KO@mT4u
zAJKcN*>tx)gXkolyCMH-z1)_m{pKt#3x?`((0AY26W^*@2__mj(A6kC;4{>I*0kcN
z)BQGor&8X#Lo~F}v%%M;202kPb%!4OvnkaMRkv@6B3LyofAf9ysG_EIG*KVSq<nGP
zg&H!>O!V*aJ2Cl}!HCsra+_3(%_@Kn5Mn#@e^fku$@mEp?X2PpiZ(>ny?pTVK@vi8
zKgAviU(dTaWyCaCk@y8xzb<8nU<$%eH^MJYk*!Np>qf^bvrw#(VW-Na)q6f%QTh6f
zMg`$$R7EEoEO^Rn#abC=yKVH6x4{qlX>GjMGwz+mq)}9!{$|84wFa!ukg1-h0GuVw
zq6sH#_R~iqg)X&0d&dYIdUzOyqWbtIJmOdEd?4#lHfFTz4#(Fe?e=MgV`0Nn9kJ#{
zovbLt7|WtNf=U92S}UBWRGC#T@zI5<r{JkJqwmxWW$8FfBpqU1<6QZyq?hNuOWNd@
zZyO8J7rKm`c=|EiSTK3nw$zPMqjjr!3&O#XM)pVWCN;zS$c-N3NpeG9%&_70JF)B>
z4hAszl@PO++f-hozhXkH$UvVTZCl$jV!MZ@L^4y^Fy81#KGUL1bm;ntw`WMpwSU#O
zHj_OC8~a~BA&GEaIL*KPTr@PYzYfg5k8)xpuQYmX;ux<wdVL}t9}CTYx;6jKeV%B-
zM^E#=9}NFy(a>fSGx*q{|FqT$9SzMF0}YMjf6{1%tce?Z;^^0jPx!Ua9}?a9nQ(@9
H{>k_+8=8vK

delta 6497
zcmaJ`bx_>RmIVS~aCditOM=TVfkA@11%eOmF26t+AOyGI790k587x2ugb5Z1P6!Yn
zxLeqKZ{OC|zCX6BZq@12eXF~w>z+Dw8xG5Js3*VyX0-w|=pUh>5n=xgG&HmT7eRk_
zFK26acV~eBSC<-tRjoxK;;@3|hyIh``aI5}coiL>gqAet>+t-BEqz;q%8zkB7|Fr|
z(p&BGYpTc9QxX>(1-)Ij2O(aa)x18u@dllvLej<5qtIWFXF2#luQ`@YT+N0?KkxD<
z{8d2pomM%Me6CANN5{9D$LL@iQFE!evPff{&)JJ?%by?fIlDR?ymAuz*a|%v$k6TV
zWMwJEsqAn%Vq|$%*Z*~|h19E3$!KhNUm>oh{n1UUK7!$^(=ly(c1}($L1eAj>vqF*
zFMnBXXp0I;EMDxc6Qj+a4=Vl02m^qwQxXEw;X6ZyEu>Jh9zSI?jf)b?D-^fYZ&i?=
zpv{p~Nep#hbqFC*jrs}Ha1@<45Yy&t8HQgn>&yXP>mlKn^T0bkvqkU(w!9bk;d%?7
zlhdPsQnU#Orll%HC0bRros358ECiPG)Qe=-Q>X4IOi(?h@cPpA`DH{Bc0hozDkos|
zzzMQ3lysy$_z4jw^toD_3w|api~d`wQepj9_1)|fwxW8l<r?o&hst$J&swcwUZsdS
z(##{@*eyWyV^o_$b^HcJ12hR4W2ZnL;HKKoz%}%I*7)ST&-IwpHTJRBx{Wen^Cb#3
z_~^HW6zFV#1{38l=B+X%9)|bikrjZyG!dc+m8Y}Py*QPl*Xb<TP!%%U3<N8aG=Gq>
zz<NQ8FpP`R$xH;^h;pjJxAJ(IX4Z;CWlnBF-EPMH-@qrE^@iBugKv)T?yi1|&4&d?
zhpu+%dO0)sZj{3=pFe}I*h`d=Ncf7YD+|MSTE+ip!+FCYDyf4DT$N%RLV*3`ydWer
zz@7$a`Nc7x{hIHUjs$M;u_t|;Lv<kW^rn}vmCM9QAS&igsq(NMj9}84HX2d~IfepY
z3*4_$p;QNN;*(YHEw*Z2g^EO@u<9`N-!R=!jSwH<Kq2DAqL1}^(cWn>Vk#aib@<+c
z7WDgam3;ivxpcs%I{4#U(pEwVP^<cw3@b*a?XeKX+nMV@wtFW2X2E11iFBFc6gq#$
z_V)0m-__=*4lKleC?=A5HvAb4GW!pDYSANdV0Ap~8zm+hS}8sn8u@?Mk2lQ9%g$EM
z$IH>p!TX;giZgQ7ViPC7x2Swr=do-}$9?|YV73-wsQQ|Mw2o81PuXFMb3??2s5N!@
z?v@b+Mq6xxs}x@OGHow;3l0jd?!}6^)XeO{j&A?-zc$|NOmgyldbPdfbBIC*Ot|z}
zco0{xdOPVWk(=xCqLu^?qwgOScFa#D$6PGD_<tkajIA6hHTda~*#`D>gT?h6P1V)E
zkJi=_YbbRIxaXWVUtJRW&8f-N1}-4(e1)h95cCAgzdonC{Q4b`*)b_}>b!iD+l&2T
zr3lqZB1sQd8nYjKGZ{zcO8?~w@Z=|;B1ekK%F|8j&NMCz;3AA0NC>f=i@p^PREagu
zPeG{JqXzdhu!8k>t6skcW&7;8ZyaS35K$=^&V8qpU!Q=tdEL~$ru{$#{%9#=j)VQ{
zlVgwtp(PR}z-F^P;CpG?PXAJdLazPD_}9g+;x7uen6-@%3;hv-D+b0aAn#5?e34V_
zBwfL;U$s-9&2URsQQV&nr!2P;vgMi&3Eq%#*y|u_ry}9hbH(4*bjuvRBJ0T6qGNa?
z`&7B4sKw8wYOl=th*zz(BI_(A8%eC7i4tV~3PjnB$(620@WMZsbDIjiEe6N5^H&IU
z7C`_#eO0=fOv1{Cj`t(LfJd#^02<dJm{AX!naIR;6bd)}e$r9ECadZ`*45R08ArFv
z`gRev;vwK{h*K+LrEh>HH~h=oU#7V7K+@ADvJtUnUeD_r*|@C~;e_~3v(1Ey0TZ>D
z>M)MP*)=L}e!AXm9`tK5H#WBBXz9)Fg+y;^`PY*+-7OAdqhkml{W3Z|Ww_+X7wuUX
zF)!!d24MkWkvwp!Yr7dsc}h@xK%%0VgJ$g7PAnl3dtnmXmK3g9YNM{%5fe1}DNCzh
zAes~U4m2s?7>#{(r(z*9pmRt1<B@?hGCR`ja(!Sf)yA@ytda{N8`(9j#sf2-eTFs0
ztca7Ft`!_LvGx=I14eEInO7jvATds_x3#Cg(ksH44Et28i73?rFbJB})8y2iZ4X~A
zD-KHaw`*JN9UKsTGa9%;FEtpv;eh|lDUOJZs`KYxaNe#q)lT|UA%DB_JU{VeX~|gM
zQ3^pBdzl(vzH`-P+w^+8<fF)lFlLD^g#M~?|GcVNjj}hO!YOk+`fD=ISw6ozTk|gF
zbj-{kfQ^^hhj_NmiXP*NrY9NsZVjZnmIyCT5FF8l+Cq!+$p}uB0{mZXG%%tZ>g;p!
zuS*z~Ywt_Wdu<ji)Fsg^2CjNlO96!%z%)x`L#<P}L1{J^V4^reh!O2^O0F!0<vl>#
zxUPl-R%M8o2Ho`GS|4WYVn5a`Sb70ere2q4fuw}42}U|r*Pi^snf@<1SEzY)tU{V(
z*=XnCfwy1mqX1Fp!@mS1l~#nfX_D9z1WeA7J6mI+p-Ho$p%MIBNqhob?EWPoGp1(l
zKP1S*EFJ$mPxH^l1c{SCOI=Hn?Ui|3c=xMu2#tNMe`XSnJ3ZVoO5+H_lhEKI-ANJ?
z+lS^M6AgLma|Vj>;^Slk)+nC?m+3ywzB2awz^zrmun!KjZ^6gpI+TD7%6#MFoz%_A
zh!BZ$Y{51)-XrE8A4Rtze2+8IF}>-FekSmH;A=sK+1S@IP&Kyw!r7cBr7@-&iwDLe
ztPNSL_{D{UDv?)vUXvi>W^jZ$h{GCDug-@1c-3Bb0|p7-{p|Rm&sdUS<*A_zlVZz3
z99i<~rbze?LJxk*D0ZN)%jec`6`~N1`Kg>mver(#&DQdmEO~))#9c8PMNjaY^4&+!
zya;9fcl2UitT%I+_i>@uAimslT7p!|VY=e4KQh0;sD(bav-J5CO3spg0F}!!5_6Dg
z+O`oZMb=JlmAPnto48TJ^bw@vA7!%>rPZ{pLs5?j{QS}M*5L*~BElLwh!66~6fa|2
zIg~<i60l}q2S$KK-^Q6aPF0CkUrgjw{ELpM;6kgXksVw@iJ|SnjeGCUF+#YagpzMw
z?;1@84^J*QcAi_RP;=;wT3-~(h&9z5%J*pvs5fG>=l`I{A9xDm^@*?d`6~5uB{4k<
z8&bmT74htMmwgwodg#Y+y?1egz$2Rn-OhyWGD>Hp(BMm1h2S2`C6dZ&+b=w&)uqSP
zX5nh;xTA7mq4*s6I#YsH6DPZYT1T?~RQybwMGLHUq%Ittl&?(QOwNbLoe_!p%6rR0
z7z?p8D$HuC`JID%+R#{%-5Hoo+rQ>p7b<76-Lbvg>HZs74_w6UA;9LJOr|8S9T&y3
zCMJEPzSD$n@erBQ&W%@U0Q)%hDe<HDCpO{>JCjsP2*#+uG<D|6r}>HxrSr5dFNPO4
zJmU@8Y1Zc37%Cn$wGlKMe?2$0d(Fr5zUZ04k~*}V&vr>66<+#Lt-($~dx?UrRO|3b
z))0xNB{e}2P^<2~iL00d&iH^XGo+1p_z63XFv2x;@}<bwL&Q1n_52hcZwY;4t^fpw
zqLmG_Dl<$uhvpb%(deg5qd#tW*_hGw-s+7r1L-;6Z`?~6ysm+~i2;FU^3?Twxv-mM
zT&_7E4&{_jkbtZn|Fa7l8V#Y#S%E0Zt=0$&epg5&&}|LQ6V(huSEbRli+$EYjE8mV
zd2CRI481-8+gHY1N}pA2A))AHUy3>aQw*u>uiIIos3fI{ZuB>9sg;doWipu_?*#;W
z7Q<ejG&|Y6yW=;cYjHUE?wfPguSa(EHd{K$tyWq(G4E{<Yn5x!{o4w|UCBi}LRe=F
zv|CpeFx&L9JF9JulVn9O2sl>PJg{6}u!flgZyIq$q#@3DKA&lsq-UgOo|xM^%OZP%
zq^dK!;}~mo)ep4mdICLs_O~+9e!QPsal1}#(Z`Av`Ajk4Q@p`cn>kT}Up%uaD=~^A
zXFj&Pns{P&a;TF~<Oi`j)LVSDykN2Hl16(1NPH6oJ}HUK>dTst;wrlaw;G~*ck+li
z&(c&Sjr?(<^B2YaIrD^YG9~u?m**(H+J}#Njd8CQ9D4q+x~p@8{xZE|r$XgTlA2Nd
zY)Jj>MBUc=?WDu(Q?f4qFQHeJjV6zp^_jp?%cw(~+|2&ohMylrA3|yUZxgk`nkJio
zksfFld>OjRATPFbF?e-6dAO)?yDq=~B3cybt<7ZL7nU&1$WD%oTFbH*)sUrf&97W8
z>C|OKcg+j#{oZi+a>VV5f6=j7x?=8^uc_Ohf8Lf&xontMsL{qj{*`Q`tEA?Q9i07;
z5c91Nhvxxxhx9MR8i_|R7{uEaX%K>WE^@>4kEO$n(<Mn@!bL+%=1<*cWCj*>H&jSN
zS7Yw6qgNXyhZ;T6$WjOhzLYv1oaG2%(7T#3s)8@w#(T9g(hSt>DBYfaG#@|KF*Mwi
zG~qB=1n#p@rgf?rTGK<Ci#sl@W~0_Aa_UrC()-n8_wfo{=~Tx*Sn^BX*_Pi+k1hiH
ze=g-0-5oDP0nL<REE39VP2jeJJ7dsIyg-KQRN}B?%ocrBrtG_nz#8gzWsYDIdl<K1
zHmdVr%l>QL$h1;dtkC$kx5;vHnp~q2dEMiwN;+7hDJJINKi8)|S$nY4QK|JU&x=#L
z4q?^p7a&jhJdJm&c3=IR<!Bh?_^0@g1^wNCWSETO{pU3%sz2vvLI8&VG+T?yB)dFO
z37gfwI(PG|t<!8dz?yaRx{hM_xXa{3$^fh8c}KyE(VZ+~H-_<B+xk9Cy|Hux%Q7PA
z{fDlVZ-wF~BXl{MPv?)`pu=2xncM<`4+6Tk;gd+woAREndm%V(wMk3kPtS4&sX-m;
z=B}>A($}@+qh^|NZ9weRAJ#06RMv2XT)#cQQK~odW0hY5c9j@Vz@s!lY`}xmnl9j3
zVli*Wd9-au{met=Nm<<}cNV&AB-ZFdt*)m5?aNf``x@^BL0=%F<zPdGrnD(%=lL=I
z8we-SsLh=Suaigh2Ie38wTp58?VUvhk<cIk{p@-_wUEK+RbYd?;Po!cmxO6ON%bij
ze9(jIm-5fZ!i6iN>j!aq)IO$^pT(VFHA%Bs>ice$Z7B|XefVpW43^df#O3odqbP0f
znyJ@BhuGo6jjTi?<5+UtOo|462IR5~(c{>ZXzwxl3|<s%ucriFF;q+i@m4ylrO=#x
z>oqSw7{Km93j;<@mo!j1x+a1++FB-fRC2$#`)^dQZoICJzUx-mj?v4I+AgyOY+1J#
zTSWP8ABO8!H>|%whTO9TFi-9N-L=CSP}s!tS<MI`7FjntAKj6oR{LaAj?W@1{O0J`
z;P;+^K9d|WHnb;pcd*>3q0UGr^r}4#yNnlWrI3<!2k3!u!z`=1j1Voqk2l(5{M(w&
z5LjkUGbJsDcl$Q*4LFa`adlggt9IVg&vq^serUD~b-3wh$?K{=RM)@K9$Iw_eq|Js
z^ysd0@2s_}j)&u?;-}-W2}CU|jxAr3Z1``ypYOngm|omTbus#8DWA7NB-%My@V4){
zLk%(BtU!`pcnI02EHD!9r#UkfMu%dFyWu%5i76h+apH&R#}1ND2;&0jv70lKbRb5e
zh*J6Gm9`U0nl-4D7V;!odzU3G0#wQhdGc1<IijRWNrAaCT>D)irCtmVnqVcH)(&YJ
zI$Rr@9KxavN(Zx~VZgQV$RWboAZINWJ~?<ja6>+Dq6ezhDEu2NHDd7k1@Zx>e~ABQ
zVzRGdD4jB!VB5m>pKxkc;80%;0qtDCoSLmIO0M<8@wI9amQLtAekGkC#bDM(&*W!3
zwhZa%sdzQ30S6q%-|)Bg%u9r&iZ*%}15Aqp^^9!}>t}Dav~MX1od+yktd{IEAK^vo
z5c0-izcUk@D5Tlq_2P!xdF$(#OVH0)&6gwy({(CH#d%w=pHXkmP?oQ%U+2URmigLm
z2)ztx^Ds9&^ER7s2^Dam7yDKUpLU?_h7hW()HIE?IRJFg>G#9K$Jx#S?{c9UCb`lz
z(sy-oXF?N1cwDbeamSPiJVHvsF}8Ar8Wne&s-j@iQ1(O(e#H-Bsxn+YPNH0#!^DV%
zxQ}GYO@A5E&K61fY2TPISLhG+r!H@19C&bA>~>;_0@%-7L`kQCn6gskVRTE6r~H!G
zMM^jkKomP@A(0s-7E#y&ih#0$7NVHv($hc@8O$Ks*Ua4h5ryr}><|tzOuHAvGu#T=
zNtg=fB$%a8;u#7BZADClA!5u;1>zYDh594RejYN#nWv=B;uSU6bf#7Q6V{h)POHmQ
zz0woL-d7Q?xiQr9F9%Kpvhwx7r}5ru*Jq>$fKk5kjFJaqIGxwD=<T>*GJKGmt)vge
ziNE*C=J}*JNpNdJ*3KnA{kLl5{6KwEG<?AH+)C+q*P7lyB~&^nPG^lKF3UF2caDs%
zJZroX{-bf|#(tx|Xu{=r!{)8qMtxSngXAV29$cxT*>0T27clUg*;u;i>k~n?ddz=x
z|D)cage$B1*k!t7FmA^&Tx0j|5rk@1+>+?hK9LIwGqy4bt;D{>t4qz%+|ll(3kGR}
zr<1kC^hi}FGaW+tG<xH$iH*Vr7)MZug{2*RSPDeEvsni#w}e5>i||Uy`F0jc)UpUF
z`J%+U&w=3x>=CP&!NCi(m=G=rOvt((2(HOOE(UK-P~}6IH8#PrNL8v+z^0LyQ`u}u
z8Pws;#h?-_MP>vQhGQVHN_9BcI8<a`WiGqk=aeI<#*e+4Vbn>vaUuEtz?lla4RlP{
zpAEXM&Wfe^Su&S`)i!8+oopDOB{ERA#}#H1{@WmCu`WN^-L*lp**&VFOICD)3UA2j
z%X;6yP6Ma6vFEE(!_@urkVN0^53%42uegQzF_U2FejLKj6-aT=e0@1*jNDX+sLFL$
zc+)0!XaQWd34gy)^J00}>Ye|@^rCetvL4gZI9b4TukVNXI@o8C&^R5aB~TU|iU+%q
zc#fM~5Go5&&7VGE%BT)BzA2*lX+G)>dyW^qigN0iR)&O7K_&+UTT1OhMmGEl7ckGg
zJzOI$>lElJR)M__!bCOkV95~gX=oumvZIJ(mx8c&cStz#U=lCVI}ZNDy__~r`F{Kb
zQ)sR4iy{G$ymG;OH!%R2k%!&OnWxMgOkpiWB>#Oshh^nZhB9`rz<5DoTt1p&`+mYZ
zPQg%&HWzgpKa43HKg`Ta8*c}jgt^eKzygAbCygUwpf+aMD=?43^!Q;9^$#SU_*qLP
z*M5~udIZC>Auc?fF+cGYsrvg@4?(6#-k_+r4OI@tq?EFQ13+c<Vtr>2%g;eWBPyC|
zX#v3F>y@b~LocTx)h7!$b@gq{^<fUnC)7D#yr3B2Vj7VP{lR%`)4(3JZRwmR<S=*{
zQ`kcM6K<VZ6s1L*XZu`W(7;{>^T1xK@0!I(xM9$;N8B8i*E`U$RDx!=z`^%tcIk5*
z%0Ud+mh0#8B7k;3Ai?zaE*5Pb?Q_-soYj|(TdEiONVBXq&YgEb)sxX2q7{LAWMO7e
zn+{j$qLG%S9AVq|vvimz@7Nqb@~2f)!MmRRZSRluyht6irj=H*ITAN>(%159g3i5~
zKIEne%-&|~BC2njU(a!#TcVBxyfF?tUHsNy=b2@2-an5X>v#&L{IL73*7T&i!33G#
zJ#A5Dk~}nx0i~|;w`v-U=HWUOIX~z{`^P?F)(IZuH@QA}h7P!*x6rWh^i#cMhdf0-
z+h=&{wm(IAeY&$R29DvBYu#S5M0GfdEg0GzVGXMxqPP=4xL{BI5>vuSYdIWuomUKc
z5$UG=#QS~4I$@#d$Jt)4uk3vYhE?msxt_cTlvU*2c8YD-<gLhz?|vTG@U_f(g6RwC
zTpZBIu@;$xUJLXI_tOs&2-RpQ`hao6Am5zzTG9H2)3n3AC`WYSfNnx{y9at$x~K!k
za_vPPp(pBlg#VAxo|Y5aB^R+tPFI_&JK?Ybx!zWT=atznJ;8yT5}S`Rr#S)B23fsE
zswKMlJzw|M*kPS06#W;m7xv3Dg2*IhM!#!)uMY?TbL}Ht@|5TG^{+(k7Jpefz4Jwd
zgF^HdD5QDZcQyj#6apcnq$y9?{k=sB@9ET15;;ePLdbf$DhM6_aI8dnN@;;K$Y?K7
zG0NbTE##fRwRqDJZ#5RyzkV!I3xy@<{<HV`w`ub4p^|zktbmb3oQfjmNHr7T!AK!V
zog|@1%@rZ0`_Deh|Ky;dk))1@Fw_69{YOKi{@=yFD4#L)n2|P>L6rDkkDMwjN{MmG
bovJD-i=LPoEvk>6k-8+x_Sj13pRE4^T@g%z

diff --git a/src/encoded/tests/datafixtures.py b/src/encoded/tests/datafixtures.py
index 530a564386..93ea118867 100644
--- a/src/encoded/tests/datafixtures.py
+++ b/src/encoded/tests/datafixtures.py
@@ -15,7 +15,7 @@
     'quality_metric_vcfqc', 'quality_metric_bamqc',
     'software', 'static_section', 'tracking_item', 'workflow_mapping',
     'workflow_run_awsem', 'workflow_run', 'annotation_field', 'variant_sample',
-    'variant', 'gene_annotation_field', 'gene',
+    'variant', 'gene_annotation_field', 'gene', 'ingestion_submission'
 ]
 
 
diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
index 194a60d963..8f275e15fc 100644
--- a/src/encoded/tests/test_submit.py
+++ b/src/encoded/tests/test_submit.py
@@ -1,5 +1,8 @@
 import pytest
 from encoded.submit import *
+from unittest import mock
+from copy import deepcopy
+import xlrd
 import json
 
 
@@ -17,11 +20,20 @@ def row_dict():
     }
 
 
+@pytest.fixture
+def xls_list():
+    book = xlrd.open_workbook('src/encoded/tests/data/documents/cgap_submit_test.xlsx')
+    sheet, = book.sheets()
+    row = row_generator(sheet)
+    return list(row)
+
+
 @pytest.fixture
 def empty_items():
     return {
-        'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {},
-        'case': {}, 'report': {}, 'reports': []
+        'individual': {}, 'family': {}, 'file_fastq': {},
+        'file_processed': {}, 'sample': {}, 'sample_processing': {},
+        'case': {}, 'report': {}, 'reports': [], 'errors': []
     }
 
 
@@ -34,7 +46,8 @@ def submission_info():
         }},
         'individual': {'test-proj:indiv1': {'samples': ['test-proj:samp1']}},
         'sample': {'test-proj:samp1': {'workup_type': 'WGS'}},
-        'sample_processing': {}
+        'sample_processing': {},
+        'errors': []
     }
 
 
@@ -131,7 +144,7 @@ def test_get_analysis_types(example_rows):
 
 
 def test_fetch_individual_metadata_new(row_dict, empty_items):
-    items_out = fetch_individual_metadata(row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi')
+    items_out = fetch_individual_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi')
     assert items_out['individual']['test-proj:indiv1']['aliases'] == ['test-proj:indiv1']
     assert items_out['individual']['test-proj:indiv1']['individual_id'] == '456'
 
@@ -143,14 +156,31 @@ def test_fetch_individual_metadata_old(row_dict, empty_items):
         'age': 46,
         'aliases': ['test-proj:indiv1']
     }}
-    items_out = fetch_individual_metadata(row_dict, items, 'test-proj:indiv1', 'hms-dbmi')
+    items_out = fetch_individual_metadata(1, row_dict, items, 'test-proj:indiv1', 'hms-dbmi')
     assert len(items['individual']) == len(items_out['individual'])
     assert 'sex' in items_out['individual']['test-proj:indiv1']
     assert 'age' in items_out['individual']['test-proj:indiv1']
 
 
+def test_fetch_individual_metadata_nums(row_dict, empty_items):
+    items2 = deepcopy(empty_items)
+    row_dict['age'] = '33'
+    row_dict['birth year'] = '1988'
+    items_out_nums = fetch_individual_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi')
+    assert not items_out_nums['errors']
+    assert isinstance(items_out_nums['individual']['test-proj:indiv1']['age'], int)
+    assert isinstance(items_out_nums['individual']['test-proj:indiv1']['birth_year'], int)
+    # text values for age and birth year should be passed on without errors to eventually fail validation
+    row_dict['age'] = 'abc'
+    row_dict['birth year'] = 'def'
+    items_out_text = fetch_individual_metadata(1, row_dict, items2, 'test-proj:indiv1', 'hms-dbmi')
+    assert not items_out_text['errors']
+    assert isinstance(items_out_text['individual']['test-proj:indiv1']['age'], str)
+    assert isinstance(items_out_text['individual']['test-proj:indiv1']['birth_year'], str)
+
+
 def test_fetch_family_metadata_new(row_dict, empty_items):
-    items_out = fetch_family_metadata(row_dict, empty_items, 'test-proj:indiv1', 'test-proj:fam1')
+    items_out = fetch_family_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'test-proj:fam1')
     assert items_out['family']['test-proj:fam1']['members'] == ['test-proj:indiv1']
     assert items_out['family']['test-proj:fam1']['proband'] == 'test-proj:indiv1'
 
@@ -163,26 +193,37 @@ def test_fetch_family_metadata_old(row_dict, empty_items):
         'members': ['test-proj:indiv2'],
         'mother': 'test-proj:indiv2'
     }}
-    items_out = fetch_family_metadata(row_dict, items, 'test-proj:indiv1', 'test-proj:fam1')
+    items_out = fetch_family_metadata(1, row_dict, items, 'test-proj:indiv1', 'test-proj:fam1')
     assert items_out['family']['test-proj:fam1']['members'] == ['test-proj:indiv2', 'test-proj:indiv1']
     assert items_out['family']['test-proj:fam1']['proband'] == 'test-proj:indiv1'
     assert items_out['family']['test-proj:fam1']['mother'] == 'test-proj:indiv2'
 
 
+def test_fetch_family_metadata_invalid_relation(row_dict, empty_items):
+    row_dict['relation to proband'] = 'grandmother'
+    items_out = fetch_family_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'test-proj:fam1')
+    assert 'Row 1 - Invalid relation' in items_out['errors'][0]
+
+
 def test_fetch_sample_metadata_sp(row_dict, empty_items):
     items = empty_items.copy()
     items['individual'] = {'test-proj:indiv1': {}}
+    row_dict['req accepted y/n'] = 'Yes'
+    row_dict['specimen accepted by ref lab'] = "n"
     items_out = fetch_sample_metadata(
-        row_dict, items, 'test-proj:indiv1', 'test-proj:samp1',
+        1, row_dict, items, 'test-proj:indiv1', 'test-proj:samp1',
         'test-proj:sp1', 'test-proj:fam1', 'test-proj', {}
     )
+    print(items_out['sample']['test-proj:samp1'])
     assert items_out['sample']['test-proj:samp1']['specimen_accession'] == row_dict['specimen id']
+    assert items_out['sample']['test-proj:samp1']['specimen_accepted'] == 'No'
+    assert items_out['sample']['test-proj:samp1']['requisition_acceptance']['accepted_rejected'] == 'Accepted'
     assert items_out['sample_processing']['test-proj:sp1']['samples'] == ['test-proj:samp1']
     assert items_out['individual']['test-proj:indiv1']['samples'] == ['test-proj:samp1']
 
 
 def test_fetch_file_metadata_valid():
-    results = fetch_file_metadata(['f1.fastq.gz', 'f2.cram', 'f3.vcf.gz'], 'test-proj')
+    results = fetch_file_metadata(1, ['f1.fastq.gz', 'f2.cram', 'f3.vcf.gz'], 'test-proj')
     assert 'test-proj:f1.fastq.gz' in results['file_fastq']
     assert results['file_fastq']['test-proj:f1.fastq.gz']['file_format'] == '/file-formats/fastq/'
     assert results['file_fastq']['test-proj:f1.fastq.gz']['file_type'] == 'reads'
@@ -192,7 +233,7 @@ def test_fetch_file_metadata_valid():
 
 
 def test_fetch_file_metadata_uncompressed():
-    results = fetch_file_metadata(['f1.fastq', 'f2.cram', 'f3.vcf'], 'test-proj')
+    results = fetch_file_metadata(1, ['f1.fastq', 'f2.cram', 'f3.vcf'], 'test-proj')
     assert not results['file_fastq']
     assert 'test-proj:f2.cram' in results['file_processed']
     assert 'test-proj:f3.vcf' not in results['file_processed']
@@ -201,7 +242,7 @@ def test_fetch_file_metadata_uncompressed():
 
 
 def test_fetch_file_metadata_invalid():
-    results = fetch_file_metadata(['f3.gvcf.gz'], 'test-proj')
+    results = fetch_file_metadata(1, ['f3.gvcf.gz'], 'test-proj')
     assert all(not results[key] for key in ['file_fastq', 'file_processed'])
     assert results['errors'] == [
         'File extension on f3.gvcf.gz not supported - '
@@ -210,13 +251,82 @@ def test_fetch_file_metadata_invalid():
 
 
 def test_xls_to_json(project, institution):
-    json_out = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+    json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
     assert len(json_out['family']) == 1
     assert 'encode-project:family-456' in json_out['family']
     assert len(json_out['individual']) == 3
     assert all(['encode-project:individual-' + x in json_out['individual'] for x in ['123', '456', '789']])
 
 
+def test_xls_to_json_no_header(project, institution, xls_list):
+    no_top_header = xls_list[1:]  # top header missing should work ok (e.g. 'Patient Information', etc)
+    no_main_header = [xls_list[0]] + xls_list[2:]  # main header missing should cause a caught error
+    no_comments = xls_list[0:2] + xls_list[3:]
+    with mock.patch('encoded.submit.row_generator') as row_gen:
+        row_gen.return_value = iter(no_top_header)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert success
+        row_gen.return_value = iter(no_main_header)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert not success
+        row_gen.return_value = iter(no_comments)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert success
+
+
+def test_xls_to_json_missing_req_col(project, institution, xls_list):
+    # test error is caught when a required column in missing from excel file
+    idx = xls_list[1].index('Specimen ID')
+    rows = [row[0:idx] + row[idx+1:] for row in xls_list]
+    with mock.patch('encoded.submit.row_generator') as row_gen:
+        row_gen.return_value = iter(rows)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert not success
+
+
+def test_xls_to_json_missing_req_val(project, institution, xls_list):
+    # test error is caught when a required column is present but value is missing in a row
+    idx = xls_list[1].index('Specimen ID')
+    xls_list[4] = xls_list[4][0:idx] + [''] + xls_list[4][idx+1:]
+    with mock.patch('encoded.submit.row_generator') as row_gen:
+        row_gen.return_value = iter(xls_list)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert json_out['errors']
+        assert success
+
+
+def test_xls_to_json_invalid_workup(project, institution, xls_list):
+    # invalid workup type is caught as an error
+    idx = xls_list[1].index('Workup Type')
+    xls_list[4] = xls_list[4][0:idx] + ['Other'] + xls_list[4][idx+1:]
+    with mock.patch('encoded.submit.row_generator') as row_gen:
+        row_gen.return_value = iter(xls_list)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert json_out['errors']
+        print(json_out['errors'])
+        assert success
+        assert ('Row 5 - Samples with analysis ID 55432 contain mis-matched '
+                'or invalid workup type values.') in ''.join(json_out['errors'])
+
+
+def test_xls_to_json_mixed_workup(project, institution, xls_list):
+    # mixed workup types per analysis caught as an error
+    idx = xls_list[1].index('Workup Type')
+    xls_list[3] = xls_list[3][0:idx] + ['WES'] + xls_list[3][idx+1:]
+    one_row = xls_list[:4]
+    with mock.patch('encoded.submit.row_generator') as row_gen:
+        row_gen.return_value = iter(xls_list)
+        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert json_out['errors']
+        print(json_out['errors'])
+        assert success
+        assert ('Row 5 - Samples with analysis ID 55432 contain mis-matched '
+                'or invalid workup type values.') in ''.join(json_out['errors'])
+        row_gen.return_value = iter(one_row)
+        one_json_out, one_success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        assert not one_json_out['errors']
+
+
 def test_parse_exception_invalid_alias(testapp, a_case):
     a_case['invalid_field'] = 'value'
     a_case['project'] = '/projects/invalid-project/'
@@ -266,6 +376,17 @@ def test_validate_item_post_invalid(testapp, a_case):
     assert 'not found' in result[0]
 
 
+def test_validate_item_post_invalid_yn(testapp, sample_info):
+    sample_info['req accepted y/n'] = 'not sure'
+    sample_info['specimen accepted by ref lab'] = "I don't know"
+    sample_item = map_fields(sample_info, {}, ['workup_type'], 'sample')
+    req_info = map_fields(sample_info, {}, ['date sent', 'date completed'], 'requisition')
+    sample_item['requisition_acceptance'] = req_info
+    result = validate_item(testapp, sample_item, 'post', 'sample', [])
+    assert len(result) == 2
+    assert all("is not one of ['Y', 'N']" in error for error in result)
+
+
 def test_validate_item_patch_valid(testapp, mother, grandpa):
     patch_dict = {'mother': mother['aliases'][0]}
     result = validate_item(testapp, patch_dict, 'patch', 'individual', [], atid=grandpa['@id'])
@@ -295,8 +416,9 @@ def test_validate_all_items_errors(testapp, mother, empty_items):
     }
     items = empty_items
     items['individual']['new-individual-alias'] = new_individual
-    data_out, result = validate_all_items(testapp, items)
+    data_out, result, success = validate_all_items(testapp, items)
     assert not data_out
+    assert not success
     assert len(result) > 1
     errors = ' '.join(result)
     assert "'test-proj:invalid-project-alias' not found" in errors

From 82029329dcf9c708680166e821351b33b6dc1b0a Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Sat, 8 Aug 2020 01:25:04 -0400
Subject: [PATCH 057/125] Remove stray src/__init__.py

---
 src/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 src/__init__.py

diff --git a/src/__init__.py b/src/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000

From 7acfe2480a8ec6c272fbf38c936c6c4112173378 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Sat, 8 Aug 2020 02:47:40 -0400
Subject: [PATCH 058/125] Add boto3 stubs for PyCharm. Comment out some unused
 code. Rearrange some imports.

---
 poetry.lock                             | 287 +++++++++++++++++++++++-
 pyproject.toml                          |   2 +
 src/encoded/commands/submission_test.py |   7 +-
 src/encoded/submit.py                   |  12 +-
 4 files changed, 295 insertions(+), 13 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 2f3dcecda9..7815c83ea7 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -110,6 +110,248 @@ botocore = ">=1.13.50,<1.14.0"
 jmespath = ">=0.7.1,<1.0.0"
 s3transfer = ">=0.2.0,<0.3.0"
 
+[[package]]
+category = "dev"
+description = "Type annotations for boto3 1.14.38, generated by mypy-boto3-buider 2.2.0"
+name = "boto3-stubs"
+optional = false
+python-versions = ">=3.6"
+version = "1.14.38.0"
+
+[package.dependencies]
+mypy-boto3 = "1.14.38.0"
+
+[package.dependencies.typing-extensions]
+python = "<3.8"
+version = "*"
+
+[package.extras]
+accessanalyzer = ["mypy-boto3-accessanalyzer (1.14.38.0)"]
+acm = ["mypy-boto3-acm (1.14.38.0)"]
+acm-pca = ["mypy-boto3-acm-pca (1.14.38.0)"]
+alexaforbusiness = ["mypy-boto3-alexaforbusiness (1.14.38.0)"]
+all = ["mypy-boto3-accessanalyzer (1.14.38.0)", "mypy-boto3-acm (1.14.38.0)", "mypy-boto3-acm-pca (1.14.38.0)", "mypy-boto3-alexaforbusiness (1.14.38.0)", "mypy-boto3-amplify (1.14.38.0)", "mypy-boto3-apigateway (1.14.38.0)", "mypy-boto3-apigatewaymanagementapi (1.14.38.0)", "mypy-boto3-apigatewayv2 (1.14.38.0)", "mypy-boto3-appconfig (1.14.38.0)", "mypy-boto3-application-autoscaling (1.14.38.0)", "mypy-boto3-application-insights (1.14.38.0)", "mypy-boto3-appmesh (1.14.38.0)", "mypy-boto3-appstream (1.14.38.0)", "mypy-boto3-appsync (1.14.38.0)", "mypy-boto3-athena (1.14.38.0)", "mypy-boto3-autoscaling (1.14.38.0)", "mypy-boto3-autoscaling-plans (1.14.38.0)", "mypy-boto3-backup (1.14.38.0)", "mypy-boto3-batch (1.14.38.0)", "mypy-boto3-budgets (1.14.38.0)", "mypy-boto3-ce (1.14.38.0)", "mypy-boto3-chime (1.14.38.0)", "mypy-boto3-cloud9 (1.14.38.0)", "mypy-boto3-clouddirectory (1.14.38.0)", "mypy-boto3-cloudformation (1.14.38.0)", "mypy-boto3-cloudfront (1.14.38.0)", "mypy-boto3-cloudhsm (1.14.38.0)", "mypy-boto3-cloudhsmv2 (1.14.38.0)", "mypy-boto3-cloudsearch (1.14.38.0)", "mypy-boto3-cloudsearchdomain (1.14.38.0)", "mypy-boto3-cloudtrail (1.14.38.0)", "mypy-boto3-cloudwatch (1.14.38.0)", "mypy-boto3-codeartifact (1.14.38.0)", "mypy-boto3-codebuild (1.14.38.0)", "mypy-boto3-codecommit (1.14.38.0)", "mypy-boto3-codedeploy (1.14.38.0)", "mypy-boto3-codeguru-reviewer (1.14.38.0)", "mypy-boto3-codeguruprofiler (1.14.38.0)", "mypy-boto3-codepipeline (1.14.38.0)", "mypy-boto3-codestar (1.14.38.0)", "mypy-boto3-codestar-connections (1.14.38.0)", "mypy-boto3-codestar-notifications (1.14.38.0)", "mypy-boto3-cognito-identity (1.14.38.0)", "mypy-boto3-cognito-idp (1.14.38.0)", "mypy-boto3-cognito-sync (1.14.38.0)", "mypy-boto3-comprehend (1.14.38.0)", "mypy-boto3-comprehendmedical (1.14.38.0)", "mypy-boto3-compute-optimizer (1.14.38.0)", "mypy-boto3-config (1.14.38.0)", "mypy-boto3-connect (1.14.38.0)", "mypy-boto3-connectparticipant (1.14.38.0)", "mypy-boto3-cur (1.14.38.0)", "mypy-boto3-dataexchange (1.14.38.0)", "mypy-boto3-datapipeline (1.14.38.0)", "mypy-boto3-datasync (1.14.38.0)", "mypy-boto3-dax (1.14.38.0)", "mypy-boto3-detective (1.14.38.0)", "mypy-boto3-devicefarm (1.14.38.0)", "mypy-boto3-directconnect (1.14.38.0)", "mypy-boto3-discovery (1.14.38.0)", "mypy-boto3-dlm (1.14.38.0)", "mypy-boto3-dms (1.14.38.0)", "mypy-boto3-docdb (1.14.38.0)", "mypy-boto3-ds (1.14.38.0)", "mypy-boto3-dynamodb (1.14.38.0)", "mypy-boto3-dynamodbstreams (1.14.38.0)", "mypy-boto3-ebs (1.14.38.0)", "mypy-boto3-ec2 (1.14.38.0)", "mypy-boto3-ec2-instance-connect (1.14.38.0)", "mypy-boto3-ecr (1.14.38.0)", "mypy-boto3-ecs (1.14.38.0)", "mypy-boto3-efs (1.14.38.0)", "mypy-boto3-eks (1.14.38.0)", "mypy-boto3-elastic-inference (1.14.38.0)", "mypy-boto3-elasticache (1.14.38.0)", "mypy-boto3-elasticbeanstalk (1.14.38.0)", "mypy-boto3-elastictranscoder (1.14.38.0)", "mypy-boto3-elb (1.14.38.0)", "mypy-boto3-elbv2 (1.14.38.0)", "mypy-boto3-emr (1.14.38.0)", "mypy-boto3-es (1.14.38.0)", "mypy-boto3-events (1.14.38.0)", "mypy-boto3-firehose (1.14.38.0)", "mypy-boto3-fms (1.14.38.0)", "mypy-boto3-forecast (1.14.38.0)", "mypy-boto3-forecastquery (1.14.38.0)", "mypy-boto3-frauddetector (1.14.38.0)", "mypy-boto3-fsx (1.14.38.0)", "mypy-boto3-gamelift (1.14.38.0)", "mypy-boto3-glacier (1.14.38.0)", "mypy-boto3-globalaccelerator (1.14.38.0)", "mypy-boto3-glue (1.14.38.0)", "mypy-boto3-greengrass (1.14.38.0)", "mypy-boto3-groundstation (1.14.38.0)", "mypy-boto3-guardduty (1.14.38.0)", "mypy-boto3-health (1.14.38.0)", "mypy-boto3-iam (1.14.38.0)", "mypy-boto3-imagebuilder (1.14.38.0)", "mypy-boto3-importexport (1.14.38.0)", "mypy-boto3-inspector (1.14.38.0)", "mypy-boto3-iot (1.14.38.0)", "mypy-boto3-iot-data (1.14.38.0)", "mypy-boto3-iot-jobs-data (1.14.38.0)", "mypy-boto3-iot1click-devices (1.14.38.0)", "mypy-boto3-iot1click-projects (1.14.38.0)", "mypy-boto3-iotanalytics (1.14.38.0)", "mypy-boto3-iotevents (1.14.38.0)", "mypy-boto3-iotevents-data (1.14.38.0)", "mypy-boto3-iotsecuretunneling (1.14.38.0)", "mypy-boto3-iotsitewise (1.14.38.0)", "mypy-boto3-iotthingsgraph (1.14.38.0)", "mypy-boto3-kafka (1.14.38.0)", "mypy-boto3-kendra (1.14.38.0)", "mypy-boto3-kinesis (1.14.38.0)", "mypy-boto3-kinesis-video-archived-media (1.14.38.0)", "mypy-boto3-kinesis-video-media (1.14.38.0)", "mypy-boto3-kinesis-video-signaling (1.14.38.0)", "mypy-boto3-kinesisanalytics (1.14.38.0)", "mypy-boto3-kinesisanalyticsv2 (1.14.38.0)", "mypy-boto3-kinesisvideo (1.14.38.0)", "mypy-boto3-kms (1.14.38.0)", "mypy-boto3-lakeformation (1.14.38.0)", "mypy-boto3-lambda (1.14.38.0)", "mypy-boto3-lex-models (1.14.38.0)", "mypy-boto3-lex-runtime (1.14.38.0)", "mypy-boto3-license-manager (1.14.38.0)", "mypy-boto3-lightsail (1.14.38.0)", "mypy-boto3-logs (1.14.38.0)", "mypy-boto3-machinelearning (1.14.38.0)", "mypy-boto3-macie (1.14.38.0)", "mypy-boto3-macie2 (1.14.38.0)", "mypy-boto3-managedblockchain (1.14.38.0)", "mypy-boto3-marketplace-catalog (1.14.38.0)", "mypy-boto3-marketplace-entitlement (1.14.38.0)", "mypy-boto3-marketplacecommerceanalytics (1.14.38.0)", "mypy-boto3-mediaconnect (1.14.38.0)", "mypy-boto3-mediaconvert (1.14.38.0)", "mypy-boto3-medialive (1.14.38.0)", "mypy-boto3-mediapackage (1.14.38.0)", "mypy-boto3-mediapackage-vod (1.14.38.0)", "mypy-boto3-mediastore (1.14.38.0)", "mypy-boto3-mediastore-data (1.14.38.0)", "mypy-boto3-mediatailor (1.14.38.0)", "mypy-boto3-meteringmarketplace (1.14.38.0)", "mypy-boto3-mgh (1.14.38.0)", "mypy-boto3-migrationhub-config (1.14.38.0)", "mypy-boto3-mobile (1.14.38.0)", "mypy-boto3-mq (1.14.38.0)", "mypy-boto3-mturk (1.14.38.0)", "mypy-boto3-neptune (1.14.38.0)", "mypy-boto3-networkmanager (1.14.38.0)", "mypy-boto3-opsworks (1.14.38.0)", "mypy-boto3-opsworkscm (1.14.38.0)", "mypy-boto3-organizations (1.14.38.0)", "mypy-boto3-outposts (1.14.38.0)", "mypy-boto3-personalize (1.14.38.0)", "mypy-boto3-personalize-events (1.14.38.0)", "mypy-boto3-personalize-runtime (1.14.38.0)", "mypy-boto3-pi (1.14.38.0)", "mypy-boto3-pinpoint (1.14.38.0)", "mypy-boto3-pinpoint-email (1.14.38.0)", "mypy-boto3-pinpoint-sms-voice (1.14.38.0)", "mypy-boto3-polly (1.14.38.0)", "mypy-boto3-pricing (1.14.38.0)", "mypy-boto3-qldb (1.14.38.0)", "mypy-boto3-qldb-session (1.14.38.0)", "mypy-boto3-quicksight (1.14.38.0)", "mypy-boto3-ram (1.14.38.0)", "mypy-boto3-rds (1.14.38.0)", "mypy-boto3-rds-data (1.14.38.0)", "mypy-boto3-redshift (1.14.38.0)", "mypy-boto3-rekognition (1.14.38.0)", "mypy-boto3-resource-groups (1.14.38.0)", "mypy-boto3-resourcegroupstaggingapi (1.14.38.0)", "mypy-boto3-robomaker (1.14.38.0)", "mypy-boto3-route53 (1.14.38.0)", "mypy-boto3-route53domains (1.14.38.0)", "mypy-boto3-route53resolver (1.14.38.0)", "mypy-boto3-s3 (1.14.38.0)", "mypy-boto3-s3control (1.14.38.0)", "mypy-boto3-sagemaker (1.14.38.0)", "mypy-boto3-sagemaker-a2i-runtime (1.14.38.0)", "mypy-boto3-sagemaker-runtime (1.14.38.0)", "mypy-boto3-savingsplans (1.14.38.0)", "mypy-boto3-schemas (1.14.38.0)", "mypy-boto3-sdb (1.14.38.0)", "mypy-boto3-secretsmanager (1.14.38.0)", "mypy-boto3-securityhub (1.14.38.0)", "mypy-boto3-serverlessrepo (1.14.38.0)", "mypy-boto3-service-quotas (1.14.38.0)", "mypy-boto3-servicecatalog (1.14.38.0)", "mypy-boto3-servicediscovery (1.14.38.0)", "mypy-boto3-ses (1.14.38.0)", "mypy-boto3-sesv2 (1.14.38.0)", "mypy-boto3-shield (1.14.38.0)", "mypy-boto3-signer (1.14.38.0)", "mypy-boto3-sms (1.14.38.0)", "mypy-boto3-sms-voice (1.14.38.0)", "mypy-boto3-snowball (1.14.38.0)", "mypy-boto3-sns (1.14.38.0)", "mypy-boto3-sqs (1.14.38.0)", "mypy-boto3-ssm (1.14.38.0)", "mypy-boto3-sso (1.14.38.0)", "mypy-boto3-sso-oidc (1.14.38.0)", "mypy-boto3-stepfunctions (1.14.38.0)", "mypy-boto3-storagegateway (1.14.38.0)", "mypy-boto3-sts (1.14.38.0)", "mypy-boto3-support (1.14.38.0)", "mypy-boto3-swf (1.14.38.0)", "mypy-boto3-synthetics (1.14.38.0)", "mypy-boto3-textract (1.14.38.0)", "mypy-boto3-transcribe (1.14.38.0)", "mypy-boto3-transfer (1.14.38.0)", "mypy-boto3-translate (1.14.38.0)", "mypy-boto3-waf (1.14.38.0)", "mypy-boto3-waf-regional (1.14.38.0)", "mypy-boto3-wafv2 (1.14.38.0)", "mypy-boto3-workdocs (1.14.38.0)", "mypy-boto3-worklink (1.14.38.0)", "mypy-boto3-workmail (1.14.38.0)", "mypy-boto3-workmailmessageflow (1.14.38.0)", "mypy-boto3-workspaces (1.14.38.0)", "mypy-boto3-xray (1.14.38.0)"]
+amplify = ["mypy-boto3-amplify (1.14.38.0)"]
+apigateway = ["mypy-boto3-apigateway (1.14.38.0)"]
+apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (1.14.38.0)"]
+apigatewayv2 = ["mypy-boto3-apigatewayv2 (1.14.38.0)"]
+appconfig = ["mypy-boto3-appconfig (1.14.38.0)"]
+application-autoscaling = ["mypy-boto3-application-autoscaling (1.14.38.0)"]
+application-insights = ["mypy-boto3-application-insights (1.14.38.0)"]
+appmesh = ["mypy-boto3-appmesh (1.14.38.0)"]
+appstream = ["mypy-boto3-appstream (1.14.38.0)"]
+appsync = ["mypy-boto3-appsync (1.14.38.0)"]
+athena = ["mypy-boto3-athena (1.14.38.0)"]
+autoscaling = ["mypy-boto3-autoscaling (1.14.38.0)"]
+autoscaling-plans = ["mypy-boto3-autoscaling-plans (1.14.38.0)"]
+backup = ["mypy-boto3-backup (1.14.38.0)"]
+batch = ["mypy-boto3-batch (1.14.38.0)"]
+budgets = ["mypy-boto3-budgets (1.14.38.0)"]
+ce = ["mypy-boto3-ce (1.14.38.0)"]
+chime = ["mypy-boto3-chime (1.14.38.0)"]
+cloud9 = ["mypy-boto3-cloud9 (1.14.38.0)"]
+clouddirectory = ["mypy-boto3-clouddirectory (1.14.38.0)"]
+cloudformation = ["mypy-boto3-cloudformation (1.14.38.0)"]
+cloudfront = ["mypy-boto3-cloudfront (1.14.38.0)"]
+cloudhsm = ["mypy-boto3-cloudhsm (1.14.38.0)"]
+cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (1.14.38.0)"]
+cloudsearch = ["mypy-boto3-cloudsearch (1.14.38.0)"]
+cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (1.14.38.0)"]
+cloudtrail = ["mypy-boto3-cloudtrail (1.14.38.0)"]
+cloudwatch = ["mypy-boto3-cloudwatch (1.14.38.0)"]
+codeartifact = ["mypy-boto3-codeartifact (1.14.38.0)"]
+codebuild = ["mypy-boto3-codebuild (1.14.38.0)"]
+codecommit = ["mypy-boto3-codecommit (1.14.38.0)"]
+codedeploy = ["mypy-boto3-codedeploy (1.14.38.0)"]
+codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (1.14.38.0)"]
+codeguruprofiler = ["mypy-boto3-codeguruprofiler (1.14.38.0)"]
+codepipeline = ["mypy-boto3-codepipeline (1.14.38.0)"]
+codestar = ["mypy-boto3-codestar (1.14.38.0)"]
+codestar-connections = ["mypy-boto3-codestar-connections (1.14.38.0)"]
+codestar-notifications = ["mypy-boto3-codestar-notifications (1.14.38.0)"]
+cognito-identity = ["mypy-boto3-cognito-identity (1.14.38.0)"]
+cognito-idp = ["mypy-boto3-cognito-idp (1.14.38.0)"]
+cognito-sync = ["mypy-boto3-cognito-sync (1.14.38.0)"]
+comprehend = ["mypy-boto3-comprehend (1.14.38.0)"]
+comprehendmedical = ["mypy-boto3-comprehendmedical (1.14.38.0)"]
+compute-optimizer = ["mypy-boto3-compute-optimizer (1.14.38.0)"]
+config = ["mypy-boto3-config (1.14.38.0)"]
+connect = ["mypy-boto3-connect (1.14.38.0)"]
+connectparticipant = ["mypy-boto3-connectparticipant (1.14.38.0)"]
+cur = ["mypy-boto3-cur (1.14.38.0)"]
+dataexchange = ["mypy-boto3-dataexchange (1.14.38.0)"]
+datapipeline = ["mypy-boto3-datapipeline (1.14.38.0)"]
+datasync = ["mypy-boto3-datasync (1.14.38.0)"]
+dax = ["mypy-boto3-dax (1.14.38.0)"]
+detective = ["mypy-boto3-detective (1.14.38.0)"]
+devicefarm = ["mypy-boto3-devicefarm (1.14.38.0)"]
+directconnect = ["mypy-boto3-directconnect (1.14.38.0)"]
+discovery = ["mypy-boto3-discovery (1.14.38.0)"]
+dlm = ["mypy-boto3-dlm (1.14.38.0)"]
+dms = ["mypy-boto3-dms (1.14.38.0)"]
+docdb = ["mypy-boto3-docdb (1.14.38.0)"]
+ds = ["mypy-boto3-ds (1.14.38.0)"]
+dynamodb = ["mypy-boto3-dynamodb (1.14.38.0)"]
+dynamodbstreams = ["mypy-boto3-dynamodbstreams (1.14.38.0)"]
+ebs = ["mypy-boto3-ebs (1.14.38.0)"]
+ec2 = ["mypy-boto3-ec2 (1.14.38.0)"]
+ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (1.14.38.0)"]
+ecr = ["mypy-boto3-ecr (1.14.38.0)"]
+ecs = ["mypy-boto3-ecs (1.14.38.0)"]
+efs = ["mypy-boto3-efs (1.14.38.0)"]
+eks = ["mypy-boto3-eks (1.14.38.0)"]
+elastic-inference = ["mypy-boto3-elastic-inference (1.14.38.0)"]
+elasticache = ["mypy-boto3-elasticache (1.14.38.0)"]
+elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (1.14.38.0)"]
+elastictranscoder = ["mypy-boto3-elastictranscoder (1.14.38.0)"]
+elb = ["mypy-boto3-elb (1.14.38.0)"]
+elbv2 = ["mypy-boto3-elbv2 (1.14.38.0)"]
+emr = ["mypy-boto3-emr (1.14.38.0)"]
+es = ["mypy-boto3-es (1.14.38.0)"]
+essential = ["mypy-boto3-cloudformation (1.14.38.0)", "mypy-boto3-dynamodb (1.14.38.0)", "mypy-boto3-ec2 (1.14.38.0)", "mypy-boto3-lambda (1.14.38.0)", "mypy-boto3-rds (1.14.38.0)", "mypy-boto3-s3 (1.14.38.0)", "mypy-boto3-sqs (1.14.38.0)"]
+events = ["mypy-boto3-events (1.14.38.0)"]
+firehose = ["mypy-boto3-firehose (1.14.38.0)"]
+fms = ["mypy-boto3-fms (1.14.38.0)"]
+forecast = ["mypy-boto3-forecast (1.14.38.0)"]
+forecastquery = ["mypy-boto3-forecastquery (1.14.38.0)"]
+frauddetector = ["mypy-boto3-frauddetector (1.14.38.0)"]
+fsx = ["mypy-boto3-fsx (1.14.38.0)"]
+gamelift = ["mypy-boto3-gamelift (1.14.38.0)"]
+glacier = ["mypy-boto3-glacier (1.14.38.0)"]
+globalaccelerator = ["mypy-boto3-globalaccelerator (1.14.38.0)"]
+glue = ["mypy-boto3-glue (1.14.38.0)"]
+greengrass = ["mypy-boto3-greengrass (1.14.38.0)"]
+groundstation = ["mypy-boto3-groundstation (1.14.38.0)"]
+guardduty = ["mypy-boto3-guardduty (1.14.38.0)"]
+health = ["mypy-boto3-health (1.14.38.0)"]
+iam = ["mypy-boto3-iam (1.14.38.0)"]
+imagebuilder = ["mypy-boto3-imagebuilder (1.14.38.0)"]
+importexport = ["mypy-boto3-importexport (1.14.38.0)"]
+inspector = ["mypy-boto3-inspector (1.14.38.0)"]
+iot = ["mypy-boto3-iot (1.14.38.0)"]
+iot-data = ["mypy-boto3-iot-data (1.14.38.0)"]
+iot-jobs-data = ["mypy-boto3-iot-jobs-data (1.14.38.0)"]
+iot1click-devices = ["mypy-boto3-iot1click-devices (1.14.38.0)"]
+iot1click-projects = ["mypy-boto3-iot1click-projects (1.14.38.0)"]
+iotanalytics = ["mypy-boto3-iotanalytics (1.14.38.0)"]
+iotevents = ["mypy-boto3-iotevents (1.14.38.0)"]
+iotevents-data = ["mypy-boto3-iotevents-data (1.14.38.0)"]
+iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (1.14.38.0)"]
+iotsitewise = ["mypy-boto3-iotsitewise (1.14.38.0)"]
+iotthingsgraph = ["mypy-boto3-iotthingsgraph (1.14.38.0)"]
+kafka = ["mypy-boto3-kafka (1.14.38.0)"]
+kendra = ["mypy-boto3-kendra (1.14.38.0)"]
+kinesis = ["mypy-boto3-kinesis (1.14.38.0)"]
+kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (1.14.38.0)"]
+kinesis-video-media = ["mypy-boto3-kinesis-video-media (1.14.38.0)"]
+kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (1.14.38.0)"]
+kinesisanalytics = ["mypy-boto3-kinesisanalytics (1.14.38.0)"]
+kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (1.14.38.0)"]
+kinesisvideo = ["mypy-boto3-kinesisvideo (1.14.38.0)"]
+kms = ["mypy-boto3-kms (1.14.38.0)"]
+lakeformation = ["mypy-boto3-lakeformation (1.14.38.0)"]
+lambda = ["mypy-boto3-lambda (1.14.38.0)"]
+lex-models = ["mypy-boto3-lex-models (1.14.38.0)"]
+lex-runtime = ["mypy-boto3-lex-runtime (1.14.38.0)"]
+license-manager = ["mypy-boto3-license-manager (1.14.38.0)"]
+lightsail = ["mypy-boto3-lightsail (1.14.38.0)"]
+logs = ["mypy-boto3-logs (1.14.38.0)"]
+machinelearning = ["mypy-boto3-machinelearning (1.14.38.0)"]
+macie = ["mypy-boto3-macie (1.14.38.0)"]
+macie2 = ["mypy-boto3-macie2 (1.14.38.0)"]
+managedblockchain = ["mypy-boto3-managedblockchain (1.14.38.0)"]
+marketplace-catalog = ["mypy-boto3-marketplace-catalog (1.14.38.0)"]
+marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (1.14.38.0)"]
+marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (1.14.38.0)"]
+mediaconnect = ["mypy-boto3-mediaconnect (1.14.38.0)"]
+mediaconvert = ["mypy-boto3-mediaconvert (1.14.38.0)"]
+medialive = ["mypy-boto3-medialive (1.14.38.0)"]
+mediapackage = ["mypy-boto3-mediapackage (1.14.38.0)"]
+mediapackage-vod = ["mypy-boto3-mediapackage-vod (1.14.38.0)"]
+mediastore = ["mypy-boto3-mediastore (1.14.38.0)"]
+mediastore-data = ["mypy-boto3-mediastore-data (1.14.38.0)"]
+mediatailor = ["mypy-boto3-mediatailor (1.14.38.0)"]
+meteringmarketplace = ["mypy-boto3-meteringmarketplace (1.14.38.0)"]
+mgh = ["mypy-boto3-mgh (1.14.38.0)"]
+migrationhub-config = ["mypy-boto3-migrationhub-config (1.14.38.0)"]
+mobile = ["mypy-boto3-mobile (1.14.38.0)"]
+mq = ["mypy-boto3-mq (1.14.38.0)"]
+mturk = ["mypy-boto3-mturk (1.14.38.0)"]
+neptune = ["mypy-boto3-neptune (1.14.38.0)"]
+networkmanager = ["mypy-boto3-networkmanager (1.14.38.0)"]
+opsworks = ["mypy-boto3-opsworks (1.14.38.0)"]
+opsworkscm = ["mypy-boto3-opsworkscm (1.14.38.0)"]
+organizations = ["mypy-boto3-organizations (1.14.38.0)"]
+outposts = ["mypy-boto3-outposts (1.14.38.0)"]
+personalize = ["mypy-boto3-personalize (1.14.38.0)"]
+personalize-events = ["mypy-boto3-personalize-events (1.14.38.0)"]
+personalize-runtime = ["mypy-boto3-personalize-runtime (1.14.38.0)"]
+pi = ["mypy-boto3-pi (1.14.38.0)"]
+pinpoint = ["mypy-boto3-pinpoint (1.14.38.0)"]
+pinpoint-email = ["mypy-boto3-pinpoint-email (1.14.38.0)"]
+pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (1.14.38.0)"]
+polly = ["mypy-boto3-polly (1.14.38.0)"]
+pricing = ["mypy-boto3-pricing (1.14.38.0)"]
+qldb = ["mypy-boto3-qldb (1.14.38.0)"]
+qldb-session = ["mypy-boto3-qldb-session (1.14.38.0)"]
+quicksight = ["mypy-boto3-quicksight (1.14.38.0)"]
+ram = ["mypy-boto3-ram (1.14.38.0)"]
+rds = ["mypy-boto3-rds (1.14.38.0)"]
+rds-data = ["mypy-boto3-rds-data (1.14.38.0)"]
+redshift = ["mypy-boto3-redshift (1.14.38.0)"]
+rekognition = ["mypy-boto3-rekognition (1.14.38.0)"]
+resource-groups = ["mypy-boto3-resource-groups (1.14.38.0)"]
+resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (1.14.38.0)"]
+robomaker = ["mypy-boto3-robomaker (1.14.38.0)"]
+route53 = ["mypy-boto3-route53 (1.14.38.0)"]
+route53domains = ["mypy-boto3-route53domains (1.14.38.0)"]
+route53resolver = ["mypy-boto3-route53resolver (1.14.38.0)"]
+s3 = ["mypy-boto3-s3 (1.14.38.0)"]
+s3control = ["mypy-boto3-s3control (1.14.38.0)"]
+sagemaker = ["mypy-boto3-sagemaker (1.14.38.0)"]
+sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (1.14.38.0)"]
+sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (1.14.38.0)"]
+savingsplans = ["mypy-boto3-savingsplans (1.14.38.0)"]
+schemas = ["mypy-boto3-schemas (1.14.38.0)"]
+sdb = ["mypy-boto3-sdb (1.14.38.0)"]
+secretsmanager = ["mypy-boto3-secretsmanager (1.14.38.0)"]
+securityhub = ["mypy-boto3-securityhub (1.14.38.0)"]
+serverlessrepo = ["mypy-boto3-serverlessrepo (1.14.38.0)"]
+service-quotas = ["mypy-boto3-service-quotas (1.14.38.0)"]
+servicecatalog = ["mypy-boto3-servicecatalog (1.14.38.0)"]
+servicediscovery = ["mypy-boto3-servicediscovery (1.14.38.0)"]
+ses = ["mypy-boto3-ses (1.14.38.0)"]
+sesv2 = ["mypy-boto3-sesv2 (1.14.38.0)"]
+shield = ["mypy-boto3-shield (1.14.38.0)"]
+signer = ["mypy-boto3-signer (1.14.38.0)"]
+sms = ["mypy-boto3-sms (1.14.38.0)"]
+sms-voice = ["mypy-boto3-sms-voice (1.14.38.0)"]
+snowball = ["mypy-boto3-snowball (1.14.38.0)"]
+sns = ["mypy-boto3-sns (1.14.38.0)"]
+sqs = ["mypy-boto3-sqs (1.14.38.0)"]
+ssm = ["mypy-boto3-ssm (1.14.38.0)"]
+sso = ["mypy-boto3-sso (1.14.38.0)"]
+sso-oidc = ["mypy-boto3-sso-oidc (1.14.38.0)"]
+stepfunctions = ["mypy-boto3-stepfunctions (1.14.38.0)"]
+storagegateway = ["mypy-boto3-storagegateway (1.14.38.0)"]
+sts = ["mypy-boto3-sts (1.14.38.0)"]
+support = ["mypy-boto3-support (1.14.38.0)"]
+swf = ["mypy-boto3-swf (1.14.38.0)"]
+synthetics = ["mypy-boto3-synthetics (1.14.38.0)"]
+textract = ["mypy-boto3-textract (1.14.38.0)"]
+transcribe = ["mypy-boto3-transcribe (1.14.38.0)"]
+transfer = ["mypy-boto3-transfer (1.14.38.0)"]
+translate = ["mypy-boto3-translate (1.14.38.0)"]
+waf = ["mypy-boto3-waf (1.14.38.0)"]
+waf-regional = ["mypy-boto3-waf-regional (1.14.38.0)"]
+wafv2 = ["mypy-boto3-wafv2 (1.14.38.0)"]
+workdocs = ["mypy-boto3-workdocs (1.14.38.0)"]
+worklink = ["mypy-boto3-worklink (1.14.38.0)"]
+workmail = ["mypy-boto3-workmail (1.14.38.0)"]
+workmailmessageflow = ["mypy-boto3-workmailmessageflow (1.14.38.0)"]
+workspaces = ["mypy-boto3-workspaces (1.14.38.0)"]
+xray = ["mypy-boto3-xray (1.14.38.0)"]
+
 [[package]]
 category = "main"
 description = "Low-level, data-driven core of boto 3."
@@ -718,6 +960,21 @@ xmltodict = "*"
 [package.extras]
 server = ["flask"]
 
+[[package]]
+category = "dev"
+description = "Type annotations for boto3 1.14.38 master module, generated by mypy-boto3-buider 2.2.0"
+name = "mypy-boto3"
+optional = false
+python-versions = ">=3.6"
+version = "1.14.38.0"
+
+[package.dependencies]
+boto3 = "*"
+
+[package.dependencies.typing-extensions]
+python = "<3.8"
+version = "*"
+
 [[package]]
 category = "main"
 description = "A network address manipulation library for Python"
@@ -1410,7 +1667,7 @@ description = "Fast, Extensible Progress Meter"
 name = "tqdm"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*"
-version = "4.48.1"
+version = "4.48.2"
 
 [package.extras]
 dev = ["py-make (>=0.1.0)", "twine", "argopt", "pydoc-markdown"]
@@ -1439,6 +1696,15 @@ optional = false
 python-versions = "*"
 version = "1.3"
 
+[[package]]
+category = "dev"
+description = "Backported and Experimental Type Hints for Python 3.5+"
+marker = "python_version < \"3.8\""
+name = "typing-extensions"
+optional = false
+python-versions = "*"
+version = "3.7.4.2"
+
 [[package]]
 category = "main"
 description = "Cross-platform uptime library"
@@ -1655,7 +1921,7 @@ transaction = ">=1.6.0"
 test = ["zope.testing"]
 
 [metadata]
-content-hash = "26aaf14a00bb85d74fb32f02edf89778c1e361710661b2c8ab844e09b33d16c3"
+content-hash = "314207f2edbe6ca22952fa2dcd40b0205c2a471cb80dd888877c331c003b3b25"
 lock-version = "1.0"
 python-versions = ">=3.6,<3.7"
 
@@ -1697,6 +1963,10 @@ boto3 = [
     {file = "boto3-1.10.50-py2.py3-none-any.whl", hash = "sha256:aa58c8de6aed36211e0897598de2a3d89122ad8cd1450165679720180ab880ef"},
     {file = "boto3-1.10.50.tar.gz", hash = "sha256:5c00d51101d6a7ddf2207ae8a738e5c815c5fcffbee76121f38bd41d83c936a5"},
 ]
+boto3-stubs = [
+    {file = "boto3-stubs-1.14.38.0.tar.gz", hash = "sha256:745b531522fc606124399d0115bafcc92010784d7c85f97e28ea86f32e52aad0"},
+    {file = "boto3_stubs-1.14.38.0-py3-none-any.whl", hash = "sha256:1348bbd5c7417b67529ad13e5b4ab563a0ca2aa6bdc68c04b5beeb94c6541118"},
+]
 botocore = [
     {file = "botocore-1.13.50-py2.py3-none-any.whl", hash = "sha256:adb4cb188cd0866e7337f9a049fc68db042b0340fd496d40bca349c8dbfc6a2d"},
     {file = "botocore-1.13.50.tar.gz", hash = "sha256:765a5c637ff792239727c327b221ed5a4d851e9f176ce8b8b9eca536425c74d4"},
@@ -1950,6 +2220,10 @@ moto = [
     {file = "moto-1.3.7-py2.py3-none-any.whl", hash = "sha256:4df37936ff8d6a4b8229aab347a7b412cd2ca4823ff47bd1362ddfbc6c5e4ecf"},
     {file = "moto-1.3.7.tar.gz", hash = "sha256:129de2e04cb250d9f8b2c722ec152ed1b5426ef179b4ebb03e9ec36e6eb3fcc5"},
 ]
+mypy-boto3 = [
+    {file = "mypy-boto3-1.14.38.0.tar.gz", hash = "sha256:28c22ecc5c91213c71771ddec4f55e670b8f168c1c433831331ddf6a0136da93"},
+    {file = "mypy_boto3-1.14.38.0-py3-none-any.whl", hash = "sha256:52e848f79bc7d3334d674f8dbc1116043730506ab0c6e3c2bbc0efade7bc4ead"},
+]
 netaddr = [
     {file = "netaddr-0.8.0-py2.py3-none-any.whl", hash = "sha256:9666d0232c32d2656e5e5f8d735f58fd6c7457ce52fc21c98d45f2af78f990ac"},
     {file = "netaddr-0.8.0.tar.gz", hash = "sha256:d6cc57c7a07b1d9d2e917aa8b36ae8ce61c35ba3fcd1b83ca31c5a0ee2b5a243"},
@@ -2381,8 +2655,8 @@ toml = [
     {file = "toml-0.10.1.tar.gz", hash = "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f"},
 ]
 tqdm = [
-    {file = "tqdm-4.48.1-py2.py3-none-any.whl", hash = "sha256:44b896c38f70f91826a3f83a3195b23c0460322bfc729566ec8e4e89bb5ad713"},
-    {file = "tqdm-4.48.1.tar.gz", hash = "sha256:7b7dd59cd9f03b89365ba67eb8515f5d2803fd1eb707abdbb914691a3123d9df"},
+    {file = "tqdm-4.48.2-py2.py3-none-any.whl", hash = "sha256:1a336d2b829be50e46b84668691e0a2719f26c97c62846298dd5ae2937e4d5cf"},
+    {file = "tqdm-4.48.2.tar.gz", hash = "sha256:564d632ea2b9cb52979f7956e093e831c28d441c11751682f84c86fc46e4fd21"},
 ]
 transaction = [
     {file = "transaction-2.4.0-py2.py3-none-any.whl", hash = "sha256:b96a5e9aaa73f905759bc9ccf0021bf4864c01ac36666e0d28395e871f6d584a"},
@@ -2392,6 +2666,11 @@ translationstring = [
     {file = "translationstring-1.3-py2.py3-none-any.whl", hash = "sha256:e26c7bf383413234ed442e0980a2ebe192b95e3745288a8fd2805156d27515b4"},
     {file = "translationstring-1.3.tar.gz", hash = "sha256:4ee44cfa58c52ade8910ea0ebc3d2d84bdcad9fa0422405b1801ec9b9a65b72d"},
 ]
+typing-extensions = [
+    {file = "typing_extensions-3.7.4.2-py2-none-any.whl", hash = "sha256:f8d2bd89d25bc39dabe7d23df520442fa1d8969b82544370e03d88b5a591c392"},
+    {file = "typing_extensions-3.7.4.2-py3-none-any.whl", hash = "sha256:6e95524d8a547a91e08f404ae485bbb71962de46967e1b71a0cb89af24e761c5"},
+    {file = "typing_extensions-3.7.4.2.tar.gz", hash = "sha256:79ee589a3caca649a9bfd2a8de4709837400dfa00b6cc81962a1e6a1815969ae"},
+]
 uptime = [
     {file = "uptime-3.0.1.tar.gz", hash = "sha256:7c300254775b807ce46e3dcbcda30aa3b9a204b9c57a7ac1e79ee6dbe3942973"},
 ]
diff --git a/pyproject.toml b/pyproject.toml
index 57c2782848..c8d43e1b30 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -115,6 +115,8 @@ xlwt = "1.2.0"
 "zope.sqlalchemy" = "1.3"
 
 [tool.poetry.dev-dependencies]
+# PyCharm says boto3-stubs contains useful type hints
+boto3-stubs = ">=1.14.37.0"
 coverage = ">=5.2"
 codacy-coverage = ">=1.3.11"
 coveralls = ">=2.1.1"
diff --git a/src/encoded/commands/submission_test.py b/src/encoded/commands/submission_test.py
index 9c02ad7c77..6c1673cbf0 100644
--- a/src/encoded/commands/submission_test.py
+++ b/src/encoded/commands/submission_test.py
@@ -1,8 +1,9 @@
-from pyramid.paster import get_app
-from encoded.submit import *
-from dcicutils.misc_utils import VirtualApp
 import json
 
+from dcicutils.misc_utils import VirtualApp
+from encoded.submit import xls_to_json, validate_all_items, post_and_patch_all_items
+from pyramid.paster import get_app
+
 
 def main():
     app = get_app('development.ini', 'app')
diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index c4127ecd65..fa8ed49cd8 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -1,14 +1,14 @@
-import ast
+# import ast
 import datetime
 import json
 import xlrd
 
 from dcicutils.misc_utils import VirtualApp, VirtualAppError
-from dcicutils import ff_utils
-from pyramid.paster import get_app
-from pyramid.response import Response
-from snovault.util import debug_log
-from pyramid.view import view_config
+# from dcicutils import ff_utils
+# from pyramid.paster import get_app
+# from pyramid.response import Response
+# from snovault.util import debug_log
+# from pyramid.view import view_config
 from webtest.app import AppError
 from .util import s3_local_file, debuglog
 

From c5955dda02a5815f42d7ee550b705f82fd1c1a87 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Sat, 8 Aug 2020 03:01:32 -0400
Subject: [PATCH 059/125] Add a blank line for PEP8. Delete an
 accidentally-checked-in file.

---
 src/encoded/dev_servers.py |   1 +
 src/encoded/submit.py.SAVE | 664 -------------------------------------
 2 files changed, 1 insertion(+), 664 deletions(-)
 delete mode 100644 src/encoded/submit.py.SAVE

diff --git a/src/encoded/dev_servers.py b/src/encoded/dev_servers.py
index 6b71f8643b..e0db85cbad 100644
--- a/src/encoded/dev_servers.py
+++ b/src/encoded/dev_servers.py
@@ -89,6 +89,7 @@ def main():
     run(app_name=args.app_name, config_uri=args.config_uri, datadir=args.datadir,
         clear=args.clear, init=args.init, load=args.load, ingest=not args.no_ingest)
 
+
 def run(app_name, config_uri, datadir, clear=False, init=False, load=False, ingest=True):
 
     logging.basicConfig(format='')
diff --git a/src/encoded/submit.py.SAVE b/src/encoded/submit.py.SAVE
deleted file mode 100644
index 67b2338524..0000000000
--- a/src/encoded/submit.py.SAVE
+++ /dev/null
@@ -1,664 +0,0 @@
-import ast
-import datetime
-import json
-import xlrd
-
-from dcicutils.misc_utils import VirtualApp, VirtualAppError
-from dcicutils import ff_utils
-from pyramid.paster import get_app
-from pyramid.response import Response
-from snovault.util import debug_log
-from pyramid.view import view_config
-from webtest.app import AppError
-from .common import s3_local_file, debuglog
-
-
-GENERIC_FIELD_MAPPING = {
-    'individual': {},
-    'family': {},
-    'sample': {
-        'date collected': 'specimen_collection_date',
-        'location stored': 'specimen_storage_location',
-        'specimen id': 'specimen_accession',
-        'transport method': 'transported_by',
-        'sequencing ref lab': 'sequencing_lab',
-        "date rec'd at ref lab": 'date_received',
-        'specimen accepted by ref lab': 'specimen_accepted',
-        'sample id by ref lab': 'sequence_id',
-        'req type': 'requisition_type',
-        "date req rec'd": 'date_requisition_received',
-        'physician/provider': 'ordering_physician'
-    },
-    'requisition': {
-        'req accepted y/n': 'accepted_rejected',
-        'reason rejected': 'rejection_reason',
-        'corrective action taken': 'corrective_action',
-        'corrective action taken by': 'action_taken_by',
-        'correction notes': 'notes'
-    }
-}
-
-# BGM_FIELD_MAPPING = {
-#     'bcgg-id': 'patient id',
-#     'bcgg-f-id': 'family id',
-#     "date req rec'd": 'date requisition received'
-# }
-
-
-POST_ORDER = [
-    'file_fastq', 'file_processed', 'sample', 'individual',
-    'family', 'sample_processing', 'report', 'case'
-]
-
-
-LINKS = [
-    'samples', 'members', 'mother', 'father', 'proband', 'report',
-    'individual', 'sample_processing', 'families'
-]
-
-
-
-# This "/submit_data" endpoint is a placeholder for a submission endpoint modified from loadxl.
-#
-# NOTES FROM KMP (25-Jul-2020):
-#
-#  This will be done differently soon as part of the "/submit_for_ingestion" endpoint that
-#  will be in ingestion_listener.py. That endpoint will need an "?ingestion type=data_bundle"
-#  as query parameter. That "data_bundle" ingestion type will defined in ingestion_engines.py.
-#  The new entry point here that will be needed is submit_data_bundle, and then this temporary
-#  "/submit_data" endpoint can presumably go away.. -kmp 25-Jul-2020
-
-@view_config(route_name='submit_data', request_method='POST', permission='add')
-@debug_log
-def submit_data(context, request):
-    '''
-    usage notes here later
-    '''
-    config_uri = request.json.get('config_uri', 'production.ini')
-    patch_only = request.json.get('patch_only', False)
-    post_only = request.json.get('post_only', False)
-    app = get_app(config_uri, 'app')
-    environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': 'TEST'}
-    virtualapp = VirtualApp(app, environ)
-    # expected response
-    request.response.status = 200
-    result = {
-        'status': 'success',
-        '@type': ['result'],
-    }
-
-    raise NotImplementedError
-
-# This endpoint will soon be the primary entry point. Please keep it working as-is and do not remove it.
-# -kmp 25-Jul-2020
-def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp):  # All keyword arguments, all required.
-    """
-    Handles processing of a submitted workbook.
-
-    Args:
-        data_stream: an open stream to xls workbook data
-        project: a project identifier
-        institution: an institution identifier
-        vapp: a VirtualApp object
-        log: a logging object capable of .info, .warning, .error, or .debug messages
-    """
-    with s3_local_file(s3_client, bucket=bucket, key=key) as file:
-        project_json = vapp.get(project).json
-        institution_json = vapp.get(institution).json
-        json_data = xls_to_json(file, project=project_json, institution=institution_json)
-        final_json, validation_log_lines = validate_all_items(vapp, json_data)
-        result_lines = post_and_patch_all_items(vapp, final_json)
-        return validation_log_lines, final_json, result_lines
-
-
-def map_fields(row, metadata_dict, addl_fields, item_type):
-    for map_field in GENERIC_FIELD_MAPPING[item_type]:
-        if map_field in row:
-            metadata_dict[GENERIC_FIELD_MAPPING[item_type][map_field]] = row.get(map_field)
-    for field in addl_fields:
-        metadata_dict[field] = row.get(field.replace('_', ' '))
-    return metadata_dict
-
-
-def xls_to_json(xls_data, project, institution):
-    '''
-    Converts excel file to json for submission.
-    Functional but expect future changes.
-    '''
-    book = xlrd.open_workbook(xls_data)
-    sheet, = book.sheets()
-    row = row_generator(sheet)
-    top_header = next(row)
-    debuglog("top_header:", top_header)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
-    keys = next(row)
-    debuglog("keys:", keys)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
-    descriptions = next(row)
-    debuglog("descriptions:", descriptions)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
-    rows = []
-    counter = 0
-    for values in row:
-        r = [val for val in values]
-        row_dict = {keys[i].lower().rstrip('*'): item for i, item in enumerate(r)}
-        rows.append(row_dict)
-
-    items = {
-        'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {},
-        'file_fastq': {}, 'file_processed': {}, 'case': {}, 'report': {},
-        'reports': []
-    }
-    file_errors = []
-    specimen_ids = {}
-    family_dict = create_families(rows)
-    a_types = get_analysis_types(rows)
-    for row in rows:
-        debuglog("row:", repr(row))  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
-        indiv_alias = '{}:individual-{}'.format(project['name'], row['individual id'])
-        fam_alias = '{}:{}'.format(project['name'], family_dict[row['analysis id']])
-        # sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
-        # create items for Individual
-        items = fetch_individual_metadata(row, items, indiv_alias, institution['name'])
-        # create/edit items for Family
-        items = fetch_family_metadata(row, items, indiv_alias, fam_alias)
-        # create item for Sample if there is a specimen
-        if row.get('specimen id'):
-            samp_alias = '{}:sample-{}'.format(project['name'], row['specimen id'])
-            if row['specimen id'] in specimen_ids:
-                samp_alias = samp_alias + '-' + specimen_ids[row['specimen id']]
-                specimen_ids[row['specimen id']] += 1
-            else:
-                specimen_ids[row['specimen id']] = 1
-            analysis_alias = '{}:analysis-{}'.format(project['name'], row['analysis id'])
-            items = fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias,
-                                          fam_alias, project['name'], a_types)
-            if row.get('files'):
-                file_items = fetch_file_metadata(row['files'].split(','), project['name'])
-                file_errors.extend(file_items['errors'])
-                items['file_fastq'].update(file_items['file_fastq'])
-                items['file_processed'].update(file_items['file_processed'])
-        else:
-            print('WARNING: No specimen id present for patient {},'
-                  ' sample will not be created.'.format(row['individual id']))
-    # create SampleProcessing item for trio/group if needed
-    # items = create_sample_processing_groups(items, sp_alias)
-    items = add_relations(items)
-    items = create_case_items(items, project['name'])
-    # removed unused fields, add project and institution
-    for val1 in items.values():
-        for val2 in val1.values():
-            remove_keys = [k for k, v in val2.items() if not v]
-            for key in remove_keys:
-                del val2[key]
-            val2['project'] = project['@id']
-            val2['institution'] = institution['@id']
-    items['file_errors'] = file_errors
-    return items
-
-
-def create_families(rows):
-    proband_rows = [row for row in rows if row.get('relation to proband').lower() == 'proband']
-    fams = {row.get('analysis id'): 'family-{}'.format(row.get('individual id')) for row in proband_rows}
-    return fams
-
-
-def get_analysis_types(rows):
-    analysis_relations = {}
-    analysis_types = {}
-    for row in rows:
-        analysis_relations.setdefault(row.get('analysis id'), [[], []])
-        analysis_relations[row.get('analysis id')][0].append(row.get('relation to proband', '').lower())
-        analysis_relations[row.get('analysis id')][1].append(row.get('workup type', '').upper())
-    for k, v in analysis_relations.items():
-        if len(list(set(v[1]))) == 1:
-            if len(v[0]) == 1:
-                analysis_types[k] = v[1][0]
-            elif sorted(v[0]) == ['father', 'mother', 'proband']:
-                analysis_types[k] = v[1][0] + '-Trio'
-            else:
-                analysis_types[k] = v[1][0] + '-Group'
-        else:
-            analysis_types[k] = None
-    return analysis_types
-
-
-def fetch_individual_metadata(row, items, indiv_alias, inst_name):
-    new_items = items.copy()
-    info = {'aliases': [indiv_alias]}
-    info = map_fields(row, info, ['individual_id', 'sex', 'age', 'birth_year'], 'individual')
-    if row.get('other individual id'):
-        other_id = {'id': row['other individual id'], 'id_source': inst_name}
-        if row.get('other individual id type'):
-            other_id['id_source'] = row['other individual id source']
-        info['institutional_id'] = other_id
-    info['age'] = int(info['age']) if info.get('age') else None
-    info['birth_year'] = int(info['birth year']) if info.get('birth year') else None
-    if indiv_alias not in new_items['individual']:
-        new_items['individual'][indiv_alias] = {k: v for k, v in info.items() if v}
-    else:
-        for key in info:
-            if key not in new_items['individual'][indiv_alias]:
-                new_items['individual'][indiv_alias][key] = info[key]
-    return new_items
-
-
-def fetch_family_metadata(row, items, indiv_alias, fam_alias):
-    new_items = items.copy()
-    info = {
-        'aliases': [fam_alias],
-        'family_id': row['family id'],
-        'members': [indiv_alias]
-    }
-    if fam_alias not in new_items['family']:
-        new_items['family'][fam_alias] = info
-    if indiv_alias not in new_items['family'][fam_alias]['members']:
-        new_items['family'][fam_alias]['members'].append(indiv_alias)
-    for relation in ['proband', 'mother', 'father', 'brother', 'sister', 'sibling']:
-        if row.get('relation to proband', '').lower() == relation and relation not in new_items['family'][fam_alias]:
-            new_items['family'][fam_alias][relation] = indiv_alias
-    return new_items
-
-
-def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, fam_alias, proj_name, analysis_type_dict):
-    new_items = items.copy()
-    info = {'aliases': [samp_alias], 'files': []}  # TODO: implement creation of file db items
-    fields = [
-        'workup_type', 'specimen_type', 'dna_concentration', 'date_transported',
-        'specimen_notes', 'research_protocol_name', 'sent_by', 'physician_id', 'indication'
-    ]
-    info = map_fields(row, info, fields, 'sample')
-    if info.get('specimen_accepted', '').lower() == 'y':
-        info['specimen_accepted'] = 'Yes'
-    elif info.get('specimen_accepted', '').lower() == 'n':
-        info['specimen_accepted'] = 'No'
-    if row.get('second specimen id'):
-        other_id = {'id': row['second specimen id'], 'id_type': proj_name}  # add proj info?
-        if row.get('second specimen id type'):
-            other_id['id_type'] = row['second specimen id type']
-        info['other_specimen_ids'] = [other_id]
-    req_info = map_fields(row, {}, ['date sent', 'date completed'], 'requisition')
-    if req_info.get('accepted_rejected', '').lower() in ['yes', 'no', 'y', 'n']:
-        if req_info['accepted_rejected'].lower().startswith('y'):
-            req_info['accepted_rejected'] = 'Accepted'
-        else:
-            req_info['accepted_rejected'] = "Rejected"
-    info['requisition_acceptance'] = {k: v for k, v in req_info.items() if v}
-    new_items['sample'][samp_alias] = {k: v for k, v in info.items() if v}
-    if indiv_alias in new_items['individual']:
-        new_items['individual'][indiv_alias]['samples'] = [samp_alias]
-    new_sp_item = {
-        # not trivial to add analysis_type here, turn into calculated property
-        'aliases': [analysis_alias],
-        'samples': [],
-        'families': []
-    }
-    if row.get('analysis id') in analysis_type_dict:
-        new_sp_item['analysis_type'] = analysis_type_dict[row.get('analysis id')]
-    new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)
-    new_items['sample_processing'][analysis_alias]['samples'].append(samp_alias)
-    if row.get('report required').lower().startswith('y'):
-        new_items['reports'].append(samp_alias)
-    if fam_alias not in new_items['sample_processing'][analysis_alias]['families']:
-        new_items['sample_processing'][analysis_alias]['families'].append(fam_alias)
-    return new_items
-
-
-# TODO: finish implementing this function
-def fetch_file_metadata(filenames, proj_name):
-    valid_extensions = {
-        '.fastq.gz': ('fastq', 'reads'),
-        '.fq.gz': ('fastq', 'reads'),
-        '.cram': ('cram', 'alignments'),
-        '.vcf.gz': ('vcf_gz', 'raw VCF')
-    }
-    files = {'file_fastq': {}, 'file_processed': {}, 'errors': []}
-    for filename in filenames:
-        extension = [ext for ext in valid_extensions if filename.endswith(ext)]
-        if not extension:
-            if [ext for ext in ['.fastq', '.fq', '.vcf'] if filename.endswith(ext)]:
-                files['errors'].append('File must be compressed - please gzip file {}'.format(filename))
-            else:
-                files['errors'].append('File extension on {} not supported - expecting one of: '
-                              '.fastq.gz, .fq.gz, .cram, .vcf.gz'.format(filename))
-            continue
-        file_alias = '{}:{}'.format(proj_name, filename.lstrip(' '))
-        fmt = valid_extensions[extension[0]][0]
-        file_info = {
-            'aliases': [file_alias],
-            'file_format': '/file-formats/{}/'.format(fmt),
-            'file_type': valid_extensions[extension[0]][1],
-            'filename': filename  # causes problems without functional file upload
-        }
-        if fmt == 'fastq':
-            files['file_fastq'][file_alias] = file_info
-        else:
-            files['file_processed'][file_alias] = file_info
-    return files
-
-
-def create_case_items(items, proj_name):
-    new_items = items.copy()
-    for k, v in items['sample_processing'].items():
-        analysis_id = k[k.index('analysis-')+9:]
-        for sample in v['samples']:
-            case_id = '{}-{}'.format(analysis_id, items['sample'][sample]['specimen_accession'])
-            if len(v['samples']) == 1:
-                case_id += '-single'
-            elif len(v['samples']) > 1:
-                case_id += '-group'
-            case_alias = '{}:case-{}'.format(proj_name, case_id)
-            indiv = [ikey for ikey, ival in items['individual'].items() if sample in ival.get('samples', [])][0]
-            case_info = {
-                'aliases': [case_alias],
-                # 'case_id': case_id,
-                'sample_processing': k,
-                'individual': indiv
-            }
-            if sample in items['reports']:
-                report_alias = case_alias.replace('case', 'report')
-                new_items['report'][report_alias] = {
-                    'aliases': [report_alias],
-                    'description': 'Analysis Report for Individual ID {}'.format(items['individual'][indiv]['individual_id'])
-                }
-                case_info['report'] = report_alias
-            new_items['case'][case_alias] = case_info
-    del new_items['reports']
-    return new_items
-
-
-def add_relations(items):
-    new_items = items.copy()
-    for alias, fam in items['family'].items():
-        parents = False
-        for relation in ['mother', 'father']:
-            if fam.get(relation):
-                if fam.get('proband'):
-                    new_items['individual'][fam['proband']][relation] = fam[relation]
-                    parents = True
-                del new_items['family'][alias][relation]
-        for relation in ['brother', 'sister', 'sibling']:
-            if fam.get(relation):
-                if parents:
-                    for parent in ['mother', 'father']:
-                        if new_items['individual'][fam['proband']].get(parent):
-                            new_items['individual'][fam[relation]][parent] = new_items['individual'][fam['proband']][parent]
-                del new_items['family'][alias][relation]
-    return new_items
-
-
-def compare_with_db(virtualapp, alias):
-    try:  # check if already in db
-        result = virtualapp.get('/' + alias + '/?frame=object')
-        if result.status_code == 301:
-            msg = json.loads(result.body).get('message', '')
-            result = virtualapp.get(msg[msg.index('/'):msg.index(';')])
-    except Exception as e:  # if not in db
-        if 'HTTPNotFound' in str(e):
-            return None
-    else:
-        return result.json
-
-
-def validate_item(virtualapp, item, method, itemtype, aliases, atid=None):
-    if method == 'post':
-        try:
-            validation = virtualapp.post_json('/{}/?check_only=true'.format(itemtype), item)
-        except (AppError, VirtualAppError) as e:
-            return parse_exception(e, aliases)
-        else:
-            return
-    elif method == 'patch':
-        try:
-            validation = virtualapp.patch_json(atid + '?check_only=true', item, status=200)
-        except (AppError, VirtualAppError) as e:
-            return parse_exception(e, aliases)
-        else:
-            return
-    else:
-        raise ValueError("Unrecognized method -- must be 'post' or 'patch'")
-
-
-def parse_exception(e, aliases):
-    """ff_utils functions raise an exception when the expected code is not returned.
-    This response is a pre-formatted text, and this function will get the resonse json
-    out of it. [Adapted from Submit4DN]"""
-    try:
-        # try parsing the exception
-        if isinstance(e, VirtualAppError):
-            text = e.raw_exception.args[0]
-        else:
-            text = e.args[0]
-        resp_text = text[text.index('{'):-1]
-        resp_dict = json.loads(resp_text.replace('\\', ''))
-    except Exception:  # pragma: no cover
-        raise e
-    if resp_dict.get('description') == 'Failed validation':
-        keep = []
-        resp_list = [error['description'] for error in resp_dict['errors']]
-        for error in resp_list:
-            # if error is caused by linkTo to item not submitted yet but in aliases list,
-            # remove that error
-            if 'not found' in error and error.split("'")[1] in aliases:
-                continue
-            else:
-                keep.append(error)
-        return keep
-    else:
-        raise e
-
-
-def compare_fields(profile, aliases, json_item, db_item):
-    to_patch = {}
-    for field in json_item:
-        # if not an array, patch field gets overwritten (if different from db)
-        if profile['properties'][field]['type'] != 'array':
-            val = json_item[field]
-            if profile['properties'][field]['type'] == 'string' and val in aliases:
-                val = aliases[val]
-            if val != db_item.get(field):
-                to_patch[field] = val
-        else:
-            # if array, patch field vals get added to what's in db
-            if field != 'aliases' and profile['properties'][field].get('items', {}).get('linkTo'):
-                val = [aliases[v] if v in aliases else v for v in json_item[field]]
-            else:
-                val = [v for v in json_item[field]]
-            # if sorted(val) != sorted(db_item.get(field, [])):
-                # if len(val) == 1 and val not in db_item.get(field, []):
-                #     continue
-            if all(v in db_item.get(field, []) for v in val):
-                continue
-            new_val = [item for item in db_item.get(field, [])]
-            new_val.extend(val)
-            try:
-                to_patch[field] = list(set(new_val))
-            except TypeError:  # above doesn't handle list of dictionaries
-                to_patch[field] = [dict(t) for t in {tuple(d.items()) for d in new_val}]
-    return to_patch
-
-
-def validate_all_items(virtualapp, json_data):
-    '''
-    Still in progress, not necessarily functional yet. NOT YET TESTED.
-
-    Function that:
-    1. looks up each item in json
-    2. if item in db, will validate and patch any different metadata
-    3. if item not in db, will post item
-
-    Current status:
-    Still testing validation/data organization parts - patch/post part hasn't been fully
-    written or tested.
-    '''
-    alias_dict = {}
-    errors = json_data['file_errors']
-    all_aliases = [k for itype in json_data for k in json_data[itype]]
-    json_data_final = {'post': {}, 'patch': {}}
-    validation_results = {}
-    output = []
-    for itemtype in POST_ORDER:  # don't pre-validate case and report
-        if itemtype in json_data:
-            profile = virtualapp.get('/profiles/{}.json'.format(itemtype)).json
-            validation_results[itemtype] = {'validated': 0, 'errors': 0}
-            db_results = {}
-        # TODO: json_data[itemtype] but item_type might not be in json_data according to previous "if" statement.
-        #       Maybe we want "for alias in json_data.get(item_type, {}):" here?
-        #       Alternatively, maybe give "json_data.get(item_type, {})" a variable name so that it can be referred
-        #       to more concisely in the several places below that it's needed.
-        #       -kmp 25-Jul-2020
-        for alias in json_data[itemtype]:
-            # first collect all atids before comparing and validating items
-            db_result = compare_with_db(virtualapp, alias)
-            if db_result:
-                alias_dict[alias] = db_result['@id']
-                # TODO: db_results is only conditionally assigned in the prevous "if".
-                #       Perhaps the db_results = {} above should be moved up outside the "if"?
-                #       Are we supposed to have a new dictionary on each iteration? -kmp 25-Jul-2020
-                db_results[alias] = db_result
-        # TODO: Likewise this should probably loop over json_data.get(itemtype, {}). -kmp 25-Jul-2020
-        for alias in json_data[itemtype]:
-            if 'filename' in json_data[itemtype][alias]:  # until we have functional file upload
-                del json_data[itemtype][alias]['filename']
-            if not db_results.get(alias):
-                error = validate_item(virtualapp, json_data[itemtype][alias], 'post', itemtype, all_aliases)
-                if error:  # modify to check for presence of validation errors
-                    # do something to report validation errors
-                    if itemtype not in ['case', 'report']:
-                        for e in error:
-                            errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
-                        validation_results[itemtype]['errors'] += 1
-                # TODO: If itemtype might not be in json_data (and conditionals above suggest that's so),
-                #       then json_data[item_type][alias] seems suspect. It does work to do
-                #       json_data.get(item_type, {}).get(alias, {}).get('filename') but I would put that
-                #       quantity in a variable rather than compute it twice in a row. -kmp 25-Jul-2020
-                elif json_data[itemtype][alias].get('filename') and \
-                        json_data[itemtype][alias]['filename'] in ''.join(json_data['file_errors']):
-                    validation_results[itemtype]['errors'] += 1
-                else:
-                    json_data_final['post'].setdefault(itemtype, [])
-                    json_data_final['post'][itemtype].append(json_data[itemtype][alias])
-                    validation_results[itemtype]['validated'] += 1
-            else:
-                # patch if item exists in db
-                # alias_dict[alias] = results[alias]['@id']
-                # TODO: profile is only conditionally assigned in an "if" above. -kmp 25-Jul-2020
-                patch_data = compare_fields(profile, alias_dict, json_data[itemtype][alias], db_results[alias])
-                error = validate_item(virtualapp, patch_data, 'patch', itemtype,
-                                      all_aliases, atid=db_results[alias]['@id'])
-                if error:  # do something to report validation errors
-                    if itemtype not in ['case', 'report']:
-                        for e in error:
-                            errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
-                        validation_results[itemtype]['errors'] += 1
-                elif json_data[itemtype][alias].get('filename') and \
-                        json_data[itemtype][alias]['filename'] in ''.join(json_data['file_errors']):
-                    validation_results[itemtype]['errors'] += 1
-                else:  # patch
-                    json_data_final['patch'].setdefault(itemtype, {})
-                    if patch_data:
-                        json_data_final['patch'][itemtype][db_results[alias]['@id']] = patch_data
-                    elif itemtype not in ['case', 'report']:
-                        output.append('{} {} - Item already in database, no changes needed'.format(itemtype, alias))
-                    # do something to record response
-                    validation_results[itemtype]['validated'] += 1
-    output.extend([error for error in errors])
-    for itemtype in validation_results:
-        output.append('{} items: {} validated; {} errors'.format(
-            itemtype, validation_results[itemtype]['validated'], validation_results[itemtype]['errors']
-        ))
-    if errors:
-        output.append('Validation errors found in items. Please fix spreadsheet before submitting.')
-        return ({}, output)
-    else:
-        json_data_final['aliases'] = alias_dict
-        output.append('All items validated.')
-        return (json_data_final, output)
-
-
-def post_and_patch_all_items(virtualapp, json_data_final):
-    output = []
-    if not json_data_final:
-        return output
-    item_names = {'individual': 'individual_id', 'family': 'family_id', 'sample': 'specimen_accession'}
-    final_status = {}
-    if json_data_final.get('post'):
-        for k, v in json_data_final['post'].items():
-            final_status[k] = {'posted': 0, 'not posted': 0, 'patched': 0, 'not patched': 0}
-            for item in v:
-                patch_info = {}
-                # if 'filename' in item:  # until we have functional file upload
-                #     del item['filename']
-                for field in LINKS:
-                    if field in item:
-                        patch_info[field] = item[field]
-                        del item[field]
-                try:
-                    response = virtualapp.post_json('/' + k, item, status=201)
-                    if response.json['status'] == 'success':
-                        final_status[k]['posted'] += 1
-                        atid = response.json['@graph'][0]['@id']
-                        json_data_final['aliases'][item['aliases'][0]] = atid
-                        json_data_final['patch'].setdefault(k, {})
-                        json_data_final['patch'][k][atid] = patch_info
-                        if k in item_names:
-                            output.append('Success - {} {} posted'.format(k, item[item_names[k]]))
-                    else:
-                        final_status[k]['not posted'] += 1
-                except Exception as e:
-                    final_status[k]['not posted'] += 1
-                    output.append(str(e))
-        for itype in final_status:
-            if final_status[itype]['posted'] > 0 or final_status[itype]['not posted'] > 0:
-                output.append('{}: {} items posted successfully; {} items not posted'.format(
-                    itype, final_status[itype]['posted'], final_status[itype]['not posted']
-                ))
-    for k, v in json_data_final['patch'].items():
-        final_status.setdefault(k, {'patched': 0, 'not patched': 0})
-        for item_id, patch_data in v.items():
-            # if 'filename' in patch_data:  # until we have functional file upload
-            #     del patch_data['filename']
-            try:
-                response = virtualapp.patch_json('/' + item_id, patch_data, status=200)
-                if response.json['status'] == 'success':
-                    # if k in item_names:
-                    #     output.append('Success - {} {} patched'.format(k, patch_data[item_names[k]]))
-                    final_status[k]['patched'] += 1
-                else:
-                    final_status[k]['not patched'] += 1
-            except Exception as e:
-                final_status[k]['not patched'] += 1
-                output.append(str(e))
-        if final_status[k]['patched'] > 0 or final_status[k]['not patched'] > 0:
-            output.append('{}: {} items patched successfully; {} items not patched'.format(
-                k, final_status[k]['patched'], final_status[k]['not patched']
-            ))
-    return output
-
-
-def cell_value(cell, datemode):
-    """Get cell value from excel. [From Submit4DN]"""
-    # This should be always returning text format
-    ctype = cell.ctype
-    value = cell.value
-    if ctype == xlrd.XL_CELL_ERROR:  # pragma: no cover
-        raise ValueError(repr(cell), 'cell error')
-    elif ctype == xlrd.XL_CELL_BOOLEAN:
-        return str(value).upper().strip()
-    elif ctype == xlrd.XL_CELL_NUMBER:
-        if value.is_integer():
-            value = int(value)
-        return str(value).strip()
-    elif ctype == xlrd.XL_CELL_DATE:
-        value = xlrd.xldate_as_tuple(value, datemode)
-        if value[3:] == (0, 0, 0):
-            return datetime.date(*value[:3]).isoformat()
-        else:  # pragma: no cover
-            return datetime.datetime(*value).isoformat()
-    elif ctype in (xlrd.XL_CELL_TEXT, xlrd.XL_CELL_EMPTY, xlrd.XL_CELL_BLANK):
-        return value.strip()
-    raise ValueError(repr(cell), 'unknown cell type')  # pragma: no cover
-
-
-def row_generator(sheet):
-    '''Generator that gets rows from excel sheet [From Submit4DN]'''
-    datemode = sheet.book.datemode
-    for index in range(sheet.nrows):
-        yield [cell_value(cell, datemode) for cell in sheet.row(index)]

From 36b968d98bfbb56f6f7d8d936b210b659c2ec46d Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Sat, 8 Aug 2020 04:38:39 -0400
Subject: [PATCH 060/125] Version 3 schema version was a paste-o. Let's start
 the count from version 1.

---
 src/encoded/schemas/ingestion_submission.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/encoded/schemas/ingestion_submission.json b/src/encoded/schemas/ingestion_submission.json
index ddddbca7c0..cfaf542fbe 100644
--- a/src/encoded/schemas/ingestion_submission.json
+++ b/src/encoded/schemas/ingestion_submission.json
@@ -27,7 +27,7 @@
     ],
     "properties": {
         "schema_version": {
-            "default": "3"
+            "default": "1"
         },
         "object_bucket": {
             "title": "Object Bucket",
@@ -91,6 +91,7 @@
                 },
                 "progress": {
                     "title": "Progress",
+                    "description": "An adjectival word or phrase assessing progress, such as 'started', 'awaiting prerequisites', '88% done', or 'unavailable'.",
                     "type": "string",
                     "default": "unavailable"
                 }

From 3a46b4587dc3608a8b95389d97dd2d1470159a9b Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Sun, 9 Aug 2020 02:36:42 -0400
Subject: [PATCH 061/125] Enable debuglog by default in 'make deployX'.

---
 Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index 98d67a954c..dbc8273198 100644
--- a/Makefile
+++ b/Makefile
@@ -60,16 +60,16 @@ download-genes: # grabs latest gene list from the below link, unzips and drops i
 	mv gene_inserts_v0.4.5.json src/encoded/annotations/gene_inserts_v0.4.5.json
 
 deploy1:  # starts postgres/ES locally and loads inserts, and also starts ingestion engine
-	@SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load
+	@DEBUGLOG_ENABLED=TRUE SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load
 
 deploy1a:  # starts postgres/ES locally and loads inserts, but does not start the ingestion engine
-	@SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load --no_ingest
+	@DEBUGLOG_ENABLED=TRUE SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load --no_ingest
 
 deploy1b:  # starts ingestion engine separately so it can be easily stopped and restarted for debugging in foreground
-	@echo "Starting ingestion listener. Press ^C to exit." && SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` poetry run ingestion-listener development.ini --app-name app
+	@echo "Starting ingestion listener. Press ^C to exit." && DEBUGLOG_ENABLED=TRUE SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` poetry run ingestion-listener development.ini --app-name app
 
 deploy2:  # spins up waittress to serve the application
-	@SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` pserve development.ini
+	@DEBUGLOG_ENABLED=TRUE SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` pserve development.ini
 
 deploy3:  # uploads: GeneAnnotationFields, then Genes, then AnnotationFields, then Variant + VariantSamples
 	python src/encoded/commands/ingestion.py src/encoded/annotations/variant_table_v0.4.7.csv src/encoded/schemas/annotation_field.json src/encoded/schemas/variant.json src/encoded/schemas/variant_sample.json src/encoded/annotations/GAPFIRHN9YOZ.vcf hms-dbmi hms-dbmi src/encoded/annotations/gene_table_v0.4.5.csv src/encoded/schemas/gene_annotation_field.json src/encoded/schemas/gene.json src/encoded/annotations/gene_inserts_v0.4.5.json hms-dbmi hms-dbmi development.ini --post-variant-consequences --post-variants --post-gene-annotation-field-inserts --post-gene-inserts --app-name app

From 738054302e57536578cc4dc6bd1d79b96c4718ae Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Sun, 9 Aug 2020 03:03:08 -0400
Subject: [PATCH 062/125] Add debuglog to src/encoded/submit.py

---
 src/encoded/submit.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index fa8ed49cd8..b4c847eb6e 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -90,11 +90,14 @@ def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp,  #
             return results
         results['success'] = validate_success
         if validate_only:
+            debuglog("submit_data_bundle (validate_only=True) returns", json.dumps(results, indent=2))
             return results
         result_lines, post_success, upload_info = post_and_patch_all_items(vapp, json_data_final=processing_result)
+        debuglog("in submit_data_bundle, upload_info=", json.dumps(upload_info, indent=2))
         results['post_output'] = result_lines
         results['success'] = post_success
         results['upload_info'] = upload_info
+        debuglog("submit_data_bundle returns", json.dumps(results, indent=2))
         return results
 
 
@@ -737,6 +740,7 @@ def post_and_patch_all_items(virtualapp, json_data_final):
             output.append('{}: {} items patched successfully; {} items not patched'.format(
                 k, final_status[k]['patched'], final_status[k]['not patched']
             ))
+    debuglog("post_and_patch_all_items returns files:", files)
     return output, no_errors, files
 
 
From c16a377637ff4ddf0573b12f8f768fa023890eff Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Sun, 9 Aug 2020 03:04:17 -0400
Subject: [PATCH 063/125] Add debuglog to src/encoded/ingestion/processors.py

---
 src/encoded/ingestion/processors.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/encoded/ingestion/processors.py b/src/encoded/ingestion/processors.py
index 21e8ffcd64..7dcc74a7cd 100644
--- a/src/encoded/ingestion/processors.py
+++ b/src/encoded/ingestion/processors.py
@@ -91,6 +91,8 @@ def handle_data_bundle(submission):
                                                 vapp=submission.vapp,
                                                 validate_only=validate_only)
 
+        debuglog(submission_id, "data_bundle_result:", json.dumps(data_bundle_result, indent=2))
+
         resolution["validation_report_key"] = validation_report_key = "%s/validation-report.txt" % submission_id
         resolution["submission_key"] = submission_key = "%s/submission.json" % submission_id
         resolution["submission_response_key"] = submission_response_key = "%s/submission-response.txt" % submission_id

From e14d03d7ca8be51301926d8f13bb90eed3195cb9 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Sun, 9 Aug 2020 03:05:21 -0400
Subject: [PATCH 064/125] Keep submission_test.py up-to-date with recent
 changes.

---
 src/encoded/commands/submission_test.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/encoded/commands/submission_test.py b/src/encoded/commands/submission_test.py
index 6c1673cbf0..9421cd2807 100644
--- a/src/encoded/commands/submission_test.py
+++ b/src/encoded/commands/submission_test.py
@@ -11,12 +11,14 @@ def main():
     virtualapp = VirtualApp(app, environ)
     proj = virtualapp.get('/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/').json
     inst = virtualapp.get('/institutions/hms-dbmi/').json
-    json_data, passing = xls_to_json('/src/encoded/tests/data/documents/cgap_submit_test.xlsx', proj, inst)
+    json_data, passing = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', proj, inst)
     final_json, validation_log, passing = validate_all_items(virtualapp, json_data)
-    print('\n'.join(validation_log))
+    print('Validation Log:\n'.join(validation_log))
     print(json.dumps(final_json, indent=4))
-    result, passing = post_and_patch_all_items(virtualapp, final_json)
-    print('\n'.join(result))
+    output, no_errors, files = post_and_patch_all_items(virtualapp, final_json)
+    print('Post Output:\n', '\n'.join(output))
+    print('No Errors:', no_errors)
+    print('Files:', files)
 
 
 if __name__ == '__main__':

From f7fd3ebba46f016711111ef5adf900a0c3693a9e Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Mon, 10 Aug 2020 05:08:25 -0400
Subject: [PATCH 065/125] Misc small cleanups and addition of
 generate_fastq_file utility for testing, written by Sarah.

---
 .../commands/submit_data_bundle.notes.txt     | 36 --------------
 src/encoded/commands/submit_data_bundle.py    | 48 +++++++++++++++++++
 .../listener.py}                              |  4 +-
 src/encoded/tests/test_util.py                |  8 +++-
 src/encoded/util.py                           | 38 +++++++++++++++
 5 files changed, 95 insertions(+), 39 deletions(-)
 delete mode 100644 src/encoded/commands/submit_data_bundle.notes.txt
 rename src/encoded/{ingestion_listener.py => ingestion/listener.py} (99%)

diff --git a/src/encoded/commands/submit_data_bundle.notes.txt b/src/encoded/commands/submit_data_bundle.notes.txt
deleted file mode 100644
index 51798b0a5f..0000000000
--- a/src/encoded/commands/submit_data_bundle.notes.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-from dcicutils import ff_utils
-
-From Submit4DN...
-
-def get_upload_creds(file_id, connection):  # pragma: no cover
-    url = "%s/upload/" % (file_id)
-    req = ff_utils.post_metadata({}, url, key=connection.key)
-    return req['@graph'][0]['upload_credentials']
-
-def upload_file(creds, path):  # pragma: no cover
-
-    ####################
-    # POST file to S3
-    env = os.environ.copy()  # pragma: no cover
-    try:
-        env.update({
-            'AWS_ACCESS_KEY_ID': creds['AccessKeyId'],
-            'AWS_SECRET_ACCESS_KEY': creds['SecretAccessKey'],
-            'AWS_SECURITY_TOKEN': creds['SessionToken'],
-        })
-    except Exception as e:
-        raise("Didn't get back s3 access keys from file/upload endpoint.  Error was %s" % str(e))
-    # ~10s/GB from Stanford - AWS Oregon
-    # ~12-15s/GB from AWS Ireland - AWS Oregon
-    print("Uploading file.")
-    start = time.time()
-    try:
-        subprocess.check_call(['aws', 's3', 'cp', '--only-show-errors', path, creds['upload_url']], env=env)
-    except subprocess.CalledProcessError as e:
-        # The aws command returns a non-zero exit code on error.
-        print("Upload failed with exit code %d" % e.returncode)
-        sys.exit(e.returncode)
-    else:
-        end = time.time()
-        duration = end - start
-        print("Uploaded in %.2f seconds" % duration)
diff --git a/src/encoded/commands/submit_data_bundle.py b/src/encoded/commands/submit_data_bundle.py
index 8357417198..99f2220afb 100644
--- a/src/encoded/commands/submit_data_bundle.py
+++ b/src/encoded/commands/submit_data_bundle.py
@@ -5,6 +5,8 @@
 import os
 import re
 import requests
+import subprocess
+import sys
 import time
 
 
@@ -136,13 +138,59 @@ def show_section(section):
                 print("Nothing to show.")
 
         show_section('validation_output')
+
         if success:
             show_section('post_output')
 
+            show_section('upload_info')
+
+
     except Exception as e:
         print("%s: %s" % (e.__class__.__name__, str(e)))
         exit(1)
 
 
+# This is stuff Submit4DN does that may or may not be useful to us.
+# To be removed if it doesn't get used.  And anyway, all this upload stuff belongs
+# in the SubmitCGAP repo at some point. It's just easier to debug here for now.
+# -kmp 9-Aug-2020
+#
+#
+#     def get_upload_creds(file_id, connection):  # pragma: no cover
+#         url = "%s/upload/" % (file_id)
+#         req = ff_utils.post_metadata({}, url, key=connection.key)
+#         return req['@graph'][0]['upload_credentials']
+#
+#
+# def upload_file(creds, path):  # pragma: no cover
+#     # Source: Submit4DN
+#
+#     ####################
+#     # POST file to S3
+#     env = os.environ.copy()  # pragma: no cover
+#     try:
+#         env.update({
+#             'AWS_ACCESS_KEY_ID': creds['AccessKeyId'],
+#             'AWS_SECRET_ACCESS_KEY': creds['SecretAccessKey'],
+#             'AWS_SECURITY_TOKEN': creds['SessionToken'],
+#         })
+#     except Exception as e:
+#         raise("Didn't get back s3 access keys from file/upload endpoint.  Error was %s" % str(e))
+#     # ~10s/GB from Stanford - AWS Oregon
+#     # ~12-15s/GB from AWS Ireland - AWS Oregon
+#     print("Uploading file.")
+#     start = time.time()
+#     try:
+#         subprocess.check_call(['aws', 's3', 'cp', '--only-show-errors', path, creds['upload_url']], env=env)
+#     except subprocess.CalledProcessError as e:
+#         # The aws command returns a non-zero exit code on error.
+#         print("Upload failed with exit code %d" % e.returncode)
+#         sys.exit(e.returncode)
+#     else:
+#         end = time.time()
+#         duration = end - start
+#         print("Uploaded in %.2f seconds" % duration)
+
+
 if __name__ == '__main__':
     main()
diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion/listener.py
similarity index 99%
rename from src/encoded/ingestion_listener.py
rename to src/encoded/ingestion/listener.py
index 2c794bd6bf..0408240379 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion/listener.py
@@ -36,7 +36,7 @@
 from .ingestion.common import register_path_content_type, DATA_BUNDLE_BUCKET, SubmissionFailure, get_parameter
 from .ingestion.exceptions import UnspecifiedFormParameter
 from .ingestion.processors import get_ingestion_processor
-from .types.ingestion import SubmissionFolio
+from .types.ingestion import SubmissionFolio, ALLOW_SUBMITTER_ADD
 from .util import resolve_file_path, gunzip_content, debuglog, subrequest_item_creation
 
 
@@ -57,7 +57,7 @@ def includeme(config):
 
 
 # This endpoint is intended only for debugging. Use the command line tool.
-@view_config(route_name='prompt_for_ingestion', request_method='GET')
+@view_config(route_name='prompt_for_ingestion', request_method='GET', permission='add')
 @debug_log
 def prompt_for_ingestion(context, request):
     ignored(context, request)
diff --git a/src/encoded/tests/test_util.py b/src/encoded/tests/test_util.py
index 861af02eeb..c68caf4fb0 100644
--- a/src/encoded/tests/test_util.py
+++ b/src/encoded/tests/test_util.py
@@ -6,7 +6,9 @@
 
 from unittest import mock
 from dcicutils.qa_utils import ControlledTime
-from ..util import debuglog, deduplicate_list, gunzip_content, resolve_file_path, ENCODED_ROOT_DIR, check_true
+from ..util import (
+    debuglog, deduplicate_list, gunzip_content, resolve_file_path, ENCODED_ROOT_DIR, check_true, generate_fastq_file,
+)
 from .. import util as util_module
 
 
@@ -205,3 +207,7 @@ def mocked_open(file, mode):
                 "2010-07-04 12:30:00.007812 test 1\n"
                 "2010-07-04 12:30:00.015624 test 2\n"
             )
+
+
+# def test_generate_fastq_file():
+#     ... Need test of generate_fastq_file here ...
diff --git a/src/encoded/util.py b/src/encoded/util.py
index 7a5a15f4af..6c4ad3ae30 100644
--- a/src/encoded/util.py
+++ b/src/encoded/util.py
@@ -3,6 +3,7 @@
 import gzip
 import io
 import os
+import random
 import pyramid.request
 import tempfile
 
@@ -222,3 +223,40 @@ def create_empty_s3_file(s3_client, bucket: str, key: str):
     """
     empty_file = "/dev/null"
     s3_client.upload_file(empty_file, Bucket=bucket, Key=key)
+
+
+def generate_fastq_file(filename, num=10, length=10):
+    """
+    Creates a new fastq file with the given name, containing (pseudo)randomly generated content.
+
+    Example usage:
+
+        fastq_generator('fastq_sample.fastq.gz', 25, 50)
+           creates a new fastq file with 25 sequences, each of length 50.
+
+        fastq_generator('fastq_sample.fastq.gz')
+           creates a new fastq file with default characteristics (10 sequences, each of length 10).
+
+    Args:
+        filename str: the name of a file to create
+        num int: the number of random sequences (default 10)
+        length int: the length of the random sequences (default 10)
+
+    Returns:
+        the filename
+
+    """
+    if not filename.endswith('.fastq.gz'):
+        filename = filename.rstrip('fastq').rstrip('fq').rstrip('.') + '.fastq.gz'
+    content = ''
+    bases = 'ACTG'
+
+    for i in range(num):
+        content += '@SEQUENCE{} length={}\n'.format(i, length)
+        content += ''.join(random.choice(bases) for i in range(length)) + '\n'
+        content += '+\n'
+        content += 'I' * length + '\n'
+    with gzip.open(filename, 'w') as outfile:
+        outfile.write(content.encode('ascii'))
+
+    return filename

From c3b5e9ac06d39777b46ce4ebf9a205594a531ac4 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Mon, 10 Aug 2020 06:10:33 -0400
Subject: [PATCH 066/125] Fix some 'encoded.' imports.

---
 .../commands/clear_variants_and_genes.py      |  3 +-
 src/encoded/commands/gene_ingestion.py        |  4 +-
 src/encoded/commands/gene_table_intake.py     |  2 +-
 src/encoded/commands/ingest_vcf.py            |  2 +-
 src/encoded/commands/ingestion.py             |  4 +-
 src/encoded/commands/submission_test.py       |  2 +-
 src/encoded/commands/variant_ingestion.py     |  5 +-
 src/encoded/commands/variant_table_intake.py  |  2 +-
 src/encoded/ingestion/processors.py           |  6 +-
 .../listener.py => ingestion_listener.py}     | 14 +++--
 src/encoded/renderers.py                      |  2 +-
 src/encoded/search/lucene_builder.py          |  2 +-
 src/encoded/tests/test_gene_table_intake.py   | 12 ++--
 src/encoded/tests/test_ingest_genes.py        |  2 +-
 src/encoded/tests/test_purge_item_type.py     |  2 +-
 src/encoded/tests/test_search.py              |  2 +-
 src/encoded/tests/test_submit.py              | 26 ++++++--
 src/encoded/tests/test_submit_data_bundle.py  |  1 -
 src/encoded/tests/test_types_family.py        | 59 +++++++++++++++----
 .../tests/test_variant_table_intake.py        |  7 ++-
 src/encoded/tests/variant_fixtures.py         |  4 +-
 src/encoded/types/variant.py                  |  2 +-
 22 files changed, 110 insertions(+), 55 deletions(-)
 rename src/encoded/{ingestion/listener.py => ingestion_listener.py} (98%)

diff --git a/src/encoded/commands/clear_variants_and_genes.py b/src/encoded/commands/clear_variants_and_genes.py
index 3ebd5eba80..d832bf1e5b 100644
--- a/src/encoded/commands/clear_variants_and_genes.py
+++ b/src/encoded/commands/clear_variants_and_genes.py
@@ -1,8 +1,9 @@
 import argparse
 import structlog
 import logging
-from encoded.commands.purge_item_type import purge_item_type_from_storage
+
 from pyramid.paster import get_app
+from ..commands.purge_item_type import purge_item_type_from_storage
 
 
 logger = structlog.getLogger(__name__)
diff --git a/src/encoded/commands/gene_ingestion.py b/src/encoded/commands/gene_ingestion.py
index 1580958319..486bda3adb 100644
--- a/src/encoded/commands/gene_ingestion.py
+++ b/src/encoded/commands/gene_ingestion.py
@@ -3,8 +3,8 @@
 import logging
 from pyramid.paster import get_app
 from dcicutils.misc_utils import VirtualApp
-from encoded.commands.gene_table_intake import GeneTableParser, GeneTableIntakeException
-from encoded.commands.ingest_genes import GeneIngestion
+from ..commands.gene_table_intake import GeneTableParser, GeneTableIntakeException
+from ..commands.ingest_genes import GeneIngestion
 from tqdm import tqdm
 
 logger = logging.getLogger(__name__)
diff --git a/src/encoded/commands/gene_table_intake.py b/src/encoded/commands/gene_table_intake.py
index fd7d88ac19..c7aa5c3c0e 100644
--- a/src/encoded/commands/gene_table_intake.py
+++ b/src/encoded/commands/gene_table_intake.py
@@ -2,7 +2,7 @@
 import logging
 from pyramid.paster import get_app
 from dcicutils.misc_utils import VirtualApp
-from encoded.commands.variant_table_intake import MappingTableParser
+from ..commands.variant_table_intake import MappingTableParser
 
 
 logger = logging.getLogger(__name__)
diff --git a/src/encoded/commands/ingest_vcf.py b/src/encoded/commands/ingest_vcf.py
index f50d7cca0f..8881df6f5b 100644
--- a/src/encoded/commands/ingest_vcf.py
+++ b/src/encoded/commands/ingest_vcf.py
@@ -7,7 +7,7 @@
 from pyramid.paster import get_app
 from dcicutils.misc_utils import VirtualApp
 from collections import OrderedDict
-from encoded.util import resolve_file_path
+from ..util import resolve_file_path
 
 logger = logging.getLogger(__name__)
 EPILOG = __doc__
diff --git a/src/encoded/commands/ingestion.py b/src/encoded/commands/ingestion.py
index 550f3ae19a..36918d5c4d 100644
--- a/src/encoded/commands/ingestion.py
+++ b/src/encoded/commands/ingestion.py
@@ -1,8 +1,8 @@
 import os
 import argparse
 import logging
-from encoded.commands.variant_ingestion import run_variant_table_intake, run_ingest_vcf
-from encoded.commands.gene_ingestion import run_gene_table_intake, run_ingest_genes
+from ..commands.variant_ingestion import run_variant_table_intake, run_ingest_vcf
+from ..commands.gene_ingestion import run_gene_table_intake, run_ingest_genes
 from dcicutils.misc_utils import VirtualApp
 from dcicutils.env_utils import CGAP_ENV_DEV, CGAP_ENV_WOLF
 from pyramid.paster import get_app
diff --git a/src/encoded/commands/submission_test.py b/src/encoded/commands/submission_test.py
index 9421cd2807..3508a522b0 100644
--- a/src/encoded/commands/submission_test.py
+++ b/src/encoded/commands/submission_test.py
@@ -1,7 +1,7 @@
 import json
 
 from dcicutils.misc_utils import VirtualApp
-from encoded.submit import xls_to_json, validate_all_items, post_and_patch_all_items
+from ..submit import xls_to_json, validate_all_items, post_and_patch_all_items
 from pyramid.paster import get_app
 
 
diff --git a/src/encoded/commands/variant_ingestion.py b/src/encoded/commands/variant_ingestion.py
index 1f717d1069..0a4b959aee 100644
--- a/src/encoded/commands/variant_ingestion.py
+++ b/src/encoded/commands/variant_ingestion.py
@@ -1,10 +1,11 @@
 import argparse
 import logging
-from encoded.commands.variant_table_intake import MappingTableParser
-from encoded.commands.ingest_vcf import VCFParser
+
 from dcicutils.misc_utils import VirtualApp
 from pyramid.paster import get_app
 from tqdm import tqdm
+from ..commands.variant_table_intake import MappingTableParser
+from ..commands.ingest_vcf import VCFParser
 
 
 logger = logging.getLogger(__name__)
diff --git a/src/encoded/commands/variant_table_intake.py b/src/encoded/commands/variant_table_intake.py
index 5d8eba6796..564f90e670 100644
--- a/src/encoded/commands/variant_table_intake.py
+++ b/src/encoded/commands/variant_table_intake.py
@@ -6,7 +6,7 @@
 from pyramid.paster import get_app
 from dcicutils.misc_utils import VirtualApp
 from collections import OrderedDict, Mapping
-from encoded.util import resolve_file_path
+from ..util import resolve_file_path
 
 logger = logging.getLogger(__name__)
 EPILOG = __doc__
diff --git a/src/encoded/ingestion/processors.py b/src/encoded/ingestion/processors.py
index 7dcc74a7cd..43f5b5935f 100644
--- a/src/encoded/ingestion/processors.py
+++ b/src/encoded/ingestion/processors.py
@@ -2,9 +2,9 @@
 import json
 import traceback
 
-from encoded.ingestion.common import DATA_BUNDLE_BUCKET, get_parameter
-from encoded.util import debuglog, s3_output_stream, create_empty_s3_file
-from encoded.submit import submit_data_bundle
+from ..ingestion.common import DATA_BUNDLE_BUCKET, get_parameter
+from ..util import debuglog, s3_output_stream, create_empty_s3_file
+from ..submit import submit_data_bundle
 from .exceptions import UndefinedIngestionProcessorType
 
 
diff --git a/src/encoded/ingestion/listener.py b/src/encoded/ingestion_listener.py
similarity index 98%
rename from src/encoded/ingestion/listener.py
rename to src/encoded/ingestion_listener.py
index 0408240379..182639aafe 100644
--- a/src/encoded/ingestion/listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -36,7 +36,7 @@
 from .ingestion.common import register_path_content_type, DATA_BUNDLE_BUCKET, SubmissionFailure, get_parameter
 from .ingestion.exceptions import UnspecifiedFormParameter
 from .ingestion.processors import get_ingestion_processor
-from .types.ingestion import SubmissionFolio, ALLOW_SUBMITTER_ADD
+from .types.ingestion import SubmissionFolio
 from .util import resolve_file_path, gunzip_content, debuglog, subrequest_item_creation
 
 
@@ -57,7 +57,7 @@ def includeme(config):
 
 
 # This endpoint is intended only for debugging. Use the command line tool.
-@view_config(route_name='prompt_for_ingestion', request_method='GET', permission='add')
+@view_config(route_name='prompt_for_ingestion', request_method='GET')
 @debug_log
 def prompt_for_ingestion(context, request):
     ignored(context, request)
@@ -124,24 +124,26 @@ def submit_for_ingestion(context, request):
         success = False
         message = "{error_type}: {error_message}".format(error_type=type(e), error_message=str(e))
 
-    result = {
+    # This manifest will be stored in the manifest.json file on on s3 AND will be returned from this endpoint call.
+    manifest_content = {
         "filename": filename,
         "object_name": object_name,
         "submission_id": submission_id,
         "submission_uri": SubmissionFolio.make_submission_uri(submission_id),
         "bucket": DATA_BUNDLE_BUCKET,
+        "authenticated_userid": request.authenticated_userid,
         "success": success,
         "message": message,
         "upload_time": upload_time,
         "parameters": parameters
     }
 
-    pretty_result = json.dumps(result, indent=2)
+    manifest_content_formatted = json.dumps(manifest_content, indent=2)
 
     if success:
 
         try:
-            with io.BytesIO(pretty_result.encode('utf-8')) as fp:
+            with io.BytesIO(manifest_content_formatted.encode('utf-8')) as fp:
                 s3_client.upload_fileobj(fp, Bucket=DATA_BUNDLE_BUCKET, Key=manifest_name)
 
         except botocore.exceptions.ClientError as e:
@@ -160,7 +162,7 @@ def submit_for_ingestion(context, request):
         # If there's a failure, failed will be a list of one problem description since we only submitted one thing.
         raise SubmissionFailure(failed[0])
 
-    return result
+    return manifest_content
 
 
 @view_config(route_name='ingestion_status', request_method='GET', permission='index')
diff --git a/src/encoded/renderers.py b/src/encoded/renderers.py
index be269c7ec2..3d07fbadd8 100644
--- a/src/encoded/renderers.py
+++ b/src/encoded/renderers.py
@@ -22,7 +22,7 @@
 from subprocess_middleware.worker import TransformWorker
 from urllib.parse import urlencode
 from webob.cookies import Cookie
-from encoded.ingestion.common import content_type_allowed
+from .ingestion.common import content_type_allowed
 
 
 log = logging.getLogger(__name__)
diff --git a/src/encoded/search/lucene_builder.py b/src/encoded/search/lucene_builder.py
index f62aadfe49..d543ad161c 100644
--- a/src/encoded/search/lucene_builder.py
+++ b/src/encoded/search/lucene_builder.py
@@ -7,7 +7,7 @@
 from urllib.parse import urlencode
 from snovault import TYPES
 from snovault.elasticsearch.create_mapping import determine_if_is_date_field
-from encoded.search.search_utils import (
+from .search_utils import (
     find_nested_path, convert_search_to_dictionary,
     QueryConstructionException,
     COMMON_EXCLUDED_URI_PARAMS, QUERY, FILTER, MUST, MUST_NOT, BOOL, MATCH, SHOULD,
diff --git a/src/encoded/tests/test_gene_table_intake.py b/src/encoded/tests/test_gene_table_intake.py
index 1a8f09ce91..b50311deda 100644
--- a/src/encoded/tests/test_gene_table_intake.py
+++ b/src/encoded/tests/test_gene_table_intake.py
@@ -1,10 +1,10 @@
 import pytest
-from encoded.tests.test_variant_table_intake import (
-    ANNOTATION_FIELD_SCHEMA,
-)
-from encoded.util import resolve_file_path
-from encoded.tests.variant_fixtures import GENE_ANNOTATION_FIELD_URL
-from encoded.commands.gene_table_intake import GeneTableParser
+
+from ..util import resolve_file_path
+from ..commands.gene_table_intake import GeneTableParser
+from .test_variant_table_intake import ANNOTATION_FIELD_SCHEMA
+from .variant_fixtures import GENE_ANNOTATION_FIELD_URL
+
 
 pytestmark = [pytest.mark.working, pytest.mark.ingestion]
 MT_LOC = resolve_file_path('annotations/gene_table_v0.4.5.csv')
diff --git a/src/encoded/tests/test_ingest_genes.py b/src/encoded/tests/test_ingest_genes.py
index 9d6f1ce51c..2dcb976aff 100644
--- a/src/encoded/tests/test_ingest_genes.py
+++ b/src/encoded/tests/test_ingest_genes.py
@@ -1,5 +1,5 @@
 import pytest
-from encoded.tests.variant_fixtures import test_genes, GENE_URL  # noqa (fixture)
+from .variant_fixtures import test_genes, GENE_URL  # noqa (fixture)
 
 
 pytestmark = [pytest.mark.working, pytest.mark.ingestion]
diff --git a/src/encoded/tests/test_purge_item_type.py b/src/encoded/tests/test_purge_item_type.py
index 9f6b16343c..de3f7581b8 100644
--- a/src/encoded/tests/test_purge_item_type.py
+++ b/src/encoded/tests/test_purge_item_type.py
@@ -1,7 +1,7 @@
 import pytest
 import time
 from .workbook_fixtures import app, workbook
-from encoded.commands.purge_item_type import purge_item_type_from_storage
+from ..commands.purge_item_type import purge_item_type_from_storage
 
 
 pytestmark = [pytest.mark.broken]
diff --git a/src/encoded/tests/test_search.py b/src/encoded/tests/test_search.py
index 4fff460741..61150c5af8 100644
--- a/src/encoded/tests/test_search.py
+++ b/src/encoded/tests/test_search.py
@@ -7,7 +7,7 @@
 from pyramid.httpexceptions import HTTPBadRequest
 from snovault import TYPES, COLLECTIONS
 from snovault.elasticsearch import create_mapping
-from encoded.search.lucene_builder import LuceneBuilder
+from ..search.lucene_builder import LuceneBuilder
 from snovault.elasticsearch.indexer_utils import get_namespaced_index
 from snovault.util import add_default_embeds
 from webtest import AppError
diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
index 8f275e15fc..71beb44fca 100644
--- a/src/encoded/tests/test_submit.py
+++ b/src/encoded/tests/test_submit.py
@@ -1,9 +1,23 @@
 import pytest
-from encoded.submit import *
-from unittest import mock
-from copy import deepcopy
 import xlrd
-import json
+
+from copy import deepcopy
+from unittest import mock
+from ..submit import (
+    compare_fields,
+    create_families,
+    fetch_family_metadata,
+    fetch_file_metadata,
+    fetch_individual_metadata,
+    fetch_sample_metadata,
+    get_analysis_types,
+    map_fields,
+    parse_exception,
+    row_generator,
+    validate_all_items,
+    validate_item,
+    xls_to_json,
+)
 
 
 @pytest.fixture
@@ -323,13 +337,15 @@ def test_xls_to_json_mixed_workup(project, institution, xls_list):
         assert ('Row 5 - Samples with analysis ID 55432 contain mis-matched '
                 'or invalid workup type values.') in ''.join(json_out['errors'])
         row_gen.return_value = iter(one_row)
-        one_json_out, one_success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+        one_json_out, one_success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx',
+                                                project, institution)
         assert not one_json_out['errors']
 
 
 def test_parse_exception_invalid_alias(testapp, a_case):
     a_case['invalid_field'] = 'value'
     a_case['project'] = '/projects/invalid-project/'
+    errors = []
     try:
         testapp.post_json('/case', a_case)
     except Exception as e:
diff --git a/src/encoded/tests/test_submit_data_bundle.py b/src/encoded/tests/test_submit_data_bundle.py
index 390644c773..1fc683b00a 100644
--- a/src/encoded/tests/test_submit_data_bundle.py
+++ b/src/encoded/tests/test_submit_data_bundle.py
@@ -1,4 +1,3 @@
 # To be written
 
-# from encoded.commands.submit_data_bundle import ...
 
diff --git a/src/encoded/tests/test_types_family.py b/src/encoded/tests/test_types_family.py
index d6d1f7f7f0..9c39169e60 100644
--- a/src/encoded/tests/test_types_family.py
+++ b/src/encoded/tests/test_types_family.py
@@ -1,10 +1,19 @@
 import pytest
 
-from encoded.types.family import *
-
 from datetime import datetime
 # from unittest import mock
 from xml.etree.ElementTree import fromstring
+from ..types.family import (
+    Family,
+    add_to_clinic_notes,
+    affected_xml_to_phenotypic_features,
+    annotations_xml_ref_to_clinic_notes,
+    cause_of_death_xml_to_phenotype,
+    create_family_proband,
+    descendancy_xml_ref_to_parents,
+    diagnoses_xml_to_phenotypic_features,
+    etree_to_dict,
+)
 
 
 pytestmark = [pytest.mark.working, pytest.mark.schema]
@@ -186,7 +195,13 @@ def pedigree_ref_data(testapp):
             '2': {'proband': '0', 'relationships': {'@ref': '1'}, 'sex': 'F'},
             '3': {'proband': '1', 'relationships': None, 'descendancy': {'@ref': '1'}, 'sex': 'U', 'affected1': '1'},
             '1': {'active': '1', 'managedObjectID': '1', 'members': [{'@ref': '2'}, {'@ref': '0'}]},
-            'affected1': {'ageAtDx': None, 'causeOfDeath': None, 'id': 'HP:0002315', 'name': 'Headache', 'ontology': 'HPO'}
+            'affected1': {
+                'ageAtDx': None,
+                'causeOfDeath': None,
+                'id': 'HP:0002315',
+                'name': 'Headache',
+                'ontology': 'HPO'
+            }
         },
         'uuids_by_ref': {
             '0': '52e90271-2fac-4140-a0fb-16aa4697d6fe',
@@ -199,15 +214,35 @@ def pedigree_ref_data(testapp):
 @pytest.fixture
 def death_info(testapp):
     return [
-        {'causeOfDeathOntologyId': 'HP:0100651', 'causeOfDeathOntology': 'HPO', 'explicitlySetBiologicalMother': None,
-         'diagnoses': [
-             {'ageAtDx': '25', 'ageAtDxUnits': 'Y', 'causeOfDeath': None, 'modeOfInheritance': None, 'ontology': 'HPO', 'carrier': None,
-              'name': 'Type I diabetes mellitus', 'id': 'HP:0100651', 'order': '1', 'ontologyVersion': '2014-02-15 BUILD #889'}
-         ],
-         'sex': 'M', 'proband': '0', 'causeOfDeath': 'Type I diabetes mellitus', 'relationships': {'@ref': '11'},
-         'note': 'Date of death is 20s-30s', 'managedObjectID': '13', 'annotations': {'@ref': '14'}, 'ageAtDeathUnits': 'Y',
-         'ageAtDeath': '55', 'deceased': '1'
-         }
+        {
+            'causeOfDeathOntologyId': 'HP:0100651',
+            'causeOfDeathOntology': 'HPO',
+            'explicitlySetBiologicalMother': None,
+            'diagnoses': [
+                {
+                    'ageAtDx': '25',
+                    'ageAtDxUnits': 'Y',
+                    'causeOfDeath': None,
+                    'modeOfInheritance': None,
+                    'ontology': 'HPO',
+                    'carrier': None,
+                    'name': 'Type I diabetes mellitus',
+                    'id': 'HP:0100651',
+                    'order': '1',
+                    'ontologyVersion': '2014-02-15 BUILD #889'
+                }
+            ],
+            'sex': 'M',
+            'proband': '0',
+            'causeOfDeath': 'Type I diabetes mellitus',
+            'relationships': {'@ref': '11'},
+            'note': 'Date of death is 20s-30s',
+            'managedObjectID': '13',
+            'annotations': {'@ref': '14'},
+            'ageAtDeathUnits': 'Y',
+            'ageAtDeath': '55',
+            'deceased': '1'
+        }
     ]
 
 
diff --git a/src/encoded/tests/test_variant_table_intake.py b/src/encoded/tests/test_variant_table_intake.py
index 466e6b8361..16514788f4 100644
--- a/src/encoded/tests/test_variant_table_intake.py
+++ b/src/encoded/tests/test_variant_table_intake.py
@@ -1,8 +1,9 @@
 import json
 import pytest
-from encoded.util import resolve_file_path
-from encoded.commands.variant_table_intake import MappingTableParser
-from encoded.tests.variant_fixtures import ANNOTATION_FIELD_URL
+
+from ..util import resolve_file_path
+from ..commands.variant_table_intake import MappingTableParser
+from .variant_fixtures import ANNOTATION_FIELD_URL
 
 
 # XXX: These constants should probably be handled in a more intelligent way -will
diff --git a/src/encoded/tests/variant_fixtures.py b/src/encoded/tests/variant_fixtures.py
index db53b933cc..26263ac15f 100644
--- a/src/encoded/tests/variant_fixtures.py
+++ b/src/encoded/tests/variant_fixtures.py
@@ -1,7 +1,7 @@
 import pytest
 import json
-from encoded.util import resolve_file_path
-from encoded.commands.ingest_genes import GeneIngestion
+from ..util import resolve_file_path
+from ..commands.ingest_genes import GeneIngestion
 
 
 GENES_LOC = resolve_file_path('tests/data/variant_workbook/gene_inserts_partial.json')
diff --git a/src/encoded/types/variant.py b/src/encoded/types/variant.py
index 1cff32e3ce..1fa750b4a4 100644
--- a/src/encoded/types/variant.py
+++ b/src/encoded/types/variant.py
@@ -13,7 +13,7 @@
 )
 from snovault.calculated import calculate_properties
 from snovault.util import debug_log
-from encoded.util import resolve_file_path
+from ..util import resolve_file_path
 from snovault import (
     calculated_property,
     collection,

From 9c4b7546a865bc9bf17765d9624c80fdc2310f8e Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Mon, 10 Aug 2020 07:03:36 -0400
Subject: [PATCH 067/125] Fix most mocks of 'encoded.'

---
 .../tests/test_generate_item_from_owl.py      | 60 +++++++++----------
 src/encoded/tests/test_indexing.py            |  4 +-
 src/encoded/tests/test_ingestion_listener.py  |  3 +-
 src/encoded/tests/test_load_access_key.py     |  3 +-
 src/encoded/tests/test_loadxl.py              | 22 +++----
 src/encoded/tests/test_owltools.py            |  8 +--
 src/encoded/tests/test_parse_hpoa.py          | 22 +++----
 src/encoded/tests/test_search.py              |  3 +-
 src/encoded/tests/test_submit.py              | 11 ++--
 src/encoded/tests/test_types_file.py          |  3 +-
 10 files changed, 71 insertions(+), 68 deletions(-)

diff --git a/src/encoded/tests/test_generate_item_from_owl.py b/src/encoded/tests/test_generate_item_from_owl.py
index 45d3b5400e..188bba23aa 100644
--- a/src/encoded/tests/test_generate_item_from_owl.py
+++ b/src/encoded/tests/test_generate_item_from_owl.py
@@ -156,7 +156,7 @@ def test_get_existing_items(connection, rel_disorders, delobs_disorders):
     # TODO: Currently this test passes but is not really a unit test and also does not
     #       quite mock the correct things so should be refactored
     disorder_ids = [d.get('disorder_id') for d in rel_disorders + delobs_disorders]
-    with mock.patch('encoded.commands.generate_items_from_owl.search_metadata', side_effect=[rel_disorders, delobs_disorders]):
+    with mock.patch.object(gifo, 'search_metadata', side_effect=[rel_disorders, delobs_disorders]):
         dbdiseases = gifo.get_existing_items(connection, 'Disorder')
         assert len(dbdiseases) == len(rel_disorders) + len(delobs_disorders)
         assert all([d in dbdiseases for d in disorder_ids])
@@ -164,7 +164,7 @@ def test_get_existing_items(connection, rel_disorders, delobs_disorders):
 
 def test_get_existing_items_from_db_w_deleted(connection, disorder_gen, delobs_disorder_gen, rel_disorders, delobs_disorders):
     disorder_ids = [d.get('disorder_id') for d in rel_disorders + delobs_disorders]
-    with mock.patch('encoded.commands.generate_items_from_owl.search_metadata', side_effect=[disorder_gen, delobs_disorder_gen]):
+    with mock.patch.object(gifo, 'search_metadata', side_effect=[disorder_gen, delobs_disorder_gen]):
         dbdiseases = list(gifo.get_existing_items_from_db(connection, 'Disorder'))
         assert len(dbdiseases) == len(rel_disorders) + len(delobs_disorders)
         assert all([dis.get('disorder_id') in disorder_ids for dis in dbdiseases])
@@ -172,7 +172,7 @@ def test_get_existing_items_from_db_w_deleted(connection, disorder_gen, delobs_d
 
 def test_get_existing_items_from_db_wo_deleted(connection, disorder_gen, rel_disorders):
     disorder_ids = [d.get('disorder_id') for d in rel_disorders]
-    with mock.patch('encoded.commands.generate_items_from_owl.search_metadata', side_effect=[disorder_gen]):
+    with mock.patch.object(gifo, 'search_metadata', side_effect=[disorder_gen]):
         dbdiseases = list(gifo.get_existing_items_from_db(connection, 'Disorder', include_invisible=False))
         assert len(dbdiseases) == len(rel_disorders)
         assert all([dis.get('disorder_id') in disorder_ids for dis in dbdiseases])
@@ -186,7 +186,7 @@ def test_get_existing_items_from_db_w_duplicates(connection, rel_disorders):
     rel_disorders.append(rel_disorders[0])  # add the duplicate item
     dgen = iter(rel_disorders)
     disorder_ids = [d.get('disorder_id') for d in rel_disorders]
-    with mock.patch('encoded.commands.generate_items_from_owl.search_metadata', side_effect=[dgen]):
+    with mock.patch.object(gifo, 'search_metadata', side_effect=[dgen]):
         dbdiseases = list(gifo.get_existing_items_from_db(connection, 'Disorder', include_invisible=False))
         assert len(dbdiseases) == len(rel_disorders)
         assert all([dis.get('disorder_id') in disorder_ids for dis in dbdiseases])
@@ -338,7 +338,7 @@ def test_is_deprecated_not_deprecated(uberon_owler5):
 
 
 def test_create_term_dict(mkd_class, uberon_owler5):
-    with mock.patch('encoded.commands.generate_items_from_owl.get_term_name_from_rdf',
+    with mock.patch.object(gifo, 'get_term_name_from_rdf',
                     return_value='Multicystic kidney dysplasia'):
         term = gifo.create_term_dict(mkd_class, 'HP:0000003', uberon_owler5, 'Phenotype')
         assert term == {'hpo_id': 'HP:0000003', 'hpo_url': 'http://purl.obolibrary.org/obo/HP_0000003', 'phenotype_name': 'Multicystic kidney dysplasia'}
@@ -372,11 +372,11 @@ def simple_terms():
 def test_add_additional_term_info(simple_terms):
     val_lists = [[], ['val1'], ['val1', 'val2']]
     fields = ['definition', 'synonyms', 'dbxrefs', 'alternative_ids']
-    with mock.patch('encoded.commands.generate_items_from_owl.convert2URIRef', return_value='blah'):
-        with mock.patch('encoded.commands.generate_items_from_owl.get_synonyms', side_effect=val_lists):
-            with mock.patch('encoded.commands.generate_items_from_owl.get_definitions', side_effect=val_lists):
-                with mock.patch('encoded.commands.generate_items_from_owl.get_dbxrefs', side_effect=val_lists):
-                    with mock.patch('encoded.commands.generate_items_from_owl.get_alternative_ids', side_effect=val_lists):
+    with mock.patch.object(gifo, 'convert2URIRef', return_value='blah'):
+        with mock.patch.object(gifo, 'get_synonyms', side_effect=val_lists):
+            with mock.patch.object(gifo, 'get_definitions', side_effect=val_lists):
+                with mock.patch.object(gifo, 'get_dbxrefs', side_effect=val_lists):
+                    with mock.patch.object(gifo, 'get_alternative_ids', side_effect=val_lists):
                         result = gifo.add_additional_term_info(simple_terms, 'data', 'synterms', 'defterms', 'Phenotype')
                         for tid, term in result.items():
                             for f in fields:
@@ -402,7 +402,7 @@ def returned_synonyms():
 
 
 def test_get_syn_def_dbxref_altid(owler, returned_synonyms):
-    with mock.patch('encoded.commands.generate_items_from_owl.getObjectLiteralsOfType',
+    with mock.patch.object(gifo, 'getObjectLiteralsOfType',
                     side_effect=returned_synonyms):
         checks = ['test_val1', 'test_val2']
         class_ = 'test_class'
@@ -636,16 +636,16 @@ def test_check_for_fields_to_keep(raw_item_dict):
 
 
 def test_id_fields2patch_unchanged(raw_item_dict):
-    with mock.patch('encoded.commands.generate_items_from_owl.get_raw_form', return_value=raw_item_dict):
-        with mock.patch('encoded.commands.generate_items_from_owl.compare_terms', return_value=None):
+    with mock.patch.object(gifo, 'get_raw_form', return_value=raw_item_dict):
+        with mock.patch.object(gifo, 'compare_terms', return_value=None):
             assert not gifo.id_fields2patch(raw_item_dict, raw_item_dict, True)
 
 
 def test_id_fields2patch_keep_term(raw_item_dict):
     """ case when remove unchanged (rm_unch) param is False just returns term
     """
-    with mock.patch('encoded.commands.generate_items_from_owl.get_raw_form', return_value=raw_item_dict):
-        with mock.patch('encoded.commands.generate_items_from_owl.compare_terms', return_value=None):
+    with mock.patch.object(gifo, 'get_raw_form', return_value=raw_item_dict):
+        with mock.patch.object(gifo, 'compare_terms', return_value=None):
             assert gifo.id_fields2patch(raw_item_dict, raw_item_dict, False) == raw_item_dict
 
 
@@ -653,8 +653,8 @@ def test_id_fields2patch_find_some_fields(raw_item_dict):
     """ case when remove unchanged (rm_unch) param is False just returns term
     """
     patch = {'uuid': 'uuid1', 'field1': 'val1', 'field2': ['a', 'b']}
-    with mock.patch('encoded.commands.generate_items_from_owl.get_raw_form', return_value=raw_item_dict):
-        with mock.patch('encoded.commands.generate_items_from_owl.compare_terms', return_value=patch):
+    with mock.patch.object(gifo, 'get_raw_form', return_value=raw_item_dict):
+        with mock.patch.object(gifo, 'compare_terms', return_value=patch):
             assert gifo.id_fields2patch(raw_item_dict, raw_item_dict, True) == patch
 
 
@@ -699,8 +699,8 @@ def test_identify_item_updates_no_changes(terms, mock_logger):
     dbterms = terms.copy()
     for i, tid in enumerate(dbterms.keys()):
         dbterms[tid].update({'uuid': 'uuid' + str(i + 1)})
-    with mock.patch('encoded.commands.generate_items_from_owl._get_uuids_for_linked', return_value={}):
-        with mock.patch('encoded.commands.generate_items_from_owl.id_fields2patch', return_value=None):
+    with mock.patch.object(gifo, '_get_uuids_for_linked', return_value={}):
+        with mock.patch.object(gifo, 'id_fields2patch', return_value=None):
             assert not gifo.identify_item_updates(terms, dbterms, 'Phenotype', logger=mock_logger)
 
 
@@ -712,9 +712,9 @@ def test_identify_item_updates_w_new_term(terms, mock_logger):
     terms['hp:11'] = new_term
     side_effect = [None] * 9
     side_effect.append(new_term)
-    with mock.patch('encoded.commands.generate_items_from_owl.uuid4', return_value='uuid11'):
-        with mock.patch('encoded.commands.generate_items_from_owl._get_uuids_for_linked', return_value={}):
-            with mock.patch('encoded.commands.generate_items_from_owl.id_fields2patch', side_effect=side_effect):
+    with mock.patch.object(gifo, 'uuid4', return_value='uuid11'):
+        with mock.patch.object(gifo, '_get_uuids_for_linked', return_value={}):
+            with mock.patch.object(gifo, 'id_fields2patch', side_effect=side_effect):
                 to_update = gifo.identify_item_updates(terms, dbterms, 'Phenotype', logger=mock_logger)
                 new_term.update({'uuid': 'uuid11'})
                 assert to_update[0] == new_term
@@ -732,8 +732,8 @@ def test_identify_item_updates_w_patch_term(terms, mock_logger):
         se = copy.deepcopy(added_field)
         se.update({'uuid': 'uuid{}'.format(n)})
         side_effect.append(se)
-    with mock.patch('encoded.commands.generate_items_from_owl._get_uuids_for_linked', return_value={}):
-        with mock.patch('encoded.commands.generate_items_from_owl.id_fields2patch', side_effect=side_effect):
+    with mock.patch.object(gifo, '_get_uuids_for_linked', return_value={}):
+        with mock.patch.object(gifo, 'id_fields2patch', side_effect=side_effect):
             to_update = gifo.identify_item_updates(terms, dbterms, 'Phenotype', logger=mock_logger)
             assert len(to_update) == 2
             for upd in to_update:
@@ -751,8 +751,8 @@ def test_identify_item_updates_set_obsolete_true_obsolete(terms, mock_logger):
     for tid in dbterms.keys():
         uid = tid.replace('hp:', 'uuid')
         dbterms[tid].update({'uuid': uid})
-    with mock.patch('encoded.commands.generate_items_from_owl._get_uuids_for_linked', return_value={}):
-        with mock.patch('encoded.commands.generate_items_from_owl.id_fields2patch', return_value=None):
+    with mock.patch.object(gifo, '_get_uuids_for_linked', return_value={}):
+        with mock.patch.object(gifo, 'id_fields2patch', return_value=None):
             to_update = gifo.identify_item_updates(terms, dbterms, 'Phenotype', logger=mock_logger)
             assert len(to_update) == 1
             obsterm = to_update[0]
@@ -769,8 +769,8 @@ def test_identify_item_updates_set_obsolete_false_do_not_obsolete_live_term(term
     dbterms.update({added_obs['hpo_id']: added_obs})
     for i, tid in enumerate(dbterms.keys()):
         dbterms[tid].update({'uuid': 'uuid' + str(i + 1)})
-    with mock.patch('encoded.commands.generate_items_from_owl._get_uuids_for_linked', return_value={}):
-        with mock.patch('encoded.commands.generate_items_from_owl.id_fields2patch', return_value=None):
+    with mock.patch.object(gifo, '_get_uuids_for_linked', return_value={}):
+        with mock.patch.object(gifo, 'id_fields2patch', return_value=None):
             to_update = gifo.identify_item_updates(terms, dbterms, 'Phenotype', set_obsoletes=False, logger=mock_logger)
             assert not to_update
 
@@ -784,8 +784,8 @@ def test_identify_item_updates_set_obsolete_true_do_not_patch_obsolete_term(term
     dbterms.update({added_obs['hpo_id']: added_obs})
     for i, tid in enumerate(dbterms.keys()):
         dbterms[tid].update({'uuid': 'uuid' + str(i + 1)})
-    with mock.patch('encoded.commands.generate_items_from_owl._get_uuids_for_linked', return_value={}):
-        with mock.patch('encoded.commands.generate_items_from_owl.id_fields2patch', return_value=None):
+    with mock.patch.object(gifo, '_get_uuids_for_linked', return_value={}):
+        with mock.patch.object(gifo, 'id_fields2patch', return_value=None):
             to_update = gifo.identify_item_updates(terms, dbterms, 'Phenotype', logger=mock_logger)
             assert not to_update
 
diff --git a/src/encoded/tests/test_indexing.py b/src/encoded/tests/test_indexing.py
index 207160e127..b77d364959 100644
--- a/src/encoded/tests/test_indexing.py
+++ b/src/encoded/tests/test_indexing.py
@@ -26,7 +26,7 @@
 from timeit import default_timer as timer
 from unittest import mock
 from zope.sqlalchemy import mark_changed
-from .. import main
+from .. import main, loadxl
 from ..verifier import verify_item
 
 
@@ -290,7 +290,7 @@ def test_load_and_index_perf_data(testapp, indexer_testapp):
 
     # load -em up
     start = timer()
-    with mock.patch('encoded.loadxl.get_app') as mocked_app:
+    with mock.patch.object(loadxl, 'get_app') as mocked_app:
         mocked_app.return_value = testapp.app
         data = {'store': json_inserts}
         res = testapp.post_json('/load_data', data,  # status=200
diff --git a/src/encoded/tests/test_ingestion_listener.py b/src/encoded/tests/test_ingestion_listener.py
index 61629f5ec1..1742545f89 100644
--- a/src/encoded/tests/test_ingestion_listener.py
+++ b/src/encoded/tests/test_ingestion_listener.py
@@ -165,7 +165,6 @@ def mocked_should_remain_online(override=None):
     # XXX: This is a really hard thing to test, but take my word for it that this is doing "something" -Will
     #      If you do not get ValueError here, it means the VCF wasn't processed in the run method or a different
     #      error occurred.
-    with mock.patch('encoded.ingestion_listener.IngestionListener.should_remain_online',
-                    new=mocked_should_remain_online):
+    with mock.patch.object(IngestionListener, 'should_remain_online', new=mocked_should_remain_online):
         with pytest.raises(ValueError):
             run(testapp, _queue_manager=queue_manager)  # expected in this test since the source VCF is malformed
diff --git a/src/encoded/tests/test_load_access_key.py b/src/encoded/tests/test_load_access_key.py
index b3636ded98..5d727b9a54 100644
--- a/src/encoded/tests/test_load_access_key.py
+++ b/src/encoded/tests/test_load_access_key.py
@@ -2,6 +2,7 @@
 
 from unittest import mock
 from ..commands.load_access_keys import generate_access_key
+from ..commands import load_access_keys
 
 
 pytestmark = [pytest.mark.setone, pytest.mark.working]
@@ -10,7 +11,7 @@
 
 
 def test_gen_access_keys(testapp, admin):
-    with mock.patch('encoded.commands.load_access_keys.get_beanstalk_real_url') as mocked_url:
+    with mock.patch.object(load_access_keys, 'get_beanstalk_real_url') as mocked_url:
         mocked_url.return_value = 'http://fourfront-hotseat'
         res = generate_access_key(testapp, 'test_env', admin['uuid'], 'test_desc')
         assert res['server'] == 'http://fourfront-hotseat'
diff --git a/src/encoded/tests/test_loadxl.py b/src/encoded/tests/test_loadxl.py
index 204a36209f..34da7e3a53 100644
--- a/src/encoded/tests/test_loadxl.py
+++ b/src/encoded/tests/test_loadxl.py
@@ -16,7 +16,7 @@
 def test_load_data_endpoint(testapp):
     data = {'fdn_dir': 'master-inserts',
             'itype': ['project', 'institution', 'user']}
-    with mock.patch('encoded.loadxl.get_app') as mocked_app:
+    with mock.patch.object(loadxl, 'get_app') as mocked_app:
         mocked_app.return_value = testapp.app
         res = testapp.post_json('/load_data', data, status=200)
         assert res.json['status'] == 'success'
@@ -25,7 +25,7 @@ def test_load_data_endpoint(testapp):
 def test_load_data_endpoint_returns_error_if_incorrect_keyword(testapp):
     data = {'mdn_dir': 'master-inserts',
             'itype': ['user']}
-    with mock.patch('encoded.loadxl.get_app') as mocked_app:
+    with mock.patch.object(loadxl, 'get_app') as mocked_app:
         mocked_app.return_value = testapp.app
         res = testapp.post_json('/load_data', data, status=422)
         assert res.json['status'] == 'error'
@@ -35,7 +35,7 @@ def test_load_data_endpoint_returns_error_if_incorrect_keyword(testapp):
 def test_load_data_endpoint_returns_error_if_incorrect_data(testapp):
     data = {'fdn_dir': 'master-inserts',
             'itype': ['user']}
-    with mock.patch('encoded.loadxl.get_app') as mocked_app:
+    with mock.patch.object(loadxl, 'get_app') as mocked_app:
         mocked_app.return_value = testapp.app
         res = testapp.post_json('/load_data', data, status=422)
         assert res.json['status'] == 'error'
@@ -47,7 +47,7 @@ def test_load_data_user_specified_config(testapp):
             'itype': ['user', 'institution', 'project']}
     config_uri = 'test.ini'
     data['config_uri'] = config_uri
-    with mock.patch('encoded.loadxl.get_app') as mocked_app:
+    with mock.patch.object(loadxl, 'get_app') as mocked_app:
         mocked_app.return_value = testapp.app
         res = testapp.post_json('/load_data', data, status=200)
         assert res.json['status'] == 'success'
@@ -56,8 +56,8 @@ def test_load_data_user_specified_config(testapp):
 
 def test_load_data_local_dir(testapp):
     expected_dir = resource_filename('encoded', 'tests/data/perf-testing/')
-    with mock.patch('encoded.loadxl.get_app') as mocked_app:
-        with mock.patch('encoded.loadxl.load_all') as load_all:
+    with mock.patch.object(loadxl, 'get_app') as mocked_app:
+        with mock.patch.object(loadxl, 'load_all') as load_all:
             mocked_app.return_value = testapp.app
             load_all.return_value = None
             res = testapp.post_json('/load_data', {'fdn_dir': 'perf-testing'}, status=200)
@@ -71,7 +71,7 @@ def test_load_data_from_json(testapp):
     project_inserts = list(get_inserts('master-inserts', 'project'))
     data = {'store': {'user': user_inserts, 'institution': institution_inserts, 'project': project_inserts},
             'itype': ['user', 'institution', 'project']}
-    with mock.patch('encoded.loadxl.get_app') as mocked_app:
+    with mock.patch.object(loadxl, 'get_app') as mocked_app:
         mocked_app.return_value = testapp.app
         res = testapp.post_json('/load_data', data, status=200)
         assert res.json['status'] == 'success'
@@ -80,7 +80,7 @@ def test_load_data_from_json(testapp):
 def test_load_data_local_path(testapp):
     local_path = resource_filename('encoded', 'tests/data/master-inserts/')
     data = {'local_path': local_path, 'itype': ['user', 'institution', 'project']}
-    with mock.patch('encoded.loadxl.get_app') as mocked_app:
+    with mock.patch.object(loadxl, 'get_app') as mocked_app:
         mocked_app.return_value = testapp.app
         res = testapp.post_json('/load_data', data, status=200)
         assert res.json['status'] == 'success'
@@ -99,7 +99,7 @@ def test_load_data_iter_response(testapp):
     expected = len(user_inserts) + len(institution_inserts) + len(project_inserts)
     data = {'store': {'user': user_inserts, 'institution': institution_inserts, 'project': project_inserts},
             'itype': ['user', 'institution', 'project'], 'iter_response': True}
-    with mock.patch('encoded.loadxl.get_app') as mocked_app:
+    with mock.patch.object(loadxl, 'get_app') as mocked_app:
         mocked_app.return_value = testapp.app
         res = testapp.post_json('/load_data', data, status=200)
         assert res.content_type == 'text/plain'
@@ -123,7 +123,7 @@ def test_load_data_iter_response_fail(testapp):
     # the total number of items we expect
     expected = len(user_inserts)
     data = {'store': {'user': user_inserts}, 'itype': ['user'], 'iter_response': True}
-    with mock.patch('encoded.loadxl.get_app') as mocked_app:
+    with mock.patch.object(loadxl, 'get_app') as mocked_app:
         mocked_app.return_value = testapp.app
         res = testapp.post_json('/load_data', data, status=200)
         assert res.content_type == 'text/plain'
@@ -149,7 +149,7 @@ def test_load_all_gen(testapp):
     expected = len(user_inserts) + len(institution_inserts) + len(project_inserts)
     data = {'store': {'user': user_inserts, 'institution': institution_inserts, 'project': project_inserts},
             'itype': ['user', 'institution', 'project']}
-    with mock.patch('encoded.loadxl.get_app') as mocked_app:
+    with mock.patch.object(loadxl, 'get_app') as mocked_app:
         mocked_app.return_value = testapp.app
         # successful load items
         gen1 = loadxl.load_all_gen(testapp, data['store'], None,
diff --git a/src/encoded/tests/test_owltools.py b/src/encoded/tests/test_owltools.py
index bbd25a663c..b3c99f5eac 100644
--- a/src/encoded/tests/test_owltools.py
+++ b/src/encoded/tests/test_owltools.py
@@ -33,7 +33,7 @@ def rdf_objects_2_3():
 
 def test_get_rdfobjects_one_type_two_rdfobjs(owler, rdf_objects):
     checks = ['testrdfobj1', 'testrdfobj2']
-    with mock.patch('encoded.commands.owltools.ConjunctiveGraph') as graph:
+    with mock.patch.object(ot, 'ConjunctiveGraph') as graph:
         graph.objects.return_value = rdf_objects
         owler = ot.Owler('http://test.com')
         owler.rdfGraph = graph
@@ -47,7 +47,7 @@ def test_get_rdfobjects_one_type_two_rdfobjs(owler, rdf_objects):
 
 def test_get_rdfobjects_two_types_one_rdfobj(owler, rdf_objects_2_1):
     check = 'testrdfobj1'
-    with mock.patch('encoded.commands.owltools.ConjunctiveGraph') as graph:
+    with mock.patch.object(ot, 'ConjunctiveGraph') as graph:
         graph.objects.return_value = rdf_objects_2_1
         owler = ot.Owler('http://test.com')
         owler.rdfGraph = graph
@@ -59,7 +59,7 @@ def test_get_rdfobjects_two_types_one_rdfobj(owler, rdf_objects_2_1):
 
 def test_get_rdfobjects_two_types_three_rdfobj(rdf_objects_2_3):
     checks = ['testrdfobj1', 'testrdfobj2', 'testrdfobj3']
-    with mock.patch('encoded.commands.owltools.ConjunctiveGraph') as graph:
+    with mock.patch.object(ot, 'ConjunctiveGraph') as graph:
         graph.objects.return_value = rdf_objects_2_3
         owler = ot.Owler('http://test.com')
         owler.rdfGraph = graph
@@ -72,7 +72,7 @@ def test_get_rdfobjects_two_types_three_rdfobj(rdf_objects_2_3):
 
 
 def test_get_rdfobjects_none_there(owler):
-    with mock.patch('encoded.commands.owltools.ConjunctiveGraph') as graph:
+    with mock.patch.object(ot, 'ConjunctiveGraph') as graph:
         graph.objects.return_value = []
         owler = ot.Owler('http://test.com')
         owler.rdfGraph = graph
diff --git a/src/encoded/tests/test_parse_hpoa.py b/src/encoded/tests/test_parse_hpoa.py
index a5d8265ed8..252b348ea3 100644
--- a/src/encoded/tests/test_parse_hpoa.py
+++ b/src/encoded/tests/test_parse_hpoa.py
@@ -82,7 +82,7 @@ def mini_hpoa_lines():
 
 
 def test_get_header_info_and_field_names(capsys, mock_logger, mini_hpoa_lines):
-    with mock.patch('encoded.commands.parse_hpoa.has_unexpected_fields', return_value=False):
+    with mock.patch.object(ph, 'has_unexpected_fields', return_value=False):
         lfields = ph.line2list(mini_hpoa_lines[4])
         fields, lines = ph.get_header_info_and_field_names(iter(mini_hpoa_lines), mock_logger)
         assert fields == lfields
@@ -92,7 +92,7 @@ def test_get_header_info_and_field_names(capsys, mock_logger, mini_hpoa_lines):
 
 
 def test_get_header_info_and_field_names_no_comments(capsys, mock_logger, mini_hpoa_lines):
-    with mock.patch('encoded.commands.parse_hpoa.has_unexpected_fields', return_value=False):
+    with mock.patch.object(ph, 'has_unexpected_fields', return_value=False):
         lfields = ph.line2list(mini_hpoa_lines[4])
         fields, lines = ph.get_header_info_and_field_names(iter(mini_hpoa_lines[4:]), mock_logger)
         assert fields == lfields
@@ -103,7 +103,7 @@ def test_get_header_info_and_field_names_no_comments(capsys, mock_logger, mini_h
 
 def test_get_header_info_and_field_names_misformatted(capsys, mock_logger, mini_hpoa_lines):
     mini_hpoa_lines.insert(2, 'bad stuff')
-    with mock.patch('encoded.commands.parse_hpoa.has_unexpected_fields', return_value=['bad']):
+    with mock.patch.object(ph, 'has_unexpected_fields', return_value=['bad']):
         with pytest.raises(SystemExit):
             fields, lines = ph.get_header_info_and_field_names(iter(mini_hpoa_lines[4:]), mock_logger)
         out = capsys.readouterr()[0]
@@ -251,7 +251,7 @@ def test_create_evi_annotation_with_hp_modifier(hpoa_data, hpo2uid_map):
     mod_phe = 'HP:0500252'
     phe_uuid = '05648474-44de-4cdb-b35b-18f5362b8281'
     hpoa_data['Modifier'] = mod_phe
-    with mock.patch('encoded.commands.parse_hpoa.check_hpo_id_and_note_problems', return_value=phe_uuid):
+    with mock.patch.object(ph, 'check_hpo_id_and_note_problems', return_value=phe_uuid):
         evi = ph.create_evi_annotation(hpoa_data, hpo2uid_map, {})
         assert evi.get('modifier') == phe_uuid
 
@@ -259,7 +259,7 @@ def test_create_evi_annotation_with_hp_modifier(hpoa_data, hpo2uid_map):
 def test_create_evi_annotation_with_unknown_hp_modifier(hpoa_data, hpo2uid_map):
     mod_phe = 'HP:0000002'
     hpoa_data['Modifier'] = mod_phe
-    with mock.patch('encoded.commands.parse_hpoa.check_hpo_id_and_note_problems', return_value=None):
+    with mock.patch.object(ph, 'check_hpo_id_and_note_problems', return_value=None):
         evi = ph.create_evi_annotation(hpoa_data, hpo2uid_map, {})
         assert 'modifier' not in evi
 
@@ -272,7 +272,7 @@ def test_convert2raw(embedded_item_dict, raw_item_dict):
     embedded_item_dict['date_created'] = "2020-03-03T20:08:10.690526+00:00"
     embedded_item_dict['institution'] = '/institution/bwh'
     embedded_item_dict["principals_allowed"] = {"view": ["system.Everyone"], "edit": ["group.admin"]}
-    with mock.patch('encoded.commands.parse_hpoa.get_raw_form', return_value=raw_item_dict):
+    with mock.patch.object(ph, 'get_raw_form', return_value=raw_item_dict):
         raw_item = ph.convert2raw(embedded_item_dict)
         assert raw_item == raw_item_dict
 
@@ -303,7 +303,7 @@ def evi_items():
 
 def test_compare_existing_to_newly_generated_all_new(mock_logger, connection, evi_items):
     itemcnt = len(evi_items)
-    with mock.patch('encoded.commands.parse_hpoa.search_metadata', return_value=[]):
+    with mock.patch.object(ph, 'search_metadata', return_value=[]):
         evi, exist, to_obs = ph.compare_existing_to_newly_generated(mock_logger, connection, evi_items, 'EvidenceDisPheno')
         assert evi == evi_items
         assert not to_obs
@@ -312,8 +312,8 @@ def test_compare_existing_to_newly_generated_all_new(mock_logger, connection, ev
 
 def test_compare_existing_to_newly_generated_all_same(mock_logger, connection, evi_items):
     itemcnt = len(evi_items)
-    with mock.patch('encoded.commands.parse_hpoa.search_metadata', return_value=evi_items[:]):
-         with mock.patch('encoded.commands.parse_hpoa.get_raw_form', side_effect=evi_items[:]):
+    with mock.patch.object(ph, 'search_metadata', return_value=evi_items[:]):
+         with mock.patch.object(ph, 'get_raw_form', side_effect=evi_items[:]):
             evi, exist, to_obs = ph.compare_existing_to_newly_generated(mock_logger, connection, evi_items, 'EvidenceDisPheno')
             assert not evi
             assert not to_obs
@@ -325,8 +325,8 @@ def test_compare_existing_to_newly_generated_none_same(mock_logger, connection,
     for e in evi_items:
         dbitems.append({k: v + '9' for k, v in e.items()})
     dbuuids = [d.get('uuid') for d in dbitems]
-    with mock.patch('encoded.commands.parse_hpoa.search_metadata', return_value=dbitems):
-        with mock.patch('encoded.commands.parse_hpoa.get_raw_form', side_effect=dbitems):
+    with mock.patch.object(ph, 'search_metadata', return_value=dbitems):
+        with mock.patch.object(ph, 'get_raw_form', side_effect=dbitems):
             evi, exist, to_obs = ph.compare_existing_to_newly_generated(mock_logger, connection, evi_items, 'EvidenceDisPheno')
             assert evi == evi_items
             assert to_obs == dbuuids
diff --git a/src/encoded/tests/test_search.py b/src/encoded/tests/test_search.py
index 61150c5af8..b4e6e23720 100644
--- a/src/encoded/tests/test_search.py
+++ b/src/encoded/tests/test_search.py
@@ -7,6 +7,7 @@
 from pyramid.httpexceptions import HTTPBadRequest
 from snovault import TYPES, COLLECTIONS
 from snovault.elasticsearch import create_mapping
+from ..search import lucene_builder
 from ..search.lucene_builder import LuceneBuilder
 from snovault.elasticsearch.indexer_utils import get_namespaced_index
 from snovault.util import add_default_embeds
@@ -588,7 +589,7 @@ def test_search_with_hacked_query(anontestapp, hacked_query):
         verification function should throw an exception if there is any delta in the permissions object
         we explicitly attach to every search query.
     """
-    with mock.patch('encoded.search.lucene_builder.convert_search_to_dictionary', return_value=hacked_query):
+    with mock.patch.object(lucene_builder, 'convert_search_to_dictionary', return_value=hacked_query):
         mocked_request_with_least_permissive_permissions = MockedRequest()
         with pytest.raises(HTTPBadRequest):
             LuceneBuilder.verify_search_has_permissions(mocked_request_with_least_permissive_permissions, None)
diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
index 71beb44fca..d304aa3e20 100644
--- a/src/encoded/tests/test_submit.py
+++ b/src/encoded/tests/test_submit.py
@@ -3,6 +3,7 @@
 
 from copy import deepcopy
 from unittest import mock
+from .. import submit
 from ..submit import (
     compare_fields,
     create_families,
@@ -276,7 +277,7 @@ def test_xls_to_json_no_header(project, institution, xls_list):
     no_top_header = xls_list[1:]  # top header missing should work ok (e.g. 'Patient Information', etc)
     no_main_header = [xls_list[0]] + xls_list[2:]  # main header missing should cause a caught error
     no_comments = xls_list[0:2] + xls_list[3:]
-    with mock.patch('encoded.submit.row_generator') as row_gen:
+    with mock.patch.object(submit, 'row_generator') as row_gen:
         row_gen.return_value = iter(no_top_header)
         json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
         assert success
@@ -292,7 +293,7 @@ def test_xls_to_json_missing_req_col(project, institution, xls_list):
     # test error is caught when a required column in missing from excel file
     idx = xls_list[1].index('Specimen ID')
     rows = [row[0:idx] + row[idx+1:] for row in xls_list]
-    with mock.patch('encoded.submit.row_generator') as row_gen:
+    with mock.patch.object(submit, 'row_generator') as row_gen:
         row_gen.return_value = iter(rows)
         json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
         assert not success
@@ -302,7 +303,7 @@ def test_xls_to_json_missing_req_val(project, institution, xls_list):
     # test error is caught when a required column is present but value is missing in a row
     idx = xls_list[1].index('Specimen ID')
     xls_list[4] = xls_list[4][0:idx] + [''] + xls_list[4][idx+1:]
-    with mock.patch('encoded.submit.row_generator') as row_gen:
+    with mock.patch.object(submit, 'row_generator') as row_gen:
         row_gen.return_value = iter(xls_list)
         json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
         assert json_out['errors']
@@ -313,7 +314,7 @@ def test_xls_to_json_invalid_workup(project, institution, xls_list):
     # invalid workup type is caught as an error
     idx = xls_list[1].index('Workup Type')
     xls_list[4] = xls_list[4][0:idx] + ['Other'] + xls_list[4][idx+1:]
-    with mock.patch('encoded.submit.row_generator') as row_gen:
+    with mock.patch.object(submit, 'row_generator') as row_gen:
         row_gen.return_value = iter(xls_list)
         json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
         assert json_out['errors']
@@ -328,7 +329,7 @@ def test_xls_to_json_mixed_workup(project, institution, xls_list):
     idx = xls_list[1].index('Workup Type')
     xls_list[3] = xls_list[3][0:idx] + ['WES'] + xls_list[3][idx+1:]
     one_row = xls_list[:4]
-    with mock.patch('encoded.submit.row_generator') as row_gen:
+    with mock.patch.object(submit, 'row_generator') as row_gen:
         row_gen.return_value = iter(xls_list)
         json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
         assert json_out['errors']
diff --git a/src/encoded/tests/test_types_file.py b/src/encoded/tests/test_types_file.py
index 98d93e5f0a..93f726d976 100644
--- a/src/encoded/tests/test_types_file.py
+++ b/src/encoded/tests/test_types_file.py
@@ -6,6 +6,7 @@
 from pyramid.httpexceptions import HTTPForbidden
 from unittest import mock
 from .. import source_beanstalk_env_vars
+from ..types import file as tf
 from ..types.file import FileFastq, post_upload, external_creds
 
 
@@ -33,7 +34,7 @@ def file(testapp, project, experiment, institution, file_formats):
 
 def test_external_creds():
 
-    with mock.patch('encoded.types.file.boto3', autospec=True):
+    with mock.patch.object(tf, 'boto3', autospec=True):
 
         ret = external_creds('test-wfout-bucket', 'test-key', 'name')
         assert ret['key'] == 'test-key'

From 2252911e3b471064ef29c53d6f2ab5e0da0f40df Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Tue, 11 Aug 2020 14:15:38 -0400
Subject: [PATCH 068/125] Take new version of dcicutils, eliminating need to
 host the check_true function. Also means we're using a version of dcicutils
 whose tests are passing. And fix a problem in ingestion processing where it
 was overly strict about looking for an ingestion_type.

---
 poetry.lock                       | 545 +++++++++++++++---------------
 pyproject.toml                    |   2 +-
 src/encoded/ingestion_listener.py |   2 +-
 src/encoded/tests/test_util.py    |  13 +-
 src/encoded/types/ingestion.py    |   4 +-
 src/encoded/util.py               |  13 +-
 6 files changed, 270 insertions(+), 309 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 7815c83ea7..1fbc37d578 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -112,245 +112,245 @@ s3transfer = ">=0.2.0,<0.3.0"
 
 [[package]]
 category = "dev"
-description = "Type annotations for boto3 1.14.38, generated by mypy-boto3-buider 2.2.0"
+description = "Type annotations for boto3 1.14.39, generated by mypy-boto3-buider 2.2.0"
 name = "boto3-stubs"
 optional = false
 python-versions = ">=3.6"
-version = "1.14.38.0"
+version = "1.14.39.0"
 
 [package.dependencies]
-mypy-boto3 = "1.14.38.0"
+mypy-boto3 = "1.14.39.0"
 
 [package.dependencies.typing-extensions]
 python = "<3.8"
 version = "*"
 
 [package.extras]
-accessanalyzer = ["mypy-boto3-accessanalyzer (1.14.38.0)"]
-acm = ["mypy-boto3-acm (1.14.38.0)"]
-acm-pca = ["mypy-boto3-acm-pca (1.14.38.0)"]
-alexaforbusiness = ["mypy-boto3-alexaforbusiness (1.14.38.0)"]
-all = ["mypy-boto3-accessanalyzer (1.14.38.0)", "mypy-boto3-acm (1.14.38.0)", "mypy-boto3-acm-pca (1.14.38.0)", "mypy-boto3-alexaforbusiness (1.14.38.0)", "mypy-boto3-amplify (1.14.38.0)", "mypy-boto3-apigateway (1.14.38.0)", "mypy-boto3-apigatewaymanagementapi (1.14.38.0)", "mypy-boto3-apigatewayv2 (1.14.38.0)", "mypy-boto3-appconfig (1.14.38.0)", "mypy-boto3-application-autoscaling (1.14.38.0)", "mypy-boto3-application-insights (1.14.38.0)", "mypy-boto3-appmesh (1.14.38.0)", "mypy-boto3-appstream (1.14.38.0)", "mypy-boto3-appsync (1.14.38.0)", "mypy-boto3-athena (1.14.38.0)", "mypy-boto3-autoscaling (1.14.38.0)", "mypy-boto3-autoscaling-plans (1.14.38.0)", "mypy-boto3-backup (1.14.38.0)", "mypy-boto3-batch (1.14.38.0)", "mypy-boto3-budgets (1.14.38.0)", "mypy-boto3-ce (1.14.38.0)", "mypy-boto3-chime (1.14.38.0)", "mypy-boto3-cloud9 (1.14.38.0)", "mypy-boto3-clouddirectory (1.14.38.0)", "mypy-boto3-cloudformation (1.14.38.0)", "mypy-boto3-cloudfront (1.14.38.0)", "mypy-boto3-cloudhsm (1.14.38.0)", "mypy-boto3-cloudhsmv2 (1.14.38.0)", "mypy-boto3-cloudsearch (1.14.38.0)", "mypy-boto3-cloudsearchdomain (1.14.38.0)", "mypy-boto3-cloudtrail (1.14.38.0)", "mypy-boto3-cloudwatch (1.14.38.0)", "mypy-boto3-codeartifact (1.14.38.0)", "mypy-boto3-codebuild (1.14.38.0)", "mypy-boto3-codecommit (1.14.38.0)", "mypy-boto3-codedeploy (1.14.38.0)", "mypy-boto3-codeguru-reviewer (1.14.38.0)", "mypy-boto3-codeguruprofiler (1.14.38.0)", "mypy-boto3-codepipeline (1.14.38.0)", "mypy-boto3-codestar (1.14.38.0)", "mypy-boto3-codestar-connections (1.14.38.0)", "mypy-boto3-codestar-notifications (1.14.38.0)", "mypy-boto3-cognito-identity (1.14.38.0)", "mypy-boto3-cognito-idp (1.14.38.0)", "mypy-boto3-cognito-sync (1.14.38.0)", "mypy-boto3-comprehend (1.14.38.0)", "mypy-boto3-comprehendmedical (1.14.38.0)", "mypy-boto3-compute-optimizer (1.14.38.0)", "mypy-boto3-config (1.14.38.0)", "mypy-boto3-connect (1.14.38.0)", "mypy-boto3-connectparticipant (1.14.38.0)", "mypy-boto3-cur (1.14.38.0)", "mypy-boto3-dataexchange (1.14.38.0)", "mypy-boto3-datapipeline (1.14.38.0)", "mypy-boto3-datasync (1.14.38.0)", "mypy-boto3-dax (1.14.38.0)", "mypy-boto3-detective (1.14.38.0)", "mypy-boto3-devicefarm (1.14.38.0)", "mypy-boto3-directconnect (1.14.38.0)", "mypy-boto3-discovery (1.14.38.0)", "mypy-boto3-dlm (1.14.38.0)", "mypy-boto3-dms (1.14.38.0)", "mypy-boto3-docdb (1.14.38.0)", "mypy-boto3-ds (1.14.38.0)", "mypy-boto3-dynamodb (1.14.38.0)", "mypy-boto3-dynamodbstreams (1.14.38.0)", "mypy-boto3-ebs (1.14.38.0)", "mypy-boto3-ec2 (1.14.38.0)", "mypy-boto3-ec2-instance-connect (1.14.38.0)", "mypy-boto3-ecr (1.14.38.0)", "mypy-boto3-ecs (1.14.38.0)", "mypy-boto3-efs (1.14.38.0)", "mypy-boto3-eks (1.14.38.0)", "mypy-boto3-elastic-inference (1.14.38.0)", "mypy-boto3-elasticache (1.14.38.0)", "mypy-boto3-elasticbeanstalk (1.14.38.0)", "mypy-boto3-elastictranscoder (1.14.38.0)", "mypy-boto3-elb (1.14.38.0)", "mypy-boto3-elbv2 (1.14.38.0)", "mypy-boto3-emr (1.14.38.0)", "mypy-boto3-es (1.14.38.0)", "mypy-boto3-events (1.14.38.0)", "mypy-boto3-firehose (1.14.38.0)", "mypy-boto3-fms (1.14.38.0)", "mypy-boto3-forecast (1.14.38.0)", "mypy-boto3-forecastquery (1.14.38.0)", "mypy-boto3-frauddetector (1.14.38.0)", "mypy-boto3-fsx (1.14.38.0)", "mypy-boto3-gamelift (1.14.38.0)", "mypy-boto3-glacier (1.14.38.0)", "mypy-boto3-globalaccelerator (1.14.38.0)", "mypy-boto3-glue (1.14.38.0)", "mypy-boto3-greengrass (1.14.38.0)", "mypy-boto3-groundstation (1.14.38.0)", "mypy-boto3-guardduty (1.14.38.0)", "mypy-boto3-health (1.14.38.0)", "mypy-boto3-iam (1.14.38.0)", "mypy-boto3-imagebuilder (1.14.38.0)", "mypy-boto3-importexport (1.14.38.0)", "mypy-boto3-inspector (1.14.38.0)", "mypy-boto3-iot (1.14.38.0)", "mypy-boto3-iot-data (1.14.38.0)", "mypy-boto3-iot-jobs-data (1.14.38.0)", "mypy-boto3-iot1click-devices (1.14.38.0)", "mypy-boto3-iot1click-projects (1.14.38.0)", "mypy-boto3-iotanalytics (1.14.38.0)", "mypy-boto3-iotevents (1.14.38.0)", "mypy-boto3-iotevents-data (1.14.38.0)", "mypy-boto3-iotsecuretunneling (1.14.38.0)", "mypy-boto3-iotsitewise (1.14.38.0)", "mypy-boto3-iotthingsgraph (1.14.38.0)", "mypy-boto3-kafka (1.14.38.0)", "mypy-boto3-kendra (1.14.38.0)", "mypy-boto3-kinesis (1.14.38.0)", "mypy-boto3-kinesis-video-archived-media (1.14.38.0)", "mypy-boto3-kinesis-video-media (1.14.38.0)", "mypy-boto3-kinesis-video-signaling (1.14.38.0)", "mypy-boto3-kinesisanalytics (1.14.38.0)", "mypy-boto3-kinesisanalyticsv2 (1.14.38.0)", "mypy-boto3-kinesisvideo (1.14.38.0)", "mypy-boto3-kms (1.14.38.0)", "mypy-boto3-lakeformation (1.14.38.0)", "mypy-boto3-lambda (1.14.38.0)", "mypy-boto3-lex-models (1.14.38.0)", "mypy-boto3-lex-runtime (1.14.38.0)", "mypy-boto3-license-manager (1.14.38.0)", "mypy-boto3-lightsail (1.14.38.0)", "mypy-boto3-logs (1.14.38.0)", "mypy-boto3-machinelearning (1.14.38.0)", "mypy-boto3-macie (1.14.38.0)", "mypy-boto3-macie2 (1.14.38.0)", "mypy-boto3-managedblockchain (1.14.38.0)", "mypy-boto3-marketplace-catalog (1.14.38.0)", "mypy-boto3-marketplace-entitlement (1.14.38.0)", "mypy-boto3-marketplacecommerceanalytics (1.14.38.0)", "mypy-boto3-mediaconnect (1.14.38.0)", "mypy-boto3-mediaconvert (1.14.38.0)", "mypy-boto3-medialive (1.14.38.0)", "mypy-boto3-mediapackage (1.14.38.0)", "mypy-boto3-mediapackage-vod (1.14.38.0)", "mypy-boto3-mediastore (1.14.38.0)", "mypy-boto3-mediastore-data (1.14.38.0)", "mypy-boto3-mediatailor (1.14.38.0)", "mypy-boto3-meteringmarketplace (1.14.38.0)", "mypy-boto3-mgh (1.14.38.0)", "mypy-boto3-migrationhub-config (1.14.38.0)", "mypy-boto3-mobile (1.14.38.0)", "mypy-boto3-mq (1.14.38.0)", "mypy-boto3-mturk (1.14.38.0)", "mypy-boto3-neptune (1.14.38.0)", "mypy-boto3-networkmanager (1.14.38.0)", "mypy-boto3-opsworks (1.14.38.0)", "mypy-boto3-opsworkscm (1.14.38.0)", "mypy-boto3-organizations (1.14.38.0)", "mypy-boto3-outposts (1.14.38.0)", "mypy-boto3-personalize (1.14.38.0)", "mypy-boto3-personalize-events (1.14.38.0)", "mypy-boto3-personalize-runtime (1.14.38.0)", "mypy-boto3-pi (1.14.38.0)", "mypy-boto3-pinpoint (1.14.38.0)", "mypy-boto3-pinpoint-email (1.14.38.0)", "mypy-boto3-pinpoint-sms-voice (1.14.38.0)", "mypy-boto3-polly (1.14.38.0)", "mypy-boto3-pricing (1.14.38.0)", "mypy-boto3-qldb (1.14.38.0)", "mypy-boto3-qldb-session (1.14.38.0)", "mypy-boto3-quicksight (1.14.38.0)", "mypy-boto3-ram (1.14.38.0)", "mypy-boto3-rds (1.14.38.0)", "mypy-boto3-rds-data (1.14.38.0)", "mypy-boto3-redshift (1.14.38.0)", "mypy-boto3-rekognition (1.14.38.0)", "mypy-boto3-resource-groups (1.14.38.0)", "mypy-boto3-resourcegroupstaggingapi (1.14.38.0)", "mypy-boto3-robomaker (1.14.38.0)", "mypy-boto3-route53 (1.14.38.0)", "mypy-boto3-route53domains (1.14.38.0)", "mypy-boto3-route53resolver (1.14.38.0)", "mypy-boto3-s3 (1.14.38.0)", "mypy-boto3-s3control (1.14.38.0)", "mypy-boto3-sagemaker (1.14.38.0)", "mypy-boto3-sagemaker-a2i-runtime (1.14.38.0)", "mypy-boto3-sagemaker-runtime (1.14.38.0)", "mypy-boto3-savingsplans (1.14.38.0)", "mypy-boto3-schemas (1.14.38.0)", "mypy-boto3-sdb (1.14.38.0)", "mypy-boto3-secretsmanager (1.14.38.0)", "mypy-boto3-securityhub (1.14.38.0)", "mypy-boto3-serverlessrepo (1.14.38.0)", "mypy-boto3-service-quotas (1.14.38.0)", "mypy-boto3-servicecatalog (1.14.38.0)", "mypy-boto3-servicediscovery (1.14.38.0)", "mypy-boto3-ses (1.14.38.0)", "mypy-boto3-sesv2 (1.14.38.0)", "mypy-boto3-shield (1.14.38.0)", "mypy-boto3-signer (1.14.38.0)", "mypy-boto3-sms (1.14.38.0)", "mypy-boto3-sms-voice (1.14.38.0)", "mypy-boto3-snowball (1.14.38.0)", "mypy-boto3-sns (1.14.38.0)", "mypy-boto3-sqs (1.14.38.0)", "mypy-boto3-ssm (1.14.38.0)", "mypy-boto3-sso (1.14.38.0)", "mypy-boto3-sso-oidc (1.14.38.0)", "mypy-boto3-stepfunctions (1.14.38.0)", "mypy-boto3-storagegateway (1.14.38.0)", "mypy-boto3-sts (1.14.38.0)", "mypy-boto3-support (1.14.38.0)", "mypy-boto3-swf (1.14.38.0)", "mypy-boto3-synthetics (1.14.38.0)", "mypy-boto3-textract (1.14.38.0)", "mypy-boto3-transcribe (1.14.38.0)", "mypy-boto3-transfer (1.14.38.0)", "mypy-boto3-translate (1.14.38.0)", "mypy-boto3-waf (1.14.38.0)", "mypy-boto3-waf-regional (1.14.38.0)", "mypy-boto3-wafv2 (1.14.38.0)", "mypy-boto3-workdocs (1.14.38.0)", "mypy-boto3-worklink (1.14.38.0)", "mypy-boto3-workmail (1.14.38.0)", "mypy-boto3-workmailmessageflow (1.14.38.0)", "mypy-boto3-workspaces (1.14.38.0)", "mypy-boto3-xray (1.14.38.0)"]
-amplify = ["mypy-boto3-amplify (1.14.38.0)"]
-apigateway = ["mypy-boto3-apigateway (1.14.38.0)"]
-apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (1.14.38.0)"]
-apigatewayv2 = ["mypy-boto3-apigatewayv2 (1.14.38.0)"]
-appconfig = ["mypy-boto3-appconfig (1.14.38.0)"]
-application-autoscaling = ["mypy-boto3-application-autoscaling (1.14.38.0)"]
-application-insights = ["mypy-boto3-application-insights (1.14.38.0)"]
-appmesh = ["mypy-boto3-appmesh (1.14.38.0)"]
-appstream = ["mypy-boto3-appstream (1.14.38.0)"]
-appsync = ["mypy-boto3-appsync (1.14.38.0)"]
-athena = ["mypy-boto3-athena (1.14.38.0)"]
-autoscaling = ["mypy-boto3-autoscaling (1.14.38.0)"]
-autoscaling-plans = ["mypy-boto3-autoscaling-plans (1.14.38.0)"]
-backup = ["mypy-boto3-backup (1.14.38.0)"]
-batch = ["mypy-boto3-batch (1.14.38.0)"]
-budgets = ["mypy-boto3-budgets (1.14.38.0)"]
-ce = ["mypy-boto3-ce (1.14.38.0)"]
-chime = ["mypy-boto3-chime (1.14.38.0)"]
-cloud9 = ["mypy-boto3-cloud9 (1.14.38.0)"]
-clouddirectory = ["mypy-boto3-clouddirectory (1.14.38.0)"]
-cloudformation = ["mypy-boto3-cloudformation (1.14.38.0)"]
-cloudfront = ["mypy-boto3-cloudfront (1.14.38.0)"]
-cloudhsm = ["mypy-boto3-cloudhsm (1.14.38.0)"]
-cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (1.14.38.0)"]
-cloudsearch = ["mypy-boto3-cloudsearch (1.14.38.0)"]
-cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (1.14.38.0)"]
-cloudtrail = ["mypy-boto3-cloudtrail (1.14.38.0)"]
-cloudwatch = ["mypy-boto3-cloudwatch (1.14.38.0)"]
-codeartifact = ["mypy-boto3-codeartifact (1.14.38.0)"]
-codebuild = ["mypy-boto3-codebuild (1.14.38.0)"]
-codecommit = ["mypy-boto3-codecommit (1.14.38.0)"]
-codedeploy = ["mypy-boto3-codedeploy (1.14.38.0)"]
-codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (1.14.38.0)"]
-codeguruprofiler = ["mypy-boto3-codeguruprofiler (1.14.38.0)"]
-codepipeline = ["mypy-boto3-codepipeline (1.14.38.0)"]
-codestar = ["mypy-boto3-codestar (1.14.38.0)"]
-codestar-connections = ["mypy-boto3-codestar-connections (1.14.38.0)"]
-codestar-notifications = ["mypy-boto3-codestar-notifications (1.14.38.0)"]
-cognito-identity = ["mypy-boto3-cognito-identity (1.14.38.0)"]
-cognito-idp = ["mypy-boto3-cognito-idp (1.14.38.0)"]
-cognito-sync = ["mypy-boto3-cognito-sync (1.14.38.0)"]
-comprehend = ["mypy-boto3-comprehend (1.14.38.0)"]
-comprehendmedical = ["mypy-boto3-comprehendmedical (1.14.38.0)"]
-compute-optimizer = ["mypy-boto3-compute-optimizer (1.14.38.0)"]
-config = ["mypy-boto3-config (1.14.38.0)"]
-connect = ["mypy-boto3-connect (1.14.38.0)"]
-connectparticipant = ["mypy-boto3-connectparticipant (1.14.38.0)"]
-cur = ["mypy-boto3-cur (1.14.38.0)"]
-dataexchange = ["mypy-boto3-dataexchange (1.14.38.0)"]
-datapipeline = ["mypy-boto3-datapipeline (1.14.38.0)"]
-datasync = ["mypy-boto3-datasync (1.14.38.0)"]
-dax = ["mypy-boto3-dax (1.14.38.0)"]
-detective = ["mypy-boto3-detective (1.14.38.0)"]
-devicefarm = ["mypy-boto3-devicefarm (1.14.38.0)"]
-directconnect = ["mypy-boto3-directconnect (1.14.38.0)"]
-discovery = ["mypy-boto3-discovery (1.14.38.0)"]
-dlm = ["mypy-boto3-dlm (1.14.38.0)"]
-dms = ["mypy-boto3-dms (1.14.38.0)"]
-docdb = ["mypy-boto3-docdb (1.14.38.0)"]
-ds = ["mypy-boto3-ds (1.14.38.0)"]
-dynamodb = ["mypy-boto3-dynamodb (1.14.38.0)"]
-dynamodbstreams = ["mypy-boto3-dynamodbstreams (1.14.38.0)"]
-ebs = ["mypy-boto3-ebs (1.14.38.0)"]
-ec2 = ["mypy-boto3-ec2 (1.14.38.0)"]
-ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (1.14.38.0)"]
-ecr = ["mypy-boto3-ecr (1.14.38.0)"]
-ecs = ["mypy-boto3-ecs (1.14.38.0)"]
-efs = ["mypy-boto3-efs (1.14.38.0)"]
-eks = ["mypy-boto3-eks (1.14.38.0)"]
-elastic-inference = ["mypy-boto3-elastic-inference (1.14.38.0)"]
-elasticache = ["mypy-boto3-elasticache (1.14.38.0)"]
-elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (1.14.38.0)"]
-elastictranscoder = ["mypy-boto3-elastictranscoder (1.14.38.0)"]
-elb = ["mypy-boto3-elb (1.14.38.0)"]
-elbv2 = ["mypy-boto3-elbv2 (1.14.38.0)"]
-emr = ["mypy-boto3-emr (1.14.38.0)"]
-es = ["mypy-boto3-es (1.14.38.0)"]
-essential = ["mypy-boto3-cloudformation (1.14.38.0)", "mypy-boto3-dynamodb (1.14.38.0)", "mypy-boto3-ec2 (1.14.38.0)", "mypy-boto3-lambda (1.14.38.0)", "mypy-boto3-rds (1.14.38.0)", "mypy-boto3-s3 (1.14.38.0)", "mypy-boto3-sqs (1.14.38.0)"]
-events = ["mypy-boto3-events (1.14.38.0)"]
-firehose = ["mypy-boto3-firehose (1.14.38.0)"]
-fms = ["mypy-boto3-fms (1.14.38.0)"]
-forecast = ["mypy-boto3-forecast (1.14.38.0)"]
-forecastquery = ["mypy-boto3-forecastquery (1.14.38.0)"]
-frauddetector = ["mypy-boto3-frauddetector (1.14.38.0)"]
-fsx = ["mypy-boto3-fsx (1.14.38.0)"]
-gamelift = ["mypy-boto3-gamelift (1.14.38.0)"]
-glacier = ["mypy-boto3-glacier (1.14.38.0)"]
-globalaccelerator = ["mypy-boto3-globalaccelerator (1.14.38.0)"]
-glue = ["mypy-boto3-glue (1.14.38.0)"]
-greengrass = ["mypy-boto3-greengrass (1.14.38.0)"]
-groundstation = ["mypy-boto3-groundstation (1.14.38.0)"]
-guardduty = ["mypy-boto3-guardduty (1.14.38.0)"]
-health = ["mypy-boto3-health (1.14.38.0)"]
-iam = ["mypy-boto3-iam (1.14.38.0)"]
-imagebuilder = ["mypy-boto3-imagebuilder (1.14.38.0)"]
-importexport = ["mypy-boto3-importexport (1.14.38.0)"]
-inspector = ["mypy-boto3-inspector (1.14.38.0)"]
-iot = ["mypy-boto3-iot (1.14.38.0)"]
-iot-data = ["mypy-boto3-iot-data (1.14.38.0)"]
-iot-jobs-data = ["mypy-boto3-iot-jobs-data (1.14.38.0)"]
-iot1click-devices = ["mypy-boto3-iot1click-devices (1.14.38.0)"]
-iot1click-projects = ["mypy-boto3-iot1click-projects (1.14.38.0)"]
-iotanalytics = ["mypy-boto3-iotanalytics (1.14.38.0)"]
-iotevents = ["mypy-boto3-iotevents (1.14.38.0)"]
-iotevents-data = ["mypy-boto3-iotevents-data (1.14.38.0)"]
-iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (1.14.38.0)"]
-iotsitewise = ["mypy-boto3-iotsitewise (1.14.38.0)"]
-iotthingsgraph = ["mypy-boto3-iotthingsgraph (1.14.38.0)"]
-kafka = ["mypy-boto3-kafka (1.14.38.0)"]
-kendra = ["mypy-boto3-kendra (1.14.38.0)"]
-kinesis = ["mypy-boto3-kinesis (1.14.38.0)"]
-kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (1.14.38.0)"]
-kinesis-video-media = ["mypy-boto3-kinesis-video-media (1.14.38.0)"]
-kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (1.14.38.0)"]
-kinesisanalytics = ["mypy-boto3-kinesisanalytics (1.14.38.0)"]
-kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (1.14.38.0)"]
-kinesisvideo = ["mypy-boto3-kinesisvideo (1.14.38.0)"]
-kms = ["mypy-boto3-kms (1.14.38.0)"]
-lakeformation = ["mypy-boto3-lakeformation (1.14.38.0)"]
-lambda = ["mypy-boto3-lambda (1.14.38.0)"]
-lex-models = ["mypy-boto3-lex-models (1.14.38.0)"]
-lex-runtime = ["mypy-boto3-lex-runtime (1.14.38.0)"]
-license-manager = ["mypy-boto3-license-manager (1.14.38.0)"]
-lightsail = ["mypy-boto3-lightsail (1.14.38.0)"]
-logs = ["mypy-boto3-logs (1.14.38.0)"]
-machinelearning = ["mypy-boto3-machinelearning (1.14.38.0)"]
-macie = ["mypy-boto3-macie (1.14.38.0)"]
-macie2 = ["mypy-boto3-macie2 (1.14.38.0)"]
-managedblockchain = ["mypy-boto3-managedblockchain (1.14.38.0)"]
-marketplace-catalog = ["mypy-boto3-marketplace-catalog (1.14.38.0)"]
-marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (1.14.38.0)"]
-marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (1.14.38.0)"]
-mediaconnect = ["mypy-boto3-mediaconnect (1.14.38.0)"]
-mediaconvert = ["mypy-boto3-mediaconvert (1.14.38.0)"]
-medialive = ["mypy-boto3-medialive (1.14.38.0)"]
-mediapackage = ["mypy-boto3-mediapackage (1.14.38.0)"]
-mediapackage-vod = ["mypy-boto3-mediapackage-vod (1.14.38.0)"]
-mediastore = ["mypy-boto3-mediastore (1.14.38.0)"]
-mediastore-data = ["mypy-boto3-mediastore-data (1.14.38.0)"]
-mediatailor = ["mypy-boto3-mediatailor (1.14.38.0)"]
-meteringmarketplace = ["mypy-boto3-meteringmarketplace (1.14.38.0)"]
-mgh = ["mypy-boto3-mgh (1.14.38.0)"]
-migrationhub-config = ["mypy-boto3-migrationhub-config (1.14.38.0)"]
-mobile = ["mypy-boto3-mobile (1.14.38.0)"]
-mq = ["mypy-boto3-mq (1.14.38.0)"]
-mturk = ["mypy-boto3-mturk (1.14.38.0)"]
-neptune = ["mypy-boto3-neptune (1.14.38.0)"]
-networkmanager = ["mypy-boto3-networkmanager (1.14.38.0)"]
-opsworks = ["mypy-boto3-opsworks (1.14.38.0)"]
-opsworkscm = ["mypy-boto3-opsworkscm (1.14.38.0)"]
-organizations = ["mypy-boto3-organizations (1.14.38.0)"]
-outposts = ["mypy-boto3-outposts (1.14.38.0)"]
-personalize = ["mypy-boto3-personalize (1.14.38.0)"]
-personalize-events = ["mypy-boto3-personalize-events (1.14.38.0)"]
-personalize-runtime = ["mypy-boto3-personalize-runtime (1.14.38.0)"]
-pi = ["mypy-boto3-pi (1.14.38.0)"]
-pinpoint = ["mypy-boto3-pinpoint (1.14.38.0)"]
-pinpoint-email = ["mypy-boto3-pinpoint-email (1.14.38.0)"]
-pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (1.14.38.0)"]
-polly = ["mypy-boto3-polly (1.14.38.0)"]
-pricing = ["mypy-boto3-pricing (1.14.38.0)"]
-qldb = ["mypy-boto3-qldb (1.14.38.0)"]
-qldb-session = ["mypy-boto3-qldb-session (1.14.38.0)"]
-quicksight = ["mypy-boto3-quicksight (1.14.38.0)"]
-ram = ["mypy-boto3-ram (1.14.38.0)"]
-rds = ["mypy-boto3-rds (1.14.38.0)"]
-rds-data = ["mypy-boto3-rds-data (1.14.38.0)"]
-redshift = ["mypy-boto3-redshift (1.14.38.0)"]
-rekognition = ["mypy-boto3-rekognition (1.14.38.0)"]
-resource-groups = ["mypy-boto3-resource-groups (1.14.38.0)"]
-resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (1.14.38.0)"]
-robomaker = ["mypy-boto3-robomaker (1.14.38.0)"]
-route53 = ["mypy-boto3-route53 (1.14.38.0)"]
-route53domains = ["mypy-boto3-route53domains (1.14.38.0)"]
-route53resolver = ["mypy-boto3-route53resolver (1.14.38.0)"]
-s3 = ["mypy-boto3-s3 (1.14.38.0)"]
-s3control = ["mypy-boto3-s3control (1.14.38.0)"]
-sagemaker = ["mypy-boto3-sagemaker (1.14.38.0)"]
-sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (1.14.38.0)"]
-sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (1.14.38.0)"]
-savingsplans = ["mypy-boto3-savingsplans (1.14.38.0)"]
-schemas = ["mypy-boto3-schemas (1.14.38.0)"]
-sdb = ["mypy-boto3-sdb (1.14.38.0)"]
-secretsmanager = ["mypy-boto3-secretsmanager (1.14.38.0)"]
-securityhub = ["mypy-boto3-securityhub (1.14.38.0)"]
-serverlessrepo = ["mypy-boto3-serverlessrepo (1.14.38.0)"]
-service-quotas = ["mypy-boto3-service-quotas (1.14.38.0)"]
-servicecatalog = ["mypy-boto3-servicecatalog (1.14.38.0)"]
-servicediscovery = ["mypy-boto3-servicediscovery (1.14.38.0)"]
-ses = ["mypy-boto3-ses (1.14.38.0)"]
-sesv2 = ["mypy-boto3-sesv2 (1.14.38.0)"]
-shield = ["mypy-boto3-shield (1.14.38.0)"]
-signer = ["mypy-boto3-signer (1.14.38.0)"]
-sms = ["mypy-boto3-sms (1.14.38.0)"]
-sms-voice = ["mypy-boto3-sms-voice (1.14.38.0)"]
-snowball = ["mypy-boto3-snowball (1.14.38.0)"]
-sns = ["mypy-boto3-sns (1.14.38.0)"]
-sqs = ["mypy-boto3-sqs (1.14.38.0)"]
-ssm = ["mypy-boto3-ssm (1.14.38.0)"]
-sso = ["mypy-boto3-sso (1.14.38.0)"]
-sso-oidc = ["mypy-boto3-sso-oidc (1.14.38.0)"]
-stepfunctions = ["mypy-boto3-stepfunctions (1.14.38.0)"]
-storagegateway = ["mypy-boto3-storagegateway (1.14.38.0)"]
-sts = ["mypy-boto3-sts (1.14.38.0)"]
-support = ["mypy-boto3-support (1.14.38.0)"]
-swf = ["mypy-boto3-swf (1.14.38.0)"]
-synthetics = ["mypy-boto3-synthetics (1.14.38.0)"]
-textract = ["mypy-boto3-textract (1.14.38.0)"]
-transcribe = ["mypy-boto3-transcribe (1.14.38.0)"]
-transfer = ["mypy-boto3-transfer (1.14.38.0)"]
-translate = ["mypy-boto3-translate (1.14.38.0)"]
-waf = ["mypy-boto3-waf (1.14.38.0)"]
-waf-regional = ["mypy-boto3-waf-regional (1.14.38.0)"]
-wafv2 = ["mypy-boto3-wafv2 (1.14.38.0)"]
-workdocs = ["mypy-boto3-workdocs (1.14.38.0)"]
-worklink = ["mypy-boto3-worklink (1.14.38.0)"]
-workmail = ["mypy-boto3-workmail (1.14.38.0)"]
-workmailmessageflow = ["mypy-boto3-workmailmessageflow (1.14.38.0)"]
-workspaces = ["mypy-boto3-workspaces (1.14.38.0)"]
-xray = ["mypy-boto3-xray (1.14.38.0)"]
+accessanalyzer = ["mypy-boto3-accessanalyzer (1.14.39.0)"]
+acm = ["mypy-boto3-acm (1.14.39.0)"]
+acm-pca = ["mypy-boto3-acm-pca (1.14.39.0)"]
+alexaforbusiness = ["mypy-boto3-alexaforbusiness (1.14.39.0)"]
+all = ["mypy-boto3-accessanalyzer (1.14.39.0)", "mypy-boto3-acm (1.14.39.0)", "mypy-boto3-acm-pca (1.14.39.0)", "mypy-boto3-alexaforbusiness (1.14.39.0)", "mypy-boto3-amplify (1.14.39.0)", "mypy-boto3-apigateway (1.14.39.0)", "mypy-boto3-apigatewaymanagementapi (1.14.39.0)", "mypy-boto3-apigatewayv2 (1.14.39.0)", "mypy-boto3-appconfig (1.14.39.0)", "mypy-boto3-application-autoscaling (1.14.39.0)", "mypy-boto3-application-insights (1.14.39.0)", "mypy-boto3-appmesh (1.14.39.0)", "mypy-boto3-appstream (1.14.39.0)", "mypy-boto3-appsync (1.14.39.0)", "mypy-boto3-athena (1.14.39.0)", "mypy-boto3-autoscaling (1.14.39.0)", "mypy-boto3-autoscaling-plans (1.14.39.0)", "mypy-boto3-backup (1.14.39.0)", "mypy-boto3-batch (1.14.39.0)", "mypy-boto3-budgets (1.14.39.0)", "mypy-boto3-ce (1.14.39.0)", "mypy-boto3-chime (1.14.39.0)", "mypy-boto3-cloud9 (1.14.39.0)", "mypy-boto3-clouddirectory (1.14.39.0)", "mypy-boto3-cloudformation (1.14.39.0)", "mypy-boto3-cloudfront (1.14.39.0)", "mypy-boto3-cloudhsm (1.14.39.0)", "mypy-boto3-cloudhsmv2 (1.14.39.0)", "mypy-boto3-cloudsearch (1.14.39.0)", "mypy-boto3-cloudsearchdomain (1.14.39.0)", "mypy-boto3-cloudtrail (1.14.39.0)", "mypy-boto3-cloudwatch (1.14.39.0)", "mypy-boto3-codeartifact (1.14.39.0)", "mypy-boto3-codebuild (1.14.39.0)", "mypy-boto3-codecommit (1.14.39.0)", "mypy-boto3-codedeploy (1.14.39.0)", "mypy-boto3-codeguru-reviewer (1.14.39.0)", "mypy-boto3-codeguruprofiler (1.14.39.0)", "mypy-boto3-codepipeline (1.14.39.0)", "mypy-boto3-codestar (1.14.39.0)", "mypy-boto3-codestar-connections (1.14.39.0)", "mypy-boto3-codestar-notifications (1.14.39.0)", "mypy-boto3-cognito-identity (1.14.39.0)", "mypy-boto3-cognito-idp (1.14.39.0)", "mypy-boto3-cognito-sync (1.14.39.0)", "mypy-boto3-comprehend (1.14.39.0)", "mypy-boto3-comprehendmedical (1.14.39.0)", "mypy-boto3-compute-optimizer (1.14.39.0)", "mypy-boto3-config (1.14.39.0)", "mypy-boto3-connect (1.14.39.0)", "mypy-boto3-connectparticipant (1.14.39.0)", "mypy-boto3-cur (1.14.39.0)", "mypy-boto3-dataexchange (1.14.39.0)", "mypy-boto3-datapipeline (1.14.39.0)", "mypy-boto3-datasync (1.14.39.0)", "mypy-boto3-dax (1.14.39.0)", "mypy-boto3-detective (1.14.39.0)", "mypy-boto3-devicefarm (1.14.39.0)", "mypy-boto3-directconnect (1.14.39.0)", "mypy-boto3-discovery (1.14.39.0)", "mypy-boto3-dlm (1.14.39.0)", "mypy-boto3-dms (1.14.39.0)", "mypy-boto3-docdb (1.14.39.0)", "mypy-boto3-ds (1.14.39.0)", "mypy-boto3-dynamodb (1.14.39.0)", "mypy-boto3-dynamodbstreams (1.14.39.0)", "mypy-boto3-ebs (1.14.39.0)", "mypy-boto3-ec2 (1.14.39.0)", "mypy-boto3-ec2-instance-connect (1.14.39.0)", "mypy-boto3-ecr (1.14.39.0)", "mypy-boto3-ecs (1.14.39.0)", "mypy-boto3-efs (1.14.39.0)", "mypy-boto3-eks (1.14.39.0)", "mypy-boto3-elastic-inference (1.14.39.0)", "mypy-boto3-elasticache (1.14.39.0)", "mypy-boto3-elasticbeanstalk (1.14.39.0)", "mypy-boto3-elastictranscoder (1.14.39.0)", "mypy-boto3-elb (1.14.39.0)", "mypy-boto3-elbv2 (1.14.39.0)", "mypy-boto3-emr (1.14.39.0)", "mypy-boto3-es (1.14.39.0)", "mypy-boto3-events (1.14.39.0)", "mypy-boto3-firehose (1.14.39.0)", "mypy-boto3-fms (1.14.39.0)", "mypy-boto3-forecast (1.14.39.0)", "mypy-boto3-forecastquery (1.14.39.0)", "mypy-boto3-frauddetector (1.14.39.0)", "mypy-boto3-fsx (1.14.39.0)", "mypy-boto3-gamelift (1.14.39.0)", "mypy-boto3-glacier (1.14.39.0)", "mypy-boto3-globalaccelerator (1.14.39.0)", "mypy-boto3-glue (1.14.39.0)", "mypy-boto3-greengrass (1.14.39.0)", "mypy-boto3-groundstation (1.14.39.0)", "mypy-boto3-guardduty (1.14.39.0)", "mypy-boto3-health (1.14.39.0)", "mypy-boto3-iam (1.14.39.0)", "mypy-boto3-imagebuilder (1.14.39.0)", "mypy-boto3-importexport (1.14.39.0)", "mypy-boto3-inspector (1.14.39.0)", "mypy-boto3-iot (1.14.39.0)", "mypy-boto3-iot-data (1.14.39.0)", "mypy-boto3-iot-jobs-data (1.14.39.0)", "mypy-boto3-iot1click-devices (1.14.39.0)", "mypy-boto3-iot1click-projects (1.14.39.0)", "mypy-boto3-iotanalytics (1.14.39.0)", "mypy-boto3-iotevents (1.14.39.0)", "mypy-boto3-iotevents-data (1.14.39.0)", "mypy-boto3-iotsecuretunneling (1.14.39.0)", "mypy-boto3-iotsitewise (1.14.39.0)", "mypy-boto3-iotthingsgraph (1.14.39.0)", "mypy-boto3-kafka (1.14.39.0)", "mypy-boto3-kendra (1.14.39.0)", "mypy-boto3-kinesis (1.14.39.0)", "mypy-boto3-kinesis-video-archived-media (1.14.39.0)", "mypy-boto3-kinesis-video-media (1.14.39.0)", "mypy-boto3-kinesis-video-signaling (1.14.39.0)", "mypy-boto3-kinesisanalytics (1.14.39.0)", "mypy-boto3-kinesisanalyticsv2 (1.14.39.0)", "mypy-boto3-kinesisvideo (1.14.39.0)", "mypy-boto3-kms (1.14.39.0)", "mypy-boto3-lakeformation (1.14.39.0)", "mypy-boto3-lambda (1.14.39.0)", "mypy-boto3-lex-models (1.14.39.0)", "mypy-boto3-lex-runtime (1.14.39.0)", "mypy-boto3-license-manager (1.14.39.0)", "mypy-boto3-lightsail (1.14.39.0)", "mypy-boto3-logs (1.14.39.0)", "mypy-boto3-machinelearning (1.14.39.0)", "mypy-boto3-macie (1.14.39.0)", "mypy-boto3-macie2 (1.14.39.0)", "mypy-boto3-managedblockchain (1.14.39.0)", "mypy-boto3-marketplace-catalog (1.14.39.0)", "mypy-boto3-marketplace-entitlement (1.14.39.0)", "mypy-boto3-marketplacecommerceanalytics (1.14.39.0)", "mypy-boto3-mediaconnect (1.14.39.0)", "mypy-boto3-mediaconvert (1.14.39.0)", "mypy-boto3-medialive (1.14.39.0)", "mypy-boto3-mediapackage (1.14.39.0)", "mypy-boto3-mediapackage-vod (1.14.39.0)", "mypy-boto3-mediastore (1.14.39.0)", "mypy-boto3-mediastore-data (1.14.39.0)", "mypy-boto3-mediatailor (1.14.39.0)", "mypy-boto3-meteringmarketplace (1.14.39.0)", "mypy-boto3-mgh (1.14.39.0)", "mypy-boto3-migrationhub-config (1.14.39.0)", "mypy-boto3-mobile (1.14.39.0)", "mypy-boto3-mq (1.14.39.0)", "mypy-boto3-mturk (1.14.39.0)", "mypy-boto3-neptune (1.14.39.0)", "mypy-boto3-networkmanager (1.14.39.0)", "mypy-boto3-opsworks (1.14.39.0)", "mypy-boto3-opsworkscm (1.14.39.0)", "mypy-boto3-organizations (1.14.39.0)", "mypy-boto3-outposts (1.14.39.0)", "mypy-boto3-personalize (1.14.39.0)", "mypy-boto3-personalize-events (1.14.39.0)", "mypy-boto3-personalize-runtime (1.14.39.0)", "mypy-boto3-pi (1.14.39.0)", "mypy-boto3-pinpoint (1.14.39.0)", "mypy-boto3-pinpoint-email (1.14.39.0)", "mypy-boto3-pinpoint-sms-voice (1.14.39.0)", "mypy-boto3-polly (1.14.39.0)", "mypy-boto3-pricing (1.14.39.0)", "mypy-boto3-qldb (1.14.39.0)", "mypy-boto3-qldb-session (1.14.39.0)", "mypy-boto3-quicksight (1.14.39.0)", "mypy-boto3-ram (1.14.39.0)", "mypy-boto3-rds (1.14.39.0)", "mypy-boto3-rds-data (1.14.39.0)", "mypy-boto3-redshift (1.14.39.0)", "mypy-boto3-rekognition (1.14.39.0)", "mypy-boto3-resource-groups (1.14.39.0)", "mypy-boto3-resourcegroupstaggingapi (1.14.39.0)", "mypy-boto3-robomaker (1.14.39.0)", "mypy-boto3-route53 (1.14.39.0)", "mypy-boto3-route53domains (1.14.39.0)", "mypy-boto3-route53resolver (1.14.39.0)", "mypy-boto3-s3 (1.14.39.0)", "mypy-boto3-s3control (1.14.39.0)", "mypy-boto3-sagemaker (1.14.39.0)", "mypy-boto3-sagemaker-a2i-runtime (1.14.39.0)", "mypy-boto3-sagemaker-runtime (1.14.39.0)", "mypy-boto3-savingsplans (1.14.39.0)", "mypy-boto3-schemas (1.14.39.0)", "mypy-boto3-sdb (1.14.39.0)", "mypy-boto3-secretsmanager (1.14.39.0)", "mypy-boto3-securityhub (1.14.39.0)", "mypy-boto3-serverlessrepo (1.14.39.0)", "mypy-boto3-service-quotas (1.14.39.0)", "mypy-boto3-servicecatalog (1.14.39.0)", "mypy-boto3-servicediscovery (1.14.39.0)", "mypy-boto3-ses (1.14.39.0)", "mypy-boto3-sesv2 (1.14.39.0)", "mypy-boto3-shield (1.14.39.0)", "mypy-boto3-signer (1.14.39.0)", "mypy-boto3-sms (1.14.39.0)", "mypy-boto3-sms-voice (1.14.39.0)", "mypy-boto3-snowball (1.14.39.0)", "mypy-boto3-sns (1.14.39.0)", "mypy-boto3-sqs (1.14.39.0)", "mypy-boto3-ssm (1.14.39.0)", "mypy-boto3-sso (1.14.39.0)", "mypy-boto3-sso-oidc (1.14.39.0)", "mypy-boto3-stepfunctions (1.14.39.0)", "mypy-boto3-storagegateway (1.14.39.0)", "mypy-boto3-sts (1.14.39.0)", "mypy-boto3-support (1.14.39.0)", "mypy-boto3-swf (1.14.39.0)", "mypy-boto3-synthetics (1.14.39.0)", "mypy-boto3-textract (1.14.39.0)", "mypy-boto3-transcribe (1.14.39.0)", "mypy-boto3-transfer (1.14.39.0)", "mypy-boto3-translate (1.14.39.0)", "mypy-boto3-waf (1.14.39.0)", "mypy-boto3-waf-regional (1.14.39.0)", "mypy-boto3-wafv2 (1.14.39.0)", "mypy-boto3-workdocs (1.14.39.0)", "mypy-boto3-worklink (1.14.39.0)", "mypy-boto3-workmail (1.14.39.0)", "mypy-boto3-workmailmessageflow (1.14.39.0)", "mypy-boto3-workspaces (1.14.39.0)", "mypy-boto3-xray (1.14.39.0)"]
+amplify = ["mypy-boto3-amplify (1.14.39.0)"]
+apigateway = ["mypy-boto3-apigateway (1.14.39.0)"]
+apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (1.14.39.0)"]
+apigatewayv2 = ["mypy-boto3-apigatewayv2 (1.14.39.0)"]
+appconfig = ["mypy-boto3-appconfig (1.14.39.0)"]
+application-autoscaling = ["mypy-boto3-application-autoscaling (1.14.39.0)"]
+application-insights = ["mypy-boto3-application-insights (1.14.39.0)"]
+appmesh = ["mypy-boto3-appmesh (1.14.39.0)"]
+appstream = ["mypy-boto3-appstream (1.14.39.0)"]
+appsync = ["mypy-boto3-appsync (1.14.39.0)"]
+athena = ["mypy-boto3-athena (1.14.39.0)"]
+autoscaling = ["mypy-boto3-autoscaling (1.14.39.0)"]
+autoscaling-plans = ["mypy-boto3-autoscaling-plans (1.14.39.0)"]
+backup = ["mypy-boto3-backup (1.14.39.0)"]
+batch = ["mypy-boto3-batch (1.14.39.0)"]
+budgets = ["mypy-boto3-budgets (1.14.39.0)"]
+ce = ["mypy-boto3-ce (1.14.39.0)"]
+chime = ["mypy-boto3-chime (1.14.39.0)"]
+cloud9 = ["mypy-boto3-cloud9 (1.14.39.0)"]
+clouddirectory = ["mypy-boto3-clouddirectory (1.14.39.0)"]
+cloudformation = ["mypy-boto3-cloudformation (1.14.39.0)"]
+cloudfront = ["mypy-boto3-cloudfront (1.14.39.0)"]
+cloudhsm = ["mypy-boto3-cloudhsm (1.14.39.0)"]
+cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (1.14.39.0)"]
+cloudsearch = ["mypy-boto3-cloudsearch (1.14.39.0)"]
+cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (1.14.39.0)"]
+cloudtrail = ["mypy-boto3-cloudtrail (1.14.39.0)"]
+cloudwatch = ["mypy-boto3-cloudwatch (1.14.39.0)"]
+codeartifact = ["mypy-boto3-codeartifact (1.14.39.0)"]
+codebuild = ["mypy-boto3-codebuild (1.14.39.0)"]
+codecommit = ["mypy-boto3-codecommit (1.14.39.0)"]
+codedeploy = ["mypy-boto3-codedeploy (1.14.39.0)"]
+codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (1.14.39.0)"]
+codeguruprofiler = ["mypy-boto3-codeguruprofiler (1.14.39.0)"]
+codepipeline = ["mypy-boto3-codepipeline (1.14.39.0)"]
+codestar = ["mypy-boto3-codestar (1.14.39.0)"]
+codestar-connections = ["mypy-boto3-codestar-connections (1.14.39.0)"]
+codestar-notifications = ["mypy-boto3-codestar-notifications (1.14.39.0)"]
+cognito-identity = ["mypy-boto3-cognito-identity (1.14.39.0)"]
+cognito-idp = ["mypy-boto3-cognito-idp (1.14.39.0)"]
+cognito-sync = ["mypy-boto3-cognito-sync (1.14.39.0)"]
+comprehend = ["mypy-boto3-comprehend (1.14.39.0)"]
+comprehendmedical = ["mypy-boto3-comprehendmedical (1.14.39.0)"]
+compute-optimizer = ["mypy-boto3-compute-optimizer (1.14.39.0)"]
+config = ["mypy-boto3-config (1.14.39.0)"]
+connect = ["mypy-boto3-connect (1.14.39.0)"]
+connectparticipant = ["mypy-boto3-connectparticipant (1.14.39.0)"]
+cur = ["mypy-boto3-cur (1.14.39.0)"]
+dataexchange = ["mypy-boto3-dataexchange (1.14.39.0)"]
+datapipeline = ["mypy-boto3-datapipeline (1.14.39.0)"]
+datasync = ["mypy-boto3-datasync (1.14.39.0)"]
+dax = ["mypy-boto3-dax (1.14.39.0)"]
+detective = ["mypy-boto3-detective (1.14.39.0)"]
+devicefarm = ["mypy-boto3-devicefarm (1.14.39.0)"]
+directconnect = ["mypy-boto3-directconnect (1.14.39.0)"]
+discovery = ["mypy-boto3-discovery (1.14.39.0)"]
+dlm = ["mypy-boto3-dlm (1.14.39.0)"]
+dms = ["mypy-boto3-dms (1.14.39.0)"]
+docdb = ["mypy-boto3-docdb (1.14.39.0)"]
+ds = ["mypy-boto3-ds (1.14.39.0)"]
+dynamodb = ["mypy-boto3-dynamodb (1.14.39.0)"]
+dynamodbstreams = ["mypy-boto3-dynamodbstreams (1.14.39.0)"]
+ebs = ["mypy-boto3-ebs (1.14.39.0)"]
+ec2 = ["mypy-boto3-ec2 (1.14.39.0)"]
+ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (1.14.39.0)"]
+ecr = ["mypy-boto3-ecr (1.14.39.0)"]
+ecs = ["mypy-boto3-ecs (1.14.39.0)"]
+efs = ["mypy-boto3-efs (1.14.39.0)"]
+eks = ["mypy-boto3-eks (1.14.39.0)"]
+elastic-inference = ["mypy-boto3-elastic-inference (1.14.39.0)"]
+elasticache = ["mypy-boto3-elasticache (1.14.39.0)"]
+elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (1.14.39.0)"]
+elastictranscoder = ["mypy-boto3-elastictranscoder (1.14.39.0)"]
+elb = ["mypy-boto3-elb (1.14.39.0)"]
+elbv2 = ["mypy-boto3-elbv2 (1.14.39.0)"]
+emr = ["mypy-boto3-emr (1.14.39.0)"]
+es = ["mypy-boto3-es (1.14.39.0)"]
+essential = ["mypy-boto3-cloudformation (1.14.39.0)", "mypy-boto3-dynamodb (1.14.39.0)", "mypy-boto3-ec2 (1.14.39.0)", "mypy-boto3-lambda (1.14.39.0)", "mypy-boto3-rds (1.14.39.0)", "mypy-boto3-s3 (1.14.39.0)", "mypy-boto3-sqs (1.14.39.0)"]
+events = ["mypy-boto3-events (1.14.39.0)"]
+firehose = ["mypy-boto3-firehose (1.14.39.0)"]
+fms = ["mypy-boto3-fms (1.14.39.0)"]
+forecast = ["mypy-boto3-forecast (1.14.39.0)"]
+forecastquery = ["mypy-boto3-forecastquery (1.14.39.0)"]
+frauddetector = ["mypy-boto3-frauddetector (1.14.39.0)"]
+fsx = ["mypy-boto3-fsx (1.14.39.0)"]
+gamelift = ["mypy-boto3-gamelift (1.14.39.0)"]
+glacier = ["mypy-boto3-glacier (1.14.39.0)"]
+globalaccelerator = ["mypy-boto3-globalaccelerator (1.14.39.0)"]
+glue = ["mypy-boto3-glue (1.14.39.0)"]
+greengrass = ["mypy-boto3-greengrass (1.14.39.0)"]
+groundstation = ["mypy-boto3-groundstation (1.14.39.0)"]
+guardduty = ["mypy-boto3-guardduty (1.14.39.0)"]
+health = ["mypy-boto3-health (1.14.39.0)"]
+iam = ["mypy-boto3-iam (1.14.39.0)"]
+imagebuilder = ["mypy-boto3-imagebuilder (1.14.39.0)"]
+importexport = ["mypy-boto3-importexport (1.14.39.0)"]
+inspector = ["mypy-boto3-inspector (1.14.39.0)"]
+iot = ["mypy-boto3-iot (1.14.39.0)"]
+iot-data = ["mypy-boto3-iot-data (1.14.39.0)"]
+iot-jobs-data = ["mypy-boto3-iot-jobs-data (1.14.39.0)"]
+iot1click-devices = ["mypy-boto3-iot1click-devices (1.14.39.0)"]
+iot1click-projects = ["mypy-boto3-iot1click-projects (1.14.39.0)"]
+iotanalytics = ["mypy-boto3-iotanalytics (1.14.39.0)"]
+iotevents = ["mypy-boto3-iotevents (1.14.39.0)"]
+iotevents-data = ["mypy-boto3-iotevents-data (1.14.39.0)"]
+iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (1.14.39.0)"]
+iotsitewise = ["mypy-boto3-iotsitewise (1.14.39.0)"]
+iotthingsgraph = ["mypy-boto3-iotthingsgraph (1.14.39.0)"]
+kafka = ["mypy-boto3-kafka (1.14.39.0)"]
+kendra = ["mypy-boto3-kendra (1.14.39.0)"]
+kinesis = ["mypy-boto3-kinesis (1.14.39.0)"]
+kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (1.14.39.0)"]
+kinesis-video-media = ["mypy-boto3-kinesis-video-media (1.14.39.0)"]
+kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (1.14.39.0)"]
+kinesisanalytics = ["mypy-boto3-kinesisanalytics (1.14.39.0)"]
+kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (1.14.39.0)"]
+kinesisvideo = ["mypy-boto3-kinesisvideo (1.14.39.0)"]
+kms = ["mypy-boto3-kms (1.14.39.0)"]
+lakeformation = ["mypy-boto3-lakeformation (1.14.39.0)"]
+lambda = ["mypy-boto3-lambda (1.14.39.0)"]
+lex-models = ["mypy-boto3-lex-models (1.14.39.0)"]
+lex-runtime = ["mypy-boto3-lex-runtime (1.14.39.0)"]
+license-manager = ["mypy-boto3-license-manager (1.14.39.0)"]
+lightsail = ["mypy-boto3-lightsail (1.14.39.0)"]
+logs = ["mypy-boto3-logs (1.14.39.0)"]
+machinelearning = ["mypy-boto3-machinelearning (1.14.39.0)"]
+macie = ["mypy-boto3-macie (1.14.39.0)"]
+macie2 = ["mypy-boto3-macie2 (1.14.39.0)"]
+managedblockchain = ["mypy-boto3-managedblockchain (1.14.39.0)"]
+marketplace-catalog = ["mypy-boto3-marketplace-catalog (1.14.39.0)"]
+marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (1.14.39.0)"]
+marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (1.14.39.0)"]
+mediaconnect = ["mypy-boto3-mediaconnect (1.14.39.0)"]
+mediaconvert = ["mypy-boto3-mediaconvert (1.14.39.0)"]
+medialive = ["mypy-boto3-medialive (1.14.39.0)"]
+mediapackage = ["mypy-boto3-mediapackage (1.14.39.0)"]
+mediapackage-vod = ["mypy-boto3-mediapackage-vod (1.14.39.0)"]
+mediastore = ["mypy-boto3-mediastore (1.14.39.0)"]
+mediastore-data = ["mypy-boto3-mediastore-data (1.14.39.0)"]
+mediatailor = ["mypy-boto3-mediatailor (1.14.39.0)"]
+meteringmarketplace = ["mypy-boto3-meteringmarketplace (1.14.39.0)"]
+mgh = ["mypy-boto3-mgh (1.14.39.0)"]
+migrationhub-config = ["mypy-boto3-migrationhub-config (1.14.39.0)"]
+mobile = ["mypy-boto3-mobile (1.14.39.0)"]
+mq = ["mypy-boto3-mq (1.14.39.0)"]
+mturk = ["mypy-boto3-mturk (1.14.39.0)"]
+neptune = ["mypy-boto3-neptune (1.14.39.0)"]
+networkmanager = ["mypy-boto3-networkmanager (1.14.39.0)"]
+opsworks = ["mypy-boto3-opsworks (1.14.39.0)"]
+opsworkscm = ["mypy-boto3-opsworkscm (1.14.39.0)"]
+organizations = ["mypy-boto3-organizations (1.14.39.0)"]
+outposts = ["mypy-boto3-outposts (1.14.39.0)"]
+personalize = ["mypy-boto3-personalize (1.14.39.0)"]
+personalize-events = ["mypy-boto3-personalize-events (1.14.39.0)"]
+personalize-runtime = ["mypy-boto3-personalize-runtime (1.14.39.0)"]
+pi = ["mypy-boto3-pi (1.14.39.0)"]
+pinpoint = ["mypy-boto3-pinpoint (1.14.39.0)"]
+pinpoint-email = ["mypy-boto3-pinpoint-email (1.14.39.0)"]
+pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (1.14.39.0)"]
+polly = ["mypy-boto3-polly (1.14.39.0)"]
+pricing = ["mypy-boto3-pricing (1.14.39.0)"]
+qldb = ["mypy-boto3-qldb (1.14.39.0)"]
+qldb-session = ["mypy-boto3-qldb-session (1.14.39.0)"]
+quicksight = ["mypy-boto3-quicksight (1.14.39.0)"]
+ram = ["mypy-boto3-ram (1.14.39.0)"]
+rds = ["mypy-boto3-rds (1.14.39.0)"]
+rds-data = ["mypy-boto3-rds-data (1.14.39.0)"]
+redshift = ["mypy-boto3-redshift (1.14.39.0)"]
+rekognition = ["mypy-boto3-rekognition (1.14.39.0)"]
+resource-groups = ["mypy-boto3-resource-groups (1.14.39.0)"]
+resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (1.14.39.0)"]
+robomaker = ["mypy-boto3-robomaker (1.14.39.0)"]
+route53 = ["mypy-boto3-route53 (1.14.39.0)"]
+route53domains = ["mypy-boto3-route53domains (1.14.39.0)"]
+route53resolver = ["mypy-boto3-route53resolver (1.14.39.0)"]
+s3 = ["mypy-boto3-s3 (1.14.39.0)"]
+s3control = ["mypy-boto3-s3control (1.14.39.0)"]
+sagemaker = ["mypy-boto3-sagemaker (1.14.39.0)"]
+sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (1.14.39.0)"]
+sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (1.14.39.0)"]
+savingsplans = ["mypy-boto3-savingsplans (1.14.39.0)"]
+schemas = ["mypy-boto3-schemas (1.14.39.0)"]
+sdb = ["mypy-boto3-sdb (1.14.39.0)"]
+secretsmanager = ["mypy-boto3-secretsmanager (1.14.39.0)"]
+securityhub = ["mypy-boto3-securityhub (1.14.39.0)"]
+serverlessrepo = ["mypy-boto3-serverlessrepo (1.14.39.0)"]
+service-quotas = ["mypy-boto3-service-quotas (1.14.39.0)"]
+servicecatalog = ["mypy-boto3-servicecatalog (1.14.39.0)"]
+servicediscovery = ["mypy-boto3-servicediscovery (1.14.39.0)"]
+ses = ["mypy-boto3-ses (1.14.39.0)"]
+sesv2 = ["mypy-boto3-sesv2 (1.14.39.0)"]
+shield = ["mypy-boto3-shield (1.14.39.0)"]
+signer = ["mypy-boto3-signer (1.14.39.0)"]
+sms = ["mypy-boto3-sms (1.14.39.0)"]
+sms-voice = ["mypy-boto3-sms-voice (1.14.39.0)"]
+snowball = ["mypy-boto3-snowball (1.14.39.0)"]
+sns = ["mypy-boto3-sns (1.14.39.0)"]
+sqs = ["mypy-boto3-sqs (1.14.39.0)"]
+ssm = ["mypy-boto3-ssm (1.14.39.0)"]
+sso = ["mypy-boto3-sso (1.14.39.0)"]
+sso-oidc = ["mypy-boto3-sso-oidc (1.14.39.0)"]
+stepfunctions = ["mypy-boto3-stepfunctions (1.14.39.0)"]
+storagegateway = ["mypy-boto3-storagegateway (1.14.39.0)"]
+sts = ["mypy-boto3-sts (1.14.39.0)"]
+support = ["mypy-boto3-support (1.14.39.0)"]
+swf = ["mypy-boto3-swf (1.14.39.0)"]
+synthetics = ["mypy-boto3-synthetics (1.14.39.0)"]
+textract = ["mypy-boto3-textract (1.14.39.0)"]
+transcribe = ["mypy-boto3-transcribe (1.14.39.0)"]
+transfer = ["mypy-boto3-transfer (1.14.39.0)"]
+translate = ["mypy-boto3-translate (1.14.39.0)"]
+waf = ["mypy-boto3-waf (1.14.39.0)"]
+waf-regional = ["mypy-boto3-waf-regional (1.14.39.0)"]
+wafv2 = ["mypy-boto3-wafv2 (1.14.39.0)"]
+workdocs = ["mypy-boto3-workdocs (1.14.39.0)"]
+worklink = ["mypy-boto3-worklink (1.14.39.0)"]
+workmail = ["mypy-boto3-workmail (1.14.39.0)"]
+workmailmessageflow = ["mypy-boto3-workmailmessageflow (1.14.39.0)"]
+workspaces = ["mypy-boto3-workspaces (1.14.39.0)"]
+xray = ["mypy-boto3-xray (1.14.39.0)"]
 
 [[package]]
 category = "main"
@@ -553,7 +553,7 @@ description = "Utility package for interacting with the 4DN Data Portal and othe
 name = "dcicutils"
 optional = false
 python-versions = ">=3.4,<3.8"
-version = "0.38.0"
+version = "0.38.1"
 
 [package.dependencies]
 aws-requests-auth = ">=0.4.2,<1"
@@ -574,17 +574,14 @@ description = "A Python library for the Docker Engine API."
 name = "docker"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
-version = "4.2.2"
+version = "4.3.0"
 
 [package.dependencies]
+pywin32 = "227"
 requests = ">=2.14.2,<2.18.0 || >2.18.0"
 six = ">=1.4.0"
 websocket-client = ">=0.32.0"
 
-[package.dependencies.pypiwin32]
-python = ">=3.6"
-version = "223"
-
 [package.extras]
 ssh = ["paramiko (>=2.4.2)"]
 tls = ["pyOpenSSL (>=17.5.0)", "cryptography (>=1.3.4)", "idna (>=2.0.0)"]
@@ -962,11 +959,11 @@ server = ["flask"]
 
 [[package]]
 category = "dev"
-description = "Type annotations for boto3 1.14.38 master module, generated by mypy-boto3-buider 2.2.0"
+description = "Type annotations for boto3 1.14.39 master module, generated by mypy-boto3-buider 2.2.0"
 name = "mypy-boto3"
 optional = false
 python-versions = ">=3.6"
-version = "1.14.38.0"
+version = "1.14.39.0"
 
 [package.dependencies]
 boto3 = "*"
@@ -1168,18 +1165,6 @@ optional = false
 python-versions = "*"
 version = "2.1.1"
 
-[[package]]
-category = "dev"
-description = ""
-marker = "sys_platform == \"win32\" and python_version >= \"3.6\""
-name = "pypiwin32"
-optional = false
-python-versions = "*"
-version = "223"
-
-[package.dependencies]
-pywin32 = ">=223"
-
 [[package]]
 category = "main"
 description = "The Pyramid Web Framework, a Pylons project"
@@ -1394,11 +1379,11 @@ version = "2020.1"
 [[package]]
 category = "dev"
 description = "Python for Window Extensions"
-marker = "sys_platform == \"win32\" and python_version >= \"3.6\""
+marker = "sys_platform == \"win32\""
 name = "pywin32"
 optional = false
 python-versions = "*"
-version = "228"
+version = "227"
 
 [[package]]
 category = "main"
@@ -1474,14 +1459,15 @@ description = "A utility library for mocking out the `requests` Python library."
 name = "responses"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-version = "0.10.15"
+version = "0.10.16"
 
 [package.dependencies]
 requests = ">=2.0"
 six = "*"
+urllib3 = ">=1.25.10"
 
 [package.extras]
-tests = ["coverage (>=3.7.1,<5.0.0)", "pytest-cov", "pytest-localserver", "flake8", "pytest (>=4.6,<5.0)", "pytest"]
+tests = ["coverage (>=3.7.1,<5.0.0)", "pytest-cov", "pytest-localserver", "flake8", "pytest (>=4.6,<5.0)", "pytest (>=4.6)"]
 
 [[package]]
 category = "main"
@@ -1508,7 +1494,7 @@ description = "Py3k-compatible fork of Paste's urlmap"
 name = "rutter"
 optional = false
 python-versions = "*"
-version = "0.2"
+version = "0.3"
 
 [package.dependencies]
 WebOb = "*"
@@ -1921,7 +1907,7 @@ transaction = ">=1.6.0"
 test = ["zope.testing"]
 
 [metadata]
-content-hash = "314207f2edbe6ca22952fa2dcd40b0205c2a471cb80dd888877c331c003b3b25"
+content-hash = "50fb2fbfca54afb6d53d50254572349c8f1c0a53db8d11cb0ef7af98c1c185ae"
 lock-version = "1.0"
 python-versions = ">=3.6,<3.7"
 
@@ -1964,8 +1950,8 @@ boto3 = [
     {file = "boto3-1.10.50.tar.gz", hash = "sha256:5c00d51101d6a7ddf2207ae8a738e5c815c5fcffbee76121f38bd41d83c936a5"},
 ]
 boto3-stubs = [
-    {file = "boto3-stubs-1.14.38.0.tar.gz", hash = "sha256:745b531522fc606124399d0115bafcc92010784d7c85f97e28ea86f32e52aad0"},
-    {file = "boto3_stubs-1.14.38.0-py3-none-any.whl", hash = "sha256:1348bbd5c7417b67529ad13e5b4ab563a0ca2aa6bdc68c04b5beeb94c6541118"},
+    {file = "boto3-stubs-1.14.39.0.tar.gz", hash = "sha256:0c85e54cf3e18446cc3d6743b82f895f7f25f6cbaecec9a3f3eefcbf06af0741"},
+    {file = "boto3_stubs-1.14.39.0-py3-none-any.whl", hash = "sha256:6a4d4075a1a108e1da4939457dcde0c53d68bb3b32264c087435cf1debf807cd"},
 ]
 botocore = [
     {file = "botocore-1.13.50-py2.py3-none-any.whl", hash = "sha256:adb4cb188cd0866e7337f9a049fc68db042b0340fd496d40bca349c8dbfc6a2d"},
@@ -2089,12 +2075,12 @@ dcicsnovault = [
     {file = "dcicsnovault-3.1.9.tar.gz", hash = "sha256:347ab5ee3053a80273b081803f93fe115ed48ad53e6ce91c65a5d21a8f02d0e2"},
 ]
 dcicutils = [
-    {file = "dcicutils-0.38.0-py3-none-any.whl", hash = "sha256:57636fa6b802881a02375123080d3e121a8f22fbe731d7c2312e1a17fa69e575"},
-    {file = "dcicutils-0.38.0.tar.gz", hash = "sha256:fcdd88e6169b0b98393e052859bf09d98a31102d5d55f79339046d3e67b5edf9"},
+    {file = "dcicutils-0.38.1-py3-none-any.whl", hash = "sha256:7cf6a219a5794b07436946602b4c353314916fe463c1c42d240cf9db528f7a8b"},
+    {file = "dcicutils-0.38.1.tar.gz", hash = "sha256:3612f0bc26f17a6b14ab6112f3dca13cbb2f38eeacbd2d47922814ce90548fdf"},
 ]
 docker = [
-    {file = "docker-4.2.2-py2.py3-none-any.whl", hash = "sha256:03a46400c4080cb6f7aa997f881ddd84fef855499ece219d75fbdb53289c17ab"},
-    {file = "docker-4.2.2.tar.gz", hash = "sha256:26eebadce7e298f55b76a88c4f8802476c5eaddbdbe38dbc6cce8781c47c9b54"},
+    {file = "docker-4.3.0-py2.py3-none-any.whl", hash = "sha256:ba118607b0ba6bfc1b236ec32019a355c47b5d012d01d976467d4692ef443929"},
+    {file = "docker-4.3.0.tar.gz", hash = "sha256:431a268f2caf85aa30613f9642da274c62f6ee8bae7d70d968e01529f7d6af93"},
 ]
 docopt = [
     {file = "docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"},
@@ -2221,8 +2207,8 @@ moto = [
     {file = "moto-1.3.7.tar.gz", hash = "sha256:129de2e04cb250d9f8b2c722ec152ed1b5426ef179b4ebb03e9ec36e6eb3fcc5"},
 ]
 mypy-boto3 = [
-    {file = "mypy-boto3-1.14.38.0.tar.gz", hash = "sha256:28c22ecc5c91213c71771ddec4f55e670b8f168c1c433831331ddf6a0136da93"},
-    {file = "mypy_boto3-1.14.38.0-py3-none-any.whl", hash = "sha256:52e848f79bc7d3334d674f8dbc1116043730506ab0c6e3c2bbc0efade7bc4ead"},
+    {file = "mypy-boto3-1.14.39.0.tar.gz", hash = "sha256:36e844586ddd64ce11153646cd9e9139048ad3a18f81d78bcb1f41529a562b1e"},
+    {file = "mypy_boto3-1.14.39.0-py3-none-any.whl", hash = "sha256:4d57df2717c61dbc3f9978abffdd57dba80003d124d3afd81d5ce34ecb5345d2"},
 ]
 netaddr = [
     {file = "netaddr-0.8.0-py2.py3-none-any.whl", hash = "sha256:9666d0232c32d2656e5e5f8d735f58fd6c7457ce52fc21c98d45f2af78f990ac"},
@@ -2437,10 +2423,6 @@ pyparsing = [
     {file = "pyparsing-2.1.1.win32-py3.5.exe", hash = "sha256:900351d66d07f1338942074a4306c54d684d53a19df286e8679fdb69a0122dca"},
     {file = "pyparsing-2.1.1.zip", hash = "sha256:4bd04badfda39b1e2fd0d77cb78b488c881d88c31734791a0ae902979ae10575"},
 ]
-pypiwin32 = [
-    {file = "pypiwin32-223-py3-none-any.whl", hash = "sha256:67adf399debc1d5d14dffc1ab5acacb800da569754fafdc576b2a039485aa775"},
-    {file = "pypiwin32-223.tar.gz", hash = "sha256:71be40c1fbd28594214ecaecb58e7aa8b708eabfa0125c8a109ebd51edbd776a"},
-]
 pyramid = [
     {file = "pyramid-1.10.4-py2.py3-none-any.whl", hash = "sha256:51bf64647345237c00d2fe558935e0e4938c156e29f17e203457fd8e1d757dc7"},
     {file = "pyramid-1.10.4.tar.gz", hash = "sha256:d80ccb8cfa550139b50801591d4ca8a5575334adb493c402fce2312f55d07d66"},
@@ -2507,18 +2489,18 @@ pytz = [
     {file = "pytz-2020.1.tar.gz", hash = "sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048"},
 ]
 pywin32 = [
-    {file = "pywin32-228-cp27-cp27m-win32.whl", hash = "sha256:37dc9935f6a383cc744315ae0c2882ba1768d9b06700a70f35dc1ce73cd4ba9c"},
-    {file = "pywin32-228-cp27-cp27m-win_amd64.whl", hash = "sha256:11cb6610efc2f078c9e6d8f5d0f957620c333f4b23466931a247fb945ed35e89"},
-    {file = "pywin32-228-cp35-cp35m-win32.whl", hash = "sha256:1f45db18af5d36195447b2cffacd182fe2d296849ba0aecdab24d3852fbf3f80"},
-    {file = "pywin32-228-cp35-cp35m-win_amd64.whl", hash = "sha256:6e38c44097a834a4707c1b63efa9c2435f5a42afabff634a17f563bc478dfcc8"},
-    {file = "pywin32-228-cp36-cp36m-win32.whl", hash = "sha256:ec16d44b49b5f34e99eb97cf270806fdc560dff6f84d281eb2fcb89a014a56a9"},
-    {file = "pywin32-228-cp36-cp36m-win_amd64.whl", hash = "sha256:a60d795c6590a5b6baeacd16c583d91cce8038f959bd80c53bd9a68f40130f2d"},
-    {file = "pywin32-228-cp37-cp37m-win32.whl", hash = "sha256:af40887b6fc200eafe4d7742c48417529a8702dcc1a60bf89eee152d1d11209f"},
-    {file = "pywin32-228-cp37-cp37m-win_amd64.whl", hash = "sha256:00eaf43dbd05ba6a9b0080c77e161e0b7a601f9a3f660727a952e40140537de7"},
-    {file = "pywin32-228-cp38-cp38-win32.whl", hash = "sha256:fa6ba028909cfc64ce9e24bcf22f588b14871980d9787f1e2002c99af8f1850c"},
-    {file = "pywin32-228-cp38-cp38-win_amd64.whl", hash = "sha256:9b3466083f8271e1a5eb0329f4e0d61925d46b40b195a33413e0905dccb285e8"},
-    {file = "pywin32-228-cp39-cp39-win32.whl", hash = "sha256:ed74b72d8059a6606f64842e7917aeee99159ebd6b8d6261c518d002837be298"},
-    {file = "pywin32-228-cp39-cp39-win_amd64.whl", hash = "sha256:8319bafdcd90b7202c50d6014efdfe4fde9311b3ff15fd6f893a45c0868de203"},
+    {file = "pywin32-227-cp27-cp27m-win32.whl", hash = "sha256:371fcc39416d736401f0274dd64c2302728c9e034808e37381b5e1b22be4a6b0"},
+    {file = "pywin32-227-cp27-cp27m-win_amd64.whl", hash = "sha256:4cdad3e84191194ea6d0dd1b1b9bdda574ff563177d2adf2b4efec2a244fa116"},
+    {file = "pywin32-227-cp35-cp35m-win32.whl", hash = "sha256:f4c5be1a293bae0076d93c88f37ee8da68136744588bc5e2be2f299a34ceb7aa"},
+    {file = "pywin32-227-cp35-cp35m-win_amd64.whl", hash = "sha256:a929a4af626e530383a579431b70e512e736e9588106715215bf685a3ea508d4"},
+    {file = "pywin32-227-cp36-cp36m-win32.whl", hash = "sha256:300a2db938e98c3e7e2093e4491439e62287d0d493fe07cce110db070b54c0be"},
+    {file = "pywin32-227-cp36-cp36m-win_amd64.whl", hash = "sha256:9b31e009564fb95db160f154e2aa195ed66bcc4c058ed72850d047141b36f3a2"},
+    {file = "pywin32-227-cp37-cp37m-win32.whl", hash = "sha256:47a3c7551376a865dd8d095a98deba954a98f326c6fe3c72d8726ca6e6b15507"},
+    {file = "pywin32-227-cp37-cp37m-win_amd64.whl", hash = "sha256:31f88a89139cb2adc40f8f0e65ee56a8c585f629974f9e07622ba80199057511"},
+    {file = "pywin32-227-cp38-cp38-win32.whl", hash = "sha256:7f18199fbf29ca99dff10e1f09451582ae9e372a892ff03a28528a24d55875bc"},
+    {file = "pywin32-227-cp38-cp38-win_amd64.whl", hash = "sha256:7c1ae32c489dc012930787f06244426f8356e129184a02c25aef163917ce158e"},
+    {file = "pywin32-227-cp39-cp39-win32.whl", hash = "sha256:c054c52ba46e7eb6b7d7dfae4dbd987a1bb48ee86debe3f245a2884ece46e295"},
+    {file = "pywin32-227-cp39-cp39-win_amd64.whl", hash = "sha256:f27cec5e7f588c3d1051651830ecc00294f90728d19c3bf6916e6dba93ea357c"},
 ]
 pyyaml = [
     {file = "PyYAML-5.2-cp27-cp27m-win32.whl", hash = "sha256:35ace9b4147848cafac3db142795ee42deebe9d0dad885ce643928e88daebdcc"},
@@ -2548,8 +2530,8 @@ requests = [
     {file = "requests-2.24.0.tar.gz", hash = "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b"},
 ]
 responses = [
-    {file = "responses-0.10.15-py2.py3-none-any.whl", hash = "sha256:af94d28cdfb48ded0ad82a5216616631543650f440334a693479b8991a6594a2"},
-    {file = "responses-0.10.15.tar.gz", hash = "sha256:7bb697a5fedeb41d81e8b87f152d453d5cab42dcd1691b6a7d6097e94d33f373"},
+    {file = "responses-0.10.16-py2.py3-none-any.whl", hash = "sha256:cf55b7c89fc77b9ebbc5e5924210b6d0ef437061b80f1273d7e202069e43493c"},
+    {file = "responses-0.10.16.tar.gz", hash = "sha256:fa125311607ab3e57d8fcc4da20587f041b4485bdfb06dd6bdf19d8b66f870c1"},
 ]
 rfc3987 = [
     {file = "rfc3987-1.3.8-py2.py3-none-any.whl", hash = "sha256:10702b1e51e5658843460b189b185c0366d2cf4cff716f13111b0ea9fd2dce53"},
@@ -2560,7 +2542,8 @@ rsa = [
     {file = "rsa-3.3.tar.gz", hash = "sha256:03f3d9bebad06681771016b8752a40b12f615ff32363c7aa19b3798e73ccd615"},
 ]
 rutter = [
-    {file = "rutter-0.2.tar.gz", hash = "sha256:3bc67aed1d91de46a85788898636cb06cd2b060cab577a2c6898c4fd3a3b882e"},
+    {file = "rutter-0.3-py2.py3-none-any.whl", hash = "sha256:c42b03da9259666c7e0c2b4a533c61091dba2fd6f372611854165ec99f18a673"},
+    {file = "rutter-0.3.tar.gz", hash = "sha256:2607d3c3843f0e6094e62235b7a587b0603bf3985dd3522ee3f739648790bd5c"},
 ]
 s3transfer = [
     {file = "s3transfer-0.2.1-py2.py3-none-any.whl", hash = "sha256:b780f2411b824cb541dbcd2c713d0cb61c7d1bcadae204cdddda2b35cef493ba"},
diff --git a/pyproject.toml b/pyproject.toml
index 4a7cc796a4..a34a9746ca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,7 @@ chardet = "3.0.4"
 colorama = "0.3.3"
 dcicpyvcf = "1.0.0"
 dcicsnovault = ">=3.1.9,<4"  # Fixes build problems in 3.1.8
-dcicutils = ">=0.38.0,<1"  # has the LockoutManager needed for Snovault
+dcicutils = ">=0.38.1,<1"  # has LockoutManager 0.38.0 and check_true 0.38.1
 docutils = "0.12"
 elasticsearch = "5.5.3"
 elasticsearch-dsl = "^5.4.0"
diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index 182639aafe..2caa20a966 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -583,7 +583,7 @@ def discard(msg):
 
                 body = json.loads(message['Body'])
                 uuid = body['uuid']
-                ingestion_type = body['ingestion_type']
+                ingestion_type = body.get('ingestion_type', 'vcf')  # Older protocol doesn't yet know to expect this
                 log.info('Ingesting uuid %s' % uuid)
 
                 if ingestion_type != 'vcf':
diff --git a/src/encoded/tests/test_util.py b/src/encoded/tests/test_util.py
index c68caf4fb0..517b836db4 100644
--- a/src/encoded/tests/test_util.py
+++ b/src/encoded/tests/test_util.py
@@ -7,22 +7,11 @@
 from unittest import mock
 from dcicutils.qa_utils import ControlledTime
 from ..util import (
-    debuglog, deduplicate_list, gunzip_content, resolve_file_path, ENCODED_ROOT_DIR, check_true, generate_fastq_file,
+    debuglog, deduplicate_list, gunzip_content, resolve_file_path, ENCODED_ROOT_DIR, generate_fastq_file,
 )
 from .. import util as util_module
 
 
-def test_check_true():
-
-    x = [1, 2, 3]
-    check_true(x == [1, 2, 3], "x is not a list of one, two, and three.")
-
-    msg = "x is not a list of four, five, and six."
-    with pytest.raises(RuntimeError) as e:
-        check_true(x == [4, 5, 6], msg)
-    assert msg in str(e)
-
-
 def test_deduplicate_list():
 
     def sort_somehow(seq):
diff --git a/src/encoded/types/ingestion.py b/src/encoded/types/ingestion.py
index 88f524576a..4fd05d2d3a 100644
--- a/src/encoded/types/ingestion.py
+++ b/src/encoded/types/ingestion.py
@@ -7,7 +7,7 @@
 import re
 import uuid
 
-from dcicutils.misc_utils import ignored
+from dcicutils.misc_utils import ignored, check_true
 from snovault import collection, load_schema
 from pyramid.request import Request
 from pyramid.security import Allow, Deny, Everyone
@@ -21,7 +21,7 @@
 from .institution import (
     ONLY_ADMIN_VIEW,
 )
-from ..util import debuglog, subrequest_item_creation, check_true
+from ..util import debuglog, subrequest_item_creation
 
 
 ALLOW_SUBMITTER_VIEW = (
diff --git a/src/encoded/util.py b/src/encoded/util.py
index 6c4ad3ae30..1f733c1b8a 100644
--- a/src/encoded/util.py
+++ b/src/encoded/util.py
@@ -7,6 +7,7 @@
 import pyramid.request
 import tempfile
 
+from dcicutils.misc_utils import check_true
 from io import BytesIO
 from snovault import COLLECTIONS, Collection
 from snovault.crud_views import collection_add as sno_collection_add
@@ -81,18 +82,6 @@ def debuglog(*args):
             print(nowstr, *args, file=fp)
 
 
-def check_true(test_value: object,
-               message: str,
-               error_class: Type[Exception] = RuntimeError):
-    """
-    If the first argument does not evaluate to a true value, an error is raised.
-
-    The error, if one is raised, will be of type error_class, and its message will be given by message.
-    """
-    if not test_value:
-        raise error_class(message)
-
-
 def subrequest_item_creation(request: pyramid.request.Request, item_type: str, json_body: dict = None) -> dict:
     if json_body is None:
         json_body = {}

From 539a4b211eb6e4b8c9d9134de18f642c58d63d1d Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Wed, 12 Aug 2020 09:00:47 -0400
Subject: [PATCH 069/125] Backport some changes from the spinoff branch.

---
 .gitignore                     |   3 +
 Makefile                       |   8 +-
 poetry.lock                    | 474 ++++++++++++++++-----------------
 src/encoded/tests/test_util.py | 102 ++++---
 src/encoded/util.py            |  37 ++-
 5 files changed, 343 insertions(+), 281 deletions(-)

diff --git a/.gitignore b/.gitignore
index d5e73fe45d..10d02188d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -79,3 +79,6 @@ elasticsearch-*.deb
 # Saved configurations from Elastic beanstalk that have been downloaded.
 .elasticbeanstalk/saved_configs/
 .elasticbeanstalk/app_versions/
+
+# Used for some kinds of debugging in dcicutils, snovault, cgap & ff.
+DEBUGLOG-*
diff --git a/Makefile b/Makefile
index dbc8273198..837dc55a28 100644
--- a/Makefile
+++ b/Makefile
@@ -60,16 +60,16 @@ download-genes: # grabs latest gene list from the below link, unzips and drops i
 	mv gene_inserts_v0.4.5.json src/encoded/annotations/gene_inserts_v0.4.5.json
 
 deploy1:  # starts postgres/ES locally and loads inserts, and also starts ingestion engine
-	@DEBUGLOG_ENABLED=TRUE SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load
+	@DEBUGLOG=`pwd` SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load
 
 deploy1a:  # starts postgres/ES locally and loads inserts, but does not start the ingestion engine
-	@DEBUGLOG_ENABLED=TRUE SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load --no_ingest
+	@DEBUGLOG=`pwd` SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` dev-servers development.ini --app-name app --clear --init --load --no_ingest
 
 deploy1b:  # starts ingestion engine separately so it can be easily stopped and restarted for debugging in foreground
-	@echo "Starting ingestion listener. Press ^C to exit." && DEBUGLOG_ENABLED=TRUE SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` poetry run ingestion-listener development.ini --app-name app
+	@echo "Starting ingestion listener. Press ^C to exit." && DEBUGLOG=`pwd` SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` poetry run ingestion-listener development.ini --app-name app
 
 deploy2:  # spins up waittress to serve the application
-	@DEBUGLOG_ENABLED=TRUE SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` pserve development.ini
+	@DEBUGLOG=`pwd` SNOVAULT_DB_TEST_PORT=`grep 'sqlalchemy[.]url =' development.ini | sed -E 's|.*:([0-9]+)/.*|\1|'` pserve development.ini
 
 deploy3:  # uploads: GeneAnnotationFields, then Genes, then AnnotationFields, then Variant + VariantSamples
 	python src/encoded/commands/ingestion.py src/encoded/annotations/variant_table_v0.4.7.csv src/encoded/schemas/annotation_field.json src/encoded/schemas/variant.json src/encoded/schemas/variant_sample.json src/encoded/annotations/GAPFIRHN9YOZ.vcf hms-dbmi hms-dbmi src/encoded/annotations/gene_table_v0.4.5.csv src/encoded/schemas/gene_annotation_field.json src/encoded/schemas/gene.json src/encoded/annotations/gene_inserts_v0.4.5.json hms-dbmi hms-dbmi development.ini --post-variant-consequences --post-variants --post-gene-annotation-field-inserts --post-gene-inserts --app-name app
diff --git a/poetry.lock b/poetry.lock
index 1fbc37d578..e96c0e1519 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -112,245 +112,245 @@ s3transfer = ">=0.2.0,<0.3.0"
 
 [[package]]
 category = "dev"
-description = "Type annotations for boto3 1.14.39, generated by mypy-boto3-buider 2.2.0"
+description = "Type annotations for boto3 1.14.40, generated by mypy-boto3-buider 2.2.0"
 name = "boto3-stubs"
 optional = false
 python-versions = ">=3.6"
-version = "1.14.39.0"
+version = "1.14.40.0"
 
 [package.dependencies]
-mypy-boto3 = "1.14.39.0"
+mypy-boto3 = "1.14.40.0"
 
 [package.dependencies.typing-extensions]
 python = "<3.8"
 version = "*"
 
 [package.extras]
-accessanalyzer = ["mypy-boto3-accessanalyzer (1.14.39.0)"]
-acm = ["mypy-boto3-acm (1.14.39.0)"]
-acm-pca = ["mypy-boto3-acm-pca (1.14.39.0)"]
-alexaforbusiness = ["mypy-boto3-alexaforbusiness (1.14.39.0)"]
-all = ["mypy-boto3-accessanalyzer (1.14.39.0)", "mypy-boto3-acm (1.14.39.0)", "mypy-boto3-acm-pca (1.14.39.0)", "mypy-boto3-alexaforbusiness (1.14.39.0)", "mypy-boto3-amplify (1.14.39.0)", "mypy-boto3-apigateway (1.14.39.0)", "mypy-boto3-apigatewaymanagementapi (1.14.39.0)", "mypy-boto3-apigatewayv2 (1.14.39.0)", "mypy-boto3-appconfig (1.14.39.0)", "mypy-boto3-application-autoscaling (1.14.39.0)", "mypy-boto3-application-insights (1.14.39.0)", "mypy-boto3-appmesh (1.14.39.0)", "mypy-boto3-appstream (1.14.39.0)", "mypy-boto3-appsync (1.14.39.0)", "mypy-boto3-athena (1.14.39.0)", "mypy-boto3-autoscaling (1.14.39.0)", "mypy-boto3-autoscaling-plans (1.14.39.0)", "mypy-boto3-backup (1.14.39.0)", "mypy-boto3-batch (1.14.39.0)", "mypy-boto3-budgets (1.14.39.0)", "mypy-boto3-ce (1.14.39.0)", "mypy-boto3-chime (1.14.39.0)", "mypy-boto3-cloud9 (1.14.39.0)", "mypy-boto3-clouddirectory (1.14.39.0)", "mypy-boto3-cloudformation (1.14.39.0)", "mypy-boto3-cloudfront (1.14.39.0)", "mypy-boto3-cloudhsm (1.14.39.0)", "mypy-boto3-cloudhsmv2 (1.14.39.0)", "mypy-boto3-cloudsearch (1.14.39.0)", "mypy-boto3-cloudsearchdomain (1.14.39.0)", "mypy-boto3-cloudtrail (1.14.39.0)", "mypy-boto3-cloudwatch (1.14.39.0)", "mypy-boto3-codeartifact (1.14.39.0)", "mypy-boto3-codebuild (1.14.39.0)", "mypy-boto3-codecommit (1.14.39.0)", "mypy-boto3-codedeploy (1.14.39.0)", "mypy-boto3-codeguru-reviewer (1.14.39.0)", "mypy-boto3-codeguruprofiler (1.14.39.0)", "mypy-boto3-codepipeline (1.14.39.0)", "mypy-boto3-codestar (1.14.39.0)", "mypy-boto3-codestar-connections (1.14.39.0)", "mypy-boto3-codestar-notifications (1.14.39.0)", "mypy-boto3-cognito-identity (1.14.39.0)", "mypy-boto3-cognito-idp (1.14.39.0)", "mypy-boto3-cognito-sync (1.14.39.0)", "mypy-boto3-comprehend (1.14.39.0)", "mypy-boto3-comprehendmedical (1.14.39.0)", "mypy-boto3-compute-optimizer (1.14.39.0)", "mypy-boto3-config (1.14.39.0)", "mypy-boto3-connect (1.14.39.0)", "mypy-boto3-connectparticipant (1.14.39.0)", "mypy-boto3-cur (1.14.39.0)", "mypy-boto3-dataexchange (1.14.39.0)", "mypy-boto3-datapipeline (1.14.39.0)", "mypy-boto3-datasync (1.14.39.0)", "mypy-boto3-dax (1.14.39.0)", "mypy-boto3-detective (1.14.39.0)", "mypy-boto3-devicefarm (1.14.39.0)", "mypy-boto3-directconnect (1.14.39.0)", "mypy-boto3-discovery (1.14.39.0)", "mypy-boto3-dlm (1.14.39.0)", "mypy-boto3-dms (1.14.39.0)", "mypy-boto3-docdb (1.14.39.0)", "mypy-boto3-ds (1.14.39.0)", "mypy-boto3-dynamodb (1.14.39.0)", "mypy-boto3-dynamodbstreams (1.14.39.0)", "mypy-boto3-ebs (1.14.39.0)", "mypy-boto3-ec2 (1.14.39.0)", "mypy-boto3-ec2-instance-connect (1.14.39.0)", "mypy-boto3-ecr (1.14.39.0)", "mypy-boto3-ecs (1.14.39.0)", "mypy-boto3-efs (1.14.39.0)", "mypy-boto3-eks (1.14.39.0)", "mypy-boto3-elastic-inference (1.14.39.0)", "mypy-boto3-elasticache (1.14.39.0)", "mypy-boto3-elasticbeanstalk (1.14.39.0)", "mypy-boto3-elastictranscoder (1.14.39.0)", "mypy-boto3-elb (1.14.39.0)", "mypy-boto3-elbv2 (1.14.39.0)", "mypy-boto3-emr (1.14.39.0)", "mypy-boto3-es (1.14.39.0)", "mypy-boto3-events (1.14.39.0)", "mypy-boto3-firehose (1.14.39.0)", "mypy-boto3-fms (1.14.39.0)", "mypy-boto3-forecast (1.14.39.0)", "mypy-boto3-forecastquery (1.14.39.0)", "mypy-boto3-frauddetector (1.14.39.0)", "mypy-boto3-fsx (1.14.39.0)", "mypy-boto3-gamelift (1.14.39.0)", "mypy-boto3-glacier (1.14.39.0)", "mypy-boto3-globalaccelerator (1.14.39.0)", "mypy-boto3-glue (1.14.39.0)", "mypy-boto3-greengrass (1.14.39.0)", "mypy-boto3-groundstation (1.14.39.0)", "mypy-boto3-guardduty (1.14.39.0)", "mypy-boto3-health (1.14.39.0)", "mypy-boto3-iam (1.14.39.0)", "mypy-boto3-imagebuilder (1.14.39.0)", "mypy-boto3-importexport (1.14.39.0)", "mypy-boto3-inspector (1.14.39.0)", "mypy-boto3-iot (1.14.39.0)", "mypy-boto3-iot-data (1.14.39.0)", "mypy-boto3-iot-jobs-data (1.14.39.0)", "mypy-boto3-iot1click-devices (1.14.39.0)", "mypy-boto3-iot1click-projects (1.14.39.0)", "mypy-boto3-iotanalytics (1.14.39.0)", "mypy-boto3-iotevents (1.14.39.0)", "mypy-boto3-iotevents-data (1.14.39.0)", "mypy-boto3-iotsecuretunneling (1.14.39.0)", "mypy-boto3-iotsitewise (1.14.39.0)", "mypy-boto3-iotthingsgraph (1.14.39.0)", "mypy-boto3-kafka (1.14.39.0)", "mypy-boto3-kendra (1.14.39.0)", "mypy-boto3-kinesis (1.14.39.0)", "mypy-boto3-kinesis-video-archived-media (1.14.39.0)", "mypy-boto3-kinesis-video-media (1.14.39.0)", "mypy-boto3-kinesis-video-signaling (1.14.39.0)", "mypy-boto3-kinesisanalytics (1.14.39.0)", "mypy-boto3-kinesisanalyticsv2 (1.14.39.0)", "mypy-boto3-kinesisvideo (1.14.39.0)", "mypy-boto3-kms (1.14.39.0)", "mypy-boto3-lakeformation (1.14.39.0)", "mypy-boto3-lambda (1.14.39.0)", "mypy-boto3-lex-models (1.14.39.0)", "mypy-boto3-lex-runtime (1.14.39.0)", "mypy-boto3-license-manager (1.14.39.0)", "mypy-boto3-lightsail (1.14.39.0)", "mypy-boto3-logs (1.14.39.0)", "mypy-boto3-machinelearning (1.14.39.0)", "mypy-boto3-macie (1.14.39.0)", "mypy-boto3-macie2 (1.14.39.0)", "mypy-boto3-managedblockchain (1.14.39.0)", "mypy-boto3-marketplace-catalog (1.14.39.0)", "mypy-boto3-marketplace-entitlement (1.14.39.0)", "mypy-boto3-marketplacecommerceanalytics (1.14.39.0)", "mypy-boto3-mediaconnect (1.14.39.0)", "mypy-boto3-mediaconvert (1.14.39.0)", "mypy-boto3-medialive (1.14.39.0)", "mypy-boto3-mediapackage (1.14.39.0)", "mypy-boto3-mediapackage-vod (1.14.39.0)", "mypy-boto3-mediastore (1.14.39.0)", "mypy-boto3-mediastore-data (1.14.39.0)", "mypy-boto3-mediatailor (1.14.39.0)", "mypy-boto3-meteringmarketplace (1.14.39.0)", "mypy-boto3-mgh (1.14.39.0)", "mypy-boto3-migrationhub-config (1.14.39.0)", "mypy-boto3-mobile (1.14.39.0)", "mypy-boto3-mq (1.14.39.0)", "mypy-boto3-mturk (1.14.39.0)", "mypy-boto3-neptune (1.14.39.0)", "mypy-boto3-networkmanager (1.14.39.0)", "mypy-boto3-opsworks (1.14.39.0)", "mypy-boto3-opsworkscm (1.14.39.0)", "mypy-boto3-organizations (1.14.39.0)", "mypy-boto3-outposts (1.14.39.0)", "mypy-boto3-personalize (1.14.39.0)", "mypy-boto3-personalize-events (1.14.39.0)", "mypy-boto3-personalize-runtime (1.14.39.0)", "mypy-boto3-pi (1.14.39.0)", "mypy-boto3-pinpoint (1.14.39.0)", "mypy-boto3-pinpoint-email (1.14.39.0)", "mypy-boto3-pinpoint-sms-voice (1.14.39.0)", "mypy-boto3-polly (1.14.39.0)", "mypy-boto3-pricing (1.14.39.0)", "mypy-boto3-qldb (1.14.39.0)", "mypy-boto3-qldb-session (1.14.39.0)", "mypy-boto3-quicksight (1.14.39.0)", "mypy-boto3-ram (1.14.39.0)", "mypy-boto3-rds (1.14.39.0)", "mypy-boto3-rds-data (1.14.39.0)", "mypy-boto3-redshift (1.14.39.0)", "mypy-boto3-rekognition (1.14.39.0)", "mypy-boto3-resource-groups (1.14.39.0)", "mypy-boto3-resourcegroupstaggingapi (1.14.39.0)", "mypy-boto3-robomaker (1.14.39.0)", "mypy-boto3-route53 (1.14.39.0)", "mypy-boto3-route53domains (1.14.39.0)", "mypy-boto3-route53resolver (1.14.39.0)", "mypy-boto3-s3 (1.14.39.0)", "mypy-boto3-s3control (1.14.39.0)", "mypy-boto3-sagemaker (1.14.39.0)", "mypy-boto3-sagemaker-a2i-runtime (1.14.39.0)", "mypy-boto3-sagemaker-runtime (1.14.39.0)", "mypy-boto3-savingsplans (1.14.39.0)", "mypy-boto3-schemas (1.14.39.0)", "mypy-boto3-sdb (1.14.39.0)", "mypy-boto3-secretsmanager (1.14.39.0)", "mypy-boto3-securityhub (1.14.39.0)", "mypy-boto3-serverlessrepo (1.14.39.0)", "mypy-boto3-service-quotas (1.14.39.0)", "mypy-boto3-servicecatalog (1.14.39.0)", "mypy-boto3-servicediscovery (1.14.39.0)", "mypy-boto3-ses (1.14.39.0)", "mypy-boto3-sesv2 (1.14.39.0)", "mypy-boto3-shield (1.14.39.0)", "mypy-boto3-signer (1.14.39.0)", "mypy-boto3-sms (1.14.39.0)", "mypy-boto3-sms-voice (1.14.39.0)", "mypy-boto3-snowball (1.14.39.0)", "mypy-boto3-sns (1.14.39.0)", "mypy-boto3-sqs (1.14.39.0)", "mypy-boto3-ssm (1.14.39.0)", "mypy-boto3-sso (1.14.39.0)", "mypy-boto3-sso-oidc (1.14.39.0)", "mypy-boto3-stepfunctions (1.14.39.0)", "mypy-boto3-storagegateway (1.14.39.0)", "mypy-boto3-sts (1.14.39.0)", "mypy-boto3-support (1.14.39.0)", "mypy-boto3-swf (1.14.39.0)", "mypy-boto3-synthetics (1.14.39.0)", "mypy-boto3-textract (1.14.39.0)", "mypy-boto3-transcribe (1.14.39.0)", "mypy-boto3-transfer (1.14.39.0)", "mypy-boto3-translate (1.14.39.0)", "mypy-boto3-waf (1.14.39.0)", "mypy-boto3-waf-regional (1.14.39.0)", "mypy-boto3-wafv2 (1.14.39.0)", "mypy-boto3-workdocs (1.14.39.0)", "mypy-boto3-worklink (1.14.39.0)", "mypy-boto3-workmail (1.14.39.0)", "mypy-boto3-workmailmessageflow (1.14.39.0)", "mypy-boto3-workspaces (1.14.39.0)", "mypy-boto3-xray (1.14.39.0)"]
-amplify = ["mypy-boto3-amplify (1.14.39.0)"]
-apigateway = ["mypy-boto3-apigateway (1.14.39.0)"]
-apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (1.14.39.0)"]
-apigatewayv2 = ["mypy-boto3-apigatewayv2 (1.14.39.0)"]
-appconfig = ["mypy-boto3-appconfig (1.14.39.0)"]
-application-autoscaling = ["mypy-boto3-application-autoscaling (1.14.39.0)"]
-application-insights = ["mypy-boto3-application-insights (1.14.39.0)"]
-appmesh = ["mypy-boto3-appmesh (1.14.39.0)"]
-appstream = ["mypy-boto3-appstream (1.14.39.0)"]
-appsync = ["mypy-boto3-appsync (1.14.39.0)"]
-athena = ["mypy-boto3-athena (1.14.39.0)"]
-autoscaling = ["mypy-boto3-autoscaling (1.14.39.0)"]
-autoscaling-plans = ["mypy-boto3-autoscaling-plans (1.14.39.0)"]
-backup = ["mypy-boto3-backup (1.14.39.0)"]
-batch = ["mypy-boto3-batch (1.14.39.0)"]
-budgets = ["mypy-boto3-budgets (1.14.39.0)"]
-ce = ["mypy-boto3-ce (1.14.39.0)"]
-chime = ["mypy-boto3-chime (1.14.39.0)"]
-cloud9 = ["mypy-boto3-cloud9 (1.14.39.0)"]
-clouddirectory = ["mypy-boto3-clouddirectory (1.14.39.0)"]
-cloudformation = ["mypy-boto3-cloudformation (1.14.39.0)"]
-cloudfront = ["mypy-boto3-cloudfront (1.14.39.0)"]
-cloudhsm = ["mypy-boto3-cloudhsm (1.14.39.0)"]
-cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (1.14.39.0)"]
-cloudsearch = ["mypy-boto3-cloudsearch (1.14.39.0)"]
-cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (1.14.39.0)"]
-cloudtrail = ["mypy-boto3-cloudtrail (1.14.39.0)"]
-cloudwatch = ["mypy-boto3-cloudwatch (1.14.39.0)"]
-codeartifact = ["mypy-boto3-codeartifact (1.14.39.0)"]
-codebuild = ["mypy-boto3-codebuild (1.14.39.0)"]
-codecommit = ["mypy-boto3-codecommit (1.14.39.0)"]
-codedeploy = ["mypy-boto3-codedeploy (1.14.39.0)"]
-codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (1.14.39.0)"]
-codeguruprofiler = ["mypy-boto3-codeguruprofiler (1.14.39.0)"]
-codepipeline = ["mypy-boto3-codepipeline (1.14.39.0)"]
-codestar = ["mypy-boto3-codestar (1.14.39.0)"]
-codestar-connections = ["mypy-boto3-codestar-connections (1.14.39.0)"]
-codestar-notifications = ["mypy-boto3-codestar-notifications (1.14.39.0)"]
-cognito-identity = ["mypy-boto3-cognito-identity (1.14.39.0)"]
-cognito-idp = ["mypy-boto3-cognito-idp (1.14.39.0)"]
-cognito-sync = ["mypy-boto3-cognito-sync (1.14.39.0)"]
-comprehend = ["mypy-boto3-comprehend (1.14.39.0)"]
-comprehendmedical = ["mypy-boto3-comprehendmedical (1.14.39.0)"]
-compute-optimizer = ["mypy-boto3-compute-optimizer (1.14.39.0)"]
-config = ["mypy-boto3-config (1.14.39.0)"]
-connect = ["mypy-boto3-connect (1.14.39.0)"]
-connectparticipant = ["mypy-boto3-connectparticipant (1.14.39.0)"]
-cur = ["mypy-boto3-cur (1.14.39.0)"]
-dataexchange = ["mypy-boto3-dataexchange (1.14.39.0)"]
-datapipeline = ["mypy-boto3-datapipeline (1.14.39.0)"]
-datasync = ["mypy-boto3-datasync (1.14.39.0)"]
-dax = ["mypy-boto3-dax (1.14.39.0)"]
-detective = ["mypy-boto3-detective (1.14.39.0)"]
-devicefarm = ["mypy-boto3-devicefarm (1.14.39.0)"]
-directconnect = ["mypy-boto3-directconnect (1.14.39.0)"]
-discovery = ["mypy-boto3-discovery (1.14.39.0)"]
-dlm = ["mypy-boto3-dlm (1.14.39.0)"]
-dms = ["mypy-boto3-dms (1.14.39.0)"]
-docdb = ["mypy-boto3-docdb (1.14.39.0)"]
-ds = ["mypy-boto3-ds (1.14.39.0)"]
-dynamodb = ["mypy-boto3-dynamodb (1.14.39.0)"]
-dynamodbstreams = ["mypy-boto3-dynamodbstreams (1.14.39.0)"]
-ebs = ["mypy-boto3-ebs (1.14.39.0)"]
-ec2 = ["mypy-boto3-ec2 (1.14.39.0)"]
-ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (1.14.39.0)"]
-ecr = ["mypy-boto3-ecr (1.14.39.0)"]
-ecs = ["mypy-boto3-ecs (1.14.39.0)"]
-efs = ["mypy-boto3-efs (1.14.39.0)"]
-eks = ["mypy-boto3-eks (1.14.39.0)"]
-elastic-inference = ["mypy-boto3-elastic-inference (1.14.39.0)"]
-elasticache = ["mypy-boto3-elasticache (1.14.39.0)"]
-elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (1.14.39.0)"]
-elastictranscoder = ["mypy-boto3-elastictranscoder (1.14.39.0)"]
-elb = ["mypy-boto3-elb (1.14.39.0)"]
-elbv2 = ["mypy-boto3-elbv2 (1.14.39.0)"]
-emr = ["mypy-boto3-emr (1.14.39.0)"]
-es = ["mypy-boto3-es (1.14.39.0)"]
-essential = ["mypy-boto3-cloudformation (1.14.39.0)", "mypy-boto3-dynamodb (1.14.39.0)", "mypy-boto3-ec2 (1.14.39.0)", "mypy-boto3-lambda (1.14.39.0)", "mypy-boto3-rds (1.14.39.0)", "mypy-boto3-s3 (1.14.39.0)", "mypy-boto3-sqs (1.14.39.0)"]
-events = ["mypy-boto3-events (1.14.39.0)"]
-firehose = ["mypy-boto3-firehose (1.14.39.0)"]
-fms = ["mypy-boto3-fms (1.14.39.0)"]
-forecast = ["mypy-boto3-forecast (1.14.39.0)"]
-forecastquery = ["mypy-boto3-forecastquery (1.14.39.0)"]
-frauddetector = ["mypy-boto3-frauddetector (1.14.39.0)"]
-fsx = ["mypy-boto3-fsx (1.14.39.0)"]
-gamelift = ["mypy-boto3-gamelift (1.14.39.0)"]
-glacier = ["mypy-boto3-glacier (1.14.39.0)"]
-globalaccelerator = ["mypy-boto3-globalaccelerator (1.14.39.0)"]
-glue = ["mypy-boto3-glue (1.14.39.0)"]
-greengrass = ["mypy-boto3-greengrass (1.14.39.0)"]
-groundstation = ["mypy-boto3-groundstation (1.14.39.0)"]
-guardduty = ["mypy-boto3-guardduty (1.14.39.0)"]
-health = ["mypy-boto3-health (1.14.39.0)"]
-iam = ["mypy-boto3-iam (1.14.39.0)"]
-imagebuilder = ["mypy-boto3-imagebuilder (1.14.39.0)"]
-importexport = ["mypy-boto3-importexport (1.14.39.0)"]
-inspector = ["mypy-boto3-inspector (1.14.39.0)"]
-iot = ["mypy-boto3-iot (1.14.39.0)"]
-iot-data = ["mypy-boto3-iot-data (1.14.39.0)"]
-iot-jobs-data = ["mypy-boto3-iot-jobs-data (1.14.39.0)"]
-iot1click-devices = ["mypy-boto3-iot1click-devices (1.14.39.0)"]
-iot1click-projects = ["mypy-boto3-iot1click-projects (1.14.39.0)"]
-iotanalytics = ["mypy-boto3-iotanalytics (1.14.39.0)"]
-iotevents = ["mypy-boto3-iotevents (1.14.39.0)"]
-iotevents-data = ["mypy-boto3-iotevents-data (1.14.39.0)"]
-iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (1.14.39.0)"]
-iotsitewise = ["mypy-boto3-iotsitewise (1.14.39.0)"]
-iotthingsgraph = ["mypy-boto3-iotthingsgraph (1.14.39.0)"]
-kafka = ["mypy-boto3-kafka (1.14.39.0)"]
-kendra = ["mypy-boto3-kendra (1.14.39.0)"]
-kinesis = ["mypy-boto3-kinesis (1.14.39.0)"]
-kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (1.14.39.0)"]
-kinesis-video-media = ["mypy-boto3-kinesis-video-media (1.14.39.0)"]
-kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (1.14.39.0)"]
-kinesisanalytics = ["mypy-boto3-kinesisanalytics (1.14.39.0)"]
-kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (1.14.39.0)"]
-kinesisvideo = ["mypy-boto3-kinesisvideo (1.14.39.0)"]
-kms = ["mypy-boto3-kms (1.14.39.0)"]
-lakeformation = ["mypy-boto3-lakeformation (1.14.39.0)"]
-lambda = ["mypy-boto3-lambda (1.14.39.0)"]
-lex-models = ["mypy-boto3-lex-models (1.14.39.0)"]
-lex-runtime = ["mypy-boto3-lex-runtime (1.14.39.0)"]
-license-manager = ["mypy-boto3-license-manager (1.14.39.0)"]
-lightsail = ["mypy-boto3-lightsail (1.14.39.0)"]
-logs = ["mypy-boto3-logs (1.14.39.0)"]
-machinelearning = ["mypy-boto3-machinelearning (1.14.39.0)"]
-macie = ["mypy-boto3-macie (1.14.39.0)"]
-macie2 = ["mypy-boto3-macie2 (1.14.39.0)"]
-managedblockchain = ["mypy-boto3-managedblockchain (1.14.39.0)"]
-marketplace-catalog = ["mypy-boto3-marketplace-catalog (1.14.39.0)"]
-marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (1.14.39.0)"]
-marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (1.14.39.0)"]
-mediaconnect = ["mypy-boto3-mediaconnect (1.14.39.0)"]
-mediaconvert = ["mypy-boto3-mediaconvert (1.14.39.0)"]
-medialive = ["mypy-boto3-medialive (1.14.39.0)"]
-mediapackage = ["mypy-boto3-mediapackage (1.14.39.0)"]
-mediapackage-vod = ["mypy-boto3-mediapackage-vod (1.14.39.0)"]
-mediastore = ["mypy-boto3-mediastore (1.14.39.0)"]
-mediastore-data = ["mypy-boto3-mediastore-data (1.14.39.0)"]
-mediatailor = ["mypy-boto3-mediatailor (1.14.39.0)"]
-meteringmarketplace = ["mypy-boto3-meteringmarketplace (1.14.39.0)"]
-mgh = ["mypy-boto3-mgh (1.14.39.0)"]
-migrationhub-config = ["mypy-boto3-migrationhub-config (1.14.39.0)"]
-mobile = ["mypy-boto3-mobile (1.14.39.0)"]
-mq = ["mypy-boto3-mq (1.14.39.0)"]
-mturk = ["mypy-boto3-mturk (1.14.39.0)"]
-neptune = ["mypy-boto3-neptune (1.14.39.0)"]
-networkmanager = ["mypy-boto3-networkmanager (1.14.39.0)"]
-opsworks = ["mypy-boto3-opsworks (1.14.39.0)"]
-opsworkscm = ["mypy-boto3-opsworkscm (1.14.39.0)"]
-organizations = ["mypy-boto3-organizations (1.14.39.0)"]
-outposts = ["mypy-boto3-outposts (1.14.39.0)"]
-personalize = ["mypy-boto3-personalize (1.14.39.0)"]
-personalize-events = ["mypy-boto3-personalize-events (1.14.39.0)"]
-personalize-runtime = ["mypy-boto3-personalize-runtime (1.14.39.0)"]
-pi = ["mypy-boto3-pi (1.14.39.0)"]
-pinpoint = ["mypy-boto3-pinpoint (1.14.39.0)"]
-pinpoint-email = ["mypy-boto3-pinpoint-email (1.14.39.0)"]
-pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (1.14.39.0)"]
-polly = ["mypy-boto3-polly (1.14.39.0)"]
-pricing = ["mypy-boto3-pricing (1.14.39.0)"]
-qldb = ["mypy-boto3-qldb (1.14.39.0)"]
-qldb-session = ["mypy-boto3-qldb-session (1.14.39.0)"]
-quicksight = ["mypy-boto3-quicksight (1.14.39.0)"]
-ram = ["mypy-boto3-ram (1.14.39.0)"]
-rds = ["mypy-boto3-rds (1.14.39.0)"]
-rds-data = ["mypy-boto3-rds-data (1.14.39.0)"]
-redshift = ["mypy-boto3-redshift (1.14.39.0)"]
-rekognition = ["mypy-boto3-rekognition (1.14.39.0)"]
-resource-groups = ["mypy-boto3-resource-groups (1.14.39.0)"]
-resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (1.14.39.0)"]
-robomaker = ["mypy-boto3-robomaker (1.14.39.0)"]
-route53 = ["mypy-boto3-route53 (1.14.39.0)"]
-route53domains = ["mypy-boto3-route53domains (1.14.39.0)"]
-route53resolver = ["mypy-boto3-route53resolver (1.14.39.0)"]
-s3 = ["mypy-boto3-s3 (1.14.39.0)"]
-s3control = ["mypy-boto3-s3control (1.14.39.0)"]
-sagemaker = ["mypy-boto3-sagemaker (1.14.39.0)"]
-sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (1.14.39.0)"]
-sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (1.14.39.0)"]
-savingsplans = ["mypy-boto3-savingsplans (1.14.39.0)"]
-schemas = ["mypy-boto3-schemas (1.14.39.0)"]
-sdb = ["mypy-boto3-sdb (1.14.39.0)"]
-secretsmanager = ["mypy-boto3-secretsmanager (1.14.39.0)"]
-securityhub = ["mypy-boto3-securityhub (1.14.39.0)"]
-serverlessrepo = ["mypy-boto3-serverlessrepo (1.14.39.0)"]
-service-quotas = ["mypy-boto3-service-quotas (1.14.39.0)"]
-servicecatalog = ["mypy-boto3-servicecatalog (1.14.39.0)"]
-servicediscovery = ["mypy-boto3-servicediscovery (1.14.39.0)"]
-ses = ["mypy-boto3-ses (1.14.39.0)"]
-sesv2 = ["mypy-boto3-sesv2 (1.14.39.0)"]
-shield = ["mypy-boto3-shield (1.14.39.0)"]
-signer = ["mypy-boto3-signer (1.14.39.0)"]
-sms = ["mypy-boto3-sms (1.14.39.0)"]
-sms-voice = ["mypy-boto3-sms-voice (1.14.39.0)"]
-snowball = ["mypy-boto3-snowball (1.14.39.0)"]
-sns = ["mypy-boto3-sns (1.14.39.0)"]
-sqs = ["mypy-boto3-sqs (1.14.39.0)"]
-ssm = ["mypy-boto3-ssm (1.14.39.0)"]
-sso = ["mypy-boto3-sso (1.14.39.0)"]
-sso-oidc = ["mypy-boto3-sso-oidc (1.14.39.0)"]
-stepfunctions = ["mypy-boto3-stepfunctions (1.14.39.0)"]
-storagegateway = ["mypy-boto3-storagegateway (1.14.39.0)"]
-sts = ["mypy-boto3-sts (1.14.39.0)"]
-support = ["mypy-boto3-support (1.14.39.0)"]
-swf = ["mypy-boto3-swf (1.14.39.0)"]
-synthetics = ["mypy-boto3-synthetics (1.14.39.0)"]
-textract = ["mypy-boto3-textract (1.14.39.0)"]
-transcribe = ["mypy-boto3-transcribe (1.14.39.0)"]
-transfer = ["mypy-boto3-transfer (1.14.39.0)"]
-translate = ["mypy-boto3-translate (1.14.39.0)"]
-waf = ["mypy-boto3-waf (1.14.39.0)"]
-waf-regional = ["mypy-boto3-waf-regional (1.14.39.0)"]
-wafv2 = ["mypy-boto3-wafv2 (1.14.39.0)"]
-workdocs = ["mypy-boto3-workdocs (1.14.39.0)"]
-worklink = ["mypy-boto3-worklink (1.14.39.0)"]
-workmail = ["mypy-boto3-workmail (1.14.39.0)"]
-workmailmessageflow = ["mypy-boto3-workmailmessageflow (1.14.39.0)"]
-workspaces = ["mypy-boto3-workspaces (1.14.39.0)"]
-xray = ["mypy-boto3-xray (1.14.39.0)"]
+accessanalyzer = ["mypy-boto3-accessanalyzer (1.14.40.0)"]
+acm = ["mypy-boto3-acm (1.14.40.0)"]
+acm-pca = ["mypy-boto3-acm-pca (1.14.40.0)"]
+alexaforbusiness = ["mypy-boto3-alexaforbusiness (1.14.40.0)"]
+all = ["mypy-boto3-accessanalyzer (1.14.40.0)", "mypy-boto3-acm (1.14.40.0)", "mypy-boto3-acm-pca (1.14.40.0)", "mypy-boto3-alexaforbusiness (1.14.40.0)", "mypy-boto3-amplify (1.14.40.0)", "mypy-boto3-apigateway (1.14.40.0)", "mypy-boto3-apigatewaymanagementapi (1.14.40.0)", "mypy-boto3-apigatewayv2 (1.14.40.0)", "mypy-boto3-appconfig (1.14.40.0)", "mypy-boto3-application-autoscaling (1.14.40.0)", "mypy-boto3-application-insights (1.14.40.0)", "mypy-boto3-appmesh (1.14.40.0)", "mypy-boto3-appstream (1.14.40.0)", "mypy-boto3-appsync (1.14.40.0)", "mypy-boto3-athena (1.14.40.0)", "mypy-boto3-autoscaling (1.14.40.0)", "mypy-boto3-autoscaling-plans (1.14.40.0)", "mypy-boto3-backup (1.14.40.0)", "mypy-boto3-batch (1.14.40.0)", "mypy-boto3-budgets (1.14.40.0)", "mypy-boto3-ce (1.14.40.0)", "mypy-boto3-chime (1.14.40.0)", "mypy-boto3-cloud9 (1.14.40.0)", "mypy-boto3-clouddirectory (1.14.40.0)", "mypy-boto3-cloudformation (1.14.40.0)", "mypy-boto3-cloudfront (1.14.40.0)", "mypy-boto3-cloudhsm (1.14.40.0)", "mypy-boto3-cloudhsmv2 (1.14.40.0)", "mypy-boto3-cloudsearch (1.14.40.0)", "mypy-boto3-cloudsearchdomain (1.14.40.0)", "mypy-boto3-cloudtrail (1.14.40.0)", "mypy-boto3-cloudwatch (1.14.40.0)", "mypy-boto3-codeartifact (1.14.40.0)", "mypy-boto3-codebuild (1.14.40.0)", "mypy-boto3-codecommit (1.14.40.0)", "mypy-boto3-codedeploy (1.14.40.0)", "mypy-boto3-codeguru-reviewer (1.14.40.0)", "mypy-boto3-codeguruprofiler (1.14.40.0)", "mypy-boto3-codepipeline (1.14.40.0)", "mypy-boto3-codestar (1.14.40.0)", "mypy-boto3-codestar-connections (1.14.40.0)", "mypy-boto3-codestar-notifications (1.14.40.0)", "mypy-boto3-cognito-identity (1.14.40.0)", "mypy-boto3-cognito-idp (1.14.40.0)", "mypy-boto3-cognito-sync (1.14.40.0)", "mypy-boto3-comprehend (1.14.40.0)", "mypy-boto3-comprehendmedical (1.14.40.0)", "mypy-boto3-compute-optimizer (1.14.40.0)", "mypy-boto3-config (1.14.40.0)", "mypy-boto3-connect (1.14.40.0)", "mypy-boto3-connectparticipant (1.14.40.0)", "mypy-boto3-cur (1.14.40.0)", "mypy-boto3-dataexchange (1.14.40.0)", "mypy-boto3-datapipeline (1.14.40.0)", "mypy-boto3-datasync (1.14.40.0)", "mypy-boto3-dax (1.14.40.0)", "mypy-boto3-detective (1.14.40.0)", "mypy-boto3-devicefarm (1.14.40.0)", "mypy-boto3-directconnect (1.14.40.0)", "mypy-boto3-discovery (1.14.40.0)", "mypy-boto3-dlm (1.14.40.0)", "mypy-boto3-dms (1.14.40.0)", "mypy-boto3-docdb (1.14.40.0)", "mypy-boto3-ds (1.14.40.0)", "mypy-boto3-dynamodb (1.14.40.0)", "mypy-boto3-dynamodbstreams (1.14.40.0)", "mypy-boto3-ebs (1.14.40.0)", "mypy-boto3-ec2 (1.14.40.0)", "mypy-boto3-ec2-instance-connect (1.14.40.0)", "mypy-boto3-ecr (1.14.40.0)", "mypy-boto3-ecs (1.14.40.0)", "mypy-boto3-efs (1.14.40.0)", "mypy-boto3-eks (1.14.40.0)", "mypy-boto3-elastic-inference (1.14.40.0)", "mypy-boto3-elasticache (1.14.40.0)", "mypy-boto3-elasticbeanstalk (1.14.40.0)", "mypy-boto3-elastictranscoder (1.14.40.0)", "mypy-boto3-elb (1.14.40.0)", "mypy-boto3-elbv2 (1.14.40.0)", "mypy-boto3-emr (1.14.40.0)", "mypy-boto3-es (1.14.40.0)", "mypy-boto3-events (1.14.40.0)", "mypy-boto3-firehose (1.14.40.0)", "mypy-boto3-fms (1.14.40.0)", "mypy-boto3-forecast (1.14.40.0)", "mypy-boto3-forecastquery (1.14.40.0)", "mypy-boto3-frauddetector (1.14.40.0)", "mypy-boto3-fsx (1.14.40.0)", "mypy-boto3-gamelift (1.14.40.0)", "mypy-boto3-glacier (1.14.40.0)", "mypy-boto3-globalaccelerator (1.14.40.0)", "mypy-boto3-glue (1.14.40.0)", "mypy-boto3-greengrass (1.14.40.0)", "mypy-boto3-groundstation (1.14.40.0)", "mypy-boto3-guardduty (1.14.40.0)", "mypy-boto3-health (1.14.40.0)", "mypy-boto3-iam (1.14.40.0)", "mypy-boto3-imagebuilder (1.14.40.0)", "mypy-boto3-importexport (1.14.40.0)", "mypy-boto3-inspector (1.14.40.0)", "mypy-boto3-iot (1.14.40.0)", "mypy-boto3-iot-data (1.14.40.0)", "mypy-boto3-iot-jobs-data (1.14.40.0)", "mypy-boto3-iot1click-devices (1.14.40.0)", "mypy-boto3-iot1click-projects (1.14.40.0)", "mypy-boto3-iotanalytics (1.14.40.0)", "mypy-boto3-iotevents (1.14.40.0)", "mypy-boto3-iotevents-data (1.14.40.0)", "mypy-boto3-iotsecuretunneling (1.14.40.0)", "mypy-boto3-iotsitewise (1.14.40.0)", "mypy-boto3-iotthingsgraph (1.14.40.0)", "mypy-boto3-kafka (1.14.40.0)", "mypy-boto3-kendra (1.14.40.0)", "mypy-boto3-kinesis (1.14.40.0)", "mypy-boto3-kinesis-video-archived-media (1.14.40.0)", "mypy-boto3-kinesis-video-media (1.14.40.0)", "mypy-boto3-kinesis-video-signaling (1.14.40.0)", "mypy-boto3-kinesisanalytics (1.14.40.0)", "mypy-boto3-kinesisanalyticsv2 (1.14.40.0)", "mypy-boto3-kinesisvideo (1.14.40.0)", "mypy-boto3-kms (1.14.40.0)", "mypy-boto3-lakeformation (1.14.40.0)", "mypy-boto3-lambda (1.14.40.0)", "mypy-boto3-lex-models (1.14.40.0)", "mypy-boto3-lex-runtime (1.14.40.0)", "mypy-boto3-license-manager (1.14.40.0)", "mypy-boto3-lightsail (1.14.40.0)", "mypy-boto3-logs (1.14.40.0)", "mypy-boto3-machinelearning (1.14.40.0)", "mypy-boto3-macie (1.14.40.0)", "mypy-boto3-macie2 (1.14.40.0)", "mypy-boto3-managedblockchain (1.14.40.0)", "mypy-boto3-marketplace-catalog (1.14.40.0)", "mypy-boto3-marketplace-entitlement (1.14.40.0)", "mypy-boto3-marketplacecommerceanalytics (1.14.40.0)", "mypy-boto3-mediaconnect (1.14.40.0)", "mypy-boto3-mediaconvert (1.14.40.0)", "mypy-boto3-medialive (1.14.40.0)", "mypy-boto3-mediapackage (1.14.40.0)", "mypy-boto3-mediapackage-vod (1.14.40.0)", "mypy-boto3-mediastore (1.14.40.0)", "mypy-boto3-mediastore-data (1.14.40.0)", "mypy-boto3-mediatailor (1.14.40.0)", "mypy-boto3-meteringmarketplace (1.14.40.0)", "mypy-boto3-mgh (1.14.40.0)", "mypy-boto3-migrationhub-config (1.14.40.0)", "mypy-boto3-mobile (1.14.40.0)", "mypy-boto3-mq (1.14.40.0)", "mypy-boto3-mturk (1.14.40.0)", "mypy-boto3-neptune (1.14.40.0)", "mypy-boto3-networkmanager (1.14.40.0)", "mypy-boto3-opsworks (1.14.40.0)", "mypy-boto3-opsworkscm (1.14.40.0)", "mypy-boto3-organizations (1.14.40.0)", "mypy-boto3-outposts (1.14.40.0)", "mypy-boto3-personalize (1.14.40.0)", "mypy-boto3-personalize-events (1.14.40.0)", "mypy-boto3-personalize-runtime (1.14.40.0)", "mypy-boto3-pi (1.14.40.0)", "mypy-boto3-pinpoint (1.14.40.0)", "mypy-boto3-pinpoint-email (1.14.40.0)", "mypy-boto3-pinpoint-sms-voice (1.14.40.0)", "mypy-boto3-polly (1.14.40.0)", "mypy-boto3-pricing (1.14.40.0)", "mypy-boto3-qldb (1.14.40.0)", "mypy-boto3-qldb-session (1.14.40.0)", "mypy-boto3-quicksight (1.14.40.0)", "mypy-boto3-ram (1.14.40.0)", "mypy-boto3-rds (1.14.40.0)", "mypy-boto3-rds-data (1.14.40.0)", "mypy-boto3-redshift (1.14.40.0)", "mypy-boto3-rekognition (1.14.40.0)", "mypy-boto3-resource-groups (1.14.40.0)", "mypy-boto3-resourcegroupstaggingapi (1.14.40.0)", "mypy-boto3-robomaker (1.14.40.0)", "mypy-boto3-route53 (1.14.40.0)", "mypy-boto3-route53domains (1.14.40.0)", "mypy-boto3-route53resolver (1.14.40.0)", "mypy-boto3-s3 (1.14.40.0)", "mypy-boto3-s3control (1.14.40.0)", "mypy-boto3-sagemaker (1.14.40.0)", "mypy-boto3-sagemaker-a2i-runtime (1.14.40.0)", "mypy-boto3-sagemaker-runtime (1.14.40.0)", "mypy-boto3-savingsplans (1.14.40.0)", "mypy-boto3-schemas (1.14.40.0)", "mypy-boto3-sdb (1.14.40.0)", "mypy-boto3-secretsmanager (1.14.40.0)", "mypy-boto3-securityhub (1.14.40.0)", "mypy-boto3-serverlessrepo (1.14.40.0)", "mypy-boto3-service-quotas (1.14.40.0)", "mypy-boto3-servicecatalog (1.14.40.0)", "mypy-boto3-servicediscovery (1.14.40.0)", "mypy-boto3-ses (1.14.40.0)", "mypy-boto3-sesv2 (1.14.40.0)", "mypy-boto3-shield (1.14.40.0)", "mypy-boto3-signer (1.14.40.0)", "mypy-boto3-sms (1.14.40.0)", "mypy-boto3-sms-voice (1.14.40.0)", "mypy-boto3-snowball (1.14.40.0)", "mypy-boto3-sns (1.14.40.0)", "mypy-boto3-sqs (1.14.40.0)", "mypy-boto3-ssm (1.14.40.0)", "mypy-boto3-sso (1.14.40.0)", "mypy-boto3-sso-oidc (1.14.40.0)", "mypy-boto3-stepfunctions (1.14.40.0)", "mypy-boto3-storagegateway (1.14.40.0)", "mypy-boto3-sts (1.14.40.0)", "mypy-boto3-support (1.14.40.0)", "mypy-boto3-swf (1.14.40.0)", "mypy-boto3-synthetics (1.14.40.0)", "mypy-boto3-textract (1.14.40.0)", "mypy-boto3-transcribe (1.14.40.0)", "mypy-boto3-transfer (1.14.40.0)", "mypy-boto3-translate (1.14.40.0)", "mypy-boto3-waf (1.14.40.0)", "mypy-boto3-waf-regional (1.14.40.0)", "mypy-boto3-wafv2 (1.14.40.0)", "mypy-boto3-workdocs (1.14.40.0)", "mypy-boto3-worklink (1.14.40.0)", "mypy-boto3-workmail (1.14.40.0)", "mypy-boto3-workmailmessageflow (1.14.40.0)", "mypy-boto3-workspaces (1.14.40.0)", "mypy-boto3-xray (1.14.40.0)"]
+amplify = ["mypy-boto3-amplify (1.14.40.0)"]
+apigateway = ["mypy-boto3-apigateway (1.14.40.0)"]
+apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (1.14.40.0)"]
+apigatewayv2 = ["mypy-boto3-apigatewayv2 (1.14.40.0)"]
+appconfig = ["mypy-boto3-appconfig (1.14.40.0)"]
+application-autoscaling = ["mypy-boto3-application-autoscaling (1.14.40.0)"]
+application-insights = ["mypy-boto3-application-insights (1.14.40.0)"]
+appmesh = ["mypy-boto3-appmesh (1.14.40.0)"]
+appstream = ["mypy-boto3-appstream (1.14.40.0)"]
+appsync = ["mypy-boto3-appsync (1.14.40.0)"]
+athena = ["mypy-boto3-athena (1.14.40.0)"]
+autoscaling = ["mypy-boto3-autoscaling (1.14.40.0)"]
+autoscaling-plans = ["mypy-boto3-autoscaling-plans (1.14.40.0)"]
+backup = ["mypy-boto3-backup (1.14.40.0)"]
+batch = ["mypy-boto3-batch (1.14.40.0)"]
+budgets = ["mypy-boto3-budgets (1.14.40.0)"]
+ce = ["mypy-boto3-ce (1.14.40.0)"]
+chime = ["mypy-boto3-chime (1.14.40.0)"]
+cloud9 = ["mypy-boto3-cloud9 (1.14.40.0)"]
+clouddirectory = ["mypy-boto3-clouddirectory (1.14.40.0)"]
+cloudformation = ["mypy-boto3-cloudformation (1.14.40.0)"]
+cloudfront = ["mypy-boto3-cloudfront (1.14.40.0)"]
+cloudhsm = ["mypy-boto3-cloudhsm (1.14.40.0)"]
+cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (1.14.40.0)"]
+cloudsearch = ["mypy-boto3-cloudsearch (1.14.40.0)"]
+cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (1.14.40.0)"]
+cloudtrail = ["mypy-boto3-cloudtrail (1.14.40.0)"]
+cloudwatch = ["mypy-boto3-cloudwatch (1.14.40.0)"]
+codeartifact = ["mypy-boto3-codeartifact (1.14.40.0)"]
+codebuild = ["mypy-boto3-codebuild (1.14.40.0)"]
+codecommit = ["mypy-boto3-codecommit (1.14.40.0)"]
+codedeploy = ["mypy-boto3-codedeploy (1.14.40.0)"]
+codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (1.14.40.0)"]
+codeguruprofiler = ["mypy-boto3-codeguruprofiler (1.14.40.0)"]
+codepipeline = ["mypy-boto3-codepipeline (1.14.40.0)"]
+codestar = ["mypy-boto3-codestar (1.14.40.0)"]
+codestar-connections = ["mypy-boto3-codestar-connections (1.14.40.0)"]
+codestar-notifications = ["mypy-boto3-codestar-notifications (1.14.40.0)"]
+cognito-identity = ["mypy-boto3-cognito-identity (1.14.40.0)"]
+cognito-idp = ["mypy-boto3-cognito-idp (1.14.40.0)"]
+cognito-sync = ["mypy-boto3-cognito-sync (1.14.40.0)"]
+comprehend = ["mypy-boto3-comprehend (1.14.40.0)"]
+comprehendmedical = ["mypy-boto3-comprehendmedical (1.14.40.0)"]
+compute-optimizer = ["mypy-boto3-compute-optimizer (1.14.40.0)"]
+config = ["mypy-boto3-config (1.14.40.0)"]
+connect = ["mypy-boto3-connect (1.14.40.0)"]
+connectparticipant = ["mypy-boto3-connectparticipant (1.14.40.0)"]
+cur = ["mypy-boto3-cur (1.14.40.0)"]
+dataexchange = ["mypy-boto3-dataexchange (1.14.40.0)"]
+datapipeline = ["mypy-boto3-datapipeline (1.14.40.0)"]
+datasync = ["mypy-boto3-datasync (1.14.40.0)"]
+dax = ["mypy-boto3-dax (1.14.40.0)"]
+detective = ["mypy-boto3-detective (1.14.40.0)"]
+devicefarm = ["mypy-boto3-devicefarm (1.14.40.0)"]
+directconnect = ["mypy-boto3-directconnect (1.14.40.0)"]
+discovery = ["mypy-boto3-discovery (1.14.40.0)"]
+dlm = ["mypy-boto3-dlm (1.14.40.0)"]
+dms = ["mypy-boto3-dms (1.14.40.0)"]
+docdb = ["mypy-boto3-docdb (1.14.40.0)"]
+ds = ["mypy-boto3-ds (1.14.40.0)"]
+dynamodb = ["mypy-boto3-dynamodb (1.14.40.0)"]
+dynamodbstreams = ["mypy-boto3-dynamodbstreams (1.14.40.0)"]
+ebs = ["mypy-boto3-ebs (1.14.40.0)"]
+ec2 = ["mypy-boto3-ec2 (1.14.40.0)"]
+ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (1.14.40.0)"]
+ecr = ["mypy-boto3-ecr (1.14.40.0)"]
+ecs = ["mypy-boto3-ecs (1.14.40.0)"]
+efs = ["mypy-boto3-efs (1.14.40.0)"]
+eks = ["mypy-boto3-eks (1.14.40.0)"]
+elastic-inference = ["mypy-boto3-elastic-inference (1.14.40.0)"]
+elasticache = ["mypy-boto3-elasticache (1.14.40.0)"]
+elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (1.14.40.0)"]
+elastictranscoder = ["mypy-boto3-elastictranscoder (1.14.40.0)"]
+elb = ["mypy-boto3-elb (1.14.40.0)"]
+elbv2 = ["mypy-boto3-elbv2 (1.14.40.0)"]
+emr = ["mypy-boto3-emr (1.14.40.0)"]
+es = ["mypy-boto3-es (1.14.40.0)"]
+essential = ["mypy-boto3-cloudformation (1.14.40.0)", "mypy-boto3-dynamodb (1.14.40.0)", "mypy-boto3-ec2 (1.14.40.0)", "mypy-boto3-lambda (1.14.40.0)", "mypy-boto3-rds (1.14.40.0)", "mypy-boto3-s3 (1.14.40.0)", "mypy-boto3-sqs (1.14.40.0)"]
+events = ["mypy-boto3-events (1.14.40.0)"]
+firehose = ["mypy-boto3-firehose (1.14.40.0)"]
+fms = ["mypy-boto3-fms (1.14.40.0)"]
+forecast = ["mypy-boto3-forecast (1.14.40.0)"]
+forecastquery = ["mypy-boto3-forecastquery (1.14.40.0)"]
+frauddetector = ["mypy-boto3-frauddetector (1.14.40.0)"]
+fsx = ["mypy-boto3-fsx (1.14.40.0)"]
+gamelift = ["mypy-boto3-gamelift (1.14.40.0)"]
+glacier = ["mypy-boto3-glacier (1.14.40.0)"]
+globalaccelerator = ["mypy-boto3-globalaccelerator (1.14.40.0)"]
+glue = ["mypy-boto3-glue (1.14.40.0)"]
+greengrass = ["mypy-boto3-greengrass (1.14.40.0)"]
+groundstation = ["mypy-boto3-groundstation (1.14.40.0)"]
+guardduty = ["mypy-boto3-guardduty (1.14.40.0)"]
+health = ["mypy-boto3-health (1.14.40.0)"]
+iam = ["mypy-boto3-iam (1.14.40.0)"]
+imagebuilder = ["mypy-boto3-imagebuilder (1.14.40.0)"]
+importexport = ["mypy-boto3-importexport (1.14.40.0)"]
+inspector = ["mypy-boto3-inspector (1.14.40.0)"]
+iot = ["mypy-boto3-iot (1.14.40.0)"]
+iot-data = ["mypy-boto3-iot-data (1.14.40.0)"]
+iot-jobs-data = ["mypy-boto3-iot-jobs-data (1.14.40.0)"]
+iot1click-devices = ["mypy-boto3-iot1click-devices (1.14.40.0)"]
+iot1click-projects = ["mypy-boto3-iot1click-projects (1.14.40.0)"]
+iotanalytics = ["mypy-boto3-iotanalytics (1.14.40.0)"]
+iotevents = ["mypy-boto3-iotevents (1.14.40.0)"]
+iotevents-data = ["mypy-boto3-iotevents-data (1.14.40.0)"]
+iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (1.14.40.0)"]
+iotsitewise = ["mypy-boto3-iotsitewise (1.14.40.0)"]
+iotthingsgraph = ["mypy-boto3-iotthingsgraph (1.14.40.0)"]
+kafka = ["mypy-boto3-kafka (1.14.40.0)"]
+kendra = ["mypy-boto3-kendra (1.14.40.0)"]
+kinesis = ["mypy-boto3-kinesis (1.14.40.0)"]
+kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (1.14.40.0)"]
+kinesis-video-media = ["mypy-boto3-kinesis-video-media (1.14.40.0)"]
+kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (1.14.40.0)"]
+kinesisanalytics = ["mypy-boto3-kinesisanalytics (1.14.40.0)"]
+kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (1.14.40.0)"]
+kinesisvideo = ["mypy-boto3-kinesisvideo (1.14.40.0)"]
+kms = ["mypy-boto3-kms (1.14.40.0)"]
+lakeformation = ["mypy-boto3-lakeformation (1.14.40.0)"]
+lambda = ["mypy-boto3-lambda (1.14.40.0)"]
+lex-models = ["mypy-boto3-lex-models (1.14.40.0)"]
+lex-runtime = ["mypy-boto3-lex-runtime (1.14.40.0)"]
+license-manager = ["mypy-boto3-license-manager (1.14.40.0)"]
+lightsail = ["mypy-boto3-lightsail (1.14.40.0)"]
+logs = ["mypy-boto3-logs (1.14.40.0)"]
+machinelearning = ["mypy-boto3-machinelearning (1.14.40.0)"]
+macie = ["mypy-boto3-macie (1.14.40.0)"]
+macie2 = ["mypy-boto3-macie2 (1.14.40.0)"]
+managedblockchain = ["mypy-boto3-managedblockchain (1.14.40.0)"]
+marketplace-catalog = ["mypy-boto3-marketplace-catalog (1.14.40.0)"]
+marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (1.14.40.0)"]
+marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (1.14.40.0)"]
+mediaconnect = ["mypy-boto3-mediaconnect (1.14.40.0)"]
+mediaconvert = ["mypy-boto3-mediaconvert (1.14.40.0)"]
+medialive = ["mypy-boto3-medialive (1.14.40.0)"]
+mediapackage = ["mypy-boto3-mediapackage (1.14.40.0)"]
+mediapackage-vod = ["mypy-boto3-mediapackage-vod (1.14.40.0)"]
+mediastore = ["mypy-boto3-mediastore (1.14.40.0)"]
+mediastore-data = ["mypy-boto3-mediastore-data (1.14.40.0)"]
+mediatailor = ["mypy-boto3-mediatailor (1.14.40.0)"]
+meteringmarketplace = ["mypy-boto3-meteringmarketplace (1.14.40.0)"]
+mgh = ["mypy-boto3-mgh (1.14.40.0)"]
+migrationhub-config = ["mypy-boto3-migrationhub-config (1.14.40.0)"]
+mobile = ["mypy-boto3-mobile (1.14.40.0)"]
+mq = ["mypy-boto3-mq (1.14.40.0)"]
+mturk = ["mypy-boto3-mturk (1.14.40.0)"]
+neptune = ["mypy-boto3-neptune (1.14.40.0)"]
+networkmanager = ["mypy-boto3-networkmanager (1.14.40.0)"]
+opsworks = ["mypy-boto3-opsworks (1.14.40.0)"]
+opsworkscm = ["mypy-boto3-opsworkscm (1.14.40.0)"]
+organizations = ["mypy-boto3-organizations (1.14.40.0)"]
+outposts = ["mypy-boto3-outposts (1.14.40.0)"]
+personalize = ["mypy-boto3-personalize (1.14.40.0)"]
+personalize-events = ["mypy-boto3-personalize-events (1.14.40.0)"]
+personalize-runtime = ["mypy-boto3-personalize-runtime (1.14.40.0)"]
+pi = ["mypy-boto3-pi (1.14.40.0)"]
+pinpoint = ["mypy-boto3-pinpoint (1.14.40.0)"]
+pinpoint-email = ["mypy-boto3-pinpoint-email (1.14.40.0)"]
+pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (1.14.40.0)"]
+polly = ["mypy-boto3-polly (1.14.40.0)"]
+pricing = ["mypy-boto3-pricing (1.14.40.0)"]
+qldb = ["mypy-boto3-qldb (1.14.40.0)"]
+qldb-session = ["mypy-boto3-qldb-session (1.14.40.0)"]
+quicksight = ["mypy-boto3-quicksight (1.14.40.0)"]
+ram = ["mypy-boto3-ram (1.14.40.0)"]
+rds = ["mypy-boto3-rds (1.14.40.0)"]
+rds-data = ["mypy-boto3-rds-data (1.14.40.0)"]
+redshift = ["mypy-boto3-redshift (1.14.40.0)"]
+rekognition = ["mypy-boto3-rekognition (1.14.40.0)"]
+resource-groups = ["mypy-boto3-resource-groups (1.14.40.0)"]
+resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (1.14.40.0)"]
+robomaker = ["mypy-boto3-robomaker (1.14.40.0)"]
+route53 = ["mypy-boto3-route53 (1.14.40.0)"]
+route53domains = ["mypy-boto3-route53domains (1.14.40.0)"]
+route53resolver = ["mypy-boto3-route53resolver (1.14.40.0)"]
+s3 = ["mypy-boto3-s3 (1.14.40.0)"]
+s3control = ["mypy-boto3-s3control (1.14.40.0)"]
+sagemaker = ["mypy-boto3-sagemaker (1.14.40.0)"]
+sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (1.14.40.0)"]
+sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (1.14.40.0)"]
+savingsplans = ["mypy-boto3-savingsplans (1.14.40.0)"]
+schemas = ["mypy-boto3-schemas (1.14.40.0)"]
+sdb = ["mypy-boto3-sdb (1.14.40.0)"]
+secretsmanager = ["mypy-boto3-secretsmanager (1.14.40.0)"]
+securityhub = ["mypy-boto3-securityhub (1.14.40.0)"]
+serverlessrepo = ["mypy-boto3-serverlessrepo (1.14.40.0)"]
+service-quotas = ["mypy-boto3-service-quotas (1.14.40.0)"]
+servicecatalog = ["mypy-boto3-servicecatalog (1.14.40.0)"]
+servicediscovery = ["mypy-boto3-servicediscovery (1.14.40.0)"]
+ses = ["mypy-boto3-ses (1.14.40.0)"]
+sesv2 = ["mypy-boto3-sesv2 (1.14.40.0)"]
+shield = ["mypy-boto3-shield (1.14.40.0)"]
+signer = ["mypy-boto3-signer (1.14.40.0)"]
+sms = ["mypy-boto3-sms (1.14.40.0)"]
+sms-voice = ["mypy-boto3-sms-voice (1.14.40.0)"]
+snowball = ["mypy-boto3-snowball (1.14.40.0)"]
+sns = ["mypy-boto3-sns (1.14.40.0)"]
+sqs = ["mypy-boto3-sqs (1.14.40.0)"]
+ssm = ["mypy-boto3-ssm (1.14.40.0)"]
+sso = ["mypy-boto3-sso (1.14.40.0)"]
+sso-oidc = ["mypy-boto3-sso-oidc (1.14.40.0)"]
+stepfunctions = ["mypy-boto3-stepfunctions (1.14.40.0)"]
+storagegateway = ["mypy-boto3-storagegateway (1.14.40.0)"]
+sts = ["mypy-boto3-sts (1.14.40.0)"]
+support = ["mypy-boto3-support (1.14.40.0)"]
+swf = ["mypy-boto3-swf (1.14.40.0)"]
+synthetics = ["mypy-boto3-synthetics (1.14.40.0)"]
+textract = ["mypy-boto3-textract (1.14.40.0)"]
+transcribe = ["mypy-boto3-transcribe (1.14.40.0)"]
+transfer = ["mypy-boto3-transfer (1.14.40.0)"]
+translate = ["mypy-boto3-translate (1.14.40.0)"]
+waf = ["mypy-boto3-waf (1.14.40.0)"]
+waf-regional = ["mypy-boto3-waf-regional (1.14.40.0)"]
+wafv2 = ["mypy-boto3-wafv2 (1.14.40.0)"]
+workdocs = ["mypy-boto3-workdocs (1.14.40.0)"]
+worklink = ["mypy-boto3-worklink (1.14.40.0)"]
+workmail = ["mypy-boto3-workmail (1.14.40.0)"]
+workmailmessageflow = ["mypy-boto3-workmailmessageflow (1.14.40.0)"]
+workspaces = ["mypy-boto3-workspaces (1.14.40.0)"]
+xray = ["mypy-boto3-xray (1.14.40.0)"]
 
 [[package]]
 category = "main"
@@ -447,7 +447,7 @@ description = "Show coverage stats online via coveralls.io"
 name = "coveralls"
 optional = false
 python-versions = ">= 3.5"
-version = "2.1.1"
+version = "2.1.2"
 
 [package.dependencies]
 coverage = ">=4.1,<6.0"
@@ -959,11 +959,11 @@ server = ["flask"]
 
 [[package]]
 category = "dev"
-description = "Type annotations for boto3 1.14.39 master module, generated by mypy-boto3-buider 2.2.0"
+description = "Type annotations for boto3 1.14.40 master module, generated by mypy-boto3-buider 2.2.0"
 name = "mypy-boto3"
 optional = false
 python-versions = ">=3.6"
-version = "1.14.39.0"
+version = "1.14.40.0"
 
 [package.dependencies]
 boto3 = "*"
@@ -1950,8 +1950,8 @@ boto3 = [
     {file = "boto3-1.10.50.tar.gz", hash = "sha256:5c00d51101d6a7ddf2207ae8a738e5c815c5fcffbee76121f38bd41d83c936a5"},
 ]
 boto3-stubs = [
-    {file = "boto3-stubs-1.14.39.0.tar.gz", hash = "sha256:0c85e54cf3e18446cc3d6743b82f895f7f25f6cbaecec9a3f3eefcbf06af0741"},
-    {file = "boto3_stubs-1.14.39.0-py3-none-any.whl", hash = "sha256:6a4d4075a1a108e1da4939457dcde0c53d68bb3b32264c087435cf1debf807cd"},
+    {file = "boto3-stubs-1.14.40.0.tar.gz", hash = "sha256:6aed8f64c57f90f256cd75a66663d6b07b02edb803c442094a23a2c382463311"},
+    {file = "boto3_stubs-1.14.40.0-py3-none-any.whl", hash = "sha256:db5afe4d5a0f1d2b6cffdf72cb9eb4a96f8a0954702132a4ee27e62fb2c6ece4"},
 ]
 botocore = [
     {file = "botocore-1.13.50-py2.py3-none-any.whl", hash = "sha256:adb4cb188cd0866e7337f9a049fc68db042b0340fd496d40bca349c8dbfc6a2d"},
@@ -2043,8 +2043,8 @@ coverage = [
     {file = "coverage-5.2.1.tar.gz", hash = "sha256:a34cb28e0747ea15e82d13e14de606747e9e484fb28d63c999483f5d5188e89b"},
 ]
 coveralls = [
-    {file = "coveralls-2.1.1-py2.py3-none-any.whl", hash = "sha256:3726d35c0f93a28631a003880e2aa6cc93c401d62bc6919c5cb497217ba30c55"},
-    {file = "coveralls-2.1.1.tar.gz", hash = "sha256:afe359cd5b350e1b3895372bda32af8f0260638c7c4a31a5c0f15aa6a96f40d9"},
+    {file = "coveralls-2.1.2-py2.py3-none-any.whl", hash = "sha256:b3b60c17b03a0dee61952a91aed6f131e0b2ac8bd5da909389c53137811409e1"},
+    {file = "coveralls-2.1.2.tar.gz", hash = "sha256:4430b862baabb3cf090d36d84d331966615e4288d8a8c5957e0fd456d0dd8bd6"},
 ]
 cryptography = [
     {file = "cryptography-3.0-cp27-cp27m-macosx_10_10_x86_64.whl", hash = "sha256:ab49edd5bea8d8b39a44b3db618e4783ef84c19c8b47286bf05dfdb3efb01c83"},
@@ -2207,8 +2207,8 @@ moto = [
     {file = "moto-1.3.7.tar.gz", hash = "sha256:129de2e04cb250d9f8b2c722ec152ed1b5426ef179b4ebb03e9ec36e6eb3fcc5"},
 ]
 mypy-boto3 = [
-    {file = "mypy-boto3-1.14.39.0.tar.gz", hash = "sha256:36e844586ddd64ce11153646cd9e9139048ad3a18f81d78bcb1f41529a562b1e"},
-    {file = "mypy_boto3-1.14.39.0-py3-none-any.whl", hash = "sha256:4d57df2717c61dbc3f9978abffdd57dba80003d124d3afd81d5ce34ecb5345d2"},
+    {file = "mypy-boto3-1.14.40.0.tar.gz", hash = "sha256:89475e7faf197e9988df6d763837a2f5254b9878a39a9315400a1f109db28e43"},
+    {file = "mypy_boto3-1.14.40.0-py3-none-any.whl", hash = "sha256:54a220a0b0cea7bf0e02fc0343168cd23edf55195e70dfb085a6ed8a291e125a"},
 ]
 netaddr = [
     {file = "netaddr-0.8.0-py2.py3-none-any.whl", hash = "sha256:9666d0232c32d2656e5e5f8d735f58fd6c7457ce52fc21c98d45f2af78f990ac"},
diff --git a/src/encoded/tests/test_util.py b/src/encoded/tests/test_util.py
index 517b836db4..4da4572a85 100644
--- a/src/encoded/tests/test_util.py
+++ b/src/encoded/tests/test_util.py
@@ -146,56 +146,92 @@ def test_debuglog():
 
     filename = tempfile.mktemp()
 
-    some_start_time = datetime.datetime(2010, 7, 4, 12, 30)  # Just a randomly chosen date
+    try:
 
-    dt = ControlledTime(initial_time=some_start_time, tick_seconds=1/128)
+        some_start_time = datetime.datetime(2010, 7, 4, 12, 30)  # Just a randomly chosen date
 
-    fake_homedir = "/home/user"
+        dt = ControlledTime(initial_time=some_start_time, tick_seconds=0.01)
+
+        fake_homedir = "/home/user"
+
+        with mock.patch("os.path.expanduser") as mock_expanduser:
+
+            def mocked_expanduser(x):
+                if x.startswith("~/"):
+                    return os.path.join(fake_homedir, x[2:])
+                elif x.startswith("~"):
+                    raise AssertionError("Beyond scope of this mock.")
+                else:
+                    return x
+
+            mock_expanduser.side_effect = mocked_expanduser
+
+            with mock.patch.object(datetime, "datetime", dt):
+
+                real_open = io.open
+
+                with mock.patch.object(io, "open") as mock_open:
+
+                    def mocked_open(file, mode):
+                        assert file == "%s/DEBUGLOG-20100704.txt" % fake_homedir
+                        print("Writing to", filename, "mode=", mode)
+                        return real_open(filename, mode)
+
+                    def log_content():
+                        with real_open(filename, 'r') as fp:
+                            text_content = fp.read()
+                            return text_content
+
+                    mock_open.side_effect = mocked_open
+
+                    with mock.patch.object(util_module, 'DEBUGLOG', ""):
 
-    with mock.patch("os.path.expanduser") as mock_expanduser:
+                        debuglog("test 1")
+                        debuglog("test 2")
 
-        def mocked_expanduser(x):
-            if x.startswith("~/"):
-                return os.path.join(fake_homedir, x[2:])
-            elif x.startswith("~"):
-                raise AssertionError("Beyond scope of this mock.")
-            else:
-                return x
+                        assert not os.path.exists(filename)
 
-        mock_expanduser.side_effect = mocked_expanduser
+                    with mock.patch.object(util_module, 'DEBUGLOG', fake_homedir):
 
-        with mock.patch.object(datetime, "datetime", dt):
+                        debuglog("test 1")
 
-            real_open = io.open
+                        assert log_content() == (
+                            "2010-07-04 12:30:00.010000 test 1\n"
+                        )
 
-            with mock.patch.object(io, "open") as mock_open:
+                        debuglog("test 2")
 
-                def mocked_open(file, mode):
-                    assert file == "/home/user/DEBUGLOG-20100704.txt"
-                    print("Writing to", filename, "mode=", mode)
-                    return real_open(filename, mode)
+                        assert log_content() == (
+                            "2010-07-04 12:30:00.010000 test 1\n"
+                            "2010-07-04 12:30:00.020000 test 2\n"
+                        )
 
-                mock_open.side_effect = mocked_open
+                    with mock.patch.object(util_module, 'DEBUGLOG', "~"):
 
-                with mock.patch.object(util_module, 'DEBUGLOG_ENABLED', False):
+                        debuglog("test 3")
 
-                    debuglog("test 1")
-                    debuglog("test 2")
+                        assert log_content() == (
+                            "2010-07-04 12:30:00.010000 test 1\n"
+                            "2010-07-04 12:30:00.020000 test 2\n"
+                            "2010-07-04 12:30:00.030000 test 3\n"
+                        )
 
-                    assert not os.path.exists(filename)
+                        debuglog("test 4")
 
-                with mock.patch.object(util_module, 'DEBUGLOG_ENABLED', True):
+                        assert log_content() == (
+                            "2010-07-04 12:30:00.010000 test 1\n"
+                            "2010-07-04 12:30:00.020000 test 2\n"
+                            "2010-07-04 12:30:00.030000 test 3\n"
+                            "2010-07-04 12:30:00.040000 test 4\n"
+                        )
 
-                    debuglog("test 1")
-                    debuglog("test 2")
+    finally:
 
-            with io.open(filename, 'r') as fp:
-                text_content = fp.read()
+        try:
+            os.remove(filename)
+        except Exception:
+            pass
 
-            assert text_content == (
-                "2010-07-04 12:30:00.007812 test 1\n"
-                "2010-07-04 12:30:00.015624 test 2\n"
-            )
 
 
 # def test_generate_fastq_file():
diff --git a/src/encoded/util.py b/src/encoded/util.py
index 1f733c1b8a..a8dd57e1f6 100644
--- a/src/encoded/util.py
+++ b/src/encoded/util.py
@@ -13,7 +13,6 @@
 from snovault.crud_views import collection_add as sno_collection_add
 from snovault.embed import make_subrequest
 from snovault.schema_utils import validate_request
-from typing import Type
 
 
 ENCODED_ROOT_DIR = os.path.dirname(__file__)
@@ -63,7 +62,7 @@ def gunzip_content(content):
     return gunzipped_content.decode('utf-8')
 
 
-DEBUGLOG_ENABLED = os.environ.get('DEBUGLOG_ENABLED', "FALSE").lower() == "true"
+DEBUGLOG = os.environ.get('DEBUGLOG', "")
 
 
 def debuglog(*args):
@@ -75,14 +74,38 @@ def debuglog(*args):
     It takes arguments like print or one of the logging operations and outputs to ~/DEBUGLOG-yyyymmdd.txt.
     Each line in the log is timestamped.
     """
-    if DEBUGLOG_ENABLED:
-        nowstr = str(datetime.datetime.now())
-        dateid = nowstr[:10].replace('-', '')
-        with io.open(os.path.expanduser("~/DEBUGLOG-%s.txt" % dateid), "a+") as fp:
-            print(nowstr, *args, file=fp)
+    if DEBUGLOG:
+        try:
+            nowstr = str(datetime.datetime.now())
+            dateid = nowstr[:10].replace('-', '')
+            with io.open(os.path.expanduser(os.path.join(DEBUGLOG, "DEBUGLOG-%s.txt" % dateid)), "a+") as fp:
+                print(nowstr, *args, file=fp)
+        except Exception:
+            # There are many things that could go wrong, but none of them are important enough to fuss over.
+            # Maybe it was a bad pathname? Out of disk space? Network error?
+            # It doesn't really matter. Just continue...
+            pass
 
 
 def subrequest_item_creation(request: pyramid.request.Request, item_type: str, json_body: dict = None) -> dict:
+    """
+    Acting as proxy on behalf of request, this creates a new item of the given item_type with attributes per json_body.
+
+    For example,
+
+        subrequest_item_creation(request=request, item_type='NobelPrize',
+                                 json_body={'category': 'peace', 'year': 2016))
+
+    Args:
+        request: the request on behalf of which this subrequest is done
+        item_type: the name of the item item type to be created
+        json_body: a python dictionary representing JSON containing data to use in initializing the newly created item
+
+    Returns:
+        a python dictionary (JSON description) of the item created
+
+    """
+
     if json_body is None:
         json_body = {}
     collection_path = '/' + item_type

From e52a5de4153b5b000ae6d862be6620ead5a72c6c Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Wed, 12 Aug 2020 21:28:05 -0400
Subject: [PATCH 070/125] Take recent dependency changes.

---
 poetry.lock | 468 ++++++++++++++++++++++++++--------------------------
 1 file changed, 234 insertions(+), 234 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index e96c0e1519..4fcda3bb1b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -112,245 +112,245 @@ s3transfer = ">=0.2.0,<0.3.0"
 
 [[package]]
 category = "dev"
-description = "Type annotations for boto3 1.14.40, generated by mypy-boto3-buider 2.2.0"
+description = "Type annotations for boto3 1.14.41, generated by mypy-boto3-buider 2.2.0"
 name = "boto3-stubs"
 optional = false
 python-versions = ">=3.6"
-version = "1.14.40.0"
+version = "1.14.41.0"
 
 [package.dependencies]
-mypy-boto3 = "1.14.40.0"
+mypy-boto3 = "1.14.41.0"
 
 [package.dependencies.typing-extensions]
 python = "<3.8"
 version = "*"
 
 [package.extras]
-accessanalyzer = ["mypy-boto3-accessanalyzer (1.14.40.0)"]
-acm = ["mypy-boto3-acm (1.14.40.0)"]
-acm-pca = ["mypy-boto3-acm-pca (1.14.40.0)"]
-alexaforbusiness = ["mypy-boto3-alexaforbusiness (1.14.40.0)"]
-all = ["mypy-boto3-accessanalyzer (1.14.40.0)", "mypy-boto3-acm (1.14.40.0)", "mypy-boto3-acm-pca (1.14.40.0)", "mypy-boto3-alexaforbusiness (1.14.40.0)", "mypy-boto3-amplify (1.14.40.0)", "mypy-boto3-apigateway (1.14.40.0)", "mypy-boto3-apigatewaymanagementapi (1.14.40.0)", "mypy-boto3-apigatewayv2 (1.14.40.0)", "mypy-boto3-appconfig (1.14.40.0)", "mypy-boto3-application-autoscaling (1.14.40.0)", "mypy-boto3-application-insights (1.14.40.0)", "mypy-boto3-appmesh (1.14.40.0)", "mypy-boto3-appstream (1.14.40.0)", "mypy-boto3-appsync (1.14.40.0)", "mypy-boto3-athena (1.14.40.0)", "mypy-boto3-autoscaling (1.14.40.0)", "mypy-boto3-autoscaling-plans (1.14.40.0)", "mypy-boto3-backup (1.14.40.0)", "mypy-boto3-batch (1.14.40.0)", "mypy-boto3-budgets (1.14.40.0)", "mypy-boto3-ce (1.14.40.0)", "mypy-boto3-chime (1.14.40.0)", "mypy-boto3-cloud9 (1.14.40.0)", "mypy-boto3-clouddirectory (1.14.40.0)", "mypy-boto3-cloudformation (1.14.40.0)", "mypy-boto3-cloudfront (1.14.40.0)", "mypy-boto3-cloudhsm (1.14.40.0)", "mypy-boto3-cloudhsmv2 (1.14.40.0)", "mypy-boto3-cloudsearch (1.14.40.0)", "mypy-boto3-cloudsearchdomain (1.14.40.0)", "mypy-boto3-cloudtrail (1.14.40.0)", "mypy-boto3-cloudwatch (1.14.40.0)", "mypy-boto3-codeartifact (1.14.40.0)", "mypy-boto3-codebuild (1.14.40.0)", "mypy-boto3-codecommit (1.14.40.0)", "mypy-boto3-codedeploy (1.14.40.0)", "mypy-boto3-codeguru-reviewer (1.14.40.0)", "mypy-boto3-codeguruprofiler (1.14.40.0)", "mypy-boto3-codepipeline (1.14.40.0)", "mypy-boto3-codestar (1.14.40.0)", "mypy-boto3-codestar-connections (1.14.40.0)", "mypy-boto3-codestar-notifications (1.14.40.0)", "mypy-boto3-cognito-identity (1.14.40.0)", "mypy-boto3-cognito-idp (1.14.40.0)", "mypy-boto3-cognito-sync (1.14.40.0)", "mypy-boto3-comprehend (1.14.40.0)", "mypy-boto3-comprehendmedical (1.14.40.0)", "mypy-boto3-compute-optimizer (1.14.40.0)", "mypy-boto3-config (1.14.40.0)", "mypy-boto3-connect (1.14.40.0)", "mypy-boto3-connectparticipant (1.14.40.0)", "mypy-boto3-cur (1.14.40.0)", "mypy-boto3-dataexchange (1.14.40.0)", "mypy-boto3-datapipeline (1.14.40.0)", "mypy-boto3-datasync (1.14.40.0)", "mypy-boto3-dax (1.14.40.0)", "mypy-boto3-detective (1.14.40.0)", "mypy-boto3-devicefarm (1.14.40.0)", "mypy-boto3-directconnect (1.14.40.0)", "mypy-boto3-discovery (1.14.40.0)", "mypy-boto3-dlm (1.14.40.0)", "mypy-boto3-dms (1.14.40.0)", "mypy-boto3-docdb (1.14.40.0)", "mypy-boto3-ds (1.14.40.0)", "mypy-boto3-dynamodb (1.14.40.0)", "mypy-boto3-dynamodbstreams (1.14.40.0)", "mypy-boto3-ebs (1.14.40.0)", "mypy-boto3-ec2 (1.14.40.0)", "mypy-boto3-ec2-instance-connect (1.14.40.0)", "mypy-boto3-ecr (1.14.40.0)", "mypy-boto3-ecs (1.14.40.0)", "mypy-boto3-efs (1.14.40.0)", "mypy-boto3-eks (1.14.40.0)", "mypy-boto3-elastic-inference (1.14.40.0)", "mypy-boto3-elasticache (1.14.40.0)", "mypy-boto3-elasticbeanstalk (1.14.40.0)", "mypy-boto3-elastictranscoder (1.14.40.0)", "mypy-boto3-elb (1.14.40.0)", "mypy-boto3-elbv2 (1.14.40.0)", "mypy-boto3-emr (1.14.40.0)", "mypy-boto3-es (1.14.40.0)", "mypy-boto3-events (1.14.40.0)", "mypy-boto3-firehose (1.14.40.0)", "mypy-boto3-fms (1.14.40.0)", "mypy-boto3-forecast (1.14.40.0)", "mypy-boto3-forecastquery (1.14.40.0)", "mypy-boto3-frauddetector (1.14.40.0)", "mypy-boto3-fsx (1.14.40.0)", "mypy-boto3-gamelift (1.14.40.0)", "mypy-boto3-glacier (1.14.40.0)", "mypy-boto3-globalaccelerator (1.14.40.0)", "mypy-boto3-glue (1.14.40.0)", "mypy-boto3-greengrass (1.14.40.0)", "mypy-boto3-groundstation (1.14.40.0)", "mypy-boto3-guardduty (1.14.40.0)", "mypy-boto3-health (1.14.40.0)", "mypy-boto3-iam (1.14.40.0)", "mypy-boto3-imagebuilder (1.14.40.0)", "mypy-boto3-importexport (1.14.40.0)", "mypy-boto3-inspector (1.14.40.0)", "mypy-boto3-iot (1.14.40.0)", "mypy-boto3-iot-data (1.14.40.0)", "mypy-boto3-iot-jobs-data (1.14.40.0)", "mypy-boto3-iot1click-devices (1.14.40.0)", "mypy-boto3-iot1click-projects (1.14.40.0)", "mypy-boto3-iotanalytics (1.14.40.0)", "mypy-boto3-iotevents (1.14.40.0)", "mypy-boto3-iotevents-data (1.14.40.0)", "mypy-boto3-iotsecuretunneling (1.14.40.0)", "mypy-boto3-iotsitewise (1.14.40.0)", "mypy-boto3-iotthingsgraph (1.14.40.0)", "mypy-boto3-kafka (1.14.40.0)", "mypy-boto3-kendra (1.14.40.0)", "mypy-boto3-kinesis (1.14.40.0)", "mypy-boto3-kinesis-video-archived-media (1.14.40.0)", "mypy-boto3-kinesis-video-media (1.14.40.0)", "mypy-boto3-kinesis-video-signaling (1.14.40.0)", "mypy-boto3-kinesisanalytics (1.14.40.0)", "mypy-boto3-kinesisanalyticsv2 (1.14.40.0)", "mypy-boto3-kinesisvideo (1.14.40.0)", "mypy-boto3-kms (1.14.40.0)", "mypy-boto3-lakeformation (1.14.40.0)", "mypy-boto3-lambda (1.14.40.0)", "mypy-boto3-lex-models (1.14.40.0)", "mypy-boto3-lex-runtime (1.14.40.0)", "mypy-boto3-license-manager (1.14.40.0)", "mypy-boto3-lightsail (1.14.40.0)", "mypy-boto3-logs (1.14.40.0)", "mypy-boto3-machinelearning (1.14.40.0)", "mypy-boto3-macie (1.14.40.0)", "mypy-boto3-macie2 (1.14.40.0)", "mypy-boto3-managedblockchain (1.14.40.0)", "mypy-boto3-marketplace-catalog (1.14.40.0)", "mypy-boto3-marketplace-entitlement (1.14.40.0)", "mypy-boto3-marketplacecommerceanalytics (1.14.40.0)", "mypy-boto3-mediaconnect (1.14.40.0)", "mypy-boto3-mediaconvert (1.14.40.0)", "mypy-boto3-medialive (1.14.40.0)", "mypy-boto3-mediapackage (1.14.40.0)", "mypy-boto3-mediapackage-vod (1.14.40.0)", "mypy-boto3-mediastore (1.14.40.0)", "mypy-boto3-mediastore-data (1.14.40.0)", "mypy-boto3-mediatailor (1.14.40.0)", "mypy-boto3-meteringmarketplace (1.14.40.0)", "mypy-boto3-mgh (1.14.40.0)", "mypy-boto3-migrationhub-config (1.14.40.0)", "mypy-boto3-mobile (1.14.40.0)", "mypy-boto3-mq (1.14.40.0)", "mypy-boto3-mturk (1.14.40.0)", "mypy-boto3-neptune (1.14.40.0)", "mypy-boto3-networkmanager (1.14.40.0)", "mypy-boto3-opsworks (1.14.40.0)", "mypy-boto3-opsworkscm (1.14.40.0)", "mypy-boto3-organizations (1.14.40.0)", "mypy-boto3-outposts (1.14.40.0)", "mypy-boto3-personalize (1.14.40.0)", "mypy-boto3-personalize-events (1.14.40.0)", "mypy-boto3-personalize-runtime (1.14.40.0)", "mypy-boto3-pi (1.14.40.0)", "mypy-boto3-pinpoint (1.14.40.0)", "mypy-boto3-pinpoint-email (1.14.40.0)", "mypy-boto3-pinpoint-sms-voice (1.14.40.0)", "mypy-boto3-polly (1.14.40.0)", "mypy-boto3-pricing (1.14.40.0)", "mypy-boto3-qldb (1.14.40.0)", "mypy-boto3-qldb-session (1.14.40.0)", "mypy-boto3-quicksight (1.14.40.0)", "mypy-boto3-ram (1.14.40.0)", "mypy-boto3-rds (1.14.40.0)", "mypy-boto3-rds-data (1.14.40.0)", "mypy-boto3-redshift (1.14.40.0)", "mypy-boto3-rekognition (1.14.40.0)", "mypy-boto3-resource-groups (1.14.40.0)", "mypy-boto3-resourcegroupstaggingapi (1.14.40.0)", "mypy-boto3-robomaker (1.14.40.0)", "mypy-boto3-route53 (1.14.40.0)", "mypy-boto3-route53domains (1.14.40.0)", "mypy-boto3-route53resolver (1.14.40.0)", "mypy-boto3-s3 (1.14.40.0)", "mypy-boto3-s3control (1.14.40.0)", "mypy-boto3-sagemaker (1.14.40.0)", "mypy-boto3-sagemaker-a2i-runtime (1.14.40.0)", "mypy-boto3-sagemaker-runtime (1.14.40.0)", "mypy-boto3-savingsplans (1.14.40.0)", "mypy-boto3-schemas (1.14.40.0)", "mypy-boto3-sdb (1.14.40.0)", "mypy-boto3-secretsmanager (1.14.40.0)", "mypy-boto3-securityhub (1.14.40.0)", "mypy-boto3-serverlessrepo (1.14.40.0)", "mypy-boto3-service-quotas (1.14.40.0)", "mypy-boto3-servicecatalog (1.14.40.0)", "mypy-boto3-servicediscovery (1.14.40.0)", "mypy-boto3-ses (1.14.40.0)", "mypy-boto3-sesv2 (1.14.40.0)", "mypy-boto3-shield (1.14.40.0)", "mypy-boto3-signer (1.14.40.0)", "mypy-boto3-sms (1.14.40.0)", "mypy-boto3-sms-voice (1.14.40.0)", "mypy-boto3-snowball (1.14.40.0)", "mypy-boto3-sns (1.14.40.0)", "mypy-boto3-sqs (1.14.40.0)", "mypy-boto3-ssm (1.14.40.0)", "mypy-boto3-sso (1.14.40.0)", "mypy-boto3-sso-oidc (1.14.40.0)", "mypy-boto3-stepfunctions (1.14.40.0)", "mypy-boto3-storagegateway (1.14.40.0)", "mypy-boto3-sts (1.14.40.0)", "mypy-boto3-support (1.14.40.0)", "mypy-boto3-swf (1.14.40.0)", "mypy-boto3-synthetics (1.14.40.0)", "mypy-boto3-textract (1.14.40.0)", "mypy-boto3-transcribe (1.14.40.0)", "mypy-boto3-transfer (1.14.40.0)", "mypy-boto3-translate (1.14.40.0)", "mypy-boto3-waf (1.14.40.0)", "mypy-boto3-waf-regional (1.14.40.0)", "mypy-boto3-wafv2 (1.14.40.0)", "mypy-boto3-workdocs (1.14.40.0)", "mypy-boto3-worklink (1.14.40.0)", "mypy-boto3-workmail (1.14.40.0)", "mypy-boto3-workmailmessageflow (1.14.40.0)", "mypy-boto3-workspaces (1.14.40.0)", "mypy-boto3-xray (1.14.40.0)"]
-amplify = ["mypy-boto3-amplify (1.14.40.0)"]
-apigateway = ["mypy-boto3-apigateway (1.14.40.0)"]
-apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (1.14.40.0)"]
-apigatewayv2 = ["mypy-boto3-apigatewayv2 (1.14.40.0)"]
-appconfig = ["mypy-boto3-appconfig (1.14.40.0)"]
-application-autoscaling = ["mypy-boto3-application-autoscaling (1.14.40.0)"]
-application-insights = ["mypy-boto3-application-insights (1.14.40.0)"]
-appmesh = ["mypy-boto3-appmesh (1.14.40.0)"]
-appstream = ["mypy-boto3-appstream (1.14.40.0)"]
-appsync = ["mypy-boto3-appsync (1.14.40.0)"]
-athena = ["mypy-boto3-athena (1.14.40.0)"]
-autoscaling = ["mypy-boto3-autoscaling (1.14.40.0)"]
-autoscaling-plans = ["mypy-boto3-autoscaling-plans (1.14.40.0)"]
-backup = ["mypy-boto3-backup (1.14.40.0)"]
-batch = ["mypy-boto3-batch (1.14.40.0)"]
-budgets = ["mypy-boto3-budgets (1.14.40.0)"]
-ce = ["mypy-boto3-ce (1.14.40.0)"]
-chime = ["mypy-boto3-chime (1.14.40.0)"]
-cloud9 = ["mypy-boto3-cloud9 (1.14.40.0)"]
-clouddirectory = ["mypy-boto3-clouddirectory (1.14.40.0)"]
-cloudformation = ["mypy-boto3-cloudformation (1.14.40.0)"]
-cloudfront = ["mypy-boto3-cloudfront (1.14.40.0)"]
-cloudhsm = ["mypy-boto3-cloudhsm (1.14.40.0)"]
-cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (1.14.40.0)"]
-cloudsearch = ["mypy-boto3-cloudsearch (1.14.40.0)"]
-cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (1.14.40.0)"]
-cloudtrail = ["mypy-boto3-cloudtrail (1.14.40.0)"]
-cloudwatch = ["mypy-boto3-cloudwatch (1.14.40.0)"]
-codeartifact = ["mypy-boto3-codeartifact (1.14.40.0)"]
-codebuild = ["mypy-boto3-codebuild (1.14.40.0)"]
-codecommit = ["mypy-boto3-codecommit (1.14.40.0)"]
-codedeploy = ["mypy-boto3-codedeploy (1.14.40.0)"]
-codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (1.14.40.0)"]
-codeguruprofiler = ["mypy-boto3-codeguruprofiler (1.14.40.0)"]
-codepipeline = ["mypy-boto3-codepipeline (1.14.40.0)"]
-codestar = ["mypy-boto3-codestar (1.14.40.0)"]
-codestar-connections = ["mypy-boto3-codestar-connections (1.14.40.0)"]
-codestar-notifications = ["mypy-boto3-codestar-notifications (1.14.40.0)"]
-cognito-identity = ["mypy-boto3-cognito-identity (1.14.40.0)"]
-cognito-idp = ["mypy-boto3-cognito-idp (1.14.40.0)"]
-cognito-sync = ["mypy-boto3-cognito-sync (1.14.40.0)"]
-comprehend = ["mypy-boto3-comprehend (1.14.40.0)"]
-comprehendmedical = ["mypy-boto3-comprehendmedical (1.14.40.0)"]
-compute-optimizer = ["mypy-boto3-compute-optimizer (1.14.40.0)"]
-config = ["mypy-boto3-config (1.14.40.0)"]
-connect = ["mypy-boto3-connect (1.14.40.0)"]
-connectparticipant = ["mypy-boto3-connectparticipant (1.14.40.0)"]
-cur = ["mypy-boto3-cur (1.14.40.0)"]
-dataexchange = ["mypy-boto3-dataexchange (1.14.40.0)"]
-datapipeline = ["mypy-boto3-datapipeline (1.14.40.0)"]
-datasync = ["mypy-boto3-datasync (1.14.40.0)"]
-dax = ["mypy-boto3-dax (1.14.40.0)"]
-detective = ["mypy-boto3-detective (1.14.40.0)"]
-devicefarm = ["mypy-boto3-devicefarm (1.14.40.0)"]
-directconnect = ["mypy-boto3-directconnect (1.14.40.0)"]
-discovery = ["mypy-boto3-discovery (1.14.40.0)"]
-dlm = ["mypy-boto3-dlm (1.14.40.0)"]
-dms = ["mypy-boto3-dms (1.14.40.0)"]
-docdb = ["mypy-boto3-docdb (1.14.40.0)"]
-ds = ["mypy-boto3-ds (1.14.40.0)"]
-dynamodb = ["mypy-boto3-dynamodb (1.14.40.0)"]
-dynamodbstreams = ["mypy-boto3-dynamodbstreams (1.14.40.0)"]
-ebs = ["mypy-boto3-ebs (1.14.40.0)"]
-ec2 = ["mypy-boto3-ec2 (1.14.40.0)"]
-ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (1.14.40.0)"]
-ecr = ["mypy-boto3-ecr (1.14.40.0)"]
-ecs = ["mypy-boto3-ecs (1.14.40.0)"]
-efs = ["mypy-boto3-efs (1.14.40.0)"]
-eks = ["mypy-boto3-eks (1.14.40.0)"]
-elastic-inference = ["mypy-boto3-elastic-inference (1.14.40.0)"]
-elasticache = ["mypy-boto3-elasticache (1.14.40.0)"]
-elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (1.14.40.0)"]
-elastictranscoder = ["mypy-boto3-elastictranscoder (1.14.40.0)"]
-elb = ["mypy-boto3-elb (1.14.40.0)"]
-elbv2 = ["mypy-boto3-elbv2 (1.14.40.0)"]
-emr = ["mypy-boto3-emr (1.14.40.0)"]
-es = ["mypy-boto3-es (1.14.40.0)"]
-essential = ["mypy-boto3-cloudformation (1.14.40.0)", "mypy-boto3-dynamodb (1.14.40.0)", "mypy-boto3-ec2 (1.14.40.0)", "mypy-boto3-lambda (1.14.40.0)", "mypy-boto3-rds (1.14.40.0)", "mypy-boto3-s3 (1.14.40.0)", "mypy-boto3-sqs (1.14.40.0)"]
-events = ["mypy-boto3-events (1.14.40.0)"]
-firehose = ["mypy-boto3-firehose (1.14.40.0)"]
-fms = ["mypy-boto3-fms (1.14.40.0)"]
-forecast = ["mypy-boto3-forecast (1.14.40.0)"]
-forecastquery = ["mypy-boto3-forecastquery (1.14.40.0)"]
-frauddetector = ["mypy-boto3-frauddetector (1.14.40.0)"]
-fsx = ["mypy-boto3-fsx (1.14.40.0)"]
-gamelift = ["mypy-boto3-gamelift (1.14.40.0)"]
-glacier = ["mypy-boto3-glacier (1.14.40.0)"]
-globalaccelerator = ["mypy-boto3-globalaccelerator (1.14.40.0)"]
-glue = ["mypy-boto3-glue (1.14.40.0)"]
-greengrass = ["mypy-boto3-greengrass (1.14.40.0)"]
-groundstation = ["mypy-boto3-groundstation (1.14.40.0)"]
-guardduty = ["mypy-boto3-guardduty (1.14.40.0)"]
-health = ["mypy-boto3-health (1.14.40.0)"]
-iam = ["mypy-boto3-iam (1.14.40.0)"]
-imagebuilder = ["mypy-boto3-imagebuilder (1.14.40.0)"]
-importexport = ["mypy-boto3-importexport (1.14.40.0)"]
-inspector = ["mypy-boto3-inspector (1.14.40.0)"]
-iot = ["mypy-boto3-iot (1.14.40.0)"]
-iot-data = ["mypy-boto3-iot-data (1.14.40.0)"]
-iot-jobs-data = ["mypy-boto3-iot-jobs-data (1.14.40.0)"]
-iot1click-devices = ["mypy-boto3-iot1click-devices (1.14.40.0)"]
-iot1click-projects = ["mypy-boto3-iot1click-projects (1.14.40.0)"]
-iotanalytics = ["mypy-boto3-iotanalytics (1.14.40.0)"]
-iotevents = ["mypy-boto3-iotevents (1.14.40.0)"]
-iotevents-data = ["mypy-boto3-iotevents-data (1.14.40.0)"]
-iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (1.14.40.0)"]
-iotsitewise = ["mypy-boto3-iotsitewise (1.14.40.0)"]
-iotthingsgraph = ["mypy-boto3-iotthingsgraph (1.14.40.0)"]
-kafka = ["mypy-boto3-kafka (1.14.40.0)"]
-kendra = ["mypy-boto3-kendra (1.14.40.0)"]
-kinesis = ["mypy-boto3-kinesis (1.14.40.0)"]
-kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (1.14.40.0)"]
-kinesis-video-media = ["mypy-boto3-kinesis-video-media (1.14.40.0)"]
-kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (1.14.40.0)"]
-kinesisanalytics = ["mypy-boto3-kinesisanalytics (1.14.40.0)"]
-kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (1.14.40.0)"]
-kinesisvideo = ["mypy-boto3-kinesisvideo (1.14.40.0)"]
-kms = ["mypy-boto3-kms (1.14.40.0)"]
-lakeformation = ["mypy-boto3-lakeformation (1.14.40.0)"]
-lambda = ["mypy-boto3-lambda (1.14.40.0)"]
-lex-models = ["mypy-boto3-lex-models (1.14.40.0)"]
-lex-runtime = ["mypy-boto3-lex-runtime (1.14.40.0)"]
-license-manager = ["mypy-boto3-license-manager (1.14.40.0)"]
-lightsail = ["mypy-boto3-lightsail (1.14.40.0)"]
-logs = ["mypy-boto3-logs (1.14.40.0)"]
-machinelearning = ["mypy-boto3-machinelearning (1.14.40.0)"]
-macie = ["mypy-boto3-macie (1.14.40.0)"]
-macie2 = ["mypy-boto3-macie2 (1.14.40.0)"]
-managedblockchain = ["mypy-boto3-managedblockchain (1.14.40.0)"]
-marketplace-catalog = ["mypy-boto3-marketplace-catalog (1.14.40.0)"]
-marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (1.14.40.0)"]
-marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (1.14.40.0)"]
-mediaconnect = ["mypy-boto3-mediaconnect (1.14.40.0)"]
-mediaconvert = ["mypy-boto3-mediaconvert (1.14.40.0)"]
-medialive = ["mypy-boto3-medialive (1.14.40.0)"]
-mediapackage = ["mypy-boto3-mediapackage (1.14.40.0)"]
-mediapackage-vod = ["mypy-boto3-mediapackage-vod (1.14.40.0)"]
-mediastore = ["mypy-boto3-mediastore (1.14.40.0)"]
-mediastore-data = ["mypy-boto3-mediastore-data (1.14.40.0)"]
-mediatailor = ["mypy-boto3-mediatailor (1.14.40.0)"]
-meteringmarketplace = ["mypy-boto3-meteringmarketplace (1.14.40.0)"]
-mgh = ["mypy-boto3-mgh (1.14.40.0)"]
-migrationhub-config = ["mypy-boto3-migrationhub-config (1.14.40.0)"]
-mobile = ["mypy-boto3-mobile (1.14.40.0)"]
-mq = ["mypy-boto3-mq (1.14.40.0)"]
-mturk = ["mypy-boto3-mturk (1.14.40.0)"]
-neptune = ["mypy-boto3-neptune (1.14.40.0)"]
-networkmanager = ["mypy-boto3-networkmanager (1.14.40.0)"]
-opsworks = ["mypy-boto3-opsworks (1.14.40.0)"]
-opsworkscm = ["mypy-boto3-opsworkscm (1.14.40.0)"]
-organizations = ["mypy-boto3-organizations (1.14.40.0)"]
-outposts = ["mypy-boto3-outposts (1.14.40.0)"]
-personalize = ["mypy-boto3-personalize (1.14.40.0)"]
-personalize-events = ["mypy-boto3-personalize-events (1.14.40.0)"]
-personalize-runtime = ["mypy-boto3-personalize-runtime (1.14.40.0)"]
-pi = ["mypy-boto3-pi (1.14.40.0)"]
-pinpoint = ["mypy-boto3-pinpoint (1.14.40.0)"]
-pinpoint-email = ["mypy-boto3-pinpoint-email (1.14.40.0)"]
-pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (1.14.40.0)"]
-polly = ["mypy-boto3-polly (1.14.40.0)"]
-pricing = ["mypy-boto3-pricing (1.14.40.0)"]
-qldb = ["mypy-boto3-qldb (1.14.40.0)"]
-qldb-session = ["mypy-boto3-qldb-session (1.14.40.0)"]
-quicksight = ["mypy-boto3-quicksight (1.14.40.0)"]
-ram = ["mypy-boto3-ram (1.14.40.0)"]
-rds = ["mypy-boto3-rds (1.14.40.0)"]
-rds-data = ["mypy-boto3-rds-data (1.14.40.0)"]
-redshift = ["mypy-boto3-redshift (1.14.40.0)"]
-rekognition = ["mypy-boto3-rekognition (1.14.40.0)"]
-resource-groups = ["mypy-boto3-resource-groups (1.14.40.0)"]
-resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (1.14.40.0)"]
-robomaker = ["mypy-boto3-robomaker (1.14.40.0)"]
-route53 = ["mypy-boto3-route53 (1.14.40.0)"]
-route53domains = ["mypy-boto3-route53domains (1.14.40.0)"]
-route53resolver = ["mypy-boto3-route53resolver (1.14.40.0)"]
-s3 = ["mypy-boto3-s3 (1.14.40.0)"]
-s3control = ["mypy-boto3-s3control (1.14.40.0)"]
-sagemaker = ["mypy-boto3-sagemaker (1.14.40.0)"]
-sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (1.14.40.0)"]
-sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (1.14.40.0)"]
-savingsplans = ["mypy-boto3-savingsplans (1.14.40.0)"]
-schemas = ["mypy-boto3-schemas (1.14.40.0)"]
-sdb = ["mypy-boto3-sdb (1.14.40.0)"]
-secretsmanager = ["mypy-boto3-secretsmanager (1.14.40.0)"]
-securityhub = ["mypy-boto3-securityhub (1.14.40.0)"]
-serverlessrepo = ["mypy-boto3-serverlessrepo (1.14.40.0)"]
-service-quotas = ["mypy-boto3-service-quotas (1.14.40.0)"]
-servicecatalog = ["mypy-boto3-servicecatalog (1.14.40.0)"]
-servicediscovery = ["mypy-boto3-servicediscovery (1.14.40.0)"]
-ses = ["mypy-boto3-ses (1.14.40.0)"]
-sesv2 = ["mypy-boto3-sesv2 (1.14.40.0)"]
-shield = ["mypy-boto3-shield (1.14.40.0)"]
-signer = ["mypy-boto3-signer (1.14.40.0)"]
-sms = ["mypy-boto3-sms (1.14.40.0)"]
-sms-voice = ["mypy-boto3-sms-voice (1.14.40.0)"]
-snowball = ["mypy-boto3-snowball (1.14.40.0)"]
-sns = ["mypy-boto3-sns (1.14.40.0)"]
-sqs = ["mypy-boto3-sqs (1.14.40.0)"]
-ssm = ["mypy-boto3-ssm (1.14.40.0)"]
-sso = ["mypy-boto3-sso (1.14.40.0)"]
-sso-oidc = ["mypy-boto3-sso-oidc (1.14.40.0)"]
-stepfunctions = ["mypy-boto3-stepfunctions (1.14.40.0)"]
-storagegateway = ["mypy-boto3-storagegateway (1.14.40.0)"]
-sts = ["mypy-boto3-sts (1.14.40.0)"]
-support = ["mypy-boto3-support (1.14.40.0)"]
-swf = ["mypy-boto3-swf (1.14.40.0)"]
-synthetics = ["mypy-boto3-synthetics (1.14.40.0)"]
-textract = ["mypy-boto3-textract (1.14.40.0)"]
-transcribe = ["mypy-boto3-transcribe (1.14.40.0)"]
-transfer = ["mypy-boto3-transfer (1.14.40.0)"]
-translate = ["mypy-boto3-translate (1.14.40.0)"]
-waf = ["mypy-boto3-waf (1.14.40.0)"]
-waf-regional = ["mypy-boto3-waf-regional (1.14.40.0)"]
-wafv2 = ["mypy-boto3-wafv2 (1.14.40.0)"]
-workdocs = ["mypy-boto3-workdocs (1.14.40.0)"]
-worklink = ["mypy-boto3-worklink (1.14.40.0)"]
-workmail = ["mypy-boto3-workmail (1.14.40.0)"]
-workmailmessageflow = ["mypy-boto3-workmailmessageflow (1.14.40.0)"]
-workspaces = ["mypy-boto3-workspaces (1.14.40.0)"]
-xray = ["mypy-boto3-xray (1.14.40.0)"]
+accessanalyzer = ["mypy-boto3-accessanalyzer (1.14.41.0)"]
+acm = ["mypy-boto3-acm (1.14.41.0)"]
+acm-pca = ["mypy-boto3-acm-pca (1.14.41.0)"]
+alexaforbusiness = ["mypy-boto3-alexaforbusiness (1.14.41.0)"]
+all = ["mypy-boto3-accessanalyzer (1.14.41.0)", "mypy-boto3-acm (1.14.41.0)", "mypy-boto3-acm-pca (1.14.41.0)", "mypy-boto3-alexaforbusiness (1.14.41.0)", "mypy-boto3-amplify (1.14.41.0)", "mypy-boto3-apigateway (1.14.41.0)", "mypy-boto3-apigatewaymanagementapi (1.14.41.0)", "mypy-boto3-apigatewayv2 (1.14.41.0)", "mypy-boto3-appconfig (1.14.41.0)", "mypy-boto3-application-autoscaling (1.14.41.0)", "mypy-boto3-application-insights (1.14.41.0)", "mypy-boto3-appmesh (1.14.41.0)", "mypy-boto3-appstream (1.14.41.0)", "mypy-boto3-appsync (1.14.41.0)", "mypy-boto3-athena (1.14.41.0)", "mypy-boto3-autoscaling (1.14.41.0)", "mypy-boto3-autoscaling-plans (1.14.41.0)", "mypy-boto3-backup (1.14.41.0)", "mypy-boto3-batch (1.14.41.0)", "mypy-boto3-budgets (1.14.41.0)", "mypy-boto3-ce (1.14.41.0)", "mypy-boto3-chime (1.14.41.0)", "mypy-boto3-cloud9 (1.14.41.0)", "mypy-boto3-clouddirectory (1.14.41.0)", "mypy-boto3-cloudformation (1.14.41.0)", "mypy-boto3-cloudfront (1.14.41.0)", "mypy-boto3-cloudhsm (1.14.41.0)", "mypy-boto3-cloudhsmv2 (1.14.41.0)", "mypy-boto3-cloudsearch (1.14.41.0)", "mypy-boto3-cloudsearchdomain (1.14.41.0)", "mypy-boto3-cloudtrail (1.14.41.0)", "mypy-boto3-cloudwatch (1.14.41.0)", "mypy-boto3-codeartifact (1.14.41.0)", "mypy-boto3-codebuild (1.14.41.0)", "mypy-boto3-codecommit (1.14.41.0)", "mypy-boto3-codedeploy (1.14.41.0)", "mypy-boto3-codeguru-reviewer (1.14.41.0)", "mypy-boto3-codeguruprofiler (1.14.41.0)", "mypy-boto3-codepipeline (1.14.41.0)", "mypy-boto3-codestar (1.14.41.0)", "mypy-boto3-codestar-connections (1.14.41.0)", "mypy-boto3-codestar-notifications (1.14.41.0)", "mypy-boto3-cognito-identity (1.14.41.0)", "mypy-boto3-cognito-idp (1.14.41.0)", "mypy-boto3-cognito-sync (1.14.41.0)", "mypy-boto3-comprehend (1.14.41.0)", "mypy-boto3-comprehendmedical (1.14.41.0)", "mypy-boto3-compute-optimizer (1.14.41.0)", "mypy-boto3-config (1.14.41.0)", "mypy-boto3-connect (1.14.41.0)", "mypy-boto3-connectparticipant (1.14.41.0)", "mypy-boto3-cur (1.14.41.0)", "mypy-boto3-dataexchange (1.14.41.0)", "mypy-boto3-datapipeline (1.14.41.0)", "mypy-boto3-datasync (1.14.41.0)", "mypy-boto3-dax (1.14.41.0)", "mypy-boto3-detective (1.14.41.0)", "mypy-boto3-devicefarm (1.14.41.0)", "mypy-boto3-directconnect (1.14.41.0)", "mypy-boto3-discovery (1.14.41.0)", "mypy-boto3-dlm (1.14.41.0)", "mypy-boto3-dms (1.14.41.0)", "mypy-boto3-docdb (1.14.41.0)", "mypy-boto3-ds (1.14.41.0)", "mypy-boto3-dynamodb (1.14.41.0)", "mypy-boto3-dynamodbstreams (1.14.41.0)", "mypy-boto3-ebs (1.14.41.0)", "mypy-boto3-ec2 (1.14.41.0)", "mypy-boto3-ec2-instance-connect (1.14.41.0)", "mypy-boto3-ecr (1.14.41.0)", "mypy-boto3-ecs (1.14.41.0)", "mypy-boto3-efs (1.14.41.0)", "mypy-boto3-eks (1.14.41.0)", "mypy-boto3-elastic-inference (1.14.41.0)", "mypy-boto3-elasticache (1.14.41.0)", "mypy-boto3-elasticbeanstalk (1.14.41.0)", "mypy-boto3-elastictranscoder (1.14.41.0)", "mypy-boto3-elb (1.14.41.0)", "mypy-boto3-elbv2 (1.14.41.0)", "mypy-boto3-emr (1.14.41.0)", "mypy-boto3-es (1.14.41.0)", "mypy-boto3-events (1.14.41.0)", "mypy-boto3-firehose (1.14.41.0)", "mypy-boto3-fms (1.14.41.0)", "mypy-boto3-forecast (1.14.41.0)", "mypy-boto3-forecastquery (1.14.41.0)", "mypy-boto3-frauddetector (1.14.41.0)", "mypy-boto3-fsx (1.14.41.0)", "mypy-boto3-gamelift (1.14.41.0)", "mypy-boto3-glacier (1.14.41.0)", "mypy-boto3-globalaccelerator (1.14.41.0)", "mypy-boto3-glue (1.14.41.0)", "mypy-boto3-greengrass (1.14.41.0)", "mypy-boto3-groundstation (1.14.41.0)", "mypy-boto3-guardduty (1.14.41.0)", "mypy-boto3-health (1.14.41.0)", "mypy-boto3-iam (1.14.41.0)", "mypy-boto3-imagebuilder (1.14.41.0)", "mypy-boto3-importexport (1.14.41.0)", "mypy-boto3-inspector (1.14.41.0)", "mypy-boto3-iot (1.14.41.0)", "mypy-boto3-iot-data (1.14.41.0)", "mypy-boto3-iot-jobs-data (1.14.41.0)", "mypy-boto3-iot1click-devices (1.14.41.0)", "mypy-boto3-iot1click-projects (1.14.41.0)", "mypy-boto3-iotanalytics (1.14.41.0)", "mypy-boto3-iotevents (1.14.41.0)", "mypy-boto3-iotevents-data (1.14.41.0)", "mypy-boto3-iotsecuretunneling (1.14.41.0)", "mypy-boto3-iotsitewise (1.14.41.0)", "mypy-boto3-iotthingsgraph (1.14.41.0)", "mypy-boto3-kafka (1.14.41.0)", "mypy-boto3-kendra (1.14.41.0)", "mypy-boto3-kinesis (1.14.41.0)", "mypy-boto3-kinesis-video-archived-media (1.14.41.0)", "mypy-boto3-kinesis-video-media (1.14.41.0)", "mypy-boto3-kinesis-video-signaling (1.14.41.0)", "mypy-boto3-kinesisanalytics (1.14.41.0)", "mypy-boto3-kinesisanalyticsv2 (1.14.41.0)", "mypy-boto3-kinesisvideo (1.14.41.0)", "mypy-boto3-kms (1.14.41.0)", "mypy-boto3-lakeformation (1.14.41.0)", "mypy-boto3-lambda (1.14.41.0)", "mypy-boto3-lex-models (1.14.41.0)", "mypy-boto3-lex-runtime (1.14.41.0)", "mypy-boto3-license-manager (1.14.41.0)", "mypy-boto3-lightsail (1.14.41.0)", "mypy-boto3-logs (1.14.41.0)", "mypy-boto3-machinelearning (1.14.41.0)", "mypy-boto3-macie (1.14.41.0)", "mypy-boto3-macie2 (1.14.41.0)", "mypy-boto3-managedblockchain (1.14.41.0)", "mypy-boto3-marketplace-catalog (1.14.41.0)", "mypy-boto3-marketplace-entitlement (1.14.41.0)", "mypy-boto3-marketplacecommerceanalytics (1.14.41.0)", "mypy-boto3-mediaconnect (1.14.41.0)", "mypy-boto3-mediaconvert (1.14.41.0)", "mypy-boto3-medialive (1.14.41.0)", "mypy-boto3-mediapackage (1.14.41.0)", "mypy-boto3-mediapackage-vod (1.14.41.0)", "mypy-boto3-mediastore (1.14.41.0)", "mypy-boto3-mediastore-data (1.14.41.0)", "mypy-boto3-mediatailor (1.14.41.0)", "mypy-boto3-meteringmarketplace (1.14.41.0)", "mypy-boto3-mgh (1.14.41.0)", "mypy-boto3-migrationhub-config (1.14.41.0)", "mypy-boto3-mobile (1.14.41.0)", "mypy-boto3-mq (1.14.41.0)", "mypy-boto3-mturk (1.14.41.0)", "mypy-boto3-neptune (1.14.41.0)", "mypy-boto3-networkmanager (1.14.41.0)", "mypy-boto3-opsworks (1.14.41.0)", "mypy-boto3-opsworkscm (1.14.41.0)", "mypy-boto3-organizations (1.14.41.0)", "mypy-boto3-outposts (1.14.41.0)", "mypy-boto3-personalize (1.14.41.0)", "mypy-boto3-personalize-events (1.14.41.0)", "mypy-boto3-personalize-runtime (1.14.41.0)", "mypy-boto3-pi (1.14.41.0)", "mypy-boto3-pinpoint (1.14.41.0)", "mypy-boto3-pinpoint-email (1.14.41.0)", "mypy-boto3-pinpoint-sms-voice (1.14.41.0)", "mypy-boto3-polly (1.14.41.0)", "mypy-boto3-pricing (1.14.41.0)", "mypy-boto3-qldb (1.14.41.0)", "mypy-boto3-qldb-session (1.14.41.0)", "mypy-boto3-quicksight (1.14.41.0)", "mypy-boto3-ram (1.14.41.0)", "mypy-boto3-rds (1.14.41.0)", "mypy-boto3-rds-data (1.14.41.0)", "mypy-boto3-redshift (1.14.41.0)", "mypy-boto3-rekognition (1.14.41.0)", "mypy-boto3-resource-groups (1.14.41.0)", "mypy-boto3-resourcegroupstaggingapi (1.14.41.0)", "mypy-boto3-robomaker (1.14.41.0)", "mypy-boto3-route53 (1.14.41.0)", "mypy-boto3-route53domains (1.14.41.0)", "mypy-boto3-route53resolver (1.14.41.0)", "mypy-boto3-s3 (1.14.41.0)", "mypy-boto3-s3control (1.14.41.0)", "mypy-boto3-sagemaker (1.14.41.0)", "mypy-boto3-sagemaker-a2i-runtime (1.14.41.0)", "mypy-boto3-sagemaker-runtime (1.14.41.0)", "mypy-boto3-savingsplans (1.14.41.0)", "mypy-boto3-schemas (1.14.41.0)", "mypy-boto3-sdb (1.14.41.0)", "mypy-boto3-secretsmanager (1.14.41.0)", "mypy-boto3-securityhub (1.14.41.0)", "mypy-boto3-serverlessrepo (1.14.41.0)", "mypy-boto3-service-quotas (1.14.41.0)", "mypy-boto3-servicecatalog (1.14.41.0)", "mypy-boto3-servicediscovery (1.14.41.0)", "mypy-boto3-ses (1.14.41.0)", "mypy-boto3-sesv2 (1.14.41.0)", "mypy-boto3-shield (1.14.41.0)", "mypy-boto3-signer (1.14.41.0)", "mypy-boto3-sms (1.14.41.0)", "mypy-boto3-sms-voice (1.14.41.0)", "mypy-boto3-snowball (1.14.41.0)", "mypy-boto3-sns (1.14.41.0)", "mypy-boto3-sqs (1.14.41.0)", "mypy-boto3-ssm (1.14.41.0)", "mypy-boto3-sso (1.14.41.0)", "mypy-boto3-sso-oidc (1.14.41.0)", "mypy-boto3-stepfunctions (1.14.41.0)", "mypy-boto3-storagegateway (1.14.41.0)", "mypy-boto3-sts (1.14.41.0)", "mypy-boto3-support (1.14.41.0)", "mypy-boto3-swf (1.14.41.0)", "mypy-boto3-synthetics (1.14.41.0)", "mypy-boto3-textract (1.14.41.0)", "mypy-boto3-transcribe (1.14.41.0)", "mypy-boto3-transfer (1.14.41.0)", "mypy-boto3-translate (1.14.41.0)", "mypy-boto3-waf (1.14.41.0)", "mypy-boto3-waf-regional (1.14.41.0)", "mypy-boto3-wafv2 (1.14.41.0)", "mypy-boto3-workdocs (1.14.41.0)", "mypy-boto3-worklink (1.14.41.0)", "mypy-boto3-workmail (1.14.41.0)", "mypy-boto3-workmailmessageflow (1.14.41.0)", "mypy-boto3-workspaces (1.14.41.0)", "mypy-boto3-xray (1.14.41.0)"]
+amplify = ["mypy-boto3-amplify (1.14.41.0)"]
+apigateway = ["mypy-boto3-apigateway (1.14.41.0)"]
+apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (1.14.41.0)"]
+apigatewayv2 = ["mypy-boto3-apigatewayv2 (1.14.41.0)"]
+appconfig = ["mypy-boto3-appconfig (1.14.41.0)"]
+application-autoscaling = ["mypy-boto3-application-autoscaling (1.14.41.0)"]
+application-insights = ["mypy-boto3-application-insights (1.14.41.0)"]
+appmesh = ["mypy-boto3-appmesh (1.14.41.0)"]
+appstream = ["mypy-boto3-appstream (1.14.41.0)"]
+appsync = ["mypy-boto3-appsync (1.14.41.0)"]
+athena = ["mypy-boto3-athena (1.14.41.0)"]
+autoscaling = ["mypy-boto3-autoscaling (1.14.41.0)"]
+autoscaling-plans = ["mypy-boto3-autoscaling-plans (1.14.41.0)"]
+backup = ["mypy-boto3-backup (1.14.41.0)"]
+batch = ["mypy-boto3-batch (1.14.41.0)"]
+budgets = ["mypy-boto3-budgets (1.14.41.0)"]
+ce = ["mypy-boto3-ce (1.14.41.0)"]
+chime = ["mypy-boto3-chime (1.14.41.0)"]
+cloud9 = ["mypy-boto3-cloud9 (1.14.41.0)"]
+clouddirectory = ["mypy-boto3-clouddirectory (1.14.41.0)"]
+cloudformation = ["mypy-boto3-cloudformation (1.14.41.0)"]
+cloudfront = ["mypy-boto3-cloudfront (1.14.41.0)"]
+cloudhsm = ["mypy-boto3-cloudhsm (1.14.41.0)"]
+cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (1.14.41.0)"]
+cloudsearch = ["mypy-boto3-cloudsearch (1.14.41.0)"]
+cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (1.14.41.0)"]
+cloudtrail = ["mypy-boto3-cloudtrail (1.14.41.0)"]
+cloudwatch = ["mypy-boto3-cloudwatch (1.14.41.0)"]
+codeartifact = ["mypy-boto3-codeartifact (1.14.41.0)"]
+codebuild = ["mypy-boto3-codebuild (1.14.41.0)"]
+codecommit = ["mypy-boto3-codecommit (1.14.41.0)"]
+codedeploy = ["mypy-boto3-codedeploy (1.14.41.0)"]
+codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (1.14.41.0)"]
+codeguruprofiler = ["mypy-boto3-codeguruprofiler (1.14.41.0)"]
+codepipeline = ["mypy-boto3-codepipeline (1.14.41.0)"]
+codestar = ["mypy-boto3-codestar (1.14.41.0)"]
+codestar-connections = ["mypy-boto3-codestar-connections (1.14.41.0)"]
+codestar-notifications = ["mypy-boto3-codestar-notifications (1.14.41.0)"]
+cognito-identity = ["mypy-boto3-cognito-identity (1.14.41.0)"]
+cognito-idp = ["mypy-boto3-cognito-idp (1.14.41.0)"]
+cognito-sync = ["mypy-boto3-cognito-sync (1.14.41.0)"]
+comprehend = ["mypy-boto3-comprehend (1.14.41.0)"]
+comprehendmedical = ["mypy-boto3-comprehendmedical (1.14.41.0)"]
+compute-optimizer = ["mypy-boto3-compute-optimizer (1.14.41.0)"]
+config = ["mypy-boto3-config (1.14.41.0)"]
+connect = ["mypy-boto3-connect (1.14.41.0)"]
+connectparticipant = ["mypy-boto3-connectparticipant (1.14.41.0)"]
+cur = ["mypy-boto3-cur (1.14.41.0)"]
+dataexchange = ["mypy-boto3-dataexchange (1.14.41.0)"]
+datapipeline = ["mypy-boto3-datapipeline (1.14.41.0)"]
+datasync = ["mypy-boto3-datasync (1.14.41.0)"]
+dax = ["mypy-boto3-dax (1.14.41.0)"]
+detective = ["mypy-boto3-detective (1.14.41.0)"]
+devicefarm = ["mypy-boto3-devicefarm (1.14.41.0)"]
+directconnect = ["mypy-boto3-directconnect (1.14.41.0)"]
+discovery = ["mypy-boto3-discovery (1.14.41.0)"]
+dlm = ["mypy-boto3-dlm (1.14.41.0)"]
+dms = ["mypy-boto3-dms (1.14.41.0)"]
+docdb = ["mypy-boto3-docdb (1.14.41.0)"]
+ds = ["mypy-boto3-ds (1.14.41.0)"]
+dynamodb = ["mypy-boto3-dynamodb (1.14.41.0)"]
+dynamodbstreams = ["mypy-boto3-dynamodbstreams (1.14.41.0)"]
+ebs = ["mypy-boto3-ebs (1.14.41.0)"]
+ec2 = ["mypy-boto3-ec2 (1.14.41.0)"]
+ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (1.14.41.0)"]
+ecr = ["mypy-boto3-ecr (1.14.41.0)"]
+ecs = ["mypy-boto3-ecs (1.14.41.0)"]
+efs = ["mypy-boto3-efs (1.14.41.0)"]
+eks = ["mypy-boto3-eks (1.14.41.0)"]
+elastic-inference = ["mypy-boto3-elastic-inference (1.14.41.0)"]
+elasticache = ["mypy-boto3-elasticache (1.14.41.0)"]
+elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (1.14.41.0)"]
+elastictranscoder = ["mypy-boto3-elastictranscoder (1.14.41.0)"]
+elb = ["mypy-boto3-elb (1.14.41.0)"]
+elbv2 = ["mypy-boto3-elbv2 (1.14.41.0)"]
+emr = ["mypy-boto3-emr (1.14.41.0)"]
+es = ["mypy-boto3-es (1.14.41.0)"]
+essential = ["mypy-boto3-cloudformation (1.14.41.0)", "mypy-boto3-dynamodb (1.14.41.0)", "mypy-boto3-ec2 (1.14.41.0)", "mypy-boto3-lambda (1.14.41.0)", "mypy-boto3-rds (1.14.41.0)", "mypy-boto3-s3 (1.14.41.0)", "mypy-boto3-sqs (1.14.41.0)"]
+events = ["mypy-boto3-events (1.14.41.0)"]
+firehose = ["mypy-boto3-firehose (1.14.41.0)"]
+fms = ["mypy-boto3-fms (1.14.41.0)"]
+forecast = ["mypy-boto3-forecast (1.14.41.0)"]
+forecastquery = ["mypy-boto3-forecastquery (1.14.41.0)"]
+frauddetector = ["mypy-boto3-frauddetector (1.14.41.0)"]
+fsx = ["mypy-boto3-fsx (1.14.41.0)"]
+gamelift = ["mypy-boto3-gamelift (1.14.41.0)"]
+glacier = ["mypy-boto3-glacier (1.14.41.0)"]
+globalaccelerator = ["mypy-boto3-globalaccelerator (1.14.41.0)"]
+glue = ["mypy-boto3-glue (1.14.41.0)"]
+greengrass = ["mypy-boto3-greengrass (1.14.41.0)"]
+groundstation = ["mypy-boto3-groundstation (1.14.41.0)"]
+guardduty = ["mypy-boto3-guardduty (1.14.41.0)"]
+health = ["mypy-boto3-health (1.14.41.0)"]
+iam = ["mypy-boto3-iam (1.14.41.0)"]
+imagebuilder = ["mypy-boto3-imagebuilder (1.14.41.0)"]
+importexport = ["mypy-boto3-importexport (1.14.41.0)"]
+inspector = ["mypy-boto3-inspector (1.14.41.0)"]
+iot = ["mypy-boto3-iot (1.14.41.0)"]
+iot-data = ["mypy-boto3-iot-data (1.14.41.0)"]
+iot-jobs-data = ["mypy-boto3-iot-jobs-data (1.14.41.0)"]
+iot1click-devices = ["mypy-boto3-iot1click-devices (1.14.41.0)"]
+iot1click-projects = ["mypy-boto3-iot1click-projects (1.14.41.0)"]
+iotanalytics = ["mypy-boto3-iotanalytics (1.14.41.0)"]
+iotevents = ["mypy-boto3-iotevents (1.14.41.0)"]
+iotevents-data = ["mypy-boto3-iotevents-data (1.14.41.0)"]
+iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (1.14.41.0)"]
+iotsitewise = ["mypy-boto3-iotsitewise (1.14.41.0)"]
+iotthingsgraph = ["mypy-boto3-iotthingsgraph (1.14.41.0)"]
+kafka = ["mypy-boto3-kafka (1.14.41.0)"]
+kendra = ["mypy-boto3-kendra (1.14.41.0)"]
+kinesis = ["mypy-boto3-kinesis (1.14.41.0)"]
+kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (1.14.41.0)"]
+kinesis-video-media = ["mypy-boto3-kinesis-video-media (1.14.41.0)"]
+kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (1.14.41.0)"]
+kinesisanalytics = ["mypy-boto3-kinesisanalytics (1.14.41.0)"]
+kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (1.14.41.0)"]
+kinesisvideo = ["mypy-boto3-kinesisvideo (1.14.41.0)"]
+kms = ["mypy-boto3-kms (1.14.41.0)"]
+lakeformation = ["mypy-boto3-lakeformation (1.14.41.0)"]
+lambda = ["mypy-boto3-lambda (1.14.41.0)"]
+lex-models = ["mypy-boto3-lex-models (1.14.41.0)"]
+lex-runtime = ["mypy-boto3-lex-runtime (1.14.41.0)"]
+license-manager = ["mypy-boto3-license-manager (1.14.41.0)"]
+lightsail = ["mypy-boto3-lightsail (1.14.41.0)"]
+logs = ["mypy-boto3-logs (1.14.41.0)"]
+machinelearning = ["mypy-boto3-machinelearning (1.14.41.0)"]
+macie = ["mypy-boto3-macie (1.14.41.0)"]
+macie2 = ["mypy-boto3-macie2 (1.14.41.0)"]
+managedblockchain = ["mypy-boto3-managedblockchain (1.14.41.0)"]
+marketplace-catalog = ["mypy-boto3-marketplace-catalog (1.14.41.0)"]
+marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (1.14.41.0)"]
+marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (1.14.41.0)"]
+mediaconnect = ["mypy-boto3-mediaconnect (1.14.41.0)"]
+mediaconvert = ["mypy-boto3-mediaconvert (1.14.41.0)"]
+medialive = ["mypy-boto3-medialive (1.14.41.0)"]
+mediapackage = ["mypy-boto3-mediapackage (1.14.41.0)"]
+mediapackage-vod = ["mypy-boto3-mediapackage-vod (1.14.41.0)"]
+mediastore = ["mypy-boto3-mediastore (1.14.41.0)"]
+mediastore-data = ["mypy-boto3-mediastore-data (1.14.41.0)"]
+mediatailor = ["mypy-boto3-mediatailor (1.14.41.0)"]
+meteringmarketplace = ["mypy-boto3-meteringmarketplace (1.14.41.0)"]
+mgh = ["mypy-boto3-mgh (1.14.41.0)"]
+migrationhub-config = ["mypy-boto3-migrationhub-config (1.14.41.0)"]
+mobile = ["mypy-boto3-mobile (1.14.41.0)"]
+mq = ["mypy-boto3-mq (1.14.41.0)"]
+mturk = ["mypy-boto3-mturk (1.14.41.0)"]
+neptune = ["mypy-boto3-neptune (1.14.41.0)"]
+networkmanager = ["mypy-boto3-networkmanager (1.14.41.0)"]
+opsworks = ["mypy-boto3-opsworks (1.14.41.0)"]
+opsworkscm = ["mypy-boto3-opsworkscm (1.14.41.0)"]
+organizations = ["mypy-boto3-organizations (1.14.41.0)"]
+outposts = ["mypy-boto3-outposts (1.14.41.0)"]
+personalize = ["mypy-boto3-personalize (1.14.41.0)"]
+personalize-events = ["mypy-boto3-personalize-events (1.14.41.0)"]
+personalize-runtime = ["mypy-boto3-personalize-runtime (1.14.41.0)"]
+pi = ["mypy-boto3-pi (1.14.41.0)"]
+pinpoint = ["mypy-boto3-pinpoint (1.14.41.0)"]
+pinpoint-email = ["mypy-boto3-pinpoint-email (1.14.41.0)"]
+pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (1.14.41.0)"]
+polly = ["mypy-boto3-polly (1.14.41.0)"]
+pricing = ["mypy-boto3-pricing (1.14.41.0)"]
+qldb = ["mypy-boto3-qldb (1.14.41.0)"]
+qldb-session = ["mypy-boto3-qldb-session (1.14.41.0)"]
+quicksight = ["mypy-boto3-quicksight (1.14.41.0)"]
+ram = ["mypy-boto3-ram (1.14.41.0)"]
+rds = ["mypy-boto3-rds (1.14.41.0)"]
+rds-data = ["mypy-boto3-rds-data (1.14.41.0)"]
+redshift = ["mypy-boto3-redshift (1.14.41.0)"]
+rekognition = ["mypy-boto3-rekognition (1.14.41.0)"]
+resource-groups = ["mypy-boto3-resource-groups (1.14.41.0)"]
+resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (1.14.41.0)"]
+robomaker = ["mypy-boto3-robomaker (1.14.41.0)"]
+route53 = ["mypy-boto3-route53 (1.14.41.0)"]
+route53domains = ["mypy-boto3-route53domains (1.14.41.0)"]
+route53resolver = ["mypy-boto3-route53resolver (1.14.41.0)"]
+s3 = ["mypy-boto3-s3 (1.14.41.0)"]
+s3control = ["mypy-boto3-s3control (1.14.41.0)"]
+sagemaker = ["mypy-boto3-sagemaker (1.14.41.0)"]
+sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (1.14.41.0)"]
+sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (1.14.41.0)"]
+savingsplans = ["mypy-boto3-savingsplans (1.14.41.0)"]
+schemas = ["mypy-boto3-schemas (1.14.41.0)"]
+sdb = ["mypy-boto3-sdb (1.14.41.0)"]
+secretsmanager = ["mypy-boto3-secretsmanager (1.14.41.0)"]
+securityhub = ["mypy-boto3-securityhub (1.14.41.0)"]
+serverlessrepo = ["mypy-boto3-serverlessrepo (1.14.41.0)"]
+service-quotas = ["mypy-boto3-service-quotas (1.14.41.0)"]
+servicecatalog = ["mypy-boto3-servicecatalog (1.14.41.0)"]
+servicediscovery = ["mypy-boto3-servicediscovery (1.14.41.0)"]
+ses = ["mypy-boto3-ses (1.14.41.0)"]
+sesv2 = ["mypy-boto3-sesv2 (1.14.41.0)"]
+shield = ["mypy-boto3-shield (1.14.41.0)"]
+signer = ["mypy-boto3-signer (1.14.41.0)"]
+sms = ["mypy-boto3-sms (1.14.41.0)"]
+sms-voice = ["mypy-boto3-sms-voice (1.14.41.0)"]
+snowball = ["mypy-boto3-snowball (1.14.41.0)"]
+sns = ["mypy-boto3-sns (1.14.41.0)"]
+sqs = ["mypy-boto3-sqs (1.14.41.0)"]
+ssm = ["mypy-boto3-ssm (1.14.41.0)"]
+sso = ["mypy-boto3-sso (1.14.41.0)"]
+sso-oidc = ["mypy-boto3-sso-oidc (1.14.41.0)"]
+stepfunctions = ["mypy-boto3-stepfunctions (1.14.41.0)"]
+storagegateway = ["mypy-boto3-storagegateway (1.14.41.0)"]
+sts = ["mypy-boto3-sts (1.14.41.0)"]
+support = ["mypy-boto3-support (1.14.41.0)"]
+swf = ["mypy-boto3-swf (1.14.41.0)"]
+synthetics = ["mypy-boto3-synthetics (1.14.41.0)"]
+textract = ["mypy-boto3-textract (1.14.41.0)"]
+transcribe = ["mypy-boto3-transcribe (1.14.41.0)"]
+transfer = ["mypy-boto3-transfer (1.14.41.0)"]
+translate = ["mypy-boto3-translate (1.14.41.0)"]
+waf = ["mypy-boto3-waf (1.14.41.0)"]
+waf-regional = ["mypy-boto3-waf-regional (1.14.41.0)"]
+wafv2 = ["mypy-boto3-wafv2 (1.14.41.0)"]
+workdocs = ["mypy-boto3-workdocs (1.14.41.0)"]
+worklink = ["mypy-boto3-worklink (1.14.41.0)"]
+workmail = ["mypy-boto3-workmail (1.14.41.0)"]
+workmailmessageflow = ["mypy-boto3-workmailmessageflow (1.14.41.0)"]
+workspaces = ["mypy-boto3-workspaces (1.14.41.0)"]
+xray = ["mypy-boto3-xray (1.14.41.0)"]
 
 [[package]]
 category = "main"
@@ -959,11 +959,11 @@ server = ["flask"]
 
 [[package]]
 category = "dev"
-description = "Type annotations for boto3 1.14.40 master module, generated by mypy-boto3-buider 2.2.0"
+description = "Type annotations for boto3 1.14.41 master module, generated by mypy-boto3-buider 2.2.0"
 name = "mypy-boto3"
 optional = false
 python-versions = ">=3.6"
-version = "1.14.40.0"
+version = "1.14.41.0"
 
 [package.dependencies]
 boto3 = "*"
@@ -1950,8 +1950,8 @@ boto3 = [
     {file = "boto3-1.10.50.tar.gz", hash = "sha256:5c00d51101d6a7ddf2207ae8a738e5c815c5fcffbee76121f38bd41d83c936a5"},
 ]
 boto3-stubs = [
-    {file = "boto3-stubs-1.14.40.0.tar.gz", hash = "sha256:6aed8f64c57f90f256cd75a66663d6b07b02edb803c442094a23a2c382463311"},
-    {file = "boto3_stubs-1.14.40.0-py3-none-any.whl", hash = "sha256:db5afe4d5a0f1d2b6cffdf72cb9eb4a96f8a0954702132a4ee27e62fb2c6ece4"},
+    {file = "boto3-stubs-1.14.41.0.tar.gz", hash = "sha256:c26628a6f9d5d0e8fb91954843d66b50b9103d987fd6ba4ab41eef46d23fac66"},
+    {file = "boto3_stubs-1.14.41.0-py3-none-any.whl", hash = "sha256:c5180fbfe0a85a61484ec5fc0dfdc98d303a01864e66712583adea37c0b7810b"},
 ]
 botocore = [
     {file = "botocore-1.13.50-py2.py3-none-any.whl", hash = "sha256:adb4cb188cd0866e7337f9a049fc68db042b0340fd496d40bca349c8dbfc6a2d"},
@@ -2207,8 +2207,8 @@ moto = [
     {file = "moto-1.3.7.tar.gz", hash = "sha256:129de2e04cb250d9f8b2c722ec152ed1b5426ef179b4ebb03e9ec36e6eb3fcc5"},
 ]
 mypy-boto3 = [
-    {file = "mypy-boto3-1.14.40.0.tar.gz", hash = "sha256:89475e7faf197e9988df6d763837a2f5254b9878a39a9315400a1f109db28e43"},
-    {file = "mypy_boto3-1.14.40.0-py3-none-any.whl", hash = "sha256:54a220a0b0cea7bf0e02fc0343168cd23edf55195e70dfb085a6ed8a291e125a"},
+    {file = "mypy-boto3-1.14.41.0.tar.gz", hash = "sha256:1efc8c44aaaa29748cdc12ff0e82e00c5d7437c6ca609a1e195340fce0169e67"},
+    {file = "mypy_boto3-1.14.41.0-py3-none-any.whl", hash = "sha256:da371e032fb68ac45722aa2b7b481d0c9d5b3470b3c0549fa689c935337668b1"},
 ]
 netaddr = [
     {file = "netaddr-0.8.0-py2.py3-none-any.whl", hash = "sha256:9666d0232c32d2656e5e5f8d735f58fd6c7457ce52fc21c98d45f2af78f990ac"},

From 3ecf08cbf2838344b45f23c912cc339a0e5ad150 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Mon, 17 Aug 2020 00:51:36 -0400
Subject: [PATCH 071/125] PEP8 changes

---
 src/encoded/submit.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index b4c847eb6e..e0dce4c31f 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -3,7 +3,8 @@
 import json
 import xlrd
 
-from dcicutils.misc_utils import VirtualApp, VirtualAppError
+from dcicutils.qa_utils import ignored
+from dcicutils.misc_utils import VirtualAppError  # , VirtualApp
 # from dcicutils import ff_utils
 # from pyramid.paster import get_app
 # from pyramid.response import Response
@@ -64,10 +65,11 @@ def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp,  #
 
     Args:
         data_stream: an open stream to xls workbook data
-        project: a project identifier
-        institution: an institution identifier
+        project str: a project identifier
+        institution str: an institution identifier
         vapp: a VirtualApp object
         log: a logging object capable of .info, .warning, .error, or .debug messages
+        validate_only bool: whether to exit after validation, before submission (default False)
     """
     with s3_local_file(s3_client, bucket=bucket, key=key) as file:
         project_json = vapp.get(project).json
@@ -111,10 +113,10 @@ def map_fields(row, metadata_dict, addl_fields, item_type):
 
 
 def xls_to_json(xls_data, project, institution):
-    '''
+    """
     Converts excel file to json for submission.
     Functional but expect future changes.
-    '''
+    """
     book = xlrd.open_workbook(xls_data)
     sheet, = book.sheets()
     row = row_generator(sheet)
@@ -134,10 +136,13 @@ def xls_to_json(xls_data, project, institution):
     if not header:
         msg = 'Column headers not detected in spreadsheet! "Individual ID*" column must be present in header.'
         return {'errors': [msg]}, False
-    # debuglog("keys:", keys)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
+    # NOTE: A single row is allocated below the column headers to contain descriptions of row contents.
+    #       We could just blindly consume it, but then if it's missing we'll make an error.
+    #       Instead, we filter lines below, ignoring lines that like they're such a descriptive row.
     # descriptions = next(row)
-    # debuglog("descriptions:", descriptions)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
+    # ignored(descriptions)
     rows = []
+    # This is now done in the loop above.
     # keys = [key.lower().strip().rstrip('*').rstrip() for key in keys]
     required = ['individual id', 'relation to proband', 'report required', 'analysis id', 'specimen id']
     missing = [col for col in required if col not in keys]
@@ -544,7 +549,7 @@ def compare_fields(profile, aliases, json_item, db_item):
 
 
 def validate_all_items(virtualapp, json_data):
-    '''
+    """
     Still in progress, not necessarily functional yet. NOT YET TESTED.
 
     Function that:
@@ -555,7 +560,7 @@ def validate_all_items(virtualapp, json_data):
     Current status:
     Still testing validation/data organization parts - patch/post part hasn't been fully
     written or tested.
-    '''
+    """
     alias_dict = {}
     errors = json_data['errors']
     all_aliases = [k for itype in json_data for k in json_data[itype]]
@@ -769,7 +774,7 @@ def cell_value(cell, datemode):
 
 
 def row_generator(sheet):
-    '''Generator that gets rows from excel sheet [From Submit4DN]'''
+    """Generator that gets rows from excel sheet [From Submit4DN]"""
     datemode = sheet.book.datemode
     for index in range(sheet.nrows):
         yield [cell_value(cell, datemode) for cell in sheet.row(index)]

From 1310b877e74efa8cd8d80e6175c90fed31af6794 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Mon, 17 Aug 2020 00:53:51 -0400
Subject: [PATCH 072/125] Centralize get_trusted_email utility that is
 dupliated in types/cohort.py and types/family.py

---
 src/encoded/tests/test_util.py |  6 +---
 src/encoded/util.py            | 55 +++++++++++++++-------------------
 2 files changed, 25 insertions(+), 36 deletions(-)

diff --git a/src/encoded/tests/test_util.py b/src/encoded/tests/test_util.py
index 2da5c019db..6dc501153a 100644
--- a/src/encoded/tests/test_util.py
+++ b/src/encoded/tests/test_util.py
@@ -7,7 +7,7 @@
 from unittest import mock
 from dcicutils.qa_utils import ControlledTime
 from ..util import (
-    debuglog, deduplicate_list, gunzip_content, resolve_file_path, ENCODED_ROOT_DIR, generate_fastq_file,
+    debuglog, deduplicate_list, gunzip_content, resolve_file_path, ENCODED_ROOT_DIR,
 )
 from .. import util as util_module
 
@@ -231,7 +231,3 @@ def log_content():
             os.remove(filename)
         except Exception:
             pass
-
-
-# def test_generate_fastq_file():
-#     ... Need test of generate_fastq_file here ...
diff --git a/src/encoded/util.py b/src/encoded/util.py
index a8dd57e1f6..855a188c5a 100644
--- a/src/encoded/util.py
+++ b/src/encoded/util.py
@@ -9,10 +9,12 @@
 
 from dcicutils.misc_utils import check_true
 from io import BytesIO
+from pyramid.httpexceptions import HTTPUnprocessableEntity
 from snovault import COLLECTIONS, Collection
 from snovault.crud_views import collection_add as sno_collection_add
 from snovault.embed import make_subrequest
 from snovault.schema_utils import validate_request
+from .types.base import get_item_or_none
 
 
 ENCODED_ROOT_DIR = os.path.dirname(__file__)
@@ -237,38 +239,29 @@ def create_empty_s3_file(s3_client, bucket: str, key: str):
     s3_client.upload_file(empty_file, Bucket=bucket, Key=key)
 
 
-def generate_fastq_file(filename, num=10, length=10):
+def get_trusted_email(request, context=None):
     """
-    Creates a new fastq file with the given name, containing (pseudo)randomly generated content.
+    Get an email address on behalf of which we can issue other requests.
 
-    Example usage:
-
-        fastq_generator('fastq_sample.fastq.gz', 25, 50)
-           creates a new fastq file with 25 sequences, each of length 50.
-
-        fastq_generator('fastq_sample.fastq.gz')
-           creates a new fastq file with default characteristics (10 sequences, each of length 10).
-
-    Args:
-        filename str: the name of a file to create
-        num int: the number of random sequences (default 10)
-        length int: the length of the random sequences (default 10)
-
-    Returns:
-        the filename
+    If auth0 has authenticated user info to offer, return that.
+    Otherwise, look for a userid.xxx among request.effective_principals and get the email from that.
 
+    This will raise HTTPUnprocessableEntity if there's a problem obtaining the mail.
     """
-    if not filename.endswith('.fastq.gz'):
-        filename = filename.rstrip('fastq').rstrip('fq').rstrip('.') + '.fastq.gz'
-    content = ''
-    bases = 'ACTG'
-
-    for i in range(num):
-        content += '@SEQUENCE{} length={}\n'.format(i, length)
-        content += ''.join(random.choice(bases) for i in range(length)) + '\n'
-        content += '+\n'
-        content += 'I' * length + '\n'
-    with gzip.open(filename, 'w') as outfile:
-        outfile.write(content.encode('ascii'))
-
-    return filename
+    context = context or "Requirement"
+    email = getattr(request, '_auth0_authenticated', None)
+    if not email:
+        user_uuid = None
+        for principal in request.effective_principals:
+            if principal.startswith('userid.'):
+                user_uuid = principal[7:]
+                break
+        if not user_uuid:
+            raise HTTPUnprocessableEntity('%s: Must provide authentication' % context)
+        user_props = get_item_or_none(request, user_uuid)
+        if not user_props:
+            raise HTTPUnprocessableEntity('%s: User profile missing' % context)
+        if 'email' not in user_props:
+            raise HTTPUnprocessableEntity('%s: Entry for "email" missing in user profile.' % context)
+        email = user_props['email']
+    return email

From 154bf9f48fbe0992df4aac95838908ea44fd4816 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Mon, 17 Aug 2020 00:54:53 -0400
Subject: [PATCH 073/125] Use new get_trusted_email instead of inline logic.

---
 src/encoded/types/cohort.py | 15 +++------------
 src/encoded/types/family.py | 16 ++--------------
 2 files changed, 5 insertions(+), 26 deletions(-)

diff --git a/src/encoded/types/cohort.py b/src/encoded/types/cohort.py
index 1534487bcb..fb2a251099 100644
--- a/src/encoded/types/cohort.py
+++ b/src/encoded/types/cohort.py
@@ -15,7 +15,8 @@
 from snovault.util import debug_log
 from webtest import TestApp
 from xml.etree.ElementTree import fromstring
-from .base import Item, get_item_or_none
+from .base import Item
+from ..util import get_trusted_email
 
 
 log = structlog.getLogger(__name__)
@@ -115,17 +116,7 @@ def process_pedigree(context, request):
     ped_timestamp = ped_datetime.isoformat() + '+00:00'
     app = get_app(config_uri, 'app')
     # get user email for TestApp authentication
-    email = getattr(request, '_auth0_authenticated', None)
-    if not email:
-        user_uuid = None
-        for principal in request.effective_principals:
-            if principal.startswith('userid.'):
-                user_uuid = principal[7:]
-                break
-        if not user_uuid:
-            raise HTTPUnprocessableEntity('Cohort %s: Must provide authentication' % cohort)
-        user_props = get_item_or_none(request, user_uuid)
-        email = user_props['email']
+    email = get_trusted_email(request, context="Cohort %s" % cohort)
     environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': email}
     testapp = TestApp(app, environ)
 
diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py
index f920998fbd..acea4c3fd9 100644
--- a/src/encoded/types/family.py
+++ b/src/encoded/types/family.py
@@ -11,14 +11,12 @@
     calculated_property,
     collection,
     load_schema,
-    CONNECTION,
-    COLLECTIONS,
-    display_title_schema
 )
 from snovault.util import debug_log
 from webtest import TestApp
 from xml.etree.ElementTree import fromstring
 from .base import Item, get_item_or_none
+from ..util import get_trusted_email
 
 
 log = structlog.getLogger(__name__)
@@ -598,17 +596,7 @@ def process_pedigree(context, request):
     ped_timestamp = ped_datetime.isoformat() + '+00:00'
     app = get_app(config_uri, 'app')
     # get user email for TestApp authentication
-    email = getattr(request, '_auth0_authenticated', None)
-    if not email:
-        user_uuid = None
-        for principal in request.effective_principals:
-            if principal.startswith('userid.'):
-                user_uuid = principal[7:]
-                break
-        if not user_uuid:
-            raise HTTPUnprocessableEntity('Family %s: Must provide authentication' % family_item)
-        user_props = get_item_or_none(request, user_uuid)
-        email = user_props['email']
+    email = get_trusted_email(request, context="Family %s" % family_item)
     environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': email}
     testapp = TestApp(app, environ)
 

From ca4118e19c3c1714cd82267e8af86e31dd785ed5 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Mon, 17 Aug 2020 01:05:47 -0400
Subject: [PATCH 074/125] Record trusted email as part of submission manifest.

---
 src/encoded/ingestion_listener.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index f82a10080f..797e7ddc03 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -34,7 +34,7 @@
 from .ingestion.processors import get_ingestion_processor
 from .types.ingestion import SubmissionFolio
 from .types.variant import build_variant_display_title, ANNOTATION_ID_SEP
-from .util import resolve_file_path, gunzip_content, debuglog
+from .util import resolve_file_path, gunzip_content, debuglog, get_trusted_email
 
 
 log = structlog.getLogger(__name__)
@@ -131,6 +131,7 @@ def submit_for_ingestion(context, request):
         "submission_uri": SubmissionFolio.make_submission_uri(submission_id),
         "bucket": DATA_BUNDLE_BUCKET,
         "authenticated_userid": request.authenticated_userid,
+        "email": get_trusted_email(request, context="Submission"),
         "success": success,
         "message": message,
         "upload_time": upload_time,

From 614879b2dfdbff8b9d8efdaeeac1e765592aef45 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Tue, 18 Aug 2020 16:08:18 -0400
Subject: [PATCH 075/125] Adjust comment wording.

---
 src/encoded/ingestion/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/encoded/ingestion/common.py b/src/encoded/ingestion/common.py
index 38ababfae1..a89bf6cc5e 100644
--- a/src/encoded/ingestion/common.py
+++ b/src/encoded/ingestion/common.py
@@ -15,7 +15,7 @@
 
 CONTENT_TYPE_SPECIAL_CASES = {
     'application/x-www-form-urlencoded': [
-        # Special case to allow us to POST to metadata TSV requests via form submission
+        # Legacy special case to allow us to POST to metadata TSV requests via form submission.
         '/metadata/'
     ]
 }

From f6f906427a60042bef707b55d489493739d2f483 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Tue, 18 Aug 2020 16:09:29 -0400
Subject: [PATCH 076/125] Better definition of get_trusted_email.

---
 src/encoded/util.py | 43 ++++++++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/src/encoded/util.py b/src/encoded/util.py
index 855a188c5a..5f0cff2138 100644
--- a/src/encoded/util.py
+++ b/src/encoded/util.py
@@ -3,7 +3,6 @@
 import gzip
 import io
 import os
-import random
 import pyramid.request
 import tempfile
 
@@ -239,7 +238,7 @@ def create_empty_s3_file(s3_client, bucket: str, key: str):
     s3_client.upload_file(empty_file, Bucket=bucket, Key=key)
 
 
-def get_trusted_email(request, context=None):
+def get_trusted_email(request, context=None, raise_errors=True):
     """
     Get an email address on behalf of which we can issue other requests.
 
@@ -248,20 +247,26 @@ def get_trusted_email(request, context=None):
 
     This will raise HTTPUnprocessableEntity if there's a problem obtaining the mail.
     """
-    context = context or "Requirement"
-    email = getattr(request, '_auth0_authenticated', None)
-    if not email:
-        user_uuid = None
-        for principal in request.effective_principals:
-            if principal.startswith('userid.'):
-                user_uuid = principal[7:]
-                break
-        if not user_uuid:
-            raise HTTPUnprocessableEntity('%s: Must provide authentication' % context)
-        user_props = get_item_or_none(request, user_uuid)
-        if not user_props:
-            raise HTTPUnprocessableEntity('%s: User profile missing' % context)
-        if 'email' not in user_props:
-            raise HTTPUnprocessableEntity('%s: Entry for "email" missing in user profile.' % context)
-        email = user_props['email']
-    return email
+    try:
+        # import pdb;pdb.set_trace()
+        context = context or "Requirement"
+        email = getattr(request, '_auth0_authenticated', None)
+        if not email:
+            user_uuid = None
+            for principal in request.effective_principals:
+                if principal.startswith('userid.'):
+                    user_uuid = principal[7:]
+                    break
+            if not user_uuid:
+                raise HTTPUnprocessableEntity('%s: Must provide authentication' % context)
+            user_props = get_item_or_none(request, user_uuid)
+            if not user_props:
+                raise HTTPUnprocessableEntity('%s: User profile missing' % context)
+            if 'email' not in user_props:
+                raise HTTPUnprocessableEntity('%s: Entry for "email" missing in user profile.' % context)
+            email = user_props['email']
+        return email
+    except Exception:
+        if raise_errors:
+            raise
+        return None

From 47a9a16d16222cd483fdf7fe38ee800f9e8e6629 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Tue, 18 Aug 2020 16:10:14 -0400
Subject: [PATCH 077/125] Use better definition of get_trusted_email.

---
 src/encoded/ingestion_listener.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index 510fca224e..42c6932c25 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -131,7 +131,7 @@ def submit_for_ingestion(context, request):
         "submission_uri": SubmissionFolio.make_submission_uri(submission_id),
         "bucket": DATA_BUNDLE_BUCKET,
         "authenticated_userid": request.authenticated_userid,
-        "email": get_trusted_email(request, context="Submission"),
+        "email": get_trusted_email(request, context="Submission", raise_errors=False),
         "success": success,
         "message": message,
         "upload_time": upload_time,

From 24dae93d2f2941a6167e087a2222abba7e33c5ba Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Wed, 19 Aug 2020 14:02:10 -0400
Subject: [PATCH 078/125] Use separated per-bs-env data_bundle buckets.

---
 src/encoded/ingestion/common.py     | 45 ++++++++++++++++++++++-------
 src/encoded/ingestion/processors.py | 22 +++++++-------
 src/encoded/ingestion_listener.py   | 23 ++++++++-------
 src/encoded/types/ingestion.py      |  6 ++--
 src/encoded/util.py                 |  8 +++++
 5 files changed, 70 insertions(+), 34 deletions(-)

diff --git a/src/encoded/ingestion/common.py b/src/encoded/ingestion/common.py
index a89bf6cc5e..40017b9aad 100644
--- a/src/encoded/ingestion/common.py
+++ b/src/encoded/ingestion/common.py
@@ -2,15 +2,38 @@
 common.py - tools common to various parts of ingestion
 """
 
-import codecs
-import contextlib
-import io
-import os
-import tempfile
-from .exceptions import SubmissionFailure, MissingParameter, BadParameter
+# import codecs
+# import contextlib
+# import io
+# import os
+# import tempfile
 
+from dcicutils.env_utils import is_stg_or_prd_env, is_cgap_env
+from dcicutils.misc_utils import check_true
+from .exceptions import MissingParameter, BadParameter
 
-DATA_BUNDLE_BUCKET = 'cgap-data-bundles'
+
+CGAP_DATA_BUNDLE_BUCKET_PRD = 'cgap-data-bundles-prd'
+
+CGAP_DATA_BUNDLE_BUCKETS_NON_PRD = {
+    'fourfront-cgapdev': 'cgap-data-bundles-dev',
+    'fourfront-cgaptest': 'cgap-data-bundles-test',
+    'fourfront-cgapwolf': 'cgap-data-bundles-wolf',
+}
+
+CGAP_DATA_BUNDLE_BUCKET_MISC = 'cgap-data-bundles'
+
+
+def cgap_data_bundle_bucket(bs_env):
+    check_true(is_cgap_env(bs_env), "bs_env is not a cgap environment.", error_class=ValueError)
+    if is_stg_or_prd_env(bs_env):
+        return CGAP_DATA_BUNDLE_BUCKET_PRD
+    else:
+        specific_bucket = CGAP_DATA_BUNDLE_BUCKETS_NON_PRD.get(bs_env)
+        if specific_bucket:
+            return specific_bucket
+        else:
+            return CGAP_DATA_BUNDLE_BUCKET_MISC
 
 
 CONTENT_TYPE_SPECIAL_CASES = {
@@ -61,11 +84,11 @@ def get_parameter(parameter_block, parameter_name, as_type=None, default=_NO_DEF
     If the parameter is not present but there is no default, an error of type MissingParameter will be raised.
 
     Args:
-        parameter_block dict: a dictionary whose keys are parameter names and whose values are parameter values
-        parameter_name str: the name of a parameter
+        parameter_block (dict): a dictionary whose keys are parameter names and whose values are parameter values
+        parameter_name (str): the name of a parameter
         as_type: if supplied, a type coercion to perform on the result
-        default object: a default value to be used if the parameter_name is not present.
-        update bool: if as_type is applied, whether to update the parameter_block
+        default (object): a default value to be used if the parameter_name is not present.
+        update (bool): if as_type is applied, whether to update the parameter_block
     """
 
     if isinstance(parameter_block, dict):
diff --git a/src/encoded/ingestion/processors.py b/src/encoded/ingestion/processors.py
index 43f5b5935f..535db5677f 100644
--- a/src/encoded/ingestion/processors.py
+++ b/src/encoded/ingestion/processors.py
@@ -2,7 +2,7 @@
 import json
 import traceback
 
-from ..ingestion.common import DATA_BUNDLE_BUCKET, get_parameter
+from ..ingestion.common import cgap_data_bundle_bucket, get_parameter
 from ..util import debuglog, s3_output_stream, create_empty_s3_file
 from ..submit import submit_data_bundle
 from .exceptions import UndefinedIngestionProcessorType
@@ -47,7 +47,7 @@ def handle_data_bundle(submission):
     submission_id = submission.submission_id
     s3_client = boto3.client('s3')
     manifest_key = "%s/manifest.json" % submission_id
-    response = s3_client.get_object(Bucket=DATA_BUNDLE_BUCKET, Key=manifest_key)
+    response = s3_client.get_object(Bucket=submission.bucket, Key=manifest_key)
     manifest = json.load(response['Body'])
 
     object_name = manifest['object_name']
@@ -60,10 +60,10 @@ def handle_data_bundle(submission):
     debuglog(submission_id, "parameters:", parameters)
 
     started_key = "%s/started.txt" % submission_id
-    create_empty_s3_file(s3_client, bucket=DATA_BUNDLE_BUCKET, key=started_key)
+    create_empty_s3_file(s3_client, bucket=submission.bucket, key=started_key)
 
     # PyCharm thinks this is unused. -kmp 26-Jul-2020
-    # data_stream = s3_client.get_object(Bucket=DATA_BUNDLE_BUCKET, Key="%s/manifest.json" % submission_id)['Body']
+    # data_stream = s3_client.get_object(Bucket=submission.bucket, Key="%s/manifest.json" % submission_id)['Body']
 
     resolution = {
         "data_key": object_name,
@@ -84,7 +84,7 @@ def handle_data_bundle(submission):
         #     project = submission.vapp.get(project).json
 
         data_bundle_result = submit_data_bundle(s3_client=s3_client,
-                                                bucket=DATA_BUNDLE_BUCKET,
+                                                bucket=submission.bucket,
                                                 key=object_name,
                                                 project=project,
                                                 institution=institution,
@@ -104,24 +104,24 @@ def note_additional_datum(key, bundle_key=None):
             other_details['additional_data'] = additional_data = other_details.get('additional_data', {})
             additional_data[key] = data_bundle_result[bundle_key or key]
 
-        with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=validation_report_key) as fp:
+        with s3_output_stream(s3_client, bucket=submission.bucket, key=validation_report_key) as fp:
             _show_report_lines(data_bundle_result['validation_output'], fp)
             note_additional_datum('validation_output')
 
         # Next several files are created only if relevant.
 
         if data_bundle_result['result']:
-            with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=submission_key) as fp:
+            with s3_output_stream(s3_client, bucket=submission.bucket, key=submission_key) as fp:
                 print(json.dumps(data_bundle_result['result'], indent=2), file=fp)
                 other_details['result'] = data_bundle_result['result']
 
         if data_bundle_result['post_output']:
-            with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=submission_response_key) as fp:
+            with s3_output_stream(s3_client, bucket=submission.bucket, key=submission_response_key) as fp:
                 _show_report_lines(data_bundle_result['post_output'], fp)
                 note_additional_datum('post_output')
 
         if data_bundle_result['upload_info']:
-            with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=upload_info_key) as fp:
+            with s3_output_stream(s3_client, bucket=submission.bucket, key=upload_info_key) as fp:
                 print(json.dumps(data_bundle_result['upload_info'], indent=2), file=fp)
                 note_additional_datum('upload_info')
 
@@ -133,7 +133,7 @@ def note_additional_datum(key, bundle_key=None):
     except Exception as e:
 
         resolution["traceback_key"] = traceback_key = "%s/traceback.txt" % submission_id
-        with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key=traceback_key) as fp:
+        with s3_output_stream(s3_client, bucket=submission.bucket, key=traceback_key) as fp:
             traceback.print_exc(file=fp)
 
         resolution["error_type"] = e.__class__.__name__
@@ -141,5 +141,5 @@ def note_additional_datum(key, bundle_key=None):
 
         submission.patch_item(processing_status={"state": "done", "outcome": "error", "progress": "incomplete"})
 
-    with s3_output_stream(s3_client, bucket=DATA_BUNDLE_BUCKET, key="%s/resolution.json" % submission_id) as fp:
+    with s3_output_stream(s3_client, bucket=submission.bucket, key="%s/resolution.json" % submission_id) as fp:
         print(json.dumps(resolution, indent=2), file=fp)
diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index b6f8a9c81a..2a82d89346 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -20,6 +20,7 @@
 import uuid
 import webtest
 
+from dcicutils.env_utils import is_stg_or_prd_env
 from dcicutils.misc_utils import VirtualApp, ignored
 from pyramid import paster
 from pyramid.httpexceptions import HTTPConflict, HTTPNotFound
@@ -29,12 +30,12 @@
 from snovault.util import debug_log
 from vcf import Reader
 from .commands.ingest_vcf import VCFParser
-from .ingestion.common import register_path_content_type, DATA_BUNDLE_BUCKET, SubmissionFailure, get_parameter
-from .ingestion.exceptions import UnspecifiedFormParameter
+from .ingestion.common import register_path_content_type, cgap_data_bundle_bucket, get_parameter
+from .ingestion.exceptions import UnspecifiedFormParameter, SubmissionFailure
 from .ingestion.processors import get_ingestion_processor
 from .types.ingestion import SubmissionFolio
 from .types.variant import build_variant_display_title, ANNOTATION_ID_SEP
-from .util import resolve_file_path, gunzip_content, debuglog, get_trusted_email
+from .util import resolve_file_path, gunzip_content, debuglog, get_trusted_email, beanstalk_env_from_request
 
 
 log = structlog.getLogger(__name__)
@@ -70,6 +71,8 @@ def submit_for_ingestion(context, request):
 
     ignored(context)
 
+    bs_env = beanstalk_env_from_request(request)
+    data_bundle_bucket = cgap_data_bundle_bucket(bs_env)
     ingestion_type = request.POST['ingestion_type']
     datafile = request.POST['datafile']
     if not isinstance(datafile, cgi.FieldStorage):
@@ -114,7 +117,7 @@ def submit_for_ingestion(context, request):
     message = "Uploaded successfully."
 
     try:
-        s3_client.upload_fileobj(input_file_stream, Bucket=DATA_BUNDLE_BUCKET, Key=object_name)
+        s3_client.upload_fileobj(input_file_stream, Bucket=data_bundle_bucket, Key=object_name)
 
     except botocore.exceptions.ClientError as e:
 
@@ -129,13 +132,15 @@ def submit_for_ingestion(context, request):
         "object_name": object_name,
         "submission_id": submission_id,
         "submission_uri": SubmissionFolio.make_submission_uri(submission_id),
-        "bucket": DATA_BUNDLE_BUCKET,
+        "beanstalk_env_is_prd": is_stg_or_prd_env(bs_env),
+        "beanstalk_env": bs_env,
+        "bucket": data_bundle_bucket,
         "authenticated_userid": request.authenticated_userid,
         "email": get_trusted_email(request, context="Submission", raise_errors=False),
         "success": success,
         "message": message,
         "upload_time": upload_time,
-        "parameters": parameters
+        "parameters": parameters,
     }
 
     manifest_content_formatted = json.dumps(manifest_content, indent=2)
@@ -144,7 +149,7 @@ def submit_for_ingestion(context, request):
 
         try:
             with io.BytesIO(manifest_content_formatted.encode('utf-8')) as fp:
-                s3_client.upload_fileobj(fp, Bucket=DATA_BUNDLE_BUCKET, Key=manifest_name)
+                s3_client.upload_fileobj(fp, Bucket=data_bundle_bucket, Key=manifest_name)
 
         except botocore.exceptions.ClientError as e:
 
@@ -444,10 +449,8 @@ def __init__(self, vapp, _queue_manager=None, _update_status=None):
 
         # Get queue_manager
         registry = None
-        if isinstance(self.vapp, webtest.TestApp):  # if in testing
+        if isinstance(self.vapp, (webtest.TestApp, VirtualApp)):  # TestApp in testing or VirtualApp in production
             registry = self.vapp.app.registry
-        elif isinstance(self.vapp, VirtualApp):  # if in production
-            registry = self.vapp.wrapped_app.app.registry
         elif _queue_manager is None:  # if we got here, we cannot succeed in starting
             raise Exception('Bad arguments given to IngestionListener: %s, %s, %s' %
                             (self.vapp, _queue_manager, _update_status))
diff --git a/src/encoded/types/ingestion.py b/src/encoded/types/ingestion.py
index 4fd05d2d3a..276d1c0a51 100644
--- a/src/encoded/types/ingestion.py
+++ b/src/encoded/types/ingestion.py
@@ -21,8 +21,8 @@
 from .institution import (
     ONLY_ADMIN_VIEW,
 )
-from ..util import debuglog, subrequest_item_creation
-
+from ..util import debuglog, subrequest_item_creation, beanstalk_env_from_registry
+from ..ingestion.common import cgap_data_bundle_bucket
 
 ALLOW_SUBMITTER_VIEW = (
     # TODO: There is an issue here where we want a logged in user remotely only to view this
@@ -42,6 +42,8 @@ def __init__(self, *, vapp, ingestion_type, submission_id, log=None):
         self.vapp = vapp
         self.ingestion_type = ingestion_type
         self.log = log or logging
+        self.bs_env = beanstalk_env_from_registry(vapp.app.registry)
+        self.bucket = cgap_data_bundle_bucket(self.bs_env)
         self.submission_id = submission_id
 
     def __str__(self):
diff --git a/src/encoded/util.py b/src/encoded/util.py
index a0fa43abab..958e4324b4 100644
--- a/src/encoded/util.py
+++ b/src/encoded/util.py
@@ -269,3 +269,11 @@ def get_trusted_email(request, context=None, raise_errors=True):
         if raise_errors:
             raise
         return None
+
+
+def beanstalk_env_from_request(request):
+    return beanstalk_env_from_registry(request.registry)
+
+
+def beanstalk_env_from_registry(registry):
+    return registry.settings.get('env.name')

From 30152a95005d065699e5d0356aa0f58a5028e76f Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Wed, 19 Aug 2020 14:08:09 -0400
Subject: [PATCH 079/125] logic in submit.py to check for file upload status

---
 src/encoded/submit.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index c4127ecd65..9195cdf47a 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -367,6 +367,7 @@ def fetch_file_metadata(idx, filenames, proj_name):
             'row': idx,
             'file_format': '/file-formats/{}/'.format(fmt),
             'file_type': valid_extensions[extension[0]][1],
+            'status': 'uploading',
             'filename': filename.strip()  # causes problems without functional file upload
         }
         if fmt == 'fastq':
@@ -601,9 +602,11 @@ def validate_all_items(virtualapp, json_data):
                 #       then json_data[item_type][alias] seems suspect. It does work to do
                 #       json_data.get(item_type, {}).get(alias, {}).get('filename') but I would put that
                 #       quantity in a variable rather than compute it twice in a row. -kmp 25-Jul-2020
-                elif json_data[itemtype][alias].get('filename') and \
-                        json_data[itemtype][alias]['filename'] in ''.join(json_data['errors']):
-                    validation_results[itemtype]['errors'] += 1
+                elif json_data[itemtype][alias].get('filename'):
+                    if json_data[itemtype][alias]['filename'] in ''.join(json_data['errors']):
+                        validation_results[itemtype]['errors'] += 1
+                    else:
+                        json_data[itemtype][alias]['status'] = 'uploading'
                 else:
                     json_data_final['post'].setdefault(itemtype, [])
                     json_data_final['post'][itemtype].append(json_data[itemtype][alias])
@@ -613,6 +616,8 @@ def validate_all_items(virtualapp, json_data):
                 # alias_dict[alias] = results[alias]['@id']
                 # TODO: profile is only conditionally assigned in an "if" above. -kmp 25-Jul-2020
                 patch_data = compare_fields(profile, alias_dict, data, db_results[alias])
+                if itemtype in ['file_fastq', 'file_processed'] and 'filename' in patch_data:
+                    patch_data['status'] = 'uploading'
                 error = validate_item(virtualapp, patch_data, 'patch', itemtype,
                                       all_aliases, atid=db_results[alias]['@id'])
                 if error:  # do something to report validation errors
@@ -690,7 +695,7 @@ def post_and_patch_all_items(virtualapp, json_data_final):
                         json_data_final['patch'][k][atid] = patch_info
                         if k in item_names:
                             output.append('Success - {} {} posted'.format(k, item[item_names[k]]))
-                        if fname:
+                        if fname and item.get('status') == 'uploading':
                             files.append({
                                 'uuid': response.json['@graph'][0]['uuid'],
                                 'filename': fname
@@ -721,7 +726,7 @@ def post_and_patch_all_items(virtualapp, json_data_final):
                     # if k in item_names:
                     #     output.append('Success - {} {} patched'.format(k, patch_data[item_names[k]]))
                     final_status[k]['patched'] += 1
-                    if fname:
+                    if fname and patch_data.get('status') == 'uploading':
                         files.append({
                             'uuid': response.json['@graph'][0]['uuid'],
                             'filename': fname

From 1b83a5e4986e06594f25681d3b08bd10c815097b Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Wed, 19 Aug 2020 17:14:19 -0400
Subject: [PATCH 080/125] Tolerate bs_env=None in cgap_data_bundle_bucket
 computation. We'll have to do this differently another time.

---
 src/encoded/ingestion/common.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/encoded/ingestion/common.py b/src/encoded/ingestion/common.py
index 40017b9aad..c34c983658 100644
--- a/src/encoded/ingestion/common.py
+++ b/src/encoded/ingestion/common.py
@@ -25,7 +25,9 @@
 
 
 def cgap_data_bundle_bucket(bs_env):
-    check_true(is_cgap_env(bs_env), "bs_env is not a cgap environment.", error_class=ValueError)
+    if bs_env is None:
+        return CGAP_DATA_BUNDLE_BUCKET_MISC
+    check_true(is_cgap_env(bs_env), "bs_env is not a cgap environment or None.", error_class=ValueError)
     if is_stg_or_prd_env(bs_env):
         return CGAP_DATA_BUNDLE_BUCKET_PRD
     else:

From e63e00e2bf9130f769be8434b6ab8c5ab3c640d7 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 20 Aug 2020 15:41:12 -0400
Subject: [PATCH 081/125] metadata tweaks for submit.py

---
 src/encoded/submit.py | 50 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 9 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 9195cdf47a..02bd98bad1 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -27,7 +27,8 @@
         'sample id by ref lab': 'sequence_id',
         'req type': 'requisition_type',
         "date req rec'd": 'date_requisition_received',
-        'physician/provider': 'ordering_physician'
+        'physician/provider': 'ordering_physician',
+        'test requested': 'workup_type'
     },
     'requisition': {
         'req accepted y/n': 'accepted_rejected',
@@ -38,6 +39,18 @@
     }
 }
 
+
+ABBREVS = {
+    'male': 'M',
+    'female': 'F',
+    'yes': 'Y',
+    'no': 'N',
+    'p': 'proband',
+    'mth': 'mother',
+    'fth': 'father',
+    'sf': 'sibling'
+}
+
 # BGM_FIELD_MAPPING = {
 #     'bcgg-id': 'patient id',
 #     'bcgg-f-id': 'family id',
@@ -57,6 +70,9 @@
 ]
 
 
+ID_SOURCES = [ 'UDN' ]
+
+
 def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp,  # <- All keyword arguments, all required.
                        validate_only=False):  # <-- Additional options with defaults.
     """
@@ -101,12 +117,19 @@ def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp,  #
 def map_fields(row, metadata_dict, addl_fields, item_type):
     for map_field in GENERIC_FIELD_MAPPING[item_type]:
         if map_field in row:
-            metadata_dict[GENERIC_FIELD_MAPPING[item_type][map_field]] = row.get(map_field)
+            metadata_dict[GENERIC_FIELD_MAPPING[item_type][map_field]] = use_abbrev(row.get(map_field))
     for field in addl_fields:
-        metadata_dict[field] = row.get(field.replace('_', ' '))
+        metadata_dict[field] = use_abbrev(row.get(field.replace('_', ' ')))
     return metadata_dict
 
 
+def use_abbrev(value):
+    if value in ABBREVS:
+        return ABBREVS[value]
+    else:
+        return value
+
+
 def xls_to_json(xls_data, project, institution):
     '''
     Converts excel file to json for submission.
@@ -121,7 +144,7 @@ def xls_to_json(xls_data, project, institution):
     while True:
         try:
             keys = next(row)
-            keys = [key.lower().strip().rstrip('*').rstrip() for key in keys]
+            keys = [key.lower().strip().rstrip('*:') for key in keys]
             counter += 1
             if 'individual id' in keys:
                 header = True
@@ -248,10 +271,19 @@ def fetch_individual_metadata(idx, row, items, indiv_alias, inst_name):
     new_items = items.copy()
     info = {'aliases': [indiv_alias]}
     info = map_fields(row, info, ['individual_id', 'sex', 'age', 'birth_year'], 'individual')
+    other_id_col = None
     if row.get('other individual id'):
-        other_id = {'id': row['other individual id'], 'id_source': inst_name}
+        other_id_col = 'other individual id'
+    elif row.get('other id'):
+        other_id_col = 'other id'
+    if other_id_col:
+        other_id = {'id': row[other_id_col], 'id_source': inst_name}
         if row.get('other individual id type'):
             other_id['id_source'] = row['other individual id source']
+        else:
+            for id_source in ID_SOURCES:
+                if row[other_id_col].upper().startswith(id_source):
+                    other_id['id_source'] = id_source
         info['institutional_id'] = other_id
     for col in ['age', 'birth_year']:
         if info.get(col) and isinstance(info[col], str) and info[col].isnumeric():
@@ -302,9 +334,9 @@ def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_ali
     ]
     info = map_fields(row, info, fields, 'sample')
     info['row'] = idx
-    if info.get('specimen_accepted', '').lower() in ['y', 'yes']:
+    if info.get('specimen_accepted', '').lower() == 'y':
         info['specimen_accepted'] = 'Yes'
-    elif info.get('specimen_accepted', '').lower() in ['n', 'no']:
+    elif info.get('specimen_accepted', '').lower() == 'n':
         info['specimen_accepted'] = 'No'
     if row.get('second specimen id'):
         other_id = {'id': row['second specimen id'], 'id_type': proj_name}  # add proj info?
@@ -312,8 +344,8 @@ def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_ali
             other_id['id_type'] = row['second specimen id type']
         info['other_specimen_ids'] = [other_id]
     req_info = map_fields(row, {}, ['date sent', 'date completed'], 'requisition')
-    if req_info.get('accepted_rejected', '').lower() in ['yes', 'no', 'y', 'n']:
-        if req_info['accepted_rejected'].lower().startswith('y'):
+    if req_info.get('accepted_rejected', '').lower() in ['y', 'n']:
+        if req_info['accepted_rejected'].lower() == 'y':
             req_info['accepted_rejected'] = 'Accepted'
         else:
             req_info['accepted_rejected'] = "Rejected"

From 965031b526e5af38e74a24df2f3999c9a483b680 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Fri, 21 Aug 2020 13:24:14 -0400
Subject: [PATCH 082/125] Remove old version of get_jwt.

---
 src/encoded/authentication.py | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/src/encoded/authentication.py b/src/encoded/authentication.py
index 4e0166f5c1..3bf387d237 100644
--- a/src/encoded/authentication.py
+++ b/src/encoded/authentication.py
@@ -260,27 +260,6 @@ def get_token_info(self, token, request):
         return None
 
 
-# def get_jwt(request):
-#     token = None
-#     try:
-#         # ensure this is a jwt token not basic auth:
-#         auth_type = request.headers['Authorization'][:6]
-#         if auth_type.strip().lower() == 'bearer':
-#             token = request.headers['Authorization'][7:]
-#     except (ValueError, TypeError, KeyError):
-#         pass
-#
-#     if not token and request.method in ('GET', 'HEAD'):
-#         # Only grab this if is a GET request, not a transactional request to help mitigate CSRF attacks.
-#         # See: https://en.wikipedia.org/wiki/Cross-site_request_forgery#Cookie-to-header_token
-#         # The way our JS grabs and sticks JWT into Authorization header is somewhat analogous to above approach.
-#         # TODO: Ensure our `Access-Control-Allow-Origin` response headers are appropriate (more for CGAP).
-#         # TODO: Get a security audit done.
-#         token = request.cookies.get('jwtToken')
-#
-#     return token
-
-
 def get_jwt(request):
 
     token = None

From 0143df10a49631a8c0ef409bec656f80184627e3 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 21 Aug 2020 15:15:20 -0400
Subject: [PATCH 083/125] bug fixes in submit.py

---
 src/encoded/submit.py | 244 +++++++++++++++++++++++-------------------
 1 file changed, 134 insertions(+), 110 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 02bd98bad1..9757bb2041 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -66,7 +66,7 @@
 
 LINKS = [
     'samples', 'members', 'mother', 'father', 'proband', 'report',
-    'individual', 'sample_processing', 'families'
+    'individual', 'sample_processing', 'families', 'files'
 ]
 
 
@@ -115,17 +115,17 @@ def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp,  #
 
 
 def map_fields(row, metadata_dict, addl_fields, item_type):
+    for field in addl_fields:
+        metadata_dict[field] = use_abbrev(row.get(field.replace('_', ' ')))
     for map_field in GENERIC_FIELD_MAPPING[item_type]:
         if map_field in row:
             metadata_dict[GENERIC_FIELD_MAPPING[item_type][map_field]] = use_abbrev(row.get(map_field))
-    for field in addl_fields:
-        metadata_dict[field] = use_abbrev(row.get(field.replace('_', ' ')))
     return metadata_dict
 
 
 def use_abbrev(value):
-    if value in ABBREVS:
-        return ABBREVS[value]
+    if value and value.lower() in ABBREVS:
+        return ABBREVS[value.lower()]
     else:
         return value
 
@@ -144,7 +144,7 @@ def xls_to_json(xls_data, project, institution):
     while True:
         try:
             keys = next(row)
-            keys = [key.lower().strip().rstrip('*:') for key in keys]
+            keys = [key.lower().strip().rstrip('*: ') for key in keys]
             counter += 1
             if 'individual id' in keys:
                 header = True
@@ -179,11 +179,13 @@ def xls_to_json(xls_data, project, institution):
         'reports': [], 'errors': []
     }
     file_errors = []
-    specimen_ids = {}
+    #specimen_ids = {}
     family_dict = create_families(rows)
     a_types = get_analysis_types(rows)
+    case_names = {}
     for i, row in enumerate(rows):
         debuglog("row:", repr(row))  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
+        print(row)
         row_num = i + counter + 1
         missing_required = [col for col in required if col not in row or not row[col]]
         if missing_required:
@@ -206,14 +208,14 @@ def xls_to_json(xls_data, project, institution):
         # create item for Sample if there is a specimen
         if row.get('specimen id'):
             samp_alias = '{}:sample-{}'.format(project['name'], row['specimen id'])
-            if row['specimen id'] in specimen_ids:
-                samp_alias = samp_alias + '-' + specimen_ids[row['specimen id']]
-                specimen_ids[row['specimen id']] += 1
-            else:
-                specimen_ids[row['specimen id']] = 1
+            # if row['specimen id'] in specimen_ids:
+            #     samp_alias = samp_alias + '-' + str(specimen_ids[row['specimen id']])
+            #     specimen_ids[row['specimen id']] += 1
+            # else:
+            #     specimen_ids[row['specimen id']] = 1
             analysis_alias = '{}:analysis-{}'.format(project['name'], row['analysis id'])
             items = fetch_sample_metadata(row_num, row, items, indiv_alias, samp_alias, analysis_alias,
-                                          fam_alias, project['name'], a_types)
+                                          fam_alias, project['name'], a_types, case_names)
             if row.get('files'):
                 file_items = fetch_file_metadata(row_num, row['files'].split(','), project['name'])
                 file_errors.extend(file_items['errors'])
@@ -225,7 +227,7 @@ def xls_to_json(xls_data, project, institution):
     # create SampleProcessing item for trio/group if needed
     # items = create_sample_processing_groups(items, sp_alias)
     items = add_relations(items)
-    items = create_case_items(items, project['name'])
+    items = create_case_items(items, project['name'], case_names)
     # removed unused fields, add project and institution
     for val1 in items.values():
         if isinstance(val1, dict):
@@ -252,7 +254,11 @@ def get_analysis_types(rows):
     for row in rows:
         analysis_relations.setdefault(row.get('analysis id'), [[], []])
         analysis_relations[row.get('analysis id')][0].append(row.get('relation to proband', '').lower())
-        analysis_relations[row.get('analysis id')][1].append(row.get('workup type', '').upper())
+        if row.get('test requested'):
+            col = 'test requested'
+        else:
+            col = 'workup type'
+        analysis_relations[row.get('analysis id')][1].append(row.get(col, '').upper())
     for k, v in analysis_relations.items():
         workup = list(set(v[1]))
         if len(workup) == 1 and '' not in workup:
@@ -302,10 +308,12 @@ def fetch_family_metadata(idx, row, items, indiv_alias, fam_alias):
     new_items = items.copy()
     info = {
         'aliases': [fam_alias],
-        'family_id': row['family id'],
+        'family_id': row.get('family id'),
         'members': [indiv_alias],
         'row': idx
     }
+    if not info['family_id']:
+        info['family_id'] = fam_alias[fam_alias.index(':') + 1:]
     if fam_alias not in new_items['family']:
         new_items['family'][fam_alias] = info
     if indiv_alias not in new_items['family'][fam_alias]['members']:
@@ -313,8 +321,9 @@ def fetch_family_metadata(idx, row, items, indiv_alias, fam_alias):
     valid_relations = ['proband', 'mother', 'father', 'brother', 'sister', 'sibling']
     relation_found = False
     for relation in valid_relations:
-        if row.get('relation to proband', '').lower().startswith(relation) and relation not in new_items['family'][fam_alias]:
-            new_items['family'][fam_alias][relation] = indiv_alias
+        if row.get('relation to proband', '').lower().startswith(relation):
+            if relation not in new_items['family'][fam_alias]:
+                new_items['family'][fam_alias][relation] = indiv_alias
             relation_found = True
             break
     if not relation_found:
@@ -325,12 +334,13 @@ def fetch_family_metadata(idx, row, items, indiv_alias, fam_alias):
     return new_items
 
 
-def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_alias, fam_alias, proj_name, analysis_type_dict):
+def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_alias,
+                          fam_alias, proj_name, analysis_type_dict, case_name_dict):
     new_items = items.copy()
     info = {'aliases': [samp_alias], 'files': []}  # TODO: implement creation of file db items
     fields = [
-        'workup_type', 'specimen_type', 'dna_concentration', 'date_transported',
-        'specimen_notes', 'research_protocol_name', 'sent_by', 'physician_id', 'indication'
+        'workup_type', 'specimen_type', 'dna_concentration', 'date_transported', 'indication',
+        'specimen_notes', 'research_protocol_name', 'sent_by', 'physician_id'
     ]
     info = map_fields(row, info, fields, 'sample')
     info['row'] = idx
@@ -365,6 +375,8 @@ def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_ali
             msg = ('Row {} - Samples with analysis ID {} contain mis-matched or invalid workup type values. '
                    'Sample cannot be processed.'.format(idx, row.get('analysis id')))
             new_items['errors'].append(msg)
+        if row.get('unique analysis id'):
+            case_name_dict['{}-{}'.format(row.get('analysis id'), row.get('specimen id'))] = row['unique analysis id']
     new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)
     new_items['sample_processing'][analysis_alias]['samples'].append(samp_alias)
     if row.get('report required').lower().startswith('y'):
@@ -409,18 +421,21 @@ def fetch_file_metadata(idx, filenames, proj_name):
     return files
 
 
-def create_case_items(items, proj_name):
+def create_case_items(items, proj_name, case_name_dict):
     new_items = items.copy()
+    print(json.dumps(items, indent=4))
     for k, v in items['sample_processing'].items():
         analysis_id = k[k.index('analysis-')+9:]
         for sample in v['samples']:
             case_id = '{}-{}'.format(analysis_id, items['sample'][sample]['specimen_accession'])
-            if len(v['samples']) == 1:
-                case_id += '-single'
-            elif len(v['samples']) > 1:
-                case_id += '-group'
+            if case_id in case_name_dict:
+                case_id = case_name_dict[case_id]
             case_alias = '{}:case-{}'.format(proj_name, case_id)
-            indiv = [ikey for ikey, ival in items['individual'].items() if sample in ival.get('samples', [])][0]
+            print([val.get('samples') for val in items['individual'].values()])
+            try:
+                indiv = [ikey for ikey, ival in items['individual'].items() if sample in ival.get('samples', [])][0]
+            except IndexError:
+                indiv = ''
             case_info = {
                 'aliases': [case_alias],
                 # 'case_id': case_id,
@@ -429,10 +444,14 @@ def create_case_items(items, proj_name):
             }
             if sample in items['reports']:
                 report_alias = case_alias.replace('case', 'report')
-                new_items['report'][report_alias] = {
-                    'aliases': [report_alias],
-                    'description': 'Analysis Report for Individual ID {}'.format(items['individual'][indiv]['individual_id'])
-                }
+                new_items['report'][report_alias] = {'aliases': [report_alias]}
+                report_info = {'aliases': [report_alias]}
+                if indiv:
+                    report_info['description'] = 'Analysis Report for Individual ID {} (Analysis {})'.format(
+                        items['individual'][indiv]['individual_id'], analysis_id
+                    )
+                else:
+                    report_info['description'] = 'Analysis Report for Case ID {}'.format(case_id)
                 case_info['report'] = report_alias
             new_items['case'][case_alias] = case_info
     del new_items['reports']
@@ -586,6 +605,9 @@ def validate_all_items(virtualapp, json_data):
     Still testing validation/data organization parts - patch/post part hasn't been fully
     written or tested.
     '''
+    if list(json_data.keys()) == ['errors']:
+        output.append('Errors found in spreadsheet columns. Please fix spreadsheet before submitting.')
+        return {}, output, False
     alias_dict = {}
     errors = json_data['errors']
     all_aliases = [k for itype in json_data for k in json_data[itype]]
@@ -593,91 +615,93 @@ def validate_all_items(virtualapp, json_data):
     validation_results = {}
     output = []
     for itemtype in POST_ORDER:  # don't pre-validate case and report
+        db_results = {}
         if itemtype in json_data:
             profile = virtualapp.get('/profiles/{}.json'.format(itemtype)).json
             validation_results[itemtype] = {'validated': 0, 'errors': 0}
-            db_results = {}
-        # TODO: json_data[itemtype] but item_type might not be in json_data according to previous "if" statement.
-        #       Maybe we want "for alias in json_data.get(item_type, {}):" here?
-        #       Alternatively, maybe give "json_data.get(item_type, {})" a variable name so that it can be referred
-        #       to more concisely in the several places below that it's needed.
-        #       -kmp 25-Jul-2020
-        for alias in json_data[itemtype]:
-            # first collect all atids before comparing and validating items
-            db_result = compare_with_db(virtualapp, alias)
-            if db_result:
-                alias_dict[alias] = db_result['@id']
-                # TODO: db_results is only conditionally assigned in the prevous "if".
-                #       Perhaps the db_results = {} above should be moved up outside the "if"?
-                #       Are we supposed to have a new dictionary on each iteration? -kmp 25-Jul-2020
-                db_results[alias] = db_result
-        # TODO: Likewise this should probably loop over json_data.get(itemtype, {}). -kmp 25-Jul-2020
-        for alias in json_data[itemtype]:
-            data = json_data[itemtype][alias].copy()
-            row = data.get('row')
-            if row:
-                del data['row']
-            if 'filename' in data:  # until we have functional file upload
-                del data['filename']
-            if not db_results.get(alias):
-                error = validate_item(virtualapp, data, 'post', itemtype, all_aliases)
-                if error:  # modify to check for presence of validation errors
-                    # do something to report validation errors
-                    if itemtype not in ['case', 'report']:
-                        for e in error:
-                            if row:
-                                errors.append('Row {} - Error found: {}'.format(row, e))
-                            else:
-                                errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
-                        validation_results[itemtype]['errors'] += 1
-                # TODO: If itemtype might not be in json_data (and conditionals above suggest that's so),
-                #       then json_data[item_type][alias] seems suspect. It does work to do
-                #       json_data.get(item_type, {}).get(alias, {}).get('filename') but I would put that
-                #       quantity in a variable rather than compute it twice in a row. -kmp 25-Jul-2020
-                elif json_data[itemtype][alias].get('filename'):
-                    if json_data[itemtype][alias]['filename'] in ''.join(json_data['errors']):
-                        validation_results[itemtype]['errors'] += 1
+            # db_results = {}
+            # TODO: json_data[itemtype] but item_type might not be in json_data according to previous "if" statement.
+            #       Maybe we want "for alias in json_data.get(item_type, {}):" here?
+            #       Alternatively, maybe give "json_data.get(item_type, {})" a variable name so that it can be referred
+            #       to more concisely in the several places below that it's needed.
+            #       -kmp 25-Jul-2020
+            for alias in json_data[itemtype]:
+                # first collect all atids before comparing and validating items
+                db_result = compare_with_db(virtualapp, alias)
+                if db_result:
+                    alias_dict[alias] = db_result['@id']
+                    # TODO: db_results is only conditionally assigned in the prevous "if".
+                    #       Perhaps the db_results = {} above should be moved up outside the "if"?
+                    #       Are we supposed to have a new dictionary on each iteration? -kmp 25-Jul-2020
+                    db_results[alias] = db_result
+            # TODO: Likewise this should probably loop over json_data.get(itemtype, {}). -kmp 25-Jul-2020
+            for alias in json_data[itemtype]:
+                data = json_data[itemtype][alias].copy()
+                row = data.get('row')
+                if row:
+                    del data['row']
+                fname = json_data[itemtype][alias].get('filename')
+                if fname:  # until we have functional file upload
+                    del data['filename']
+                if not db_results.get(alias):
+                    error = validate_item(virtualapp, data, 'post', itemtype, all_aliases)
+                    if error:  # modify to check for presence of validation errors
+                        # do something to report validation errors
+                        if itemtype not in ['case', 'report']:
+                            for e in error:
+                                if row:
+                                    errors.append('Row {} - Error found: {}'.format(row, e))
+                                else:
+                                    errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
+                            validation_results[itemtype]['errors'] += 1
+                    # TODO: If itemtype might not be in json_data (and conditionals above suggest that's so),
+                    #       then json_data[item_type][alias] seems suspect. It does work to do
+                    #       json_data.get(item_type, {}).get(alias, {}).get('filename') but I would put that
+                    #       quantity in a variable rather than compute it twice in a row. -kmp 25-Jul-2020
                     else:
-                        json_data[itemtype][alias]['status'] = 'uploading'
-                else:
-                    json_data_final['post'].setdefault(itemtype, [])
-                    json_data_final['post'][itemtype].append(json_data[itemtype][alias])
-                    validation_results[itemtype]['validated'] += 1
-            else:
-                # patch if item exists in db
-                # alias_dict[alias] = results[alias]['@id']
-                # TODO: profile is only conditionally assigned in an "if" above. -kmp 25-Jul-2020
-                patch_data = compare_fields(profile, alias_dict, data, db_results[alias])
-                if itemtype in ['file_fastq', 'file_processed'] and 'filename' in patch_data:
-                    patch_data['status'] = 'uploading'
-                error = validate_item(virtualapp, patch_data, 'patch', itemtype,
-                                      all_aliases, atid=db_results[alias]['@id'])
-                if error:  # do something to report validation errors
-                    if itemtype not in ['case', 'report']:
-                        for e in error:
-                            if row:
-                                errors.append('Row {} {} - Error found: {}'.format(row, itemtype, e))
+                        if fname:
+                            if fname in ''.join(json_data['errors']):
+                                validation_results[itemtype]['errors'] += 1
                             else:
-                                errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
+                                json_data[itemtype][alias]['status'] = 'uploading'
+                        json_data_final['post'].setdefault(itemtype, [])
+                        json_data_final['post'][itemtype].append(json_data[itemtype][alias])
+                        validation_results[itemtype]['validated'] += 1
+                else:
+                    # patch if item exists in db
+                    # alias_dict[alias] = results[alias]['@id']
+                    # TODO: profile is only conditionally assigned in an "if" above. -kmp 25-Jul-2020
+                    patch_data = compare_fields(profile, alias_dict, data, db_results[alias])
+                    if itemtype in ['file_fastq', 'file_processed']:
+                        if 'filename' in patch_data or db_results[alias]['status'] in ['upload failed', 'to be uploaded by workflow']:
+                            patch_data['status'] = 'uploading'
+                    error = validate_item(virtualapp, patch_data, 'patch', itemtype,
+                                          all_aliases, atid=db_results[alias]['@id'])
+                    if error:  # do something to report validation errors
+                        if itemtype not in ['case', 'report']:
+                            for e in error:
+                                if row:
+                                    errors.append('Row {} {} - Error found: {}'.format(row, itemtype, e))
+                                else:
+                                    errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
+                            validation_results[itemtype]['errors'] += 1
+                    elif fname and fname in ''.join(json_data['errors']):
                         validation_results[itemtype]['errors'] += 1
-                elif json_data[itemtype][alias].get('filename') and \
-                        json_data[itemtype][alias]['filename'] in ''.join(json_data['errors']):
-                    validation_results[itemtype]['errors'] += 1
-                else:  # patch
-                    json_data_final['patch'].setdefault(itemtype, {})
-                    if patch_data:
-                        json_data_final['patch'][itemtype][db_results[alias]['@id']] = patch_data
-                    elif itemtype not in ['case', 'report', 'sample_processing', 'file_fastq']:
-                        item_name = alias[alias.index(':')+1:]
-                        if item_name.startswith(itemtype + '-'):
-                            item_name = item_name[item_name.index('-') + 1:]
-                        if itemtype == 'family':
-                            item_name = 'family for ' + item_name
-                        else:
-                            item_name = itemtype + ' ' + item_name
-                        output.append('{} - Item already in database, no changes needed'.format(item_name))
-                    # do something to record response
-                    validation_results[itemtype]['validated'] += 1
+                    else:  # patch
+                        json_data_final['patch'].setdefault(itemtype, {})
+                        if patch_data:
+                            json_data_final['patch'][itemtype][db_results[alias]['@id']] = patch_data
+                        elif itemtype not in ['case', 'report', 'sample_processing', 'file_fastq']:
+                            item_name = alias[alias.index(':')+1:]
+                            if item_name.startswith(itemtype + '-'):
+                                item_name = item_name[item_name.index('-') + 1:]
+                            if itemtype == 'family':
+                                item_name = 'family for ' + item_name
+                            else:
+                                item_name = itemtype + ' ' + item_name
+                            output.append('{} - Item already in database, no changes needed'.format(item_name))
+                        # do something to record response
+                        validation_results[itemtype]['validated'] += 1
     output.extend([error for error in errors])
     for itemtype in validation_results:
         output.append('{} items: {} validated; {} errors'.format(

From 7fff7b1a96c90c72b2131002ca6ad4981bfdda87 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 21 Aug 2020 15:23:43 -0400
Subject: [PATCH 084/125] added processing of case names

---
 src/encoded/submit.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 9757bb2041..8ac0fd24a2 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -227,6 +227,7 @@ def xls_to_json(xls_data, project, institution):
     # create SampleProcessing item for trio/group if needed
     # items = create_sample_processing_groups(items, sp_alias)
     items = add_relations(items)
+    print(case_names)
     items = create_case_items(items, project['name'], case_names)
     # removed unused fields, add project and institution
     for val1 in items.values():
@@ -429,6 +430,7 @@ def create_case_items(items, proj_name, case_name_dict):
         for sample in v['samples']:
             case_id = '{}-{}'.format(analysis_id, items['sample'][sample]['specimen_accession'])
             if case_id in case_name_dict:
+                name = True
                 case_id = case_name_dict[case_id]
             case_alias = '{}:case-{}'.format(proj_name, case_id)
             print([val.get('samples') for val in items['individual'].values()])
@@ -442,6 +444,8 @@ def create_case_items(items, proj_name, case_name_dict):
                 'sample_processing': k,
                 'individual': indiv
             }
+            if name:
+                case_info['case_id'] = case_id
             if sample in items['reports']:
                 report_alias = case_alias.replace('case', 'report')
                 new_items['report'][report_alias] = {'aliases': [report_alias]}

From 4678ee9265324e3f966fa0ae89be46f88ad1066d Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 21 Aug 2020 15:29:30 -0400
Subject: [PATCH 085/125] edit to case name processing in submit.py

---
 src/encoded/submit.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 8ac0fd24a2..713f5a001d 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -377,7 +377,11 @@ def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_ali
                    'Sample cannot be processed.'.format(idx, row.get('analysis id')))
             new_items['errors'].append(msg)
         if row.get('unique analysis id'):
-            case_name_dict['{}-{}'.format(row.get('analysis id'), row.get('specimen id'))] = row['unique analysis id']
+            case_col = 'unique analysis id'
+        else:
+            case_col = 'optional case id (unique in all rows)'
+        if row.get(case_col):
+            case_name_dict['{}-{}'.format(row.get('analysis id'), row.get('specimen id'))] = row[case_col]
     new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)
     new_items['sample_processing'][analysis_alias]['samples'].append(samp_alias)
     if row.get('report required').lower().startswith('y'):

From 6a890bdf2f48d9af4196e7c4e6f5bde8ff54429d Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 21 Aug 2020 17:53:15 -0400
Subject: [PATCH 086/125] fixes for upload_info logic in submit.py

---
 src/encoded/submit.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 713f5a001d..df92d4dbdf 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -1,4 +1,5 @@
 import ast
+from copy import deepcopy
 import datetime
 import json
 import xlrd
@@ -43,6 +44,7 @@
 ABBREVS = {
     'male': 'M',
     'female': 'F',
+    'unknown': 'U',
     'yes': 'Y',
     'no': 'N',
     'p': 'proband',
@@ -185,7 +187,6 @@ def xls_to_json(xls_data, project, institution):
     case_names = {}
     for i, row in enumerate(rows):
         debuglog("row:", repr(row))  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
-        print(row)
         row_num = i + counter + 1
         missing_required = [col for col in required if col not in row or not row[col]]
         if missing_required:
@@ -227,7 +228,6 @@ def xls_to_json(xls_data, project, institution):
     # create SampleProcessing item for trio/group if needed
     # items = create_sample_processing_groups(items, sp_alias)
     items = add_relations(items)
-    print(case_names)
     items = create_case_items(items, project['name'], case_names)
     # removed unused fields, add project and institution
     for val1 in items.values():
@@ -409,14 +409,13 @@ def fetch_file_metadata(idx, filenames, proj_name):
                 files['errors'].append('File extension on {} not supported - expecting one of: '
                               '.fastq.gz, .fq.gz, .cram, .vcf.gz'.format(filename))
             continue
-        file_alias = '{}:{}'.format(proj_name, filename.lstrip(' '))
+        file_alias = '{}:{}'.format(proj_name, filename.strip().split('/')[-1])
         fmt = valid_extensions[extension[0]][0]
         file_info = {
             'aliases': [file_alias],
             'row': idx,
             'file_format': '/file-formats/{}/'.format(fmt),
             'file_type': valid_extensions[extension[0]][1],
-            'status': 'uploading',
             'filename': filename.strip()  # causes problems without functional file upload
         }
         if fmt == 'fastq':
@@ -428,7 +427,6 @@ def fetch_file_metadata(idx, filenames, proj_name):
 
 def create_case_items(items, proj_name, case_name_dict):
     new_items = items.copy()
-    print(json.dumps(items, indent=4))
     for k, v in items['sample_processing'].items():
         analysis_id = k[k.index('analysis-')+9:]
         for sample in v['samples']:
@@ -437,7 +435,6 @@ def create_case_items(items, proj_name, case_name_dict):
                 name = True
                 case_id = case_name_dict[case_id]
             case_alias = '{}:case-{}'.format(proj_name, case_id)
-            print([val.get('samples') for val in items['individual'].values()])
             try:
                 indiv = [ikey for ikey, ival in items['individual'].items() if sample in ival.get('samples', [])][0]
             except IndexError:
@@ -500,16 +497,19 @@ def compare_with_db(virtualapp, alias):
 
 
 def validate_item(virtualapp, item, method, itemtype, aliases, atid=None):
+    data = deepcopy(item)
+    if data.get('filename'):
+        del data['filename']
     if method == 'post':
         try:
-            validation = virtualapp.post_json('/{}/?check_only=true'.format(itemtype), item)
+            validation = virtualapp.post_json('/{}/?check_only=true'.format(itemtype), data)
         except (AppError, VirtualAppError) as e:
             return parse_exception(e, aliases)
         else:
             return
     elif method == 'patch':
         try:
-            validation = virtualapp.patch_json(atid + '?check_only=true', item, status=200)
+            validation = virtualapp.patch_json(atid + '?check_only=true', data, status=200)
         except (AppError, VirtualAppError) as e:
             return parse_exception(e, aliases)
         else:
@@ -573,6 +573,12 @@ def map_enum_options(fieldname, error_message):
 def compare_fields(profile, aliases, json_item, db_item):
     to_patch = {}
     for field in json_item:
+        if field == 'filename':
+            if (db_item.get('status') in ['uploading', 'upload failed', 'to be uploaded by workflow'] or
+                        json_item['filename'].split('/')[-1] != db_item.get('filename')):
+                to_patch['filename'] = json_item['filename']
+                to_patch['status'] = 'uploading'
+            continue
         # if not an array, patch field gets overwritten (if different from db)
         if profile['properties'][field]['type'] != 'array':
             val = json_item[field]
@@ -649,8 +655,8 @@ def validate_all_items(virtualapp, json_data):
                 if row:
                     del data['row']
                 fname = json_data[itemtype][alias].get('filename')
-                if fname:  # until we have functional file upload
-                    del data['filename']
+                # if fname:  # until we have functional file upload
+                #     del data['filename']
                 if not db_results.get(alias):
                     error = validate_item(virtualapp, data, 'post', itemtype, all_aliases)
                     if error:  # modify to check for presence of validation errors
@@ -681,7 +687,7 @@ def validate_all_items(virtualapp, json_data):
                     # TODO: profile is only conditionally assigned in an "if" above. -kmp 25-Jul-2020
                     patch_data = compare_fields(profile, alias_dict, data, db_results[alias])
                     if itemtype in ['file_fastq', 'file_processed']:
-                        if 'filename' in patch_data or db_results[alias]['status'] in ['upload failed', 'to be uploaded by workflow']:
+                        if 'filename' in patch_data:
                             patch_data['status'] = 'uploading'
                     error = validate_item(virtualapp, patch_data, 'patch', itemtype,
                                           all_aliases, atid=db_results[alias]['@id'])

From bce133b3dc598b7d0c3631ab14d9604f8879f13c Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Fri, 21 Aug 2020 17:54:22 -0400
Subject: [PATCH 087/125] changes to fix submission-test

---
 src/encoded/commands/submission_test.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/encoded/commands/submission_test.py b/src/encoded/commands/submission_test.py
index 9c02ad7c77..08ac0a2e21 100644
--- a/src/encoded/commands/submission_test.py
+++ b/src/encoded/commands/submission_test.py
@@ -10,12 +10,14 @@ def main():
     virtualapp = VirtualApp(app, environ)
     proj = virtualapp.get('/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/').json
     inst = virtualapp.get('/institutions/hms-dbmi/').json
-    json_data, passing = xls_to_json('/src/encoded/tests/data/documents/cgap_submit_test.xlsx', proj, inst)
+    json_data, passing = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', proj, inst)
+    print(json_data)
     final_json, validation_log, passing = validate_all_items(virtualapp, json_data)
     print('\n'.join(validation_log))
     print(json.dumps(final_json, indent=4))
-    result, passing = post_and_patch_all_items(virtualapp, final_json)
+    result, passing, files = post_and_patch_all_items(virtualapp, final_json)
     print('\n'.join(result))
+    print(json.dumps(files, indent=4))
 
 
 if __name__ == '__main__':

From d2aaaaa1aadeb13d81e2738627b911acb30e01e4 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Mon, 24 Aug 2020 07:09:19 -0400
Subject: [PATCH 088/125] Remove an unnecessary import.

---
 src/encoded/tests/test_util.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/encoded/tests/test_util.py b/src/encoded/tests/test_util.py
index b4aec9e4d2..6784a5626c 100644
--- a/src/encoded/tests/test_util.py
+++ b/src/encoded/tests/test_util.py
@@ -6,7 +6,6 @@
 
 from unittest import mock
 from dcicutils.qa_utils import ControlledTime, ignored
-from dcicutils.misc_utils import check_true
 from ..util import (
     debuglog, deduplicate_list, gunzip_content, resolve_file_path, ENCODED_ROOT_DIR, get_trusted_email,
 )

From 053ce1cee8f2f8696dae7708eaa118556e7ccf4a Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 24 Aug 2020 10:20:23 -0400
Subject: [PATCH 089/125] initialized name variable in submit.create_case_items

---
 src/encoded/submit.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 04d220bc51..9e8271d664 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -439,6 +439,7 @@ def create_case_items(items, proj_name, case_name_dict):
         analysis_id = k[k.index('analysis-')+9:]
         for sample in v['samples']:
             case_id = '{}-{}'.format(analysis_id, items['sample'][sample]['specimen_accession'])
+            name = False
             if case_id in case_name_dict:
                 name = True
                 case_id = case_name_dict[case_id]

From 5a5705a516733fb1b4ac3a8c34a89b60f4b2f70f Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 24 Aug 2020 13:29:42 -0400
Subject: [PATCH 090/125] removed commented out lines in submit.py

---
 src/encoded/submit.py | 91 ++++++-------------------------------------
 1 file changed, 12 insertions(+), 79 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 9e8271d664..1f06847dc9 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -54,12 +54,6 @@
     'sf': 'sibling'
 }
 
-# BGM_FIELD_MAPPING = {
-#     'bcgg-id': 'patient id',
-#     'bcgg-f-id': 'family id',
-#     "date req rec'd": 'date requisition received'
-# }
-
 
 POST_ORDER = [
     'file_fastq', 'file_processed', 'sample', 'individual',
@@ -83,11 +77,10 @@ def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp,  #
 
     Args:
         data_stream: an open stream to xls workbook data
-        project str: a project identifier
-        institution str: an institution identifier
+        project: a project identifier
+        institution: an institution identifier
         vapp: a VirtualApp object
         log: a logging object capable of .info, .warning, .error, or .debug messages
-        validate_only bool: whether to exit after validation, before submission (default False)
     """
     with s3_local_file(s3_client, bucket=bucket, key=key) as file:
         project_json = vapp.get(project).json
@@ -110,14 +103,11 @@ def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp,  #
             return results
         results['success'] = validate_success
         if validate_only:
-            debuglog("submit_data_bundle (validate_only=True) returns", json.dumps(results, indent=2))
             return results
         result_lines, post_success, upload_info = post_and_patch_all_items(vapp, json_data_final=processing_result)
-        debuglog("in submit_data_bundle, upload_info=", json.dumps(upload_info, indent=2))
         results['post_output'] = result_lines
         results['success'] = post_success
         results['upload_info'] = upload_info
-        debuglog("submit_data_bundle returns", json.dumps(results, indent=2))
         return results
 
 
@@ -138,10 +128,9 @@ def use_abbrev(value):
 
 
 def xls_to_json(xls_data, project, institution):
-    """
+    '''
     Converts excel file to json for submission.
-    Functional but expect future changes.
-    """
+    '''
     book = xlrd.open_workbook(xls_data)
     sheet, = book.sheets()
     row = row_generator(sheet)
@@ -161,14 +150,8 @@ def xls_to_json(xls_data, project, institution):
     if not header:
         msg = 'Column headers not detected in spreadsheet! "Individual ID*" column must be present in header.'
         return {'errors': [msg]}, False
-    # NOTE: A single row is allocated below the column headers to contain descriptions of row contents.
-    #       We could just blindly consume it, but then if it's missing we'll make an error.
-    #       Instead, we filter lines below, ignoring lines that like they're such a descriptive row.
-    # descriptions = next(row)
-    # ignored(descriptions)
+    # debuglog("keys:", keys)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
     rows = []
-    # This is now done in the loop above.
-    # keys = [key.lower().strip().rstrip('*').rstrip() for key in keys]
     required = ['individual id', 'relation to proband', 'report required', 'analysis id', 'specimen id']
     missing = [col for col in required if col not in keys]
     if missing:
@@ -189,7 +172,6 @@ def xls_to_json(xls_data, project, institution):
         'reports': [], 'errors': []
     }
     file_errors = []
-    #specimen_ids = {}
     family_dict = create_families(rows)
     a_types = get_analysis_types(rows)
     case_names = {}
@@ -209,7 +191,6 @@ def xls_to_json(xls_data, project, institution):
             items['errors'].append(msg)
             continue
         fam_alias = '{}:{}'.format(project['name'], family_dict[row['analysis id']])
-        # sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id'])
         # create items for Individual
         items = fetch_individual_metadata(row_num, row, items, indiv_alias, institution['name'])
         # create/edit items for Family
@@ -217,11 +198,6 @@ def xls_to_json(xls_data, project, institution):
         # create item for Sample if there is a specimen
         if row.get('specimen id'):
             samp_alias = '{}:sample-{}'.format(project['name'], row['specimen id'])
-            # if row['specimen id'] in specimen_ids:
-            #     samp_alias = samp_alias + '-' + str(specimen_ids[row['specimen id']])
-            #     specimen_ids[row['specimen id']] += 1
-            # else:
-            #     specimen_ids[row['specimen id']] = 1
             analysis_alias = '{}:analysis-{}'.format(project['name'], row['analysis id'])
             items = fetch_sample_metadata(row_num, row, items, indiv_alias, samp_alias, analysis_alias,
                                           fam_alias, project['name'], a_types, case_names)
@@ -230,11 +206,6 @@ def xls_to_json(xls_data, project, institution):
                 file_errors.extend(file_items['errors'])
                 items['file_fastq'].update(file_items['file_fastq'])
                 items['file_processed'].update(file_items['file_processed'])
-        # else:
-        #     items['errors'].append('WARNING: No specimen id present for patient {},'
-        #                            ' sample will not be created.'.format(row['individual id']))
-    # create SampleProcessing item for trio/group if needed
-    # items = create_sample_processing_groups(items, sp_alias)
     items = add_relations(items)
     items = create_case_items(items, project['name'], case_names)
     # removed unused fields, add project and institution
@@ -373,7 +344,6 @@ def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_ali
     if indiv_alias in new_items['individual']:
         new_items['individual'][indiv_alias]['samples'] = [samp_alias]
     new_sp_item = {
-        # not trivial to add analysis_type here, turn into calculated property
         'aliases': [analysis_alias],
         'samples': [],
         'families': []
@@ -399,7 +369,6 @@ def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_ali
     return new_items
 
 
-# TODO: finish implementing this function
 def fetch_file_metadata(idx, filenames, proj_name):
     valid_extensions = {
         '.fastq.gz': ('fastq', 'reads'),
@@ -424,7 +393,7 @@ def fetch_file_metadata(idx, filenames, proj_name):
             'row': idx,
             'file_format': '/file-formats/{}/'.format(fmt),
             'file_type': valid_extensions[extension[0]][1],
-            'filename': filename.strip()  # causes problems without functional file upload
+            'filename': filename.strip()
         }
         if fmt == 'fastq':
             files['file_fastq'][file_alias] = file_info
@@ -450,7 +419,6 @@ def create_case_items(items, proj_name, case_name_dict):
                 indiv = ''
             case_info = {
                 'aliases': [case_alias],
-                # 'case_id': case_id,
                 'sample_processing': k,
                 'individual': indiv
             }
@@ -601,9 +569,6 @@ def compare_fields(profile, aliases, json_item, db_item):
                 val = [aliases[v] if v in aliases else v for v in json_item[field]]
             else:
                 val = [v for v in json_item[field]]
-            # if sorted(val) != sorted(db_item.get(field, [])):
-                # if len(val) == 1 and val not in db_item.get(field, []):
-                #     continue
             if all(v in db_item.get(field, []) for v in val):
                 continue
             new_val = [item for item in db_item.get(field, [])]
@@ -616,18 +581,12 @@ def compare_fields(profile, aliases, json_item, db_item):
 
 
 def validate_all_items(virtualapp, json_data):
-    """
-    Still in progress, not necessarily functional yet. NOT YET TESTED.
-
+    '''
     Function that:
     1. looks up each item in json
     2. if item in db, will validate and patch any different metadata
     3. if item not in db, will post item
-
-    Current status:
-    Still testing validation/data organization parts - patch/post part hasn't been fully
-    written or tested.
-    """
+    '''
     if list(json_data.keys()) == ['errors']:
         output.append('Errors found in spreadsheet columns. Please fix spreadsheet before submitting.')
         return {}, output, False
@@ -642,34 +601,21 @@ def validate_all_items(virtualapp, json_data):
         if itemtype in json_data:
             profile = virtualapp.get('/profiles/{}.json'.format(itemtype)).json
             validation_results[itemtype] = {'validated': 0, 'errors': 0}
-            # db_results = {}
-            # TODO: json_data[itemtype] but item_type might not be in json_data according to previous "if" statement.
-            #       Maybe we want "for alias in json_data.get(item_type, {}):" here?
-            #       Alternatively, maybe give "json_data.get(item_type, {})" a variable name so that it can be referred
-            #       to more concisely in the several places below that it's needed.
-            #       -kmp 25-Jul-2020
             for alias in json_data[itemtype]:
                 # first collect all atids before comparing and validating items
                 db_result = compare_with_db(virtualapp, alias)
                 if db_result:
                     alias_dict[alias] = db_result['@id']
-                    # TODO: db_results is only conditionally assigned in the prevous "if".
-                    #       Perhaps the db_results = {} above should be moved up outside the "if"?
-                    #       Are we supposed to have a new dictionary on each iteration? -kmp 25-Jul-2020
                     db_results[alias] = db_result
-            # TODO: Likewise this should probably loop over json_data.get(itemtype, {}). -kmp 25-Jul-2020
             for alias in json_data[itemtype]:
                 data = json_data[itemtype][alias].copy()
                 row = data.get('row')
                 if row:
                     del data['row']
                 fname = json_data[itemtype][alias].get('filename')
-                # if fname:  # until we have functional file upload
-                #     del data['filename']
                 if not db_results.get(alias):
                     error = validate_item(virtualapp, data, 'post', itemtype, all_aliases)
-                    if error:  # modify to check for presence of validation errors
-                        # do something to report validation errors
+                    if error:  # check an report presence of validation errors
                         if itemtype not in ['case', 'report']:
                             for e in error:
                                 if row:
@@ -677,10 +623,6 @@ def validate_all_items(virtualapp, json_data):
                                 else:
                                     errors.append('{} {} - Error found: {}'.format(itemtype, alias, e))
                             validation_results[itemtype]['errors'] += 1
-                    # TODO: If itemtype might not be in json_data (and conditionals above suggest that's so),
-                    #       then json_data[item_type][alias] seems suspect. It does work to do
-                    #       json_data.get(item_type, {}).get(alias, {}).get('filename') but I would put that
-                    #       quantity in a variable rather than compute it twice in a row. -kmp 25-Jul-2020
                     else:
                         if fname:
                             if fname in ''.join(json_data['errors']):
@@ -692,15 +634,13 @@ def validate_all_items(virtualapp, json_data):
                         validation_results[itemtype]['validated'] += 1
                 else:
                     # patch if item exists in db
-                    # alias_dict[alias] = results[alias]['@id']
-                    # TODO: profile is only conditionally assigned in an "if" above. -kmp 25-Jul-2020
                     patch_data = compare_fields(profile, alias_dict, data, db_results[alias])
                     if itemtype in ['file_fastq', 'file_processed']:
                         if 'filename' in patch_data:
                             patch_data['status'] = 'uploading'
                     error = validate_item(virtualapp, patch_data, 'patch', itemtype,
                                           all_aliases, atid=db_results[alias]['@id'])
-                    if error:  # do something to report validation errors
+                    if error:  # report validation errors
                         if itemtype not in ['case', 'report']:
                             for e in error:
                                 if row:
@@ -723,7 +663,7 @@ def validate_all_items(virtualapp, json_data):
                             else:
                                 item_name = itemtype + ' ' + item_name
                             output.append('{} - Item already in database, no changes needed'.format(item_name))
-                        # do something to record response
+                        # record response
                         validation_results[itemtype]['validated'] += 1
     output.extend([error for error in errors])
     for itemtype in validation_results:
@@ -755,8 +695,6 @@ def post_and_patch_all_items(virtualapp, json_data_final):
                 row = item.get('row')
                 if row:
                     del item['row']
-                # if 'filename' in item:  # until we have functional file upload
-                #     del item['filename']
                 fname = item.get('filename')
                 if fname:
                     del item['filename']
@@ -794,16 +732,12 @@ def post_and_patch_all_items(virtualapp, json_data_final):
     for k, v in json_data_final['patch'].items():
         final_status.setdefault(k, {'patched': 0, 'not patched': 0})
         for item_id, patch_data in v.items():
-            # if 'filename' in patch_data:  # until we have functional file upload
-            #     del patch_data['filename']
             fname = patch_data.get('filename')
             if fname:
                 del patch_data['filename']
             try:
                 response = virtualapp.patch_json('/' + item_id, patch_data, status=200)
                 if response.json['status'] == 'success':
-                    # if k in item_names:
-                    #     output.append('Success - {} {} patched'.format(k, patch_data[item_names[k]]))
                     final_status[k]['patched'] += 1
                     if fname and patch_data.get('status') == 'uploading':
                         files.append({
@@ -821,7 +755,6 @@ def post_and_patch_all_items(virtualapp, json_data_final):
             output.append('{}: {} items patched successfully; {} items not patched'.format(
                 k, final_status[k]['patched'], final_status[k]['not patched']
             ))
-    debuglog("post_and_patch_all_items returns files:", files)
     return output, no_errors, files
 
 
@@ -850,7 +783,7 @@ def cell_value(cell, datemode):
 
 
 def row_generator(sheet):
-    """Generator that gets rows from excel sheet [From Submit4DN]"""
+    '''Generator that gets rows from excel sheet [From Submit4DN]'''
     datemode = sheet.book.datemode
     for index in range(sheet.nrows):
         yield [cell_value(cell, datemode) for cell in sheet.row(index)]

From dce65e2d6b2ff2bebcbb3b62aa4b3e23d953b5de Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Tue, 25 Aug 2020 17:35:26 -0400
Subject: [PATCH 091/125] util.s3_local_file now preserves file extension

---
 src/encoded/util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/encoded/util.py b/src/encoded/util.py
index 958e4324b4..a8dc3126f3 100644
--- a/src/encoded/util.py
+++ b/src/encoded/util.py
@@ -191,8 +191,8 @@ def s3_local_file(s3_client, bucket: str, key: str):
         bucket: an S3 bucket name
         key: the name of a key within the given S3 bucket
     """
-
-    tempfile_name = tempfile.mktemp()
+    ext = os.path.splitext(key)[-1]
+    tempfile_name = tempfile.mktemp() + ext
     try:
         s3_client.download_file(Bucket=bucket, Key=key, Filename=tempfile_name)
         yield tempfile_name

From fa4bb294a79b696e2ef03268dbacdb9ec491a53e Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Tue, 25 Aug 2020 17:35:46 -0400
Subject: [PATCH 092/125] handling of csv files in submit.py

---
 src/encoded/submit.py | 76 +++++++++++++++++++++++++++++++------------
 1 file changed, 56 insertions(+), 20 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 1f06847dc9..a28c207114 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -1,6 +1,8 @@
 # import ast
 from copy import deepcopy
+import csv
 import datetime
+import io
 import json
 import xlrd
 
@@ -82,7 +84,7 @@ def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp,  #
         vapp: a VirtualApp object
         log: a logging object capable of .info, .warning, .error, or .debug messages
     """
-    with s3_local_file(s3_client, bucket=bucket, key=key) as file:
+    with s3_local_file(s3_client, bucket=bucket, key=key) as filename:
         project_json = vapp.get(project).json
         institution_json = vapp.get(institution).json
         results = {
@@ -92,7 +94,17 @@ def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp,  #
             'post_output': [],
             'upload_info': []
         }
-        json_data, json_success = xls_to_json(file, project=project_json, institution=institution_json)
+        if filename.endswith('.xls') or filename.endswith('.xlsx'):
+            rows = digest_xls(filename)
+        elif filename.endswith('.csv') or filename.endswith('.tsv'):
+            delim = ',' if filename.endswith('csv') else '\t'
+            rows = digest_csv(filename, delim=delim)
+        else:
+            msg = ('Metadata bundle must be a file of type .xls, .xlsx, .csv, or .tsv.'
+                   'Please submit a file of the proper type.')
+            results['validation_output'].append(msg)
+            return results
+        json_data, json_success = xls_to_json(rows, project=project_json, institution=institution_json)
         if not json_success:
             results['validation_output'] = json_data['errors']
             return results
@@ -127,13 +139,46 @@ def use_abbrev(value):
         return value
 
 
-def xls_to_json(xls_data, project, institution):
+def get_column_name(row, columns):
     '''
-    Converts excel file to json for submission.
+    For cases where there is a variation on a particular column name.
+    Final column in list must be the default name.
     '''
+    for col in columns:
+        if row.get(col):
+            return col
+    return columns[-1]
+
+
+def digest_xls(xls_data):
     book = xlrd.open_workbook(xls_data)
     sheet, = book.sheets()
-    row = row_generator(sheet)
+    return row_generator(sheet)
+
+
+def digest_csv(input_data, delim=','):
+    with open(input_data) as csvfile:
+        rows = list(csv.reader(csvfile, delimiter=delim))
+    for row in rows:
+        yield(row)
+
+
+def xls_to_json(row, project, institution):
+    '''
+    Converts excel file (or csv/tsv table) to json for submission.
+    '''
+    # book = xlrd.open_workbook(xls_data)
+    # sheet, = book.sheets()
+    # row = row_generator(sheet)
+    # if xls_data.endswith('.xls') or xls_data.endswith('.xlsx'):
+    #     rows = digest_xls(xls_data)
+    # elif xls_data.endswith('.csv') or xls_data.endswith('.tsv'):
+    #     delim = ',' if xls_data.endswith('csv') else '\t'
+    #     rows = digest_csv(xls_data, delim=delim)
+    # else:
+    #     msg = ('Metadata bundle must be a file of type .xls, .xlsx, .csv, or .tsv.'
+    #            'Please submit a file of the proper type.')
+    #     return {'errors': [msg]}, False
     header = False
     counter = 0
     # debuglog("top_header:", top_header)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
@@ -141,6 +186,7 @@ def xls_to_json(xls_data, project, institution):
         try:
             keys = next(row)
             keys = [key.lower().strip().rstrip('*: ') for key in keys]
+            print(keys)
             counter += 1
             if 'individual id' in keys:
                 header = True
@@ -234,11 +280,8 @@ def get_analysis_types(rows):
     for row in rows:
         analysis_relations.setdefault(row.get('analysis id'), [[], []])
         analysis_relations[row.get('analysis id')][0].append(row.get('relation to proband', '').lower())
-        if row.get('test requested'):
-            col = 'test requested'
-        else:
-            col = 'workup type'
-        analysis_relations[row.get('analysis id')][1].append(row.get(col, '').upper())
+        workup_col = get_column_name(row, ['test requested', 'workup type'])
+        analysis_relations[row.get('analysis id')][1].append(row.get(workup_col, '').upper())
     for k, v in analysis_relations.items():
         workup = list(set(v[1]))
         if len(workup) == 1 and '' not in workup:
@@ -257,12 +300,8 @@ def fetch_individual_metadata(idx, row, items, indiv_alias, inst_name):
     new_items = items.copy()
     info = {'aliases': [indiv_alias]}
     info = map_fields(row, info, ['individual_id', 'sex', 'age', 'birth_year'], 'individual')
-    other_id_col = None
-    if row.get('other individual id'):
-        other_id_col = 'other individual id'
-    elif row.get('other id'):
-        other_id_col = 'other id'
-    if other_id_col:
+    other_id_col = get_column_name(row, ['other id', 'other individual id'])
+    if row.get(other_id_col):
         other_id = {'id': row[other_id_col], 'id_source': inst_name}
         if row.get('other individual id type'):
             other_id['id_source'] = row['other individual id source']
@@ -354,10 +393,7 @@ def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_ali
             msg = ('Row {} - Samples with analysis ID {} contain mis-matched or invalid workup type values. '
                    'Sample cannot be processed.'.format(idx, row.get('analysis id')))
             new_items['errors'].append(msg)
-        if row.get('unique analysis id'):
-            case_col = 'unique analysis id'
-        else:
-            case_col = 'optional case id (unique in all rows)'
+        case_col = get_column_name(row, ['unique analysis id', 'optional case id (unique in all rows)'])
         if row.get(case_col):
             case_name_dict['{}-{}'.format(row.get('analysis id'), row.get('specimen id'))] = row[case_col]
     new_items['sample_processing'].setdefault(analysis_alias, new_sp_item)

From d4d6b1136938c03f3c38786ea73b3a8b621d24e0 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Tue, 25 Aug 2020 17:38:12 -0400
Subject: [PATCH 093/125] removed extra print statement and unused import

---
 src/encoded/submit.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index a28c207114..eba023f526 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -2,7 +2,6 @@
 from copy import deepcopy
 import csv
 import datetime
-import io
 import json
 import xlrd
 
@@ -186,7 +185,6 @@ def xls_to_json(row, project, institution):
         try:
             keys = next(row)
             keys = [key.lower().strip().rstrip('*: ') for key in keys]
-            print(keys)
             counter += 1
             if 'individual id' in keys:
                 header = True

From 5e4c63769ee95b2e96378a3cd9f57f5085fb91f2 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Wed, 26 Aug 2020 09:51:07 -0400
Subject: [PATCH 094/125] Make the bucket to use for metadata bundles be
 specified by environment variables. This addresses C4-281.

---
 deploy/ini_files/any.ini                      |  1 +
 deploy/ini_files/cgap.ini                     |  1 +
 deploy/ini_files/cgapdev.ini                  |  1 +
 deploy/ini_files/cgaptest.ini                 |  1 +
 deploy/ini_files/cgapwolf.ini                 |  1 +
 development.ini                               |  1 +
 src/encoded/ingestion/common.py               | 25 ++----------------
 src/encoded/ingestion/processors.py           |  2 +-
 src/encoded/ingestion_listener.py             | 10 +++----
 src/encoded/root.py                           |  1 +
 .../components/item-pages/HealthView.js       | 26 +++++++++++--------
 src/encoded/types/ingestion.py                |  4 +--
 test.ini                                      |  1 +
 13 files changed, 33 insertions(+), 42 deletions(-)

diff --git a/deploy/ini_files/any.ini b/deploy/ini_files/any.ini
index efc0c59ff7..eeda9fa41f 100644
--- a/deploy/ini_files/any.ini
+++ b/deploy/ini_files/any.ini
@@ -5,6 +5,7 @@ file_upload_bucket = elasticbeanstalk-${S3_BUCKET_ENV}-files
 file_wfout_bucket = elasticbeanstalk-${S3_BUCKET_ENV}-wfoutput
 blob_bucket = elasticbeanstalk-${S3_BUCKET_ENV}-blobs
 system_bucket = elasticbeanstalk-${S3_BUCKET_ENV}-system
+metadata_bundles_bucket = metadata-bundles-${S3_BUCKET_ENV}
 sentry_dsn = ${SENTRY_DSN}
 # blob_store_profile_name = encoded-4dn-files
 accession_factory = encoded.server_defaults.enc_accession
diff --git a/deploy/ini_files/cgap.ini b/deploy/ini_files/cgap.ini
index b7ec5cb811..8917542977 100644
--- a/deploy/ini_files/cgap.ini
+++ b/deploy/ini_files/cgap.ini
@@ -5,6 +5,7 @@ file_upload_bucket = elasticbeanstalk-fourfront-cgap-files
 file_wfout_bucket = elasticbeanstalk-fourfront-cgap-wfoutput
 blob_bucket = elasticbeanstalk-fourfront-cgap-blobs
 system_bucket = elasticbeanstalk-fourfront-cgap-system
+metadata_bundles_bucket = metadata-bundles-fourfront-cgap
 sentry_dsn = ${SENTRY_DSN}
 # blob_store_profile_name = encoded-4dn-files
 accession_factory = encoded.server_defaults.enc_accession
diff --git a/deploy/ini_files/cgapdev.ini b/deploy/ini_files/cgapdev.ini
index 5afadaf23e..b5b8e6b64e 100644
--- a/deploy/ini_files/cgapdev.ini
+++ b/deploy/ini_files/cgapdev.ini
@@ -5,6 +5,7 @@ file_upload_bucket = elasticbeanstalk-fourfront-cgapdev-files
 file_wfout_bucket = elasticbeanstalk-fourfront-cgapdev-wfoutput
 blob_bucket = elasticbeanstalk-fourfront-cgapdev-blobs
 system_bucket = elasticbeanstalk-fourfront-cgapdev-system
+metadata_bundles_bucket = metadata-bundles-fourfront-cgapdev
 sentry_dsn = ${SENTRY_DSN}
 # blob_store_profile_name = encoded-4dn-files
 accession_factory = encoded.server_defaults.enc_accession
diff --git a/deploy/ini_files/cgaptest.ini b/deploy/ini_files/cgaptest.ini
index 73a9ecf9d1..ba1785efd2 100644
--- a/deploy/ini_files/cgaptest.ini
+++ b/deploy/ini_files/cgaptest.ini
@@ -5,6 +5,7 @@ file_upload_bucket = elasticbeanstalk-fourfront-cgaptest-files
 file_wfout_bucket = elasticbeanstalk-fourfront-cgaptest-wfoutput
 blob_bucket = elasticbeanstalk-fourfront-cgaptest-blobs
 system_bucket = elasticbeanstalk-fourfront-cgaptest-system
+metadata_bundles_bucket = metadata-bundles-fourfront-cgaptest
 sentry_dsn = ${SENTRY_DSN}
 # blob_store_profile_name = encoded-4dn-files
 accession_factory = encoded.server_defaults.enc_accession
diff --git a/deploy/ini_files/cgapwolf.ini b/deploy/ini_files/cgapwolf.ini
index 87f710b683..5d1f1b1962 100644
--- a/deploy/ini_files/cgapwolf.ini
+++ b/deploy/ini_files/cgapwolf.ini
@@ -5,6 +5,7 @@ file_upload_bucket = elasticbeanstalk-fourfront-cgapwolf-files
 file_wfout_bucket = elasticbeanstalk-fourfront-cgapwolf-wfoutput
 blob_bucket = elasticbeanstalk-fourfront-cgapwolf-blobs
 system_bucket = elasticbeanstalk-fourfront-cgapwolf-system
+metadata_bundles_bucket = metadata-bundles-fourfront-cgapwolf
 sentry_dsn = ${SENTRY_DSN}
 # blob_store_profile_name = encoded-4dn-files
 accession_factory = encoded.server_defaults.enc_accession
diff --git a/development.ini b/development.ini
index 076b60bf2c..bc714042ea 100644
--- a/development.ini
+++ b/development.ini
@@ -7,6 +7,7 @@
 use = config:base.ini#app
 sqlalchemy.url = postgresql://postgres@localhost:5441/postgres?host=/tmp/snovault/pgdata
 blob_bucket = encoded-4dn-blobs
+metadata_bundles_bucket = metadata-bundles-fourfront-cgaplocal-dev
 load_test_only = true
 create_tables = true
 testing = true
diff --git a/src/encoded/ingestion/common.py b/src/encoded/ingestion/common.py
index c34c983658..1b95ed2768 100644
--- a/src/encoded/ingestion/common.py
+++ b/src/encoded/ingestion/common.py
@@ -13,29 +13,8 @@
 from .exceptions import MissingParameter, BadParameter
 
 
-CGAP_DATA_BUNDLE_BUCKET_PRD = 'cgap-data-bundles-prd'
-
-CGAP_DATA_BUNDLE_BUCKETS_NON_PRD = {
-    'fourfront-cgapdev': 'cgap-data-bundles-dev',
-    'fourfront-cgaptest': 'cgap-data-bundles-test',
-    'fourfront-cgapwolf': 'cgap-data-bundles-wolf',
-}
-
-CGAP_DATA_BUNDLE_BUCKET_MISC = 'cgap-data-bundles'
-
-
-def cgap_data_bundle_bucket(bs_env):
-    if bs_env is None:
-        return CGAP_DATA_BUNDLE_BUCKET_MISC
-    check_true(is_cgap_env(bs_env), "bs_env is not a cgap environment or None.", error_class=ValueError)
-    if is_stg_or_prd_env(bs_env):
-        return CGAP_DATA_BUNDLE_BUCKET_PRD
-    else:
-        specific_bucket = CGAP_DATA_BUNDLE_BUCKETS_NON_PRD.get(bs_env)
-        if specific_bucket:
-            return specific_bucket
-        else:
-            return CGAP_DATA_BUNDLE_BUCKET_MISC
+def metadata_bundles_bucket(registry):
+    return registry.settings.get('metadata_bundles_bucket')
 
 
 CONTENT_TYPE_SPECIAL_CASES = {
diff --git a/src/encoded/ingestion/processors.py b/src/encoded/ingestion/processors.py
index 535db5677f..900bb623c2 100644
--- a/src/encoded/ingestion/processors.py
+++ b/src/encoded/ingestion/processors.py
@@ -2,7 +2,7 @@
 import json
 import traceback
 
-from ..ingestion.common import cgap_data_bundle_bucket, get_parameter
+from ..ingestion.common import get_parameter
 from ..util import debuglog, s3_output_stream, create_empty_s3_file
 from ..submit import submit_data_bundle
 from .exceptions import UndefinedIngestionProcessorType
diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index 2a82d89346..b6e019e94c 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -30,7 +30,7 @@
 from snovault.util import debug_log
 from vcf import Reader
 from .commands.ingest_vcf import VCFParser
-from .ingestion.common import register_path_content_type, cgap_data_bundle_bucket, get_parameter
+from .ingestion.common import register_path_content_type, metadata_bundles_bucket, get_parameter
 from .ingestion.exceptions import UnspecifiedFormParameter, SubmissionFailure
 from .ingestion.processors import get_ingestion_processor
 from .types.ingestion import SubmissionFolio
@@ -72,7 +72,7 @@ def submit_for_ingestion(context, request):
     ignored(context)
 
     bs_env = beanstalk_env_from_request(request)
-    data_bundle_bucket = cgap_data_bundle_bucket(bs_env)
+    bundles_bucket = metadata_bundles_bucket(request.registry)
     ingestion_type = request.POST['ingestion_type']
     datafile = request.POST['datafile']
     if not isinstance(datafile, cgi.FieldStorage):
@@ -117,7 +117,7 @@ def submit_for_ingestion(context, request):
     message = "Uploaded successfully."
 
     try:
-        s3_client.upload_fileobj(input_file_stream, Bucket=data_bundle_bucket, Key=object_name)
+        s3_client.upload_fileobj(input_file_stream, Bucket=bundles_bucket, Key=object_name)
 
     except botocore.exceptions.ClientError as e:
 
@@ -134,7 +134,7 @@ def submit_for_ingestion(context, request):
         "submission_uri": SubmissionFolio.make_submission_uri(submission_id),
         "beanstalk_env_is_prd": is_stg_or_prd_env(bs_env),
         "beanstalk_env": bs_env,
-        "bucket": data_bundle_bucket,
+        "bucket": bundles_bucket,
         "authenticated_userid": request.authenticated_userid,
         "email": get_trusted_email(request, context="Submission", raise_errors=False),
         "success": success,
@@ -149,7 +149,7 @@ def submit_for_ingestion(context, request):
 
         try:
             with io.BytesIO(manifest_content_formatted.encode('utf-8')) as fp:
-                s3_client.upload_fileobj(fp, Bucket=data_bundle_bucket, Key=manifest_name)
+                s3_client.upload_fileobj(fp, Bucket=bundles_bucket, Key=manifest_name)
 
         except botocore.exceptions.ClientError as e:
 
diff --git a/src/encoded/root.py b/src/encoded/root.py
index 44e397eefa..0ca77b9766 100644
--- a/src/encoded/root.py
+++ b/src/encoded/root.py
@@ -106,6 +106,7 @@ def health_page_view(request):
             "indexer": settings.get("indexer"),
             "index_server": settings.get("index_server"),
             "load_data": settings.get('load_test_data'),
+            "metadata_bundles_bucket": settings.get('metadata-bundles-bucket'),
             "namespace": settings.get('indexer.namespace'),
             "processed_file_bucket": settings.get('file_wfout_bucket'),
             'project_version': settings.get('encoded_version'),
diff --git a/src/encoded/static/components/item-pages/HealthView.js b/src/encoded/static/components/item-pages/HealthView.js
index 8cedf892f5..f73603563e 100644
--- a/src/encoded/static/components/item-pages/HealthView.js
+++ b/src/encoded/static/components/item-pages/HealthView.js
@@ -36,8 +36,8 @@ export default class HealthView extends React.PureComponent {
         'href' : PropTypes.string
     };
 
-    static defaultProps = {	
-        "excludedKeys" : [ ...ItemDetailList.Detail.defaultProps.excludedKeys, 'content' ],	
+    static defaultProps = {
+        "excludedKeys" : [ ...ItemDetailList.Detail.defaultProps.excludedKeys, 'content' ],
         "keyTitleDescriptionMapConfig" : {
             'aggregations' : {
                 title : 'Aggregations',
@@ -74,6 +74,10 @@ export default class HealthView extends React.PureComponent {
                 title : "Foursight",
                 description : "URI of corresponding Foursight page."
             },
+            'metadata_bundles_bucket' : {
+                title: "MetaData Bundles Bucket",
+                description : "Name of S3 bucket used for metadata bundles."
+            },
             'indexer' : {
                 title : "Indexer",
                 description : "Whether this server processes indexing requests at all."
@@ -119,15 +123,15 @@ export default class HealthView extends React.PureComponent {
                 description : "Software version of dcicutils being used."
             },
         },
-        "keyTitleDescriptionMapCounts" : {	
-            'db_es_total' : {	
-                title : "DB and ES Counts",	
-                description : "Total counts of items in database and elasticsearch."	
-            },	
-            'db_es_compare' : {	
-                title : "DB and ES Counts by Type",	
-                description : "Counts of items in database and elasticsearch for each doc_type index."	
-            }	
+        "keyTitleDescriptionMapCounts" : {
+            'db_es_total' : {
+                title : "DB and ES Counts",
+                description : "Total counts of items in database and elasticsearch."
+            },
+            'db_es_compare' : {
+                title : "DB and ES Counts by Type",
+                description : "Counts of items in database and elasticsearch for each doc_type index."
+            }
         }
     };
 
diff --git a/src/encoded/types/ingestion.py b/src/encoded/types/ingestion.py
index 276d1c0a51..2cdfeac264 100644
--- a/src/encoded/types/ingestion.py
+++ b/src/encoded/types/ingestion.py
@@ -22,7 +22,7 @@
     ONLY_ADMIN_VIEW,
 )
 from ..util import debuglog, subrequest_item_creation, beanstalk_env_from_registry
-from ..ingestion.common import cgap_data_bundle_bucket
+from ..ingestion.common import metadata_bundles_bucket
 
 ALLOW_SUBMITTER_VIEW = (
     # TODO: There is an issue here where we want a logged in user remotely only to view this
@@ -43,7 +43,7 @@ def __init__(self, *, vapp, ingestion_type, submission_id, log=None):
         self.ingestion_type = ingestion_type
         self.log = log or logging
         self.bs_env = beanstalk_env_from_registry(vapp.app.registry)
-        self.bucket = cgap_data_bundle_bucket(self.bs_env)
+        self.bucket = metadata_bundles_bucket(vapp.app.registry)
         self.submission_id = submission_id
 
     def __str__(self):
diff --git a/test.ini b/test.ini
index bd08ff729c..5671201d49 100644
--- a/test.ini
+++ b/test.ini
@@ -3,6 +3,7 @@ use = config:base.ini#app
 session.secret = superlegitrealsecret
 file_upload_bucket = elasticbeanstalk-encoded-4dn-files
 blob_bucket = elasticbeanstalk-encoded-4dn-blobs
+metadata_bundles_bucket = metadata-bundles-fourfront-cgaplocal-test
 #blob_store_profile_name = encoded-4dn-files
 accession_factory = encoded.server_defaults.test_accession
 elasticsearch.server = 172.31.49.128:9872

From 5424dce65cf32954a71f2c8e96a1cba0983011b5 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Wed, 26 Aug 2020 10:48:32 -0400
Subject: [PATCH 095/125] handling for multiple sequencing runs on specimen

---
 src/encoded/submit.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index eba023f526..e7ff0a7396 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -242,6 +242,8 @@ def xls_to_json(row, project, institution):
         # create item for Sample if there is a specimen
         if row.get('specimen id'):
             samp_alias = '{}:sample-{}'.format(project['name'], row['specimen id'])
+            if row.get('run no.'):
+                samp_alias = samp_alias + '-' + row['run no.']
             analysis_alias = '{}:analysis-{}'.format(project['name'], row['analysis id'])
             items = fetch_sample_metadata(row_num, row, items, indiv_alias, samp_alias, analysis_alias,
                                           fam_alias, project['name'], a_types, case_names)

From 3a8bd213ceb20a5ea06394d9853972c41db05c90 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Wed, 26 Aug 2020 14:34:04 -0400
Subject: [PATCH 096/125] Change data_bundle to metadata_bundle throughout the
 branch.

---
 pyproject.toml                                |  2 +-
 ...ta_bundle.py => submit_metadata_bundle.py} |  2 +-
 src/encoded/ingestion/processors.py           | 46 +++++++++----------
 src/encoded/ingestion_listener.py             |  2 +-
 src/encoded/schemas/ingestion_submission.json |  2 +-
 src/encoded/submit.py                         |  4 +-
 6 files changed, 29 insertions(+), 29 deletions(-)
 rename src/encoded/commands/{submit_data_bundle.py => submit_metadata_bundle.py} (99%)

diff --git a/pyproject.toml b/pyproject.toml
index 83edeed622..4b103a7ef3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -178,7 +178,7 @@ purge-item-type = "encoded.commands.purge_item_type:main"
 run-upgrade-on-inserts = "encoded.commands.run_upgrader_on_inserts:main"
 spreadsheet-to-json = "encoded.commands.spreadsheet_to_json:main"
 submission-test = "encoded.commands.submission_test:main"
-submit-data-bundle = "encoded.commands.submit_data_bundle:main"
+submit-metadata-bundle = "encoded.commands.submit_metadata_bundle:main"
 update-inserts-from-server = "encoded.commands.update_inserts_from_server:main"
 verify-item = "encoded.commands.verify_item:main"
 
diff --git a/src/encoded/commands/submit_data_bundle.py b/src/encoded/commands/submit_metadata_bundle.py
similarity index 99%
rename from src/encoded/commands/submit_data_bundle.py
rename to src/encoded/commands/submit_metadata_bundle.py
index 99f2220afb..a9d915e34e 100644
--- a/src/encoded/commands/submit_data_bundle.py
+++ b/src/encoded/commands/submit_metadata_bundle.py
@@ -84,7 +84,7 @@ def main(simulated_args_for_testing=None):
         }
 
         post_data = {
-            'ingestion_type': 'data_bundle',
+            'ingestion_type': 'metadata_bundle',
             'institution': institution,
             'project': project,
         }
diff --git a/src/encoded/ingestion/processors.py b/src/encoded/ingestion/processors.py
index 900bb623c2..42fdf27d1c 100644
--- a/src/encoded/ingestion/processors.py
+++ b/src/encoded/ingestion/processors.py
@@ -4,7 +4,7 @@
 
 from ..ingestion.common import get_parameter
 from ..util import debuglog, s3_output_stream, create_empty_s3_file
-from ..submit import submit_data_bundle
+from ..submit import submit_metadata_bundle
 from .exceptions import UndefinedIngestionProcessorType
 
 
@@ -35,14 +35,14 @@ def _show_report_lines(lines, fp, default="Nothing to report."):
         print(line, file=fp)
 
 
-@ingestion_processor('data_bundle')
-def handle_data_bundle(submission):
+@ingestion_processor('metadata_bundle')
+def handle_metadata_bundle(submission):
 
     submission.log.info("Processing {submission_id} as {ingestion_type}."
                         .format(submission_id=submission.submission_id, ingestion_type=submission.ingestion_type))
 
-    if submission.ingestion_type != 'data_bundle':
-        raise RuntimeError("handle_data_bundle only works for ingestion_type data_bundle.")
+    if submission.ingestion_type != 'metadata_bundle':
+        raise RuntimeError("handle_metadata_bundle only works for ingestion_type metadata_bundle.")
 
     submission_id = submission.submission_id
     s3_client = boto3.client('s3')
@@ -83,15 +83,15 @@ def handle_data_bundle(submission):
         # if isinstance(project, str):
         #     project = submission.vapp.get(project).json
 
-        data_bundle_result = submit_data_bundle(s3_client=s3_client,
-                                                bucket=submission.bucket,
-                                                key=object_name,
-                                                project=project,
-                                                institution=institution,
-                                                vapp=submission.vapp,
-                                                validate_only=validate_only)
+        bundle_result = submit_metadata_bundle(s3_client=s3_client,
+                                                    bucket=submission.bucket,
+                                                    key=object_name,
+                                                    project=project,
+                                                    institution=institution,
+                                                    vapp=submission.vapp,
+                                                    validate_only=validate_only)
 
-        debuglog(submission_id, "data_bundle_result:", json.dumps(data_bundle_result, indent=2))
+        debuglog(submission_id, "bundle_result:", json.dumps(bundle_result, indent=2))
 
         resolution["validation_report_key"] = validation_report_key = "%s/validation-report.txt" % submission_id
         resolution["submission_key"] = submission_key = "%s/submission.json" % submission_id
@@ -102,30 +102,30 @@ def handle_data_bundle(submission):
 
         def note_additional_datum(key, bundle_key=None):
             other_details['additional_data'] = additional_data = other_details.get('additional_data', {})
-            additional_data[key] = data_bundle_result[bundle_key or key]
+            additional_data[key] = bundle_result[bundle_key or key]
 
         with s3_output_stream(s3_client, bucket=submission.bucket, key=validation_report_key) as fp:
-            _show_report_lines(data_bundle_result['validation_output'], fp)
+            _show_report_lines(bundle_result['validation_output'], fp)
             note_additional_datum('validation_output')
 
         # Next several files are created only if relevant.
 
-        if data_bundle_result['result']:
+        if bundle_result['result']:
             with s3_output_stream(s3_client, bucket=submission.bucket, key=submission_key) as fp:
-                print(json.dumps(data_bundle_result['result'], indent=2), file=fp)
-                other_details['result'] = data_bundle_result['result']
+                print(json.dumps(bundle_result['result'], indent=2), file=fp)
+                other_details['result'] = bundle_result['result']
 
-        if data_bundle_result['post_output']:
+        if bundle_result['post_output']:
             with s3_output_stream(s3_client, bucket=submission.bucket, key=submission_response_key) as fp:
-                _show_report_lines(data_bundle_result['post_output'], fp)
+                _show_report_lines(bundle_result['post_output'], fp)
                 note_additional_datum('post_output')
 
-        if data_bundle_result['upload_info']:
+        if bundle_result['upload_info']:
             with s3_output_stream(s3_client, bucket=submission.bucket, key=upload_info_key) as fp:
-                print(json.dumps(data_bundle_result['upload_info'], indent=2), file=fp)
+                print(json.dumps(bundle_result['upload_info'], indent=2), file=fp)
                 note_additional_datum('upload_info')
 
-        outcome = "success" if data_bundle_result['success'] else "failure"
+        outcome = "success" if bundle_result['success'] else "failure"
 
         submission.patch_item(processing_status={"state": "done", "outcome": outcome, "progress": "complete"},
                               **other_details)
diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index b6e019e94c..a2a271b6a7 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -880,7 +880,7 @@ def main():
           </td>
           <td>
             <select id="ingestion_type" name="ingestion_type">
-              <option value="data_bundle">Data Bundle&nbsp;</option>
+              <option value="metadata_bundle">MetaData Bundle&nbsp;</option>
             </select>
           </td>
         </tr>
diff --git a/src/encoded/schemas/ingestion_submission.json b/src/encoded/schemas/ingestion_submission.json
index cfaf542fbe..926e146f90 100644
--- a/src/encoded/schemas/ingestion_submission.json
+++ b/src/encoded/schemas/ingestion_submission.json
@@ -44,7 +44,7 @@
             "description": "The type of processing requested for this submission.",
             "type": "string",
             "enum": [
-                "data_bundle",
+                "metadata_bundle",
                 "vcf"
             ]
         },
diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 1f06847dc9..02d65d4b88 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -70,8 +70,8 @@
 ID_SOURCES = [ 'UDN' ]
 
 
-def submit_data_bundle(*, s3_client, bucket, key, project, institution, vapp,  # <- All keyword arguments, all required.
-                       validate_only=False):  # <-- Additional options with defaults.
+def submit_metadata_bundle(*, s3_client, bucket, key, project, institution, vapp,  # <- Required keyword arguments
+                           validate_only=False):  # <-- Optional keyword arguments (with defaults)
     """
     Handles processing of a submitted workbook.
 

From 9d102b9c17ee53e9b44577c7ff127796cba1bcb1 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Fri, 28 Aug 2020 00:43:09 -0400
Subject: [PATCH 097/125] Factor out SubmissionFolio.processing_context with
 some basic functionality any processor might conceivably want.

---
 src/encoded/ingestion/processors.py           | 82 ++++++-------------
 src/encoded/schemas/ingestion_submission.json |  1 +
 src/encoded/types/ingestion.py                | 81 +++++++++++++++++-
 3 files changed, 104 insertions(+), 60 deletions(-)

diff --git a/src/encoded/ingestion/processors.py b/src/encoded/ingestion/processors.py
index 42fdf27d1c..01d7f6431e 100644
--- a/src/encoded/ingestion/processors.py
+++ b/src/encoded/ingestion/processors.py
@@ -6,6 +6,7 @@
 from ..util import debuglog, s3_output_stream, create_empty_s3_file
 from ..submit import submit_metadata_bundle
 from .exceptions import UndefinedIngestionProcessorType
+from ..types.ingestion import SubmissionFolio
 
 
 INGESTION_UPLOADERS = {}
@@ -35,48 +36,32 @@ def _show_report_lines(lines, fp, default="Nothing to report."):
         print(line, file=fp)
 
 
-@ingestion_processor('metadata_bundle')
-def handle_metadata_bundle(submission):
-
-    submission.log.info("Processing {submission_id} as {ingestion_type}."
-                        .format(submission_id=submission.submission_id, ingestion_type=submission.ingestion_type))
-
-    if submission.ingestion_type != 'metadata_bundle':
-        raise RuntimeError("handle_metadata_bundle only works for ingestion_type metadata_bundle.")
+@ingestion_processor('data_bundle')
+def handle_data_bundle(submission: SubmissionFolio):
 
-    submission_id = submission.submission_id
-    s3_client = boto3.client('s3')
-    manifest_key = "%s/manifest.json" % submission_id
-    response = s3_client.get_object(Bucket=submission.bucket, Key=manifest_key)
-    manifest = json.load(response['Body'])
+    # We originally called it 'data_bundle' and we retained that as OK in the schema
+    # to not upset anyone testing with the old name, but this is not the name to use
+    # any more, so reject new submissions of this kind. -kmp 27-Aug-2020
 
-    object_name = manifest['object_name']
-    parameters = manifest['parameters']
-    institution = get_parameter(parameters, 'institution')
-    project = get_parameter(parameters, 'project')
-    validate_only = get_parameter(parameters, 'validate_only', as_type=bool, default=False)
+    with submission.processing_context(submission):
 
-    debuglog(submission_id, "object_name:", object_name)
-    debuglog(submission_id, "parameters:", parameters)
+        raise RuntimeError("handle_data_bundle was called (for ingestion_type=%s). This is always an error."
+                           " The ingestion_type 'data_bundle' was renamed to 'metadata_bundle'"
+                           " prior to the initial release. Your submission program probably needs to be updated."
+                           % submission.ingestion_type)
 
-    started_key = "%s/started.txt" % submission_id
-    create_empty_s3_file(s3_client, bucket=submission.bucket, key=started_key)
 
-    # PyCharm thinks this is unused. -kmp 26-Jul-2020
-    # data_stream = s3_client.get_object(Bucket=submission.bucket, Key="%s/manifest.json" % submission_id)['Body']
+@ingestion_processor('metadata_bundle')
+def handle_metadata_bundle(submission: SubmissionFolio):
 
-    resolution = {
-        "data_key": object_name,
-        "manifest_key": manifest_key,
-        "started_key": started_key,
-    }
+    with submission.processing_context(submission) as resolution:
 
-    try:
+        s3_client = submission.s3_client
+        submission_id = submission.submission_id
 
-        submission.patch_item(submission_id=submission_id,
-                              object_name=object_name,
-                              parameters=parameters,
-                              processing_status={"state": "processing"})
+        institution = get_parameter(submission.parameters, 'institution')
+        project = get_parameter(submission.parameters, 'project')
+        validate_only = get_parameter(submission.parameters, 'validate_only', as_type=bool, default=False)
 
         # if isinstance(institution, str):
         #     institution = submission.vapp.get(institution).json
@@ -85,7 +70,7 @@ def handle_metadata_bundle(submission):
 
         bundle_result = submit_metadata_bundle(s3_client=s3_client,
                                                     bucket=submission.bucket,
-                                                    key=object_name,
+                                                    key=submission.object_name,
                                                     project=project,
                                                     institution=institution,
                                                     vapp=submission.vapp,
@@ -98,10 +83,10 @@ def handle_metadata_bundle(submission):
         resolution["submission_response_key"] = submission_response_key = "%s/submission-response.txt" % submission_id
         resolution["upload_info_key"] = upload_info_key = "%s/upload_info.txt" % submission_id
 
-        other_details = {}
-
         def note_additional_datum(key, bundle_key=None):
-            other_details['additional_data'] = additional_data = other_details.get('additional_data', {})
+            submission.other_details['additional_data'] = additional_data = (
+                submission.other_details.get('additional_data', {})
+            )
             additional_data[key] = bundle_result[bundle_key or key]
 
         with s3_output_stream(s3_client, bucket=submission.bucket, key=validation_report_key) as fp:
@@ -113,7 +98,7 @@ def note_additional_datum(key, bundle_key=None):
         if bundle_result['result']:
             with s3_output_stream(s3_client, bucket=submission.bucket, key=submission_key) as fp:
                 print(json.dumps(bundle_result['result'], indent=2), file=fp)
-                other_details['result'] = bundle_result['result']
+                submission.other_details['result'] = bundle_result['result']
 
         if bundle_result['post_output']:
             with s3_output_stream(s3_client, bucket=submission.bucket, key=submission_response_key) as fp:
@@ -125,21 +110,4 @@ def note_additional_datum(key, bundle_key=None):
                 print(json.dumps(bundle_result['upload_info'], indent=2), file=fp)
                 note_additional_datum('upload_info')
 
-        outcome = "success" if bundle_result['success'] else "failure"
-
-        submission.patch_item(processing_status={"state": "done", "outcome": outcome, "progress": "complete"},
-                              **other_details)
-
-    except Exception as e:
-
-        resolution["traceback_key"] = traceback_key = "%s/traceback.txt" % submission_id
-        with s3_output_stream(s3_client, bucket=submission.bucket, key=traceback_key) as fp:
-            traceback.print_exc(file=fp)
-
-        resolution["error_type"] = e.__class__.__name__
-        resolution["error_message"] = str(e)
-
-        submission.patch_item(processing_status={"state": "done", "outcome": "error", "progress": "incomplete"})
-
-    with s3_output_stream(s3_client, bucket=submission.bucket, key="%s/resolution.json" % submission_id) as fp:
-        print(json.dumps(resolution, indent=2), file=fp)
+        submission.outcome = "success" if bundle_result['success'] else "failure"
diff --git a/src/encoded/schemas/ingestion_submission.json b/src/encoded/schemas/ingestion_submission.json
index 926e146f90..c485db653d 100644
--- a/src/encoded/schemas/ingestion_submission.json
+++ b/src/encoded/schemas/ingestion_submission.json
@@ -44,6 +44,7 @@
             "description": "The type of processing requested for this submission.",
             "type": "string",
             "enum": [
+                "data_bundle",
                 "metadata_bundle",
                 "vcf"
             ]
diff --git a/src/encoded/types/ingestion.py b/src/encoded/types/ingestion.py
index 2cdfeac264..1f4d24f157 100644
--- a/src/encoded/types/ingestion.py
+++ b/src/encoded/types/ingestion.py
@@ -2,10 +2,12 @@
 Collection for objects related to ingestion submissions.
 """
 
+import boto3
+import contextlib
 import json
 import logging
 import re
-import uuid
+import traceback
 
 from dcicutils.misc_utils import ignored, check_true
 from snovault import collection, load_schema
@@ -21,8 +23,10 @@
 from .institution import (
     ONLY_ADMIN_VIEW,
 )
-from ..util import debuglog, subrequest_item_creation, beanstalk_env_from_registry
-from ..ingestion.common import metadata_bundles_bucket
+from ..util import (
+    debuglog, subrequest_item_creation, beanstalk_env_from_registry, create_empty_s3_file, s3_output_stream
+)
+from ..ingestion.common import metadata_bundles_bucket, get_parameter
 
 ALLOW_SUBMITTER_VIEW = (
     # TODO: There is an issue here where we want a logged in user remotely only to view this
@@ -44,7 +48,16 @@ def __init__(self, *, vapp, ingestion_type, submission_id, log=None):
         self.log = log or logging
         self.bs_env = beanstalk_env_from_registry(vapp.app.registry)
         self.bucket = metadata_bundles_bucket(vapp.app.registry)
+        self.s3_client = boto3.client('s3')
+        self.other_details = {}
+        self.outcome = 'unknown'
         self.submission_id = submission_id
+        # These next two are initialized later by s3 lookup, and the result is cached here.
+        # In particular, the values will be made available in time for the body of 'with folio.processing_context(...)'
+        # Setting them to None here makes PyCharm and other code analysis tools happier in knowing
+        # that accesses to these instance variables are legit. -kmp 27-Aug-2020
+        self.object_name = None
+        self.parameters = None
 
     def __str__(self):
         return "<SubmissionFolio(%s) %s>" % (self.ingestion_type, self.submission_id)
@@ -88,6 +101,68 @@ def patch_item(self, **kwargs):
         [item] = res.json['@graph']
         debuglog(json.dumps(item))
 
+    @contextlib.contextmanager
+    def processing_context(self, submission):
+
+        submission.log.info("Processing {submission_id} as {ingestion_type}."
+                            .format(submission_id=submission.submission_id, ingestion_type=submission.ingestion_type))
+
+        submission_id = submission.submission_id
+        manifest_key = "%s/manifest.json" % submission_id
+        response = submission.s3_client.get_object(Bucket=submission.bucket, Key=manifest_key)
+        manifest = json.load(response['Body'])
+
+        self.object_name = object_name = manifest['object_name']
+        self.parameters = parameters = manifest['parameters']
+
+        debuglog(submission_id, "object_name:", object_name)
+        debuglog(submission_id, "parameters:", parameters)
+
+        started_key = "%s/started.txt" % submission_id
+        create_empty_s3_file(submission.s3_client, bucket=submission.bucket, key=started_key)
+
+        # PyCharm thinks this is unused. -kmp 26-Jul-2020
+        # data_stream = submission.s3_client.get_object(Bucket=submission.bucket, Key="%s/manifest.json" % submission_id)['Body']
+
+        resolution = {
+            "data_key": object_name,
+            "manifest_key": manifest_key,
+            "started_key": started_key,
+        }
+
+        try:
+            submission.patch_item(submission_id=submission_id,
+                                  object_name=object_name,
+                                  parameters=parameters,
+                                  processing_status={"state": "processing"})
+
+            yield resolution
+
+            submission.patch_item(processing_status={"state": "done", "outcome": submission.outcome, "progress": "complete"},
+                                  **submission.other_details)
+
+        except Exception as e:
+
+            resolution["traceback_key"] = traceback_key = "%s/traceback.txt" % submission_id
+            with s3_output_stream(submission.s3_client, bucket=submission.bucket, key=traceback_key) as fp:
+                traceback.print_exc(file=fp)
+
+            resolution["error_type"] = e.__class__.__name__
+            resolution["error_message"] = str(e)
+
+            submission.patch_item(
+                errors=["%s: %s" % (e.__class__.__name__, e)],
+                processing_status={
+                "state": "done",
+                "outcome": "error",
+                "progress": "incomplete"
+            })
+
+        with s3_output_stream(submission.s3_client,
+                              bucket=submission.bucket,
+                              key="%s/resolution.json" % submission_id) as fp:
+            print(json.dumps(resolution, indent=2), file=fp)
+
 
 @collection(
     name='ingestion-submissions',

From 1621c6d859f574a33fa02766704edb0c8ddeeebc Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Fri, 28 Aug 2020 04:05:51 -0400
Subject: [PATCH 098/125] Bump beta version for possible deployment.

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index c07a5e695a..50891c09f9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 # Note: Various modules refer to this system as "encoded", not "cgap-portal".
 name = "encoded"
-version = "2.4.1.1b2"  # Includes master 2.4.15, eventually bumping minor version (probably 2.5.0)
+version = "2.4.1.1b3"  # Includes master 2.4.15, eventually bumping minor version (probably 2.5.0)
 description = "Clinical Genomics Analysis Platform"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"

From bca5bdfe2e652e485c6732ae20dc04911da67198 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Fri, 28 Aug 2020 05:39:27 -0400
Subject: [PATCH 099/125] Fix info on health page.

---
 pyproject.toml      | 2 +-
 src/encoded/root.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 50891c09f9..1236fec288 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 # Note: Various modules refer to this system as "encoded", not "cgap-portal".
 name = "encoded"
-version = "2.4.1.1b3"  # Includes master 2.4.15, eventually bumping minor version (probably 2.5.0)
+version = "2.4.1.1b4"  # Includes master 2.4.15, eventually bumping minor version (probably 2.5.0)
 description = "Clinical Genomics Analysis Platform"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"
diff --git a/src/encoded/root.py b/src/encoded/root.py
index 0ca77b9766..4f3673ba1b 100644
--- a/src/encoded/root.py
+++ b/src/encoded/root.py
@@ -106,7 +106,7 @@ def health_page_view(request):
             "indexer": settings.get("indexer"),
             "index_server": settings.get("index_server"),
             "load_data": settings.get('load_test_data'),
-            "metadata_bundles_bucket": settings.get('metadata-bundles-bucket'),
+            "metadata_bundles_bucket": settings.get('metadata_bundles_bucket'),
             "namespace": settings.get('indexer.namespace'),
             "processed_file_bucket": settings.get('file_wfout_bucket'),
             'project_version': settings.get('encoded_version'),

From 5cabeca74e41031ffe3525bf7204336e1e5e9c6b Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Fri, 28 Aug 2020 13:01:45 -0400
Subject: [PATCH 100/125] Use hopefully final bucket names prefixed with
 elasticbeanstalk-

---
 deploy/ini_files/any.ini      | 2 +-
 deploy/ini_files/cgap.ini     | 2 +-
 deploy/ini_files/cgapdev.ini  | 2 +-
 deploy/ini_files/cgaptest.ini | 2 +-
 deploy/ini_files/cgapwolf.ini | 2 +-
 development.ini               | 2 +-
 test.ini                      | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/deploy/ini_files/any.ini b/deploy/ini_files/any.ini
index eeda9fa41f..74562fb6e7 100644
--- a/deploy/ini_files/any.ini
+++ b/deploy/ini_files/any.ini
@@ -5,7 +5,7 @@ file_upload_bucket = elasticbeanstalk-${S3_BUCKET_ENV}-files
 file_wfout_bucket = elasticbeanstalk-${S3_BUCKET_ENV}-wfoutput
 blob_bucket = elasticbeanstalk-${S3_BUCKET_ENV}-blobs
 system_bucket = elasticbeanstalk-${S3_BUCKET_ENV}-system
-metadata_bundles_bucket = metadata-bundles-${S3_BUCKET_ENV}
+metadata_bundles_bucket = elasticbeanstalk-${S3_BUCKET_ENV}-metadata-bundles
 sentry_dsn = ${SENTRY_DSN}
 # blob_store_profile_name = encoded-4dn-files
 accession_factory = encoded.server_defaults.enc_accession
diff --git a/deploy/ini_files/cgap.ini b/deploy/ini_files/cgap.ini
index 8917542977..570028e410 100644
--- a/deploy/ini_files/cgap.ini
+++ b/deploy/ini_files/cgap.ini
@@ -5,7 +5,7 @@ file_upload_bucket = elasticbeanstalk-fourfront-cgap-files
 file_wfout_bucket = elasticbeanstalk-fourfront-cgap-wfoutput
 blob_bucket = elasticbeanstalk-fourfront-cgap-blobs
 system_bucket = elasticbeanstalk-fourfront-cgap-system
-metadata_bundles_bucket = metadata-bundles-fourfront-cgap
+metadata_bundles_bucket = elasticbeanstalk-fourfront-cgap-metadata-bundles
 sentry_dsn = ${SENTRY_DSN}
 # blob_store_profile_name = encoded-4dn-files
 accession_factory = encoded.server_defaults.enc_accession
diff --git a/deploy/ini_files/cgapdev.ini b/deploy/ini_files/cgapdev.ini
index b5b8e6b64e..2129f22e26 100644
--- a/deploy/ini_files/cgapdev.ini
+++ b/deploy/ini_files/cgapdev.ini
@@ -5,7 +5,7 @@ file_upload_bucket = elasticbeanstalk-fourfront-cgapdev-files
 file_wfout_bucket = elasticbeanstalk-fourfront-cgapdev-wfoutput
 blob_bucket = elasticbeanstalk-fourfront-cgapdev-blobs
 system_bucket = elasticbeanstalk-fourfront-cgapdev-system
-metadata_bundles_bucket = metadata-bundles-fourfront-cgapdev
+metadata_bundles_bucket = elasticbeanstalk-fourfront-cgapdev-metadata-bundles
 sentry_dsn = ${SENTRY_DSN}
 # blob_store_profile_name = encoded-4dn-files
 accession_factory = encoded.server_defaults.enc_accession
diff --git a/deploy/ini_files/cgaptest.ini b/deploy/ini_files/cgaptest.ini
index ba1785efd2..51f420e5f3 100644
--- a/deploy/ini_files/cgaptest.ini
+++ b/deploy/ini_files/cgaptest.ini
@@ -5,7 +5,7 @@ file_upload_bucket = elasticbeanstalk-fourfront-cgaptest-files
 file_wfout_bucket = elasticbeanstalk-fourfront-cgaptest-wfoutput
 blob_bucket = elasticbeanstalk-fourfront-cgaptest-blobs
 system_bucket = elasticbeanstalk-fourfront-cgaptest-system
-metadata_bundles_bucket = metadata-bundles-fourfront-cgaptest
+metadata_bundles_bucket = elasticbeanstalk-fourfront-cgaptest-metadata-bundles
 sentry_dsn = ${SENTRY_DSN}
 # blob_store_profile_name = encoded-4dn-files
 accession_factory = encoded.server_defaults.enc_accession
diff --git a/deploy/ini_files/cgapwolf.ini b/deploy/ini_files/cgapwolf.ini
index 5d1f1b1962..c24cce541e 100644
--- a/deploy/ini_files/cgapwolf.ini
+++ b/deploy/ini_files/cgapwolf.ini
@@ -5,7 +5,7 @@ file_upload_bucket = elasticbeanstalk-fourfront-cgapwolf-files
 file_wfout_bucket = elasticbeanstalk-fourfront-cgapwolf-wfoutput
 blob_bucket = elasticbeanstalk-fourfront-cgapwolf-blobs
 system_bucket = elasticbeanstalk-fourfront-cgapwolf-system
-metadata_bundles_bucket = metadata-bundles-fourfront-cgapwolf
+metadata_bundles_bucket = elasticbeanstalk-fourfront-cgapwolf-metadata-bundles
 sentry_dsn = ${SENTRY_DSN}
 # blob_store_profile_name = encoded-4dn-files
 accession_factory = encoded.server_defaults.enc_accession
diff --git a/development.ini b/development.ini
index bc714042ea..db10817702 100644
--- a/development.ini
+++ b/development.ini
@@ -7,7 +7,7 @@
 use = config:base.ini#app
 sqlalchemy.url = postgresql://postgres@localhost:5441/postgres?host=/tmp/snovault/pgdata
 blob_bucket = encoded-4dn-blobs
-metadata_bundles_bucket = metadata-bundles-fourfront-cgaplocal-dev
+metadata_bundles_bucket = elasticbeanstalk-fourfront-cgaplocal-dev-metadata-bundles
 load_test_only = true
 create_tables = true
 testing = true
diff --git a/test.ini b/test.ini
index 5671201d49..c05ebddf3b 100644
--- a/test.ini
+++ b/test.ini
@@ -3,7 +3,7 @@ use = config:base.ini#app
 session.secret = superlegitrealsecret
 file_upload_bucket = elasticbeanstalk-encoded-4dn-files
 blob_bucket = elasticbeanstalk-encoded-4dn-blobs
-metadata_bundles_bucket = metadata-bundles-fourfront-cgaplocal-test
+metadata_bundles_bucket = elasticbeanstalk-fourfront-cgaplocal-test-metadata-bundles
 #blob_store_profile_name = encoded-4dn-files
 accession_factory = encoded.server_defaults.test_accession
 elasticsearch.server = 172.31.49.128:9872

From 789c1e94bf11a97bcfb774eed41b6375f9aaa34b Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Fri, 28 Aug 2020 14:12:10 -0400
Subject: [PATCH 101/125] Get rid of submit_metadata_bundle.py in favor of
 SubmitCGAP repo.

---
 pyproject.toml                                |   3 +-
 .../commands/submit_metadata_bundle.py        | 196 ------------------
 2 files changed, 2 insertions(+), 197 deletions(-)
 delete mode 100644 src/encoded/commands/submit_metadata_bundle.py

diff --git a/pyproject.toml b/pyproject.toml
index 1236fec288..39fcc809fc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -178,7 +178,8 @@ purge-item-type = "encoded.commands.purge_item_type:main"
 run-upgrade-on-inserts = "encoded.commands.run_upgrader_on_inserts:main"
 spreadsheet-to-json = "encoded.commands.spreadsheet_to_json:main"
 submission-test = "encoded.commands.submission_test:main"
-submit-metadata-bundle = "encoded.commands.submit_metadata_bundle:main"
+# Use the same-named script in SubmitCGAP instead.
+# submit-metadata-bundle = "encoded.commands.submit_metadata_bundle:main"
 update-inserts-from-server = "encoded.commands.update_inserts_from_server:main"
 verify-item = "encoded.commands.verify_item:main"
 
diff --git a/src/encoded/commands/submit_metadata_bundle.py b/src/encoded/commands/submit_metadata_bundle.py
deleted file mode 100644
index a9d915e34e..0000000000
--- a/src/encoded/commands/submit_metadata_bundle.py
+++ /dev/null
@@ -1,196 +0,0 @@
-import argparse
-import datetime
-import io
-import json
-import os
-import re
-import requests
-import subprocess
-import sys
-import time
-
-
-EPILOG = __doc__
-
-ACCESS_KEY_FILENAME = ".cgap-access"
-
-def get_cgap_auth():
-    key_id = os.environ.get("CGAP_ACCESS_KEY_ID", "")
-    secret = os.environ.get("CGAP_SECRET_ACCESS_KEY", "")
-    if key_id and secret:
-        return (key_id, secret)
-    raise RuntimeError("Both of the environment variables CGAP_ACCESS_KEY_ID and CGAP_SECRET_ACCESS_KEY must be set."
-                       " Appropriate values can be obtained by creating an access key in your CGAP user profile.")
-
-
-SITE_REGEXP = re.compile(
-    r"^(http://localhost:[0-9]+|https://fourfront-cgap[a-z.-]*|https://[a-z.-]*cgap.hms.harvard.edu)/?$"
-)
-
-
-def main(simulated_args_for_testing=None):
-    parser = argparse.ArgumentParser(  # noqa - PyCharm wrongly thinks the formatter_class is invalid
-        description="Submits a data bundle",
-        epilog=EPILOG,
-        formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument('bundle_filename', help='a local Excel filename that is the data bundle')
-    parser.add_argument('--institution', '-i', help='institution identifier', default=None)
-    parser.add_argument('--project', '-p', help='project identifier', default=None)
-    parser.add_argument('--site', '-s', help="The http or https address of the site")
-    args = parser.parse_args(args=simulated_args_for_testing)
-
-    bundle_filename = args.bundle_filename
-    institution = args.institution
-    project = args.project
-    site = args.site
-
-    try:
-
-        matched = SITE_REGEXP.match(site)
-        if not matched:
-            raise ValueError("The site should be 'http://localhost:<port>' or 'https://<cgap-hostname>'.")
-        site = matched.group(1)
-
-        auth = get_cgap_auth()
-
-        user_url = site + "/me?format=json"
-        user_record = requests.get(user_url, auth=auth).json()
-
-        if not institution:
-            submits_for = user_record.get('submits_for', [])
-            if len(submits_for) == 0:
-                raise SyntaxError("Your user profile declares no institution"
-                                  " on behalf of which you are authorized to make submissions.")
-            elif len(submits_for) > 1:
-                raise SyntaxError("You must use --institution to specify which institution you are submitting for"
-                                  " (probably one of: %s)." % ", ".join([x['@id'] for x in submits_for]))
-            else:
-                institution = submits_for[0]['@id']
-                print("Using institution:", institution)
-
-        if not project:
-            project = user_record.get('project', {}).get('@id', None)
-            if not project:
-                raise SyntaxError("Your user profile has not project declared,"
-                                  " so you must specify a --project explicitly.")
-            print("Using project:", project)
-
-        if not os.path.exists(bundle_filename):
-            raise ValueError("The file '%s' does not exist." % bundle_filename)
-
-        post_files = {
-            "datafile": open(bundle_filename, 'rb')
-        }
-
-        post_data = {
-            'ingestion_type': 'metadata_bundle',
-            'institution': institution,
-            'project': project,
-        }
-
-        submission_url = site + "/submit_for_ingestion"
-
-        res = requests.post(submission_url, auth=auth, data=post_data, files=post_files).json()
-
-        # print(json.dumps(res, indent=2))
-
-        uuid = res['submission_id']
-
-        def tprint(*args):
-            print(str(datetime.datetime.now().strftime("%H:%M:%S")), *args)
-
-        tprint("Bundle uploaded. Awaiting processing...")
-
-        tracking_url = site + "/ingestion-submissions/" + uuid + "?format=json"
-
-        success = False
-        outcome = None
-        n_tries = 8
-        tries_left = n_tries
-        done = False
-        while tries_left > 0:
-            time.sleep(15)
-            # print(json.dumps(res, indent=2))
-            res = res = requests.get(tracking_url, auth=auth).json()
-            processing_status = res['processing_status']
-            done = processing_status['state'] == 'done'
-            if done:
-                outcome = processing_status['outcome']
-                success = outcome == 'success'
-                break
-            else:
-                tprint("Progress is %s. Continuing to wait..." % processing_status['progress'])
-            tries_left -= 1
-
-        if not done:
-            tprint("Timed out after %d tries." % n_tries)
-        else:
-            tprint("Final status: %s" % outcome)
-
-        def show_section(section):
-            print("----- %s -----" % section.replace("_", " ").title())
-            lines = res['additional_data'].get(section)
-            if lines:
-                for line in lines:
-                    print(line)
-            else:
-                print("Nothing to show.")
-
-        show_section('validation_output')
-
-        if success:
-            show_section('post_output')
-
-            show_section('upload_info')
-
-
-    except Exception as e:
-        print("%s: %s" % (e.__class__.__name__, str(e)))
-        exit(1)
-
-
-# This is stuff Submit4DN does that may or may not be useful to us.
-# To be removed if it doesn't get used.  And anyway, all this upload stuff belongs
-# in the SubmitCGAP repo at some point. It's just easier to debug here for now.
-# -kmp 9-Aug-2020
-#
-#
-#     def get_upload_creds(file_id, connection):  # pragma: no cover
-#         url = "%s/upload/" % (file_id)
-#         req = ff_utils.post_metadata({}, url, key=connection.key)
-#         return req['@graph'][0]['upload_credentials']
-#
-#
-# def upload_file(creds, path):  # pragma: no cover
-#     # Source: Submit4DN
-#
-#     ####################
-#     # POST file to S3
-#     env = os.environ.copy()  # pragma: no cover
-#     try:
-#         env.update({
-#             'AWS_ACCESS_KEY_ID': creds['AccessKeyId'],
-#             'AWS_SECRET_ACCESS_KEY': creds['SecretAccessKey'],
-#             'AWS_SECURITY_TOKEN': creds['SessionToken'],
-#         })
-#     except Exception as e:
-#         raise("Didn't get back s3 access keys from file/upload endpoint.  Error was %s" % str(e))
-#     # ~10s/GB from Stanford - AWS Oregon
-#     # ~12-15s/GB from AWS Ireland - AWS Oregon
-#     print("Uploading file.")
-#     start = time.time()
-#     try:
-#         subprocess.check_call(['aws', 's3', 'cp', '--only-show-errors', path, creds['upload_url']], env=env)
-#     except subprocess.CalledProcessError as e:
-#         # The aws command returns a non-zero exit code on error.
-#         print("Upload failed with exit code %d" % e.returncode)
-#         sys.exit(e.returncode)
-#     else:
-#         end = time.time()
-#         duration = end - start
-#         print("Uploaded in %.2f seconds" % duration)
-
-
-if __name__ == '__main__':
-    main()

From fcc61bac54612b7608e99c4061b6e9d4d72805c5 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 31 Aug 2020 11:51:52 -0400
Subject: [PATCH 102/125] removed commented out imports

---
 src/encoded/submit.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 1c87a0bd32..a69e347922 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -6,12 +6,7 @@
 import xlrd
 
 from dcicutils.qa_utils import ignored
-from dcicutils.misc_utils import VirtualAppError  # , VirtualApp
-# from dcicutils import ff_utils
-# from pyramid.paster import get_app
-# from pyramid.response import Response
-# from snovault.util import debug_log
-# from pyramid.view import view_config
+from dcicutils.misc_utils import VirtualAppError
 from webtest.app import AppError
 from .util import s3_local_file, debuglog
 

From a6bd75abb17dadcda27c5657a17ee6ea5c131a27 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 31 Aug 2020 11:55:50 -0400
Subject: [PATCH 103/125] LINKS global var name changed to LINKTO_FIELDS

---
 src/encoded/submit.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index a69e347922..3ee059f949 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -57,7 +57,7 @@
 ]
 
 
-LINKS = [
+LINKTO_FIELDS = [  # linkTo properties that we will want to patch in second-round
     'samples', 'members', 'mother', 'father', 'proband', 'report',
     'individual', 'sample_processing', 'families', 'files'
 ]
@@ -729,7 +729,7 @@ def post_and_patch_all_items(virtualapp, json_data_final):
                 fname = item.get('filename')
                 if fname:
                     del item['filename']
-                for field in LINKS:
+                for field in LINKTO_FIELDS:
                     if field in item:
                         patch_info[field] = item[field]
                         del item[field]

From 4110de522921df607cc9043ac4b51802c72746b6 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 31 Aug 2020 12:09:16 -0400
Subject: [PATCH 104/125] docstring for submit.map_fields added

---
 src/encoded/submit.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 3ee059f949..e5ea0b028a 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -11,7 +11,7 @@
 from .util import s3_local_file, debuglog
 
 
-GENERIC_FIELD_MAPPING = {
+GENERIC_FIELD_MAPPING = {  # for spreadsheet column names that are different from schema property names
     'individual': {},
     'family': {},
     'sample': {
@@ -118,6 +118,22 @@ def submit_metadata_bundle(*, s3_client, bucket, key, project, institution, vapp
 
 
 def map_fields(row, metadata_dict, addl_fields, item_type):
+    '''
+    function for grabbing metadata from row based on column headers.
+
+    Args:
+        row - dictionary of format {column name1: value1, column name 2: value 2}
+        metadata_dict - the dictionary (json) to be filled with metadata parsed in this function.
+            Can be empty.
+        addl_fields - list of fields not present in GENERIC_FIELD_MAPPING. These fields will appear
+            in the output dictionary as keys, with spaces replaced with underscores. E.g., a field
+            'individual id' will appear in the output dict as 'individual_id'.
+        item_type - the key in GENERIC_FIELD_MAPPING to look at for column name to schema property mappings.
+
+    Example usage:
+    output = map_fields(row_dict, {}, ['individual_id', 'sex', 'age', 'birth_year'], 'individual')
+    
+    '''
     for field in addl_fields:
         metadata_dict[field] = use_abbrev(row.get(field.replace('_', ' ')))
     for map_field in GENERIC_FIELD_MAPPING[item_type]:

From ca5b21aa1b676bf09b11c4e8e6d542bbfbef3eb2 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 31 Aug 2020 12:25:03 -0400
Subject: [PATCH 105/125] docstrings changed to double quotes in submit.py

---
 src/encoded/submit.py | 35 ++++++++++++-----------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index e5ea0b028a..833234a0d8 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -118,8 +118,9 @@ def submit_metadata_bundle(*, s3_client, bucket, key, project, institution, vapp
 
 
 def map_fields(row, metadata_dict, addl_fields, item_type):
-    '''
-    function for grabbing metadata from row based on column headers.
+    """
+    function for grabbing metadata from spreadsheet row (in dictionary form) based on
+    mapping column headers to schema properties.
 
     Args:
         row - dictionary of format {column name1: value1, column name 2: value 2}
@@ -132,8 +133,8 @@ def map_fields(row, metadata_dict, addl_fields, item_type):
 
     Example usage:
     output = map_fields(row_dict, {}, ['individual_id', 'sex', 'age', 'birth_year'], 'individual')
-    
-    '''
+
+    """
     for field in addl_fields:
         metadata_dict[field] = use_abbrev(row.get(field.replace('_', ' ')))
     for map_field in GENERIC_FIELD_MAPPING[item_type]:
@@ -150,10 +151,10 @@ def use_abbrev(value):
 
 
 def get_column_name(row, columns):
-    '''
+    """
     For cases where there is a variation on a particular column name.
     Final column in list must be the default name.
-    '''
+    """
     for col in columns:
         if row.get(col):
             return col
@@ -174,21 +175,9 @@ def digest_csv(input_data, delim=','):
 
 
 def xls_to_json(row, project, institution):
-    '''
+    """
     Converts excel file (or csv/tsv table) to json for submission.
-    '''
-    # book = xlrd.open_workbook(xls_data)
-    # sheet, = book.sheets()
-    # row = row_generator(sheet)
-    # if xls_data.endswith('.xls') or xls_data.endswith('.xlsx'):
-    #     rows = digest_xls(xls_data)
-    # elif xls_data.endswith('.csv') or xls_data.endswith('.tsv'):
-    #     delim = ',' if xls_data.endswith('csv') else '\t'
-    #     rows = digest_csv(xls_data, delim=delim)
-    # else:
-    #     msg = ('Metadata bundle must be a file of type .xls, .xlsx, .csv, or .tsv.'
-    #            'Please submit a file of the proper type.')
-    #     return {'errors': [msg]}, False
+    """
     header = False
     counter = 0
     # debuglog("top_header:", top_header)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
@@ -628,12 +617,12 @@ def compare_fields(profile, aliases, json_item, db_item):
 
 
 def validate_all_items(virtualapp, json_data):
-    '''
+    """
     Function that:
     1. looks up each item in json
     2. if item in db, will validate and patch any different metadata
     3. if item not in db, will post item
-    '''
+    """
     if list(json_data.keys()) == ['errors']:
         output.append('Errors found in spreadsheet columns. Please fix spreadsheet before submitting.')
         return {}, output, False
@@ -830,7 +819,7 @@ def cell_value(cell, datemode):
 
 
 def row_generator(sheet):
-    '''Generator that gets rows from excel sheet [From Submit4DN]'''
+    """Generator that gets rows from excel sheet [From Submit4DN]"""
     datemode = sheet.book.datemode
     for index in range(sheet.nrows):
         yield [cell_value(cell, datemode) for cell in sheet.row(index)]

From c790875dbc98908e1ba19ab4cc52ef2af47745c3 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 31 Aug 2020 14:15:12 -0400
Subject: [PATCH 106/125] some docstrings added

---
 src/encoded/submit.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 833234a0d8..640a0aae38 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -177,6 +177,24 @@ def digest_csv(input_data, delim=','):
 def xls_to_json(row, project, institution):
     """
     Converts excel file (or csv/tsv table) to json for submission.
+
+    Args:
+        row - generator yielding rows of spreadsheet
+        project - dict (json) of project metadata submitter is submitting for
+        institution - dict (json) of institution metadata that submitter is submitting for
+
+    Output:
+        1. items - dictionary of db items the submitter wants to submit, of the format
+           {itemtype1: [{alias1: {metadata}, {alias2: {metadata}], itemtype2: [...], ...}
+           Also has an extra key 'errors' whose value is a list of errors found during processing,
+           to be combined with validation errors later in submission processing.
+        2. boolean indicating whether submission can move to next phase or not. False will be
+            returned if there are major errors in spreadsheet preventing rows from being
+            processed properly.
+
+    Basically, this function parses the column headers of the spreadsheet, turns each row into
+    a dictionary of {column header: cell value} pairs, then gathers the metadata it can find for each
+    db item type in each row. Minor spreadsheet errors are added to the output dictionary.
     """
     header = False
     counter = 0
@@ -275,6 +293,11 @@ def create_families(rows):
 
 
 def get_analysis_types(rows):
+    """
+    'analysis_type' is a property of sample_processing items, denoting the workup type (WGS, WES, etc)
+    as well as describing the grouping (Trio, Quad, etc). This info needs to be extracted from the spreadsheet
+    separately from most of the metadata since it depends info extracted from more than one row.
+    """
     analysis_relations = {}
     analysis_types = {}
     for row in rows:
@@ -477,6 +500,11 @@ def create_case_items(items, proj_name, case_name_dict):
 
 
 def add_relations(items):
+    """
+    This function adds relations info to 'individual' metadata for proband.
+    This is done separately from row by row processing because information needed from spreadsheet
+    is on multiple rows.
+    """
     new_items = items.copy()
     for alias, fam in items['family'].items():
         parents = False

From bd6ea93c99b81cfb0a6dbe52a67bdab56a64742a Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 31 Aug 2020 14:16:35 -0400
Subject: [PATCH 107/125] fetch changed to extract in submit.py functions

---
 src/encoded/submit.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 640a0aae38..697092df3e 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -254,19 +254,19 @@ def xls_to_json(row, project, institution):
             continue
         fam_alias = '{}:{}'.format(project['name'], family_dict[row['analysis id']])
         # create items for Individual
-        items = fetch_individual_metadata(row_num, row, items, indiv_alias, institution['name'])
+        items = extract_individual_metadata(row_num, row, items, indiv_alias, institution['name'])
         # create/edit items for Family
-        items = fetch_family_metadata(row_num, row, items, indiv_alias, fam_alias)
+        items = extract_family_metadata(row_num, row, items, indiv_alias, fam_alias)
         # create item for Sample if there is a specimen
         if row.get('specimen id'):
             samp_alias = '{}:sample-{}'.format(project['name'], row['specimen id'])
             if row.get('run no.'):
                 samp_alias = samp_alias + '-' + row['run no.']
             analysis_alias = '{}:analysis-{}'.format(project['name'], row['analysis id'])
-            items = fetch_sample_metadata(row_num, row, items, indiv_alias, samp_alias, analysis_alias,
+            items = extract_sample_metadata(row_num, row, items, indiv_alias, samp_alias, analysis_alias,
                                           fam_alias, project['name'], a_types, case_names)
             if row.get('files'):
-                file_items = fetch_file_metadata(row_num, row['files'].split(','), project['name'])
+                file_items = extract_file_metadata(row_num, row['files'].split(','), project['name'])
                 file_errors.extend(file_items['errors'])
                 items['file_fastq'].update(file_items['file_fastq'])
                 items['file_processed'].update(file_items['file_processed'])
@@ -319,7 +319,7 @@ def get_analysis_types(rows):
     return analysis_types
 
 
-def fetch_individual_metadata(idx, row, items, indiv_alias, inst_name):
+def extract_individual_metadata(idx, row, items, indiv_alias, inst_name):
     new_items = items.copy()
     info = {'aliases': [indiv_alias]}
     info = map_fields(row, info, ['individual_id', 'sex', 'age', 'birth_year'], 'individual')
@@ -346,7 +346,7 @@ def fetch_individual_metadata(idx, row, items, indiv_alias, inst_name):
     return new_items
 
 
-def fetch_family_metadata(idx, row, items, indiv_alias, fam_alias):
+def extract_family_metadata(idx, row, items, indiv_alias, fam_alias):
     new_items = items.copy()
     info = {
         'aliases': [fam_alias],
@@ -376,7 +376,7 @@ def fetch_family_metadata(idx, row, items, indiv_alias, fam_alias):
     return new_items
 
 
-def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_alias,
+def extract_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_alias,
                           fam_alias, proj_name, analysis_type_dict, case_name_dict):
     new_items = items.copy()
     info = {'aliases': [samp_alias], 'files': []}  # TODO: implement creation of file db items
@@ -428,7 +428,7 @@ def fetch_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_ali
     return new_items
 
 
-def fetch_file_metadata(idx, filenames, proj_name):
+def extract_file_metadata(idx, filenames, proj_name):
     valid_extensions = {
         '.fastq.gz': ('fastq', 'reads'),
         '.fq.gz': ('fastq', 'reads'),

From c1f6cff476cc62c2a28fa2c8cee63ca6ead707ba Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 31 Aug 2020 14:25:44 -0400
Subject: [PATCH 108/125] some more docstrings added in submit.py

---
 src/encoded/submit.py | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 697092df3e..69773cfb7c 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -234,7 +234,7 @@ def xls_to_json(row, project, institution):
         'reports': [], 'errors': []
     }
     file_errors = []
-    family_dict = create_families(rows)
+    family_dict = init_families(rows)
     a_types = get_analysis_types(rows)
     case_names = {}
     for i, row in enumerate(rows):
@@ -271,7 +271,7 @@ def xls_to_json(row, project, institution):
                 items['file_fastq'].update(file_items['file_fastq'])
                 items['file_processed'].update(file_items['file_processed'])
     items = add_relations(items)
-    items = create_case_items(items, project['name'], case_names)
+    items = create_case_item_metadata(items, project['name'], case_names)
     # removed unused fields, add project and institution
     for val1 in items.values():
         if isinstance(val1, dict):
@@ -286,7 +286,11 @@ def xls_to_json(row, project, institution):
     return items, True  # most errors passed to next step in order to combine with validation errors
 
 
-def create_families(rows):
+def init_families(rows):
+    """
+    Initializes metadata dicts for 'family' items. Requires multiple rows so must be done separately from
+    row-by-row parsing.
+    """
     proband_rows = [row for row in rows if row.get('relation to proband').lower() == 'proband']
     fams = {row.get('analysis id'): 'family-{}'.format(row.get('individual id')) for row in proband_rows}
     return fams
@@ -320,6 +324,9 @@ def get_analysis_types(rows):
 
 
 def extract_individual_metadata(idx, row, items, indiv_alias, inst_name):
+    """
+    Extracts 'individual' item metadata from each row
+    """
     new_items = items.copy()
     info = {'aliases': [indiv_alias]}
     info = map_fields(row, info, ['individual_id', 'sex', 'age', 'birth_year'], 'individual')
@@ -347,6 +354,9 @@ def extract_individual_metadata(idx, row, items, indiv_alias, inst_name):
 
 
 def extract_family_metadata(idx, row, items, indiv_alias, fam_alias):
+    """
+    Extracts 'family' item metadata from each row
+    """
     new_items = items.copy()
     info = {
         'aliases': [fam_alias],
@@ -378,6 +388,9 @@ def extract_family_metadata(idx, row, items, indiv_alias, fam_alias):
 
 def extract_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_alias,
                           fam_alias, proj_name, analysis_type_dict, case_name_dict):
+    """
+    Extracts 'sample' item metadata from each row
+    """
     new_items = items.copy()
     info = {'aliases': [samp_alias], 'files': []}  # TODO: implement creation of file db items
     fields = [
@@ -429,6 +442,9 @@ def extract_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_a
 
 
 def extract_file_metadata(idx, filenames, proj_name):
+    """
+    Extracts 'file' item metadata from each row
+    """
     valid_extensions = {
         '.fastq.gz': ('fastq', 'reads'),
         '.fq.gz': ('fastq', 'reads'),
@@ -461,7 +477,11 @@ def extract_file_metadata(idx, filenames, proj_name):
     return files
 
 
-def create_case_items(items, proj_name, case_name_dict):
+def create_case_item_metadata(items, proj_name, case_name_dict):
+    """
+    Creation of case metadata, which can only be done after all rows are processed
+    so that sample_processing metadata exists.
+    """
     new_items = items.copy()
     for k, v in items['sample_processing'].items():
         analysis_id = k[k.index('analysis-')+9:]

From 3c576a6fe582922f88abc5b02dfcc01ecf9c866f Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Mon, 31 Aug 2020 14:44:04 -0400
Subject: [PATCH 109/125] fixed some issues with test_submit.py due to recent
 changes

---
 src/encoded/tests/test_submit.py | 137 +++++++++++++++----------------
 1 file changed, 64 insertions(+), 73 deletions(-)

diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py
index d304aa3e20..226a22386d 100644
--- a/src/encoded/tests/test_submit.py
+++ b/src/encoded/tests/test_submit.py
@@ -6,11 +6,12 @@
 from .. import submit
 from ..submit import (
     compare_fields,
-    create_families,
-    fetch_family_metadata,
-    fetch_file_metadata,
-    fetch_individual_metadata,
-    fetch_sample_metadata,
+    digest_xls,
+    init_families,
+    extract_family_metadata,
+    extract_file_metadata,
+    extract_individual_metadata,
+    extract_sample_metadata,
     get_analysis_types,
     map_fields,
     parse_exception,
@@ -140,8 +141,8 @@ def test_map_fields(sample_info):
     assert not result.get('sequencing_lab')
 
 
-def test_create_families(example_rows):
-    fams = create_families(example_rows)
+def test_init_families(example_rows):
+    fams = init_families(example_rows)
     assert sorted(list(fams.keys())) == ['1111', '2222', '3333']
     assert fams['1111'] == 'family-456'
     assert fams['2222'] == 'family-456'
@@ -158,49 +159,49 @@ def test_get_analysis_types(example_rows):
     assert new_a_types['1111'] is None
 
 
-def test_fetch_individual_metadata_new(row_dict, empty_items):
-    items_out = fetch_individual_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi')
+def test_extract_individual_metadata_new(row_dict, empty_items):
+    items_out = extract_individual_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi')
     assert items_out['individual']['test-proj:indiv1']['aliases'] == ['test-proj:indiv1']
     assert items_out['individual']['test-proj:indiv1']['individual_id'] == '456'
 
 
-def test_fetch_individual_metadata_old(row_dict, empty_items):
+def test_extract_individual_metadata_old(row_dict, empty_items):
     items = empty_items.copy()
     items['individual'] = {'test-proj:indiv1': {
         'individual_id': '456',
         'age': 46,
         'aliases': ['test-proj:indiv1']
     }}
-    items_out = fetch_individual_metadata(1, row_dict, items, 'test-proj:indiv1', 'hms-dbmi')
+    items_out = extract_individual_metadata(1, row_dict, items, 'test-proj:indiv1', 'hms-dbmi')
     assert len(items['individual']) == len(items_out['individual'])
     assert 'sex' in items_out['individual']['test-proj:indiv1']
     assert 'age' in items_out['individual']['test-proj:indiv1']
 
 
-def test_fetch_individual_metadata_nums(row_dict, empty_items):
+def test_extract_individual_metadata_nums(row_dict, empty_items):
     items2 = deepcopy(empty_items)
     row_dict['age'] = '33'
     row_dict['birth year'] = '1988'
-    items_out_nums = fetch_individual_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi')
+    items_out_nums = extract_individual_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi')
     assert not items_out_nums['errors']
     assert isinstance(items_out_nums['individual']['test-proj:indiv1']['age'], int)
     assert isinstance(items_out_nums['individual']['test-proj:indiv1']['birth_year'], int)
     # text values for age and birth year should be passed on without errors to eventually fail validation
     row_dict['age'] = 'abc'
     row_dict['birth year'] = 'def'
-    items_out_text = fetch_individual_metadata(1, row_dict, items2, 'test-proj:indiv1', 'hms-dbmi')
+    items_out_text = extract_individual_metadata(1, row_dict, items2, 'test-proj:indiv1', 'hms-dbmi')
     assert not items_out_text['errors']
     assert isinstance(items_out_text['individual']['test-proj:indiv1']['age'], str)
     assert isinstance(items_out_text['individual']['test-proj:indiv1']['birth_year'], str)
 
 
-def test_fetch_family_metadata_new(row_dict, empty_items):
-    items_out = fetch_family_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'test-proj:fam1')
+def test_extract_family_metadata_new(row_dict, empty_items):
+    items_out = extract_family_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'test-proj:fam1')
     assert items_out['family']['test-proj:fam1']['members'] == ['test-proj:indiv1']
     assert items_out['family']['test-proj:fam1']['proband'] == 'test-proj:indiv1'
 
 
-def test_fetch_family_metadata_old(row_dict, empty_items):
+def test_extract_family_metadata_old(row_dict, empty_items):
     items = empty_items.copy()
     items['family'] = {'test-proj:fam1': {
         'aliases': ['test-proj:fam1'],
@@ -208,26 +209,26 @@ def test_fetch_family_metadata_old(row_dict, empty_items):
         'members': ['test-proj:indiv2'],
         'mother': 'test-proj:indiv2'
     }}
-    items_out = fetch_family_metadata(1, row_dict, items, 'test-proj:indiv1', 'test-proj:fam1')
+    items_out = extract_family_metadata(1, row_dict, items, 'test-proj:indiv1', 'test-proj:fam1')
     assert items_out['family']['test-proj:fam1']['members'] == ['test-proj:indiv2', 'test-proj:indiv1']
     assert items_out['family']['test-proj:fam1']['proband'] == 'test-proj:indiv1'
     assert items_out['family']['test-proj:fam1']['mother'] == 'test-proj:indiv2'
 
 
-def test_fetch_family_metadata_invalid_relation(row_dict, empty_items):
+def test_extract_family_metadata_invalid_relation(row_dict, empty_items):
     row_dict['relation to proband'] = 'grandmother'
-    items_out = fetch_family_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'test-proj:fam1')
+    items_out = extract_family_metadata(1, row_dict, empty_items, 'test-proj:indiv1', 'test-proj:fam1')
     assert 'Row 1 - Invalid relation' in items_out['errors'][0]
 
 
-def test_fetch_sample_metadata_sp(row_dict, empty_items):
+def test_extract_sample_metadata_sp(row_dict, empty_items):
     items = empty_items.copy()
     items['individual'] = {'test-proj:indiv1': {}}
     row_dict['req accepted y/n'] = 'Yes'
     row_dict['specimen accepted by ref lab'] = "n"
-    items_out = fetch_sample_metadata(
+    items_out = extract_sample_metadata(
         1, row_dict, items, 'test-proj:indiv1', 'test-proj:samp1',
-        'test-proj:sp1', 'test-proj:fam1', 'test-proj', {}
+        'test-proj:sp1', 'test-proj:fam1', 'test-proj', {}, {}
     )
     print(items_out['sample']['test-proj:samp1'])
     assert items_out['sample']['test-proj:samp1']['specimen_accession'] == row_dict['specimen id']
@@ -237,8 +238,8 @@ def test_fetch_sample_metadata_sp(row_dict, empty_items):
     assert items_out['individual']['test-proj:indiv1']['samples'] == ['test-proj:samp1']
 
 
-def test_fetch_file_metadata_valid():
-    results = fetch_file_metadata(1, ['f1.fastq.gz', 'f2.cram', 'f3.vcf.gz'], 'test-proj')
+def test_extract_file_metadata_valid():
+    results = extract_file_metadata(1, ['f1.fastq.gz', 'f2.cram', 'f3.vcf.gz'], 'test-proj')
     assert 'test-proj:f1.fastq.gz' in results['file_fastq']
     assert results['file_fastq']['test-proj:f1.fastq.gz']['file_format'] == '/file-formats/fastq/'
     assert results['file_fastq']['test-proj:f1.fastq.gz']['file_type'] == 'reads'
@@ -247,8 +248,8 @@ def test_fetch_file_metadata_valid():
     assert not results['errors']
 
 
-def test_fetch_file_metadata_uncompressed():
-    results = fetch_file_metadata(1, ['f1.fastq', 'f2.cram', 'f3.vcf'], 'test-proj')
+def test_extract_file_metadata_uncompressed():
+    results = extract_file_metadata(1, ['f1.fastq', 'f2.cram', 'f3.vcf'], 'test-proj')
     assert not results['file_fastq']
     assert 'test-proj:f2.cram' in results['file_processed']
     assert 'test-proj:f3.vcf' not in results['file_processed']
@@ -256,8 +257,8 @@ def test_fetch_file_metadata_uncompressed():
     assert all('File must be compressed' in error for error in results['errors'])
 
 
-def test_fetch_file_metadata_invalid():
-    results = fetch_file_metadata(1, ['f3.gvcf.gz'], 'test-proj')
+def test_extract_file_metadata_invalid():
+    results = extract_file_metadata(1, ['f3.gvcf.gz'], 'test-proj')
     assert all(not results[key] for key in ['file_fastq', 'file_processed'])
     assert results['errors'] == [
         'File extension on f3.gvcf.gz not supported - '
@@ -266,7 +267,8 @@ def test_fetch_file_metadata_invalid():
 
 
 def test_xls_to_json(project, institution):
-    json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
+    rows = digest_xls('src/encoded/tests/data/documents/cgap_submit_test.xlsx')
+    json_out, success = xls_to_json(rows, project, institution)
     assert len(json_out['family']) == 1
     assert 'encode-project:family-456' in json_out['family']
     assert len(json_out['individual']) == 3
@@ -274,54 +276,46 @@ def test_xls_to_json(project, institution):
 
 
 def test_xls_to_json_no_header(project, institution, xls_list):
-    no_top_header = xls_list[1:]  # top header missing should work ok (e.g. 'Patient Information', etc)
-    no_main_header = [xls_list[0]] + xls_list[2:]  # main header missing should cause a caught error
-    no_comments = xls_list[0:2] + xls_list[3:]
-    with mock.patch.object(submit, 'row_generator') as row_gen:
-        row_gen.return_value = iter(no_top_header)
-        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
-        assert success
-        row_gen.return_value = iter(no_main_header)
-        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
-        assert not success
-        row_gen.return_value = iter(no_comments)
-        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
-        assert success
+    no_top_header = iter(xls_list[1:])  # top header missing should work ok (e.g. 'Patient Information', etc)
+    no_main_header = iter([xls_list[0]] + xls_list[2:])  # main header missing should cause a caught error
+    no_comments = iter(xls_list[0:2] + xls_list[3:])
+    json_out, success = xls_to_json(no_top_header, project, institution)
+    assert success
+    json_out, success = xls_to_json(no_main_header, project, institution)
+    assert not success
+    json_out, success = xls_to_json(no_comments, project, institution)
+    assert success
 
 
 def test_xls_to_json_missing_req_col(project, institution, xls_list):
     # test error is caught when a required column in missing from excel file
     idx = xls_list[1].index('Specimen ID')
-    rows = [row[0:idx] + row[idx+1:] for row in xls_list]
-    with mock.patch.object(submit, 'row_generator') as row_gen:
-        row_gen.return_value = iter(rows)
-        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
-        assert not success
+    rows = (row[0:idx] + row[idx+1:] for row in xls_list)
+    json_out, success = xls_to_json(rows, project, institution)
+    assert not success
 
 
 def test_xls_to_json_missing_req_val(project, institution, xls_list):
     # test error is caught when a required column is present but value is missing in a row
     idx = xls_list[1].index('Specimen ID')
     xls_list[4] = xls_list[4][0:idx] + [''] + xls_list[4][idx+1:]
-    with mock.patch.object(submit, 'row_generator') as row_gen:
-        row_gen.return_value = iter(xls_list)
-        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
-        assert json_out['errors']
-        assert success
+    rows = iter(xls_list)
+    json_out, success = xls_to_json(rows, project, institution)
+    assert json_out['errors']
+    assert success
 
 
 def test_xls_to_json_invalid_workup(project, institution, xls_list):
     # invalid workup type is caught as an error
     idx = xls_list[1].index('Workup Type')
     xls_list[4] = xls_list[4][0:idx] + ['Other'] + xls_list[4][idx+1:]
-    with mock.patch.object(submit, 'row_generator') as row_gen:
-        row_gen.return_value = iter(xls_list)
-        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
-        assert json_out['errors']
-        print(json_out['errors'])
-        assert success
-        assert ('Row 5 - Samples with analysis ID 55432 contain mis-matched '
-                'or invalid workup type values.') in ''.join(json_out['errors'])
+    rows = iter(xls_list)
+    json_out, success = xls_to_json(rows, project, institution)
+    assert json_out['errors']
+    print(json_out['errors'])
+    assert success
+    assert ('Row 5 - Samples with analysis ID 55432 contain mis-matched '
+            'or invalid workup type values.') in ''.join(json_out['errors'])
 
 
 def test_xls_to_json_mixed_workup(project, institution, xls_list):
@@ -329,18 +323,15 @@ def test_xls_to_json_mixed_workup(project, institution, xls_list):
     idx = xls_list[1].index('Workup Type')
     xls_list[3] = xls_list[3][0:idx] + ['WES'] + xls_list[3][idx+1:]
     one_row = xls_list[:4]
-    with mock.patch.object(submit, 'row_generator') as row_gen:
-        row_gen.return_value = iter(xls_list)
-        json_out, success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', project, institution)
-        assert json_out['errors']
-        print(json_out['errors'])
-        assert success
-        assert ('Row 5 - Samples with analysis ID 55432 contain mis-matched '
-                'or invalid workup type values.') in ''.join(json_out['errors'])
-        row_gen.return_value = iter(one_row)
-        one_json_out, one_success = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx',
-                                                project, institution)
-        assert not one_json_out['errors']
+    rows = iter(xls_list)
+    json_out, success = xls_to_json(rows, project, institution)
+    assert json_out['errors']
+    assert success
+    assert ('Row 5 - Samples with analysis ID 55432 contain mis-matched '
+            'or invalid workup type values.') in ''.join(json_out['errors'])
+    single_row = iter(one_row)
+    one_json_out, one_success = xls_to_json(single_row, project, institution)
+    assert not one_json_out['errors']
 
 
 def test_parse_exception_invalid_alias(testapp, a_case):

From fe1667d742d631bf38bf7d902e977abe24a3b336 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Tue, 1 Sep 2020 16:16:03 -0400
Subject: [PATCH 110/125] Some small refactorings, PEP8 changes, and doc
 strings responsive to bits of Will's code review.

---
 src/encoded/commands/submission_test.py |  7 ++---
 src/encoded/ingestion/common.py         | 34 +++++++++++++++++--------
 src/encoded/ingestion_listener.py       | 19 +++++++++-----
 src/encoded/tests/data/__init__.py      | 19 ++++++++++++++
 4 files changed, 59 insertions(+), 20 deletions(-)
 create mode 100644 src/encoded/tests/data/__init__.py

diff --git a/src/encoded/commands/submission_test.py b/src/encoded/commands/submission_test.py
index df5ddba2c5..dbc6f14f56 100644
--- a/src/encoded/commands/submission_test.py
+++ b/src/encoded/commands/submission_test.py
@@ -3,15 +3,16 @@
 from dcicutils.misc_utils import VirtualApp
 from pyramid.paster import get_app
 from ..submit import xls_to_json, validate_all_items, post_and_patch_all_items
+from ..tests.data import DBMI_INSTITUTION, TEST_PROJECT, METADATA_BUNDLE_PATH
 
 
 def main():
     app = get_app('development.ini', 'app')
     environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': 'TEST'}
     virtualapp = VirtualApp(app, environ)
-    proj = virtualapp.get('/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/').json
-    inst = virtualapp.get('/institutions/hms-dbmi/').json
-    json_data, passing = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', proj, inst)
+    proj = virtualapp.get(TEST_PROJECT).json
+    inst = virtualapp.get(DBMI_INSTITUTION).json
+    json_data, passing = xls_to_json(METADATA_BUNDLE_PATH, proj, inst)
     print('JSON data (to validate):', json.dumps(json_data))
     final_json, validation_log, passing = validate_all_items(virtualapp, json_data)
     print('Validation Log:\n'.join(validation_log))
diff --git a/src/encoded/ingestion/common.py b/src/encoded/ingestion/common.py
index 1b95ed2768..9e7b1017dd 100644
--- a/src/encoded/ingestion/common.py
+++ b/src/encoded/ingestion/common.py
@@ -2,14 +2,6 @@
 common.py - tools common to various parts of ingestion
 """
 
-# import codecs
-# import contextlib
-# import io
-# import os
-# import tempfile
-
-from dcicutils.env_utils import is_stg_or_prd_env, is_cgap_env
-from dcicutils.misc_utils import check_true
 from .exceptions import MissingParameter, BadParameter
 
 
@@ -17,9 +9,24 @@ def metadata_bundles_bucket(registry):
     return registry.settings.get('metadata_bundles_bucket')
 
 
+# ==================================================
+
+# IMPLEMENTATION NOTE:
+#
+#    We have middleware that overrides various details about content type that are declared in the view_config.
+#    It used to work by having a wired set of exceptions, but this facility allows us to do it in a more data-driven
+#    way. Really I think we should just rely on the information in the view_config, but I didn't have time to explore
+#    why we are not using that.
+#
+#    See validate_request_tween_factory in renderers.py for where this is used. This declaration info is here
+#    rather than there to simplify the load order dependencies.
+#
+#    -kmp 1-Sep-2020
+
 CONTENT_TYPE_SPECIAL_CASES = {
     'application/x-www-form-urlencoded': [
-        # Legacy special case to allow us to POST to metadata TSV requests via form submission.
+        # Single legacy special case to allow us to POST to metadata TSV requests via form submission.
+        # All other special case values should be added using register_path_content_type.
         '/metadata/'
     ]
 }
@@ -27,7 +34,10 @@ def metadata_bundles_bucket(registry):
 
 def register_path_content_type(*, path, content_type):
     """
-    Registers that endpoints that begin with the specified path
+    Registers that endpoints that begin with the specified path use the indicated content_type.
+
+    This is part of an inelegant workaround for an issue in renderers.py that maybe we can make go away in the future.
+    See the 'implementation note' in ingestion/common.py for more details.
     """
     exceptions = CONTENT_TYPE_SPECIAL_CASES.get(content_type, None)
     if exceptions is None:
@@ -39,6 +49,9 @@ def register_path_content_type(*, path, content_type):
 def content_type_allowed(request):
     """
     Returns True if the current request allows the requested content type.
+
+    This is part of an inelegant workaround for an issue in renderers.py that maybe we can make go away in the future.
+    See the 'implementation note' in ingestion/common.py for more details.
     """
     if request.content_type == "application/json":
         # For better or worse, we always allow this.
@@ -53,6 +66,7 @@ def content_type_allowed(request):
 
     return False
 
+# ==================================================
 
 _NO_DEFAULT = object()
 
diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index af151e1e39..a5dbf122fd 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -9,15 +9,12 @@
 import json
 import os
 import psycopg2
-import pyramid.request
 import requests  # XXX: C4-211 should not be needed but is // KMP needs this, too, until subrequest posts work
 import signal
 import socket
 import structlog
 import threading
 import time
-import urllib.parse
-import uuid
 import webtest
 
 from dcicutils.env_utils import is_stg_or_prd_env
@@ -67,6 +64,8 @@ def prompt_for_ingestion(context, request):
 
 
 register_path_content_type(path='/submit_for_ingestion', content_type='multipart/form-data')
+
+
 @view_config(route_name='submit_for_ingestion', request_method='POST', accept='multipart/form-data')
 @debug_log
 def submit_for_ingestion(context, request):
@@ -176,6 +175,7 @@ def submit_for_ingestion(context, request):
 @debug_log
 def ingestion_status(context, request):
     """ Status route, essentially identical to indexing_status. """
+    ignored(context)
     queue_manager = request.registry[INGESTION_QUEUE]
     n_waiting, n_inflight = queue_manager.get_counts()
     return {
@@ -234,6 +234,7 @@ def queue_ingestion(context, request):
     """ Queues uuids as part of the request body for ingestion. Can batch as many as desired in a
         single request.
     """
+    ignored(context)
     uuids = request.json.get('uuids', [])
     override_name = request.json.get('override_name', None)
     return enqueue_uuids_for_request(request, uuids, override_name=override_name)
@@ -253,7 +254,8 @@ def enqueue_uuids_for_request(request, uuids, *, ingestion_type='vcf', override_
         response['notification'] = 'Success'
         response['number_queued'] = len(uuids)
         response['detail'] = 'Successfully queued the following uuids: %s' % uuids
-        patch_vcf_file_status(request, uuids)  # extra state management - may not be accurate, hard to get right
+        if ingestion_type == 'vcf':
+            patch_vcf_file_status(request, uuids)  # extra state management - may not be accurate, hard to get right
     else:
         response['number_queued'] = len(uuids) - len(failed)
         response['detail'] = 'Some uuids failed: %s' % failed
@@ -307,7 +309,7 @@ def _initialize(self):
                 Attributes=self.queue_attrs[self.queue_name]
             )
             queue_url = response['QueueUrl']
-        except self.client.exceptions.QueueNameExists as e:
+        except self.client.exceptions.QueueNameExists:
             queue_url = self._get_queue_url(self.queue_name)
         except Exception as e:
             log.error('Error while attempting to create queue: %s' % e)
@@ -404,6 +406,7 @@ def add_uuids(self, uuids, ingestion_type='vcf'):
 
             :precondition: uuids are all of type FileProcessed
             :param uuids: uuids to be added to the queue.
+            :param ingestion_type: the ingestion type of the uuids (default 'vcf' for legacy reasons)
             :returns: 2-tuple: uuids queued, failed messages (if any)
         """
         curr_time = datetime.datetime.utcnow().isoformat()
@@ -767,10 +770,10 @@ class ErrorHandlingThread(threading.Thread):
     def run(self):
         # interval = self._kwargs.get('interval', DEFAULT_INTERVAL)
         interval = 60  # DB polling can and should be slower
-        update_status = self._kwargs['_update_status']
+        update_status = self._kwargs['_update_status']  # noQA - uses private instance variables of parent class
         while True:
             try:
-                self._target(*self._args, **self._kwargs)
+                self._target(*self._args, **self._kwargs)  # noQA - uses private instance variables of parent class
             except (psycopg2.OperationalError, elasticsearch.exceptions.ConnectionError) as e:
                 # Handle database restart
                 log.warning('Database not there, maybe starting up: %r', e)
@@ -851,6 +854,7 @@ def status_app(environ, start_response):
         """ Allows you to get the status of the ingestion "manager". This will be much
             more useful once multi-processing is thrown at ingestion.
         """
+        ignored(environ)
         status = '200 OK'
         response_headers = [('Content-type', 'application/json')]
         start_response(status, response_headers)
@@ -882,6 +886,7 @@ def main():
     vapp = VirtualApp(app, config)
     return run(vapp)
 
+
 PROMPT_TEMPLATE = """
 <!DOCTYPE html>
 <html lang="en">
diff --git a/src/encoded/tests/data/__init__.py b/src/encoded/tests/data/__init__.py
new file mode 100644
index 0000000000..7718980631
--- /dev/null
+++ b/src/encoded/tests/data/__init__.py
@@ -0,0 +1,19 @@
+# Declarations of constants used in the inserts so they can be used more abstractly in testing.
+
+import os
+
+# Available in master_inserts, workbook_inserts, demo-inserts
+
+DBMI_INSTITUTION = '/institutions/hms-dbmi/'
+
+# Available in master_inserts, workbook_inserts
+
+TEST_PROJECT = "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/"
+
+# This gives variable names to refer to various files in this hierarchy.
+
+TEST_DATA_DIR = os.path.dirname(__file__)
+
+METADATA_BUNDLE_PATH = os.path.join(TEST_DATA_DIR, 'documents/cgap_submit_test.xlsx')
+METADATA_BUNDLE_PATH_WITH_ERRORS = os.path.join(TEST_DATA_DIR, 'documents/cgap_submit_test_with_errors.xlsx')
+

From 923d8e419ed0815e744c900e1105304520efb88c Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Tue, 1 Sep 2020 17:24:48 -0400
Subject: [PATCH 111/125] quick change to submit.py to add 'family' field for
 case items

---
 src/encoded/submit.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 69773cfb7c..c3cbc818f4 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -271,7 +271,7 @@ def xls_to_json(row, project, institution):
                 items['file_fastq'].update(file_items['file_fastq'])
                 items['file_processed'].update(file_items['file_processed'])
     items = add_relations(items)
-    items = create_case_item_metadata(items, project['name'], case_names)
+    items = create_case_item_metadata(items, project['name'], case_names, family_dict)
     # removed unused fields, add project and institution
     for val1 in items.values():
         if isinstance(val1, dict):
@@ -477,7 +477,7 @@ def extract_file_metadata(idx, filenames, proj_name):
     return files
 
 
-def create_case_item_metadata(items, proj_name, case_name_dict):
+def create_case_item_metadata(items, proj_name, case_name_dict, family_dict):
     """
     Creation of case metadata, which can only be done after all rows are processed
     so that sample_processing metadata exists.
@@ -499,6 +499,7 @@ def create_case_item_metadata(items, proj_name, case_name_dict):
             case_info = {
                 'aliases': [case_alias],
                 'sample_processing': k,
+                'family': '{}:{}'.format(proj_name, family_dict.get(analysis_id)),
                 'individual': indiv
             }
             if name:

From 3d39f283a1a65695a042cf3472cdf60cf5599b37 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Tue, 1 Sep 2020 17:25:22 -0400
Subject: [PATCH 112/125] changes to submission_test.py script to acct for
 recent submit changes

---
 src/encoded/commands/submission_test.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/encoded/commands/submission_test.py b/src/encoded/commands/submission_test.py
index df5ddba2c5..fd164ed2f6 100644
--- a/src/encoded/commands/submission_test.py
+++ b/src/encoded/commands/submission_test.py
@@ -2,7 +2,7 @@
 
 from dcicutils.misc_utils import VirtualApp
 from pyramid.paster import get_app
-from ..submit import xls_to_json, validate_all_items, post_and_patch_all_items
+from ..submit import digest_xls, xls_to_json, validate_all_items, post_and_patch_all_items
 
 
 def main():
@@ -11,7 +11,8 @@ def main():
     virtualapp = VirtualApp(app, environ)
     proj = virtualapp.get('/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/').json
     inst = virtualapp.get('/institutions/hms-dbmi/').json
-    json_data, passing = xls_to_json('src/encoded/tests/data/documents/cgap_submit_test.xlsx', proj, inst)
+    rows = digest_xls('/Users/sarah/cgap/437-UDN_2020-08-28.xlsx')
+    json_data, passing = xls_to_json(rows, proj, inst)
     print('JSON data (to validate):', json.dumps(json_data))
     final_json, validation_log, passing = validate_all_items(virtualapp, json_data)
     print('Validation Log:\n'.join(validation_log))

From bcbe81d1101687bcc3c7314f68a862e46dd328ae Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Wed, 2 Sep 2020 07:06:37 -0400
Subject: [PATCH 113/125] Bump beta version.

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 39fcc809fc..4ce59346ab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 # Note: Various modules refer to this system as "encoded", not "cgap-portal".
 name = "encoded"
-version = "2.4.1.1b4"  # Includes master 2.4.15, eventually bumping minor version (probably 2.5.0)
+version = "2.4.1.1b5"  # Includes master 2.4.16, eventually bumping minor version (probably 2.5.0)
 description = "Clinical Genomics Analysis Platform"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"

From 211121a37208e7ac13c3f8fa650f9a06398ab859 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Tue, 8 Sep 2020 23:26:47 -0400
Subject: [PATCH 114/125] Make the submission-test script actually test some
 expectations. Add some documentation to that script.

---
 src/encoded/commands/submission_test.py       |  65 +++++
 .../submission_test_data/patch_output.json    |   3 +
 .../submission_test_data/post_output.json     |  23 ++
 .../submission_test_data/to_patch.json        |  47 ++++
 .../submission_test_data/to_post.json         | 259 ++++++++++++++++++
 .../submission_test_data/to_upload.json       |  18 ++
 .../submission_test_data/to_validate.json     | 253 +++++++++++++++++
 7 files changed, 668 insertions(+)
 create mode 100644 src/encoded/commands/submission_test_data/patch_output.json
 create mode 100644 src/encoded/commands/submission_test_data/post_output.json
 create mode 100644 src/encoded/commands/submission_test_data/to_patch.json
 create mode 100644 src/encoded/commands/submission_test_data/to_post.json
 create mode 100644 src/encoded/commands/submission_test_data/to_upload.json
 create mode 100644 src/encoded/commands/submission_test_data/to_validate.json

diff --git a/src/encoded/commands/submission_test.py b/src/encoded/commands/submission_test.py
index 295a8a6015..ac3461fbb3 100644
--- a/src/encoded/commands/submission_test.py
+++ b/src/encoded/commands/submission_test.py
@@ -1,4 +1,6 @@
+import io
 import json
+import os
 
 from dcicutils.misc_utils import VirtualApp
 from pyramid.paster import get_app
@@ -6,7 +8,47 @@
 from ..tests.data import DBMI_INSTITUTION, TEST_PROJECT, METADATA_BUNDLE_PATH
 
 
+TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "submission_test_data")
+
+TEST_FILE_TO_VALIDATE = os.path.join(TEST_DATA_DIR, "to_validate.json")
+TEST_FILE_TO_PATCH = os.path.join(TEST_DATA_DIR, "to_patch.json")
+TEST_FILE_TO_POST = os.path.join(TEST_DATA_DIR, "to_post.json")
+TEST_FILE_POST_OUTPUT = os.path.join(TEST_DATA_DIR, "post_output.json")
+TEST_FILE_PATCH_OUTPUT = os.path.join(TEST_DATA_DIR, "patch_output.json")
+TEST_FILE_TO_UPLOAD = os.path.join(TEST_DATA_DIR, "to_upload.json")
+
+with io.open(TEST_FILE_TO_VALIDATE, 'r') as fp:
+    TEST_DATA_TO_VALIDATE = json.load(fp)
+
+with io.open(TEST_FILE_TO_POST, 'r') as fp:
+    TEST_DATA_TO_POST = json.load(fp)
+
+with io.open(TEST_FILE_TO_PATCH, 'r') as fp:
+    TEST_DATA_TO_PATCH = json.load(fp)
+
+with io.open(TEST_FILE_POST_OUTPUT, 'r') as fp:
+    TEST_DATA_POST_OUTPUT = json.load(fp)
+
+with io.open(TEST_FILE_PATCH_OUTPUT, 'r') as fp:
+    TEST_DATA_PATCH_OUTPUT = json.load(fp)
+
+with io.open(TEST_FILE_TO_UPLOAD, 'r') as fp:
+    TEST_DATA_TO_UPLOAD = json.load(fp)
+
+
 def main():
+    """
+    This does a simple test of the data pipeline used for metadata bundle submission.
+
+    This does not test the submission endpoints, but DOES call server endpoints to
+    inquire about current state and based on that state to decide to post or patch the data.
+    (As such, the exact action of this script is dependent on the state of the db when it is run.)
+
+    The server called will be the one defined by the development environment.
+    As such, that means we won't be using this test as part of unit tests,
+    though perhaps variations of this will be adopted for that purpose.
+    """
+
     app = get_app('development.ini', 'app')
     environ = {'HTTP_ACCEPT': 'application/json', 'REMOTE_USER': 'TEST'}
     virtualapp = VirtualApp(app, environ)
@@ -15,14 +57,37 @@ def main():
     rows = digest_xls(METADATA_BUNDLE_PATH)
     json_data, passing = xls_to_json(rows, proj, inst)
     print('JSON data (to validate):', json.dumps(json_data))
+    assert json_data == TEST_DATA_TO_VALIDATE
+    print('JSON data to validate matches contents of %s' % TEST_FILE_TO_VALIDATE)
     final_json, validation_log, passing = validate_all_items(virtualapp, json_data)
     print('Validation Log:\n'.join(validation_log))
     print("Passing (after validation):", passing)
     print("Final JSON (to post, after validation):", json.dumps(final_json, indent=4))
+    if final_json == TEST_DATA_TO_PATCH:
+        # NOTE: There are more possible intermediate states than just "it's all been done" and "none has been done",
+        #  but this simple script does not anticipate those and will just fail if one of those other states is in play.
+        #  -kmp 8-Sep-2020
+        posting = False
+        print("JSON data has already been posted. Light patching is expected.")
+        print("(To test posting at this point would require wiping the database or adjusting numerous items.)")
+        print('JSON data to patch matches contents of %s' % TEST_FILE_TO_PATCH)
+    else:
+        posting = True
+        assert final_json == TEST_DATA_TO_POST
+        print('JSON data to post matches contents of %s' % TEST_FILE_TO_POST)
     output, passing, files = post_and_patch_all_items(virtualapp, final_json)
     print('Post Output:\n', '\n'.join(output))
     print('Passing (after post and patch):', passing)
+    if posting:
+        assert output == TEST_DATA_POST_OUTPUT
+        print('JSON data to post matches contents of %s' % TEST_FILE_POST_OUTPUT)
+    else:
+        assert output == TEST_DATA_PATCH_OUTPUT
+        print('JSON data to post matches contents of %s' % TEST_FILE_PATCH_OUTPUT)
     print('Files:', json.dumps(files, indent=4))
+    assert files == TEST_DATA_TO_UPLOAD
+    print('JSON data to upload matches contents of %s' % TEST_FILE_TO_UPLOAD)
+    print("SUCCESS! All done.")
 
 
 if __name__ == '__main__':
diff --git a/src/encoded/commands/submission_test_data/patch_output.json b/src/encoded/commands/submission_test_data/patch_output.json
new file mode 100644
index 0000000000..cf82f36b19
--- /dev/null
+++ b/src/encoded/commands/submission_test_data/patch_output.json
@@ -0,0 +1,3 @@
+[
+    "file_fastq: 4 items patched successfully; 0 items not patched"
+]
diff --git a/src/encoded/commands/submission_test_data/post_output.json b/src/encoded/commands/submission_test_data/post_output.json
new file mode 100644
index 0000000000..2e420ebac7
--- /dev/null
+++ b/src/encoded/commands/submission_test_data/post_output.json
@@ -0,0 +1,23 @@
+[
+    "Success - sample 3464467 posted",
+    "Success - sample 3464468 posted",
+    "Success - sample 3464469 posted",
+    "Success - individual 456 posted",
+    "Success - individual 789 posted",
+    "Success - individual 123 posted",
+    "Success - family 333 posted",
+    "file_fastq: 4 items posted successfully; 0 items not posted",
+    "sample: 3 items posted successfully; 0 items not posted",
+    "individual: 3 items posted successfully; 0 items not posted",
+    "family: 1 items posted successfully; 0 items not posted",
+    "sample_processing: 1 items posted successfully; 0 items not posted",
+    "report: 1 items posted successfully; 0 items not posted",
+    "case: 3 items posted successfully; 0 items not posted",
+    "file_fastq: 4 items patched successfully; 0 items not patched",
+    "sample: 3 items patched successfully; 0 items not patched",
+    "individual: 3 items patched successfully; 0 items not patched",
+    "family: 1 items patched successfully; 0 items not patched",
+    "sample_processing: 1 items patched successfully; 0 items not patched",
+    "report: 1 items patched successfully; 0 items not patched",
+    "case: 3 items patched successfully; 0 items not patched"
+]
diff --git a/src/encoded/commands/submission_test_data/to_patch.json b/src/encoded/commands/submission_test_data/to_patch.json
new file mode 100644
index 0000000000..2fde587615
--- /dev/null
+++ b/src/encoded/commands/submission_test_data/to_patch.json
@@ -0,0 +1,47 @@
+{
+    "post": {},
+    "patch": {
+        "file_fastq": {
+            "/files-fastq/GAPFINDNNXMD/": {
+                "filename": "f1.fastq.gz",
+                "status": "uploading"
+            },
+            "/files-fastq/GAPFI8NPRCLA/": {
+                "filename": "f2.fastq.gz",
+                "status": "uploading"
+            },
+            "/files-fastq/GAPFIHLOD3W5/": {
+                "filename": "f3.fastq.gz",
+                "status": "uploading"
+            },
+            "/files-fastq/GAPFI558AY9P/": {
+                "filename": "f4.fastq.gz",
+                "status": "uploading"
+            }
+        },
+        "sample": {},
+        "individual": {},
+        "family": {},
+        "sample_processing": {},
+        "report": {},
+        "case": {}
+    },
+    "aliases": {
+        "hms-dbmi:f1.fastq.gz": "/files-fastq/GAPFINDNNXMD/",
+        "hms-dbmi:f2.fastq.gz": "/files-fastq/GAPFI8NPRCLA/",
+        "hms-dbmi:f3.fastq.gz": "/files-fastq/GAPFIHLOD3W5/",
+        "hms-dbmi:f4.fastq.gz": "/files-fastq/GAPFI558AY9P/",
+        "hms-dbmi:sample-3464467": "/samples/GAPSAOZBTKEV/",
+        "hms-dbmi:sample-3464468": "/samples/GAPSALGBXFHS/",
+        "hms-dbmi:sample-3464469": "/samples/GAPSA4YIRPGC/",
+        "hms-dbmi:individual-456": "/individuals/GAPIDXBYJESJ/",
+        "hms-dbmi:individual-789": "/individuals/GAPID16TYLTK/",
+        "hms-dbmi:individual-123": "/individuals/GAPIDF5WPST7/",
+        "hms-dbmi:family-456": "/families/GAPFA5SLA4GB/",
+        "hms-dbmi:analysis-55432": "/sample-processings/43a03760-4e11-41ae-81d9-a408acd29f16/",
+        "hms-dbmi:report-55432-3464467": "/reports/ea7490cb-d480-4ecf-b913-467fc24d7294/",
+        "hms-dbmi:case-55432-3464467": "/cases/GAPCA12VO85Q/",
+        "hms-dbmi:case-55432-3464468": "/cases/GAPCAWSV5574/",
+        "hms-dbmi:case-55432-3464469": "/cases/GAPCA2SA3Z84/"
+    }
+}
diff --git a/src/encoded/commands/submission_test_data/to_post.json b/src/encoded/commands/submission_test_data/to_post.json
new file mode 100644
index 0000000000..85ea88c5d6
--- /dev/null
+++ b/src/encoded/commands/submission_test_data/to_post.json
@@ -0,0 +1,259 @@
+{
+    "post": {
+        "file_fastq": [
+            {
+                "aliases": [
+                    "hms-dbmi:f1.fastq.gz"
+                ],
+                "row": 4,
+                "file_format": "/file-formats/fastq/",
+                "file_type": "reads",
+                "filename": "f1.fastq.gz",
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/",
+                "status": "uploading"
+            },
+            {
+                "aliases": [
+                    "hms-dbmi:f2.fastq.gz"
+                ],
+                "row": 4,
+                "file_format": "/file-formats/fastq/",
+                "file_type": "reads",
+                "filename": "f2.fastq.gz",
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/",
+                "status": "uploading"
+            },
+            {
+                "aliases": [
+                    "hms-dbmi:f3.fastq.gz"
+                ],
+                "row": 5,
+                "file_format": "/file-formats/fastq/",
+                "file_type": "reads",
+                "filename": "f3.fastq.gz",
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/",
+                "status": "uploading"
+            },
+            {
+                "aliases": [
+                    "hms-dbmi:f4.fastq.gz"
+                ],
+                "row": 6,
+                "file_format": "/file-formats/fastq/",
+                "file_type": "reads",
+                "filename": "f4.fastq.gz",
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/",
+                "status": "uploading"
+            }
+        ],
+        "sample": [
+            {
+                "aliases": [
+                    "hms-dbmi:sample-3464467"
+                ],
+                "workup_type": "WGS",
+                "specimen_type": "Peripheral Blood",
+                "date_transported": "2020-11-02",
+                "indication": "Ovarian cancer",
+                "sent_by": "tr44",
+                "physician_id": "11946744",
+                "specimen_collection_date": "2020-11-02",
+                "specimen_accession": "3464467",
+                "transported_by": "Fedex",
+                "sequencing_lab": "Broad",
+                "requisition_type": "GED or BioBank",
+                "date_requisition_received": "2020-02-02",
+                "row": 4,
+                "other_specimen_ids": [
+                    {
+                        "id": "N/A",
+                        "id_type": "hms-dbmi"
+                    }
+                ],
+                "requisition_acceptance": {
+                    "accepted_rejected": "Rejected",
+                    "rejection_reason": "missing DOB"
+                },
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/"
+            },
+            {
+                "aliases": [
+                    "hms-dbmi:sample-3464468"
+                ],
+                "workup_type": "WGS",
+                "specimen_type": "Peripheral Blood",
+                "date_transported": "2020-11-02",
+                "indication": "Ovarian cancer",
+                "sent_by": "tr44",
+                "physician_id": "11946744",
+                "specimen_collection_date": "2020-11-02",
+                "specimen_accession": "3464468",
+                "transported_by": "Fedex",
+                "sequencing_lab": "Broad",
+                "requisition_type": "GED or BioBank",
+                "date_requisition_received": "2020-02-02",
+                "row": 5,
+                "requisition_acceptance": {
+                    "accepted_rejected": "Rejected",
+                    "rejection_reason": "missing DOB"
+                },
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/"
+            },
+            {
+                "aliases": [
+                    "hms-dbmi:sample-3464469"
+                ],
+                "workup_type": "WGS",
+                "specimen_type": "Peripheral Blood",
+                "date_transported": "2020-11-02",
+                "indication": "Ovarian cancer",
+                "sent_by": "tr44",
+                "physician_id": "11946744",
+                "specimen_collection_date": "2020-11-02",
+                "specimen_accession": "3464469",
+                "transported_by": "Fedex",
+                "sequencing_lab": "Broad",
+                "requisition_type": "GED or BioBank",
+                "date_requisition_received": "2020-02-02",
+                "row": 6,
+                "requisition_acceptance": {
+                    "accepted_rejected": "Rejected",
+                    "rejection_reason": "missing DOB"
+                },
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/"
+            }
+        ],
+        "individual": [
+            {
+                "aliases": [
+                    "hms-dbmi:individual-456"
+                ],
+                "individual_id": "456",
+                "sex": "M",
+                "birth_year": 1991,
+                "row": 4,
+                "samples": [
+                    "hms-dbmi:sample-3464467"
+                ],
+                "mother": "hms-dbmi:individual-123",
+                "father": "hms-dbmi:individual-789",
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/"
+            },
+            {
+                "aliases": [
+                    "hms-dbmi:individual-789"
+                ],
+                "individual_id": "789",
+                "sex": "M",
+                "birth_year": 1961,
+                "row": 5,
+                "samples": [
+                    "hms-dbmi:sample-3464468"
+                ],
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/"
+            },
+            {
+                "aliases": [
+                    "hms-dbmi:individual-123"
+                ],
+                "individual_id": "123",
+                "sex": "F",
+                "birth_year": 1945,
+                "row": 6,
+                "samples": [
+                    "hms-dbmi:sample-3464469"
+                ],
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/"
+            }
+        ],
+        "family": [
+            {
+                "aliases": [
+                    "hms-dbmi:family-456"
+                ],
+                "family_id": "333",
+                "members": [
+                    "hms-dbmi:individual-456",
+                    "hms-dbmi:individual-789",
+                    "hms-dbmi:individual-123"
+                ],
+                "row": 4,
+                "proband": "hms-dbmi:individual-456",
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/"
+            }
+        ],
+        "sample_processing": [
+            {
+                "aliases": [
+                    "hms-dbmi:analysis-55432"
+                ],
+                "samples": [
+                    "hms-dbmi:sample-3464467",
+                    "hms-dbmi:sample-3464468",
+                    "hms-dbmi:sample-3464469"
+                ],
+                "families": [
+                    "hms-dbmi:family-456"
+                ],
+                "analysis_type": "WGS-Trio",
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/"
+            }
+        ],
+        "report": [
+            {
+                "aliases": [
+                    "hms-dbmi:report-55432-3464467"
+                ],
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/"
+            }
+        ],
+        "case": [
+            {
+                "aliases": [
+                    "hms-dbmi:case-55432-3464467"
+                ],
+                "sample_processing": "hms-dbmi:analysis-55432",
+                "family": "hms-dbmi:family-456",
+                "individual": "hms-dbmi:individual-456",
+                "report": "hms-dbmi:report-55432-3464467",
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/"
+            },
+            {
+                "aliases": [
+                    "hms-dbmi:case-55432-3464468"
+                ],
+                "sample_processing": "hms-dbmi:analysis-55432",
+                "family": "hms-dbmi:family-456",
+                "individual": "hms-dbmi:individual-789",
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/"
+            },
+            {
+                "aliases": [
+                    "hms-dbmi:case-55432-3464469"
+                ],
+                "sample_processing": "hms-dbmi:analysis-55432",
+                "family": "hms-dbmi:family-456",
+                "individual": "hms-dbmi:individual-123",
+                "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+                "institution": "/institutions/hms-dbmi/"
+            }
+        ]
+    },
+    "patch": {},
+    "aliases": {}
+}
diff --git a/src/encoded/commands/submission_test_data/to_upload.json b/src/encoded/commands/submission_test_data/to_upload.json
new file mode 100644
index 0000000000..44245a5b28
--- /dev/null
+++ b/src/encoded/commands/submission_test_data/to_upload.json
@@ -0,0 +1,18 @@
+[
+    {
+        "uuid": "48ee5042-7498-491d-b599-bcf5afcdbfa7",
+        "filename": "f1.fastq.gz"
+    },
+    {
+        "uuid": "eca0f76c-06d8-4953-828e-0bd8998639dc",
+        "filename": "f2.fastq.gz"
+    },
+    {
+        "uuid": "9f576938-bd3e-4eef-ae7c-13cd06bdc3fe",
+        "filename": "f3.fastq.gz"
+    },
+    {
+        "uuid": "eedfa412-a797-48a8-863f-639a33ba2814",
+        "filename": "f4.fastq.gz"
+    }
+]
diff --git a/src/encoded/commands/submission_test_data/to_validate.json b/src/encoded/commands/submission_test_data/to_validate.json
new file mode 100644
index 0000000000..1ed5af1e95
--- /dev/null
+++ b/src/encoded/commands/submission_test_data/to_validate.json
@@ -0,0 +1,253 @@
+{
+  "individual": {
+    "hms-dbmi:individual-456": {
+      "aliases": [
+        "hms-dbmi:individual-456"
+      ],
+      "individual_id": "456",
+      "sex": "M",
+      "birth_year": 1991,
+      "row": 4,
+      "samples": [
+        "hms-dbmi:sample-3464467"
+      ],
+      "mother": "hms-dbmi:individual-123",
+      "father": "hms-dbmi:individual-789",
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    },
+    "hms-dbmi:individual-789": {
+      "aliases": [
+        "hms-dbmi:individual-789"
+      ],
+      "individual_id": "789",
+      "sex": "M",
+      "birth_year": 1961,
+      "row": 5,
+      "samples": [
+        "hms-dbmi:sample-3464468"
+      ],
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    },
+    "hms-dbmi:individual-123": {
+      "aliases": [
+        "hms-dbmi:individual-123"
+      ],
+      "individual_id": "123",
+      "sex": "F",
+      "birth_year": 1945,
+      "row": 6,
+      "samples": [
+        "hms-dbmi:sample-3464469"
+      ],
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    }
+  },
+  "family": {
+    "hms-dbmi:family-456": {
+      "aliases": [
+        "hms-dbmi:family-456"
+      ],
+      "family_id": "333",
+      "members": [
+        "hms-dbmi:individual-456",
+        "hms-dbmi:individual-789",
+        "hms-dbmi:individual-123"
+      ],
+      "row": 4,
+      "proband": "hms-dbmi:individual-456",
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    }
+  },
+  "sample": {
+    "hms-dbmi:sample-3464467": {
+      "aliases": [
+        "hms-dbmi:sample-3464467"
+      ],
+      "workup_type": "WGS",
+      "specimen_type": "Peripheral Blood",
+      "date_transported": "2020-11-02",
+      "indication": "Ovarian cancer",
+      "sent_by": "tr44",
+      "physician_id": "11946744",
+      "specimen_collection_date": "2020-11-02",
+      "specimen_accession": "3464467",
+      "transported_by": "Fedex",
+      "sequencing_lab": "Broad",
+      "requisition_type": "GED or BioBank",
+      "date_requisition_received": "2020-02-02",
+      "row": 4,
+      "other_specimen_ids": [
+        {
+          "id": "N/A",
+          "id_type": "hms-dbmi"
+        }
+      ],
+      "requisition_acceptance": {
+        "accepted_rejected": "Rejected",
+        "rejection_reason": "missing DOB"
+      },
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    },
+    "hms-dbmi:sample-3464468": {
+      "aliases": [
+        "hms-dbmi:sample-3464468"
+      ],
+      "workup_type": "WGS",
+      "specimen_type": "Peripheral Blood",
+      "date_transported": "2020-11-02",
+      "indication": "Ovarian cancer",
+      "sent_by": "tr44",
+      "physician_id": "11946744",
+      "specimen_collection_date": "2020-11-02",
+      "specimen_accession": "3464468",
+      "transported_by": "Fedex",
+      "sequencing_lab": "Broad",
+      "requisition_type": "GED or BioBank",
+      "date_requisition_received": "2020-02-02",
+      "row": 5,
+      "requisition_acceptance": {
+        "accepted_rejected": "Rejected",
+        "rejection_reason": "missing DOB"
+      },
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    },
+    "hms-dbmi:sample-3464469": {
+      "aliases": [
+        "hms-dbmi:sample-3464469"
+      ],
+      "workup_type": "WGS",
+      "specimen_type": "Peripheral Blood",
+      "date_transported": "2020-11-02",
+      "indication": "Ovarian cancer",
+      "sent_by": "tr44",
+      "physician_id": "11946744",
+      "specimen_collection_date": "2020-11-02",
+      "specimen_accession": "3464469",
+      "transported_by": "Fedex",
+      "sequencing_lab": "Broad",
+      "requisition_type": "GED or BioBank",
+      "date_requisition_received": "2020-02-02",
+      "row": 6,
+      "requisition_acceptance": {
+        "accepted_rejected": "Rejected",
+        "rejection_reason": "missing DOB"
+      },
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    }
+  },
+  "sample_processing": {
+    "hms-dbmi:analysis-55432": {
+      "aliases": [
+        "hms-dbmi:analysis-55432"
+      ],
+      "samples": [
+        "hms-dbmi:sample-3464467",
+        "hms-dbmi:sample-3464468",
+        "hms-dbmi:sample-3464469"
+      ],
+      "families": [
+        "hms-dbmi:family-456"
+      ],
+      "analysis_type": "WGS-Trio",
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    }
+  },
+  "file_fastq": {
+    "hms-dbmi:f1.fastq.gz": {
+      "aliases": [
+        "hms-dbmi:f1.fastq.gz"
+      ],
+      "row": 4,
+      "file_format": "/file-formats/fastq/",
+      "file_type": "reads",
+      "filename": "f1.fastq.gz",
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    },
+    "hms-dbmi:f2.fastq.gz": {
+      "aliases": [
+        "hms-dbmi:f2.fastq.gz"
+      ],
+      "row": 4,
+      "file_format": "/file-formats/fastq/",
+      "file_type": "reads",
+      "filename": "f2.fastq.gz",
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    },
+    "hms-dbmi:f3.fastq.gz": {
+      "aliases": [
+        "hms-dbmi:f3.fastq.gz"
+      ],
+      "row": 5,
+      "file_format": "/file-formats/fastq/",
+      "file_type": "reads",
+      "filename": "f3.fastq.gz",
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    },
+    "hms-dbmi:f4.fastq.gz": {
+      "aliases": [
+        "hms-dbmi:f4.fastq.gz"
+      ],
+      "row": 6,
+      "file_format": "/file-formats/fastq/",
+      "file_type": "reads",
+      "filename": "f4.fastq.gz",
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    }
+  },
+  "file_processed": {},
+  "case": {
+    "hms-dbmi:case-55432-3464467": {
+      "aliases": [
+        "hms-dbmi:case-55432-3464467"
+      ],
+      "sample_processing": "hms-dbmi:analysis-55432",
+      "family": "hms-dbmi:family-456",
+      "individual": "hms-dbmi:individual-456",
+      "report": "hms-dbmi:report-55432-3464467",
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    },
+    "hms-dbmi:case-55432-3464468": {
+      "aliases": [
+        "hms-dbmi:case-55432-3464468"
+      ],
+      "sample_processing": "hms-dbmi:analysis-55432",
+      "family": "hms-dbmi:family-456",
+      "individual": "hms-dbmi:individual-789",
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    },
+    "hms-dbmi:case-55432-3464469": {
+      "aliases": [
+        "hms-dbmi:case-55432-3464469"
+      ],
+      "sample_processing": "hms-dbmi:analysis-55432",
+      "family": "hms-dbmi:family-456",
+      "individual": "hms-dbmi:individual-123",
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    }
+  },
+  "report": {
+    "hms-dbmi:report-55432-3464467": {
+      "aliases": [
+        "hms-dbmi:report-55432-3464467"
+      ],
+      "project": "/projects/12a92962-8265-4fc0-b2f8-cf14f05db58b/",
+      "institution": "/institutions/hms-dbmi/"
+    }
+  },
+  "errors": []
+}

From 9eacb653d990e20acd0b3e6b5f83de55eb469f49 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Thu, 10 Sep 2020 10:46:47 -0400
Subject: [PATCH 115/125] Use latest dcicutils.

---
 poetry.lock    | 8 ++++----
 pyproject.toml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 123a9b9082..0a4e1775c5 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -553,7 +553,7 @@ description = "Utility package for interacting with the 4DN Data Portal and othe
 name = "dcicutils"
 optional = false
 python-versions = ">=3.4,<3.8"
-version = "0.38.1"
+version = "0.41.0"
 
 [package.dependencies]
 aws-requests-auth = ">=0.4.2,<1"
@@ -1934,6 +1934,7 @@ test = ["zope.testing"]
 
 [metadata]
 content-hash = "d7d2b7b679c3878c384e7f43bca9ab37b8374b5e61b6e6ea67ad85c5cf1055fa"
+lock-version = "1.0"
 python-versions = ">=3.6,<3.7"
 
 [metadata.files]
@@ -2100,8 +2101,8 @@ dcicsnovault = [
     {file = "dcicsnovault-3.1.9.tar.gz", hash = "sha256:347ab5ee3053a80273b081803f93fe115ed48ad53e6ce91c65a5d21a8f02d0e2"},
 ]
 dcicutils = [
-    {file = "dcicutils-0.38.1-py3-none-any.whl", hash = "sha256:7cf6a219a5794b07436946602b4c353314916fe463c1c42d240cf9db528f7a8b"},
-    {file = "dcicutils-0.38.1.tar.gz", hash = "sha256:3612f0bc26f17a6b14ab6112f3dca13cbb2f38eeacbd2d47922814ce90548fdf"},
+    {file = "dcicutils-0.41.0-py3-none-any.whl", hash = "sha256:64f0ec5a83340e4b9ee189089f2937890feb01ee6ffdb97e46634f6062afb04f"},
+    {file = "dcicutils-0.41.0.tar.gz", hash = "sha256:9573bbf5379c387ac33ad092e8507339ab2a1158ecebc8c68fc506f070cad6ea"},
 ]
 docker = [
     {file = "docker-4.3.0-py2.py3-none-any.whl", hash = "sha256:ba118607b0ba6bfc1b236ec32019a355c47b5d012d01d976467d4692ef443929"},
@@ -2429,7 +2430,6 @@ pycryptodome = [
     {file = "pycryptodome-3.9.8-cp38-cp38-win_amd64.whl", hash = "sha256:55eb61aca2c883db770999f50d091ff7c14016f2769ad7bca3d9b75d1d7c1b68"},
     {file = "pycryptodome-3.9.8-cp39-cp39-manylinux1_i686.whl", hash = "sha256:39ef9fb52d6ec7728fce1f1693cb99d60ce302aeebd59bcedea70ca3203fda60"},
     {file = "pycryptodome-3.9.8-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:de6e1cd75677423ff64712c337521e62e3a7a4fc84caabbd93207752e831a85a"},
-    {file = "pycryptodome-3.9.8.tar.gz", hash = "sha256:0e24171cf01021bc5dc17d6a9d4f33a048f09d62cc3f62541e95ef104588bda4"},
 ]
 pyflakes = [
     {file = "pyflakes-2.2.0-py2.py3-none-any.whl", hash = "sha256:0d94e0e05a19e57a99444b6ddcf9a6eb2e5c68d3ca1e98e90707af8152c90a92"},
diff --git a/pyproject.toml b/pyproject.toml
index 4ce59346ab..e240d71f99 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 # Note: Various modules refer to this system as "encoded", not "cgap-portal".
 name = "encoded"
-version = "2.4.1.1b5"  # Includes master 2.4.16, eventually bumping minor version (probably 2.5.0)
+version = "2.4.1.1b6"  # Includes master 2.4.16, eventually bumping minor version (probably 2.5.0)
 description = "Clinical Genomics Analysis Platform"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"

From aeefc31e12db8edf9953ee6bec2c91ece9f5600e Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Thu, 10 Sep 2020 10:47:58 -0400
Subject: [PATCH 116/125] Add metadata_bundles_bucket to test settings.

---
 src/encoded/tests/conftest_settings.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/encoded/tests/conftest_settings.py b/src/encoded/tests/conftest_settings.py
index 971e3260f1..063044e832 100644
--- a/src/encoded/tests/conftest_settings.py
+++ b/src/encoded/tests/conftest_settings.py
@@ -30,6 +30,7 @@
     'file_upload_bucket': 'test-wfout-bucket',
     'file_wfout_bucket': 'test-wfout-bucket',
     'file_upload_profile_name': 'test-profile',
+    'metadata_bundles_bucket': 'elasticbeanstalk-fourfront-cgaplocal-test-metadata-bundles',
 }
 
 
From d67d2d1ed2866ece512dbb35b0c0612653f57c51 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Thu, 10 Sep 2020 10:51:26 -0400
Subject: [PATCH 117/125] Require the 'add' permission to submit_for_ingestion
 endpoint. Fix some other logic related to errors in s3.

---
 src/encoded/ingestion_listener.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index a5dbf122fd..d956c6fdb4 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -18,7 +18,7 @@
 import webtest
 
 from dcicutils.env_utils import is_stg_or_prd_env
-from dcicutils.misc_utils import VirtualApp, ignored
+from dcicutils.misc_utils import VirtualApp, ignored, check_true
 from pyramid import paster
 from pyramid.httpexceptions import HTTPNotFound, HTTPMovedPermanently
 from pyramid.request import Request
@@ -31,7 +31,7 @@
 from .ingestion.exceptions import UnspecifiedFormParameter, SubmissionFailure
 from .ingestion.processors import get_ingestion_processor
 from .inheritance_mode import InheritanceMode
-from .types.ingestion import SubmissionFolio
+from .types.ingestion import SubmissionFolio, ALLOW_SUBMITTER_ADD
 from .types.variant import build_variant_display_title, ANNOTATION_ID_SEP
 from .util import resolve_file_path, gunzip_content, debuglog, get_trusted_email, beanstalk_env_from_request
 
@@ -66,12 +66,15 @@ def prompt_for_ingestion(context, request):
 register_path_content_type(path='/submit_for_ingestion', content_type='multipart/form-data')
 
 
-@view_config(route_name='submit_for_ingestion', request_method='POST', accept='multipart/form-data')
+@view_config(route_name='submit_for_ingestion', request_method='POST', # accept='multipart/form-data',
+             permission='add')
 @debug_log
 def submit_for_ingestion(context, request):
-
     ignored(context)
 
+    check_true(request.content_type == 'multipart/form-data',  # even though we can't declare we accept this
+               "Expected request to have content_type 'multipart/form-data'.", error_class=RuntimeError)
+
     bs_env = beanstalk_env_from_request(request)
     bundles_bucket = metadata_bundles_bucket(request.registry)
     ingestion_type = request.POST['ingestion_type']
@@ -161,12 +164,16 @@ def submit_for_ingestion(context, request):
 
             raise SubmissionFailure(message)
 
-    queue_manager = get_queue_manager(request, override_name=override_name)
-    _, failed = queue_manager.add_uuids([submission_id], ingestion_type=ingestion_type)
+        queue_manager = get_queue_manager(request, override_name=override_name)
+        _, failed = queue_manager.add_uuids([submission_id], ingestion_type=ingestion_type)
+
+        if failed:
+            # If there's a failure, failed will be a list of one problem description since we only submitted one thing.
+            raise SubmissionFailure(failed[0])
+
+    if not success:
 
-    if failed:
-        # If there's a failure, failed will be a list of one problem description since we only submitted one thing.
-        raise SubmissionFailure(failed[0])
+        raise SubmissionFailure(message)
 
     return manifest_content
 

From e5b25676eaf0fe2a051f8ee5aadbc5b4ef51f771 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Thu, 10 Sep 2020 10:51:46 -0400
Subject: [PATCH 118/125] Add some unit tests.

---
 .../tests/test_ingestion_metadata_bundle.py   | 248 ++++++++++++++++++
 1 file changed, 248 insertions(+)
 create mode 100644 src/encoded/tests/test_ingestion_metadata_bundle.py

diff --git a/src/encoded/tests/test_ingestion_metadata_bundle.py b/src/encoded/tests/test_ingestion_metadata_bundle.py
new file mode 100644
index 0000000000..3d7e1e84e4
--- /dev/null
+++ b/src/encoded/tests/test_ingestion_metadata_bundle.py
@@ -0,0 +1,248 @@
+import boto3
+import botocore.exceptions
+import datetime as datetime_module
+import io
+import json
+import os
+import pytz
+
+from dcicutils import qa_utils
+from dcicutils.qa_utils import ignored, ControlledTime, MockFileSystem
+from dcicutils.lang_utils import n_of
+from unittest import mock
+from .data import TEST_PROJECT, DBMI_INSTITUTION, METADATA_BUNDLE_PATH
+from .. import ingestion_listener as ingestion_listener_module
+from ..types import ingestion as ingestion_module
+
+
+SUBMIT_FOR_INGESTION = "/submit_for_ingestion"
+
+
+def expect_unreachable_in_mock(function_name):
+    def fn(*args, **kwargs):
+        ignored(args, kwargs)
+        raise AssertionError("The function %s should not have been called. Its caller should have been mocked."
+                             % function_name)
+    return fn
+
+
+def constantly(value):
+    def fn(*args, **kwargs):
+        ignored(args, kwargs)
+        return value
+    return fn
+
+
+class FakeGuid:
+
+    def __init__(self):
+        self.counter = 0
+
+    def fake_guid(self):
+        self.counter += 1
+        return self.format_fake_guid(self.counter)
+
+    @classmethod
+    def format_fake_guid(cls, n):
+        digits = str(n).rjust(10, '0')
+        return "%s-%s-%s" % (digits[0:3], digits[3:7], digits[7:10])
+
+
+class MockQueueManager:
+
+    def __init__(self, expected_ingestion_type):
+        self.expected_ingestion_type = expected_ingestion_type
+        self.uuids = []
+
+    def add_uuids(self, uuids, ingestion_type):
+        assert ingestion_type == self.expected_ingestion_type
+        self.uuids += uuids
+        return uuids, []
+
+
+class MockSubmissionFolioClass:
+
+    EXPECTED_INGESTION_TYPE = 'metadata_bundle'
+    EXPECTED_INSTITUTION = DBMI_INSTITUTION
+    EXPECTED_PROJECT = TEST_PROJECT
+
+    def __init__(self):
+        self.guid_factory = FakeGuid()
+        self.items_created = []
+
+    def create_item(self, request, ingestion_type, institution, project):
+        # This is ordinarily a class method, but an instance of this class will be used as a class stand-in
+        # so this is an instance method.
+        ignored(request)
+        assert ingestion_type == self.EXPECTED_INGESTION_TYPE
+        assert institution == self.EXPECTED_INSTITUTION
+        assert project == self.EXPECTED_PROJECT
+        guid = self.guid_factory.fake_guid()
+        self.items_created.append(guid)
+        return guid
+
+    @classmethod
+    def make_submission_uri(cls, submission_id):
+        return "/ingestion-submissions/" + submission_id
+
+
+class MockBotoS3Client:
+
+    def __init__(self):
+        self.s3_files = MockFileSystem()
+
+    def upload_fileobj(self, input_file_stream, Bucket, Key):  # noqa - Uppercase argument names are chosen by AWS
+        data = input_file_stream.read()
+        print("Uploading %s (%s) to bucket %s key %s"
+              % (input_file_stream, n_of(len(data), "byte"), Bucket, Key))
+        with self.s3_files.open(os.path.join(Bucket, Key), 'wb') as fp:
+            fp.write(data)
+
+
+def test_submit_for_ingestion_anon_rejected(anontestapp):
+
+    post_files = [("datafile", METADATA_BUNDLE_PATH)]
+
+    post_data = {
+        'ingestion_type': 'metadata_bundle',
+        'institution': DBMI_INSTITUTION,
+        'project': TEST_PROJECT,
+        'validate_only': True,
+    }
+
+    response = anontestapp.post_json(
+        SUBMIT_FOR_INGESTION,
+        post_data,
+        upload_files=post_files,
+        content_type='multipart/form-data',
+        status=403  # Forbidden
+    )
+
+    assert response.status_code == 403
+
+
+def file_contents(filename, binary=False):
+    with io.open(filename, 'rb' if binary else 'r') as fp:
+        return fp.read()
+
+
+def check_submit_for_ingestion_authorized(testapp, mocked_s3_client, expected_status=200):
+
+    class ControlledTimeWithFix(ControlledTime):
+
+        def just_utcnow(self):
+            return self.just_now().astimezone(pytz.UTC).replace(tzinfo=None)
+
+    dt = ControlledTimeWithFix()
+
+    ingestion_type = 'metadata_bundle'
+
+    mocked_queue_manager = MockQueueManager(expected_ingestion_type='metadata_bundle')
+
+    post_files = [("datafile", METADATA_BUNDLE_PATH)]
+
+    post_data = {
+        'ingestion_type': ingestion_type,
+        'institution': DBMI_INSTITUTION,
+        'project': TEST_PROJECT,
+        'validate_only': True,
+    }
+
+    fake_tester_email = "test@cgap.hms.harvard.edu"
+
+    def mocked_get_trusted_email(request, context, raise_errors):
+        assert context is "Submission"
+        assert raise_errors is False
+        if request.remote_user == 'TEST':
+            return fake_tester_email
+        else:
+            return None
+
+    test_pseudoenv = "fourfront-cgaplocal-test"
+
+    with mock.patch.object(ingestion_listener_module, "get_trusted_email", mocked_get_trusted_email):
+        with mock.patch.object(datetime_module, "datetime", dt):
+            with mock.patch.object(ingestion_listener_module, "beanstalk_env_from_request",
+                                   return_value=test_pseudoenv):
+                with mock.patch.object(qa_utils, "FILE_SYSTEM_VERBOSE", False):  # This should be a parameter but isn't
+                    mock_submission_folio_class = MockSubmissionFolioClass()
+                    with mock.patch.object(ingestion_listener_module, "SubmissionFolio", mock_submission_folio_class):
+                        with mock.patch.object(boto3, "client", constantly(mocked_s3_client)):
+                            with mock.patch.object(ingestion_listener_module, "get_queue_manager",
+                                                   constantly(mocked_queue_manager)):
+                                with mock.patch.object(ingestion_module, "subrequest_item_creation",
+                                                       expect_unreachable_in_mock("subrequest_item_creation")):
+
+                                    response = testapp.post(SUBMIT_FOR_INGESTION, post_data, upload_files=post_files,
+                                                            content_type='multipart/form-data', status=expected_status)
+
+                                    response.raise_for_status()
+
+                                    # The FakeGuid facility makes ids sequentially, so we can predict we'll get
+                                    # one guid added to our mock queue. This test doesn't test the queue processing,
+                                    # only that something ends up passed off to thq queue.
+                                    expected_guid = '000-0000-001'
+
+                                    assert mocked_queue_manager.uuids == [expected_guid]
+
+                                    assert mock_submission_folio_class.items_created == [expected_guid]
+
+                                    s3_file_system = mocked_s3_client.s3_files.files
+
+                                    expected_bucket = "elasticbeanstalk-fourfront-cgaplocal-test-metadata-bundles"
+
+                                    datafile_short_name = "datafile.xlsx"
+                                    manifest_short_name = "manifest.json"
+
+                                    datafile_key = os.path.join(expected_guid, datafile_short_name)
+                                    manifest_key = os.path.join(expected_guid, manifest_short_name)
+
+                                    datafile_name = os.path.join(expected_bucket, datafile_key)
+                                    manifest_name = os.path.join(expected_bucket, manifest_key)
+
+                                    assert set(s3_file_system.keys()) == {datafile_name, manifest_name}
+
+                                    assert s3_file_system[datafile_name] == file_contents(METADATA_BUNDLE_PATH,
+                                                                                          binary=True)
+
+                                    assert json.loads(s3_file_system[manifest_name].decode('utf-8')) == {
+                                        "filename": METADATA_BUNDLE_PATH,
+                                        "object_name": datafile_key,
+                                        "submission_id": expected_guid,
+                                        "submission_uri": "/ingestion-submissions/000-0000-001",
+                                        "beanstalk_env_is_prd": False,
+                                        "beanstalk_env": test_pseudoenv,
+                                        "bucket": expected_bucket,
+                                        "authenticated_userid": "remoteuser.TEST",
+                                        "email": fake_tester_email,
+                                        "success": True,
+                                        "message": "Uploaded successfully.",
+
+                                        "upload_time": dt.just_utcnow().isoformat(),
+                                        "parameters": {
+                                            "ingestion_type": ingestion_type,
+                                            "institution": DBMI_INSTITUTION,
+                                            "project": TEST_PROJECT,
+                                            "validate_only": "True",
+                                            "datafile": METADATA_BUNDLE_PATH,
+                                        },
+                                    }
+
+                                    # Make sure we report success from the endpoint
+                                    assert response.status_code == 200
+
+
+def test_submit_for_ingestion_authorized(testapp):
+    check_submit_for_ingestion_authorized(testapp, MockBotoS3Client())
+
+
+def test_submit_for_ingestion_authorized_buggy_s3(testapp):
+
+    class MockBuggyBotoS3Client(MockBotoS3Client):
+
+        def upload_fileobj(self, input_file_stream, Bucket, Key):  # noqa - AWS decided args were uppercase
+
+            raise botocore.exceptions.ClientError({'Error': {'Code': 400, 'Message': "Simulated error."}},
+                                                  'upload_fileobj')
+
+    check_submit_for_ingestion_authorized(testapp, MockBuggyBotoS3Client(), expected_status=400)

From 7c76243f77a8d0a235607e1c31262d7de1c822a5 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Thu, 10 Sep 2020 13:35:25 -0400
Subject: [PATCH 119/125] Add and test full_class_name.

---
 src/encoded/tests/test_util.py |  8 ++++++++
 src/encoded/util.py            | 10 ++++++++++
 2 files changed, 18 insertions(+)

diff --git a/src/encoded/tests/test_util.py b/src/encoded/tests/test_util.py
index 6784a5626c..2b1429aa75 100644
--- a/src/encoded/tests/test_util.py
+++ b/src/encoded/tests/test_util.py
@@ -1,6 +1,7 @@
 import datetime
 import io
 import os
+import pyramid.httpexceptions
 import pytest
 import tempfile
 
@@ -8,10 +9,17 @@
 from dcicutils.qa_utils import ControlledTime, ignored
 from ..util import (
     debuglog, deduplicate_list, gunzip_content, resolve_file_path, ENCODED_ROOT_DIR, get_trusted_email,
+    full_class_name,
 )
 from .. import util as util_module
 
 
+def test_full_class_name():
+
+    assert full_class_name(3) == 'int'
+    assert full_class_name(pyramid.httpexceptions.HTTPClientError("Oops")) == "pyramid.httpexceptions.HTTPClientError"
+
+
 def test_deduplicate_list():
 
     def sort_somehow(seq):
diff --git a/src/encoded/util.py b/src/encoded/util.py
index a8dc3126f3..297b1a44b1 100644
--- a/src/encoded/util.py
+++ b/src/encoded/util.py
@@ -238,6 +238,16 @@ def create_empty_s3_file(s3_client, bucket: str, key: str):
     s3_client.upload_file(empty_file, Bucket=bucket, Key=key)
 
 
+def full_class_name(object):
+    # Source: https://stackoverflow.com/questions/2020014/get-fully-qualified-class-name-of-an-object-in-python
+
+    module = object.__class__.__module__
+    if module is None or module == str.__class__.__module__:
+        return object.__class__.__name__  # Avoid reporting __builtin__
+    else:
+        return module + '.' + object.__class__.__name__
+
+
 def get_trusted_email(request, context=None, raise_errors=True):
     """
     Get an email address on behalf of which we can issue other requests.

From ded4df443dd3c392cbc9325efe0c7dcb582c2070 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Thu, 10 Sep 2020 13:36:03 -0400
Subject: [PATCH 120/125] Make SubmissionFailure an HTTPServerError.

---
 src/encoded/ingestion/exceptions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/encoded/ingestion/exceptions.py b/src/encoded/ingestion/exceptions.py
index 1758417a7f..ed5972a976 100644
--- a/src/encoded/ingestion/exceptions.py
+++ b/src/encoded/ingestion/exceptions.py
@@ -2,10 +2,10 @@
 Exception definitions for ingestion
 """
 
-from pyramid.httpexceptions import HTTPBadRequest
+from pyramid.httpexceptions import HTTPBadRequest, HTTPServerError
 
 
-class SubmissionFailure(Exception):
+class SubmissionFailure(HTTPServerError):
     pass
 
 
From 704537ffbac260df19926aae321874b8d8858e21 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Thu, 10 Sep 2020 13:50:05 -0400
Subject: [PATCH 121/125] Add some unit tests for cases of failing s3
 interactions.

---
 src/encoded/ingestion_listener.py             | 18 ++++--
 .../tests/test_ingestion_metadata_bundle.py   | 58 +++++++++++++++++--
 2 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/src/encoded/ingestion_listener.py b/src/encoded/ingestion_listener.py
index d956c6fdb4..905231cb6b 100644
--- a/src/encoded/ingestion_listener.py
+++ b/src/encoded/ingestion_listener.py
@@ -31,9 +31,11 @@
 from .ingestion.exceptions import UnspecifiedFormParameter, SubmissionFailure
 from .ingestion.processors import get_ingestion_processor
 from .inheritance_mode import InheritanceMode
-from .types.ingestion import SubmissionFolio, ALLOW_SUBMITTER_ADD
+from .types.ingestion import SubmissionFolio
 from .types.variant import build_variant_display_title, ANNOTATION_ID_SEP
-from .util import resolve_file_path, gunzip_content, debuglog, get_trusted_email, beanstalk_env_from_request
+from .util import (
+    resolve_file_path, gunzip_content, debuglog, get_trusted_email, beanstalk_env_from_request, full_class_name,
+)
 
 
 log = structlog.getLogger(__name__)
@@ -66,7 +68,11 @@ def prompt_for_ingestion(context, request):
 register_path_content_type(path='/submit_for_ingestion', content_type='multipart/form-data')
 
 
-@view_config(route_name='submit_for_ingestion', request_method='POST', # accept='multipart/form-data',
+@view_config(route_name='submit_for_ingestion', request_method='POST',
+             # Apparently adding this 'accept' causes discrimination on incoming requests not to find this method.
+             # We do want this type, and instead we check the request to make sure we got it, but we omit it here
+             # for practical reasons. -kmp 10-Sep-2020
+             # accept='multipart/form-data',
              permission='add')
 @debug_log
 def submit_for_ingestion(context, request):
@@ -128,7 +134,7 @@ def submit_for_ingestion(context, request):
         log.error(e)
 
         success = False
-        message = "{error_type}: {error_message}".format(error_type=type(e), error_message=str(e))
+        message = "{error_type}: {error_message}".format(error_type=full_class_name(e), error_message=str(e))
 
     # This manifest will be stored in the manifest.json file on on s3 AND will be returned from this endpoint call.
     manifest_content = {
@@ -159,8 +165,8 @@ def submit_for_ingestion(context, request):
 
             log.error(e)
 
-            message = "{error_type} (while uploading metadata): {error_message}".format(error_type=type(e),
-                                                                                        error_message=str(e))
+            message = ("{error_type} (while uploading metadata): {error_message}"
+                       .format(error_type=full_class_name(e), error_message=str(e)))
 
             raise SubmissionFailure(message)
 
diff --git a/src/encoded/tests/test_ingestion_metadata_bundle.py b/src/encoded/tests/test_ingestion_metadata_bundle.py
index 3d7e1e84e4..b068b32e41 100644
--- a/src/encoded/tests/test_ingestion_metadata_bundle.py
+++ b/src/encoded/tests/test_ingestion_metadata_bundle.py
@@ -5,6 +5,7 @@
 import json
 import os
 import pytz
+import webtest
 
 from dcicutils import qa_utils
 from dcicutils.qa_utils import ignored, ControlledTime, MockFileSystem
@@ -176,7 +177,10 @@ def mocked_get_trusted_email(request, context, raise_errors):
                                     response = testapp.post(SUBMIT_FOR_INGESTION, post_data, upload_files=post_files,
                                                             content_type='multipart/form-data', status=expected_status)
 
-                                    response.raise_for_status()
+                                    assert response.status_code == expected_status, (
+                                        "Expected response status %s but got %s."
+                                        % (expected_status, response.status_code)
+                                    )
 
                                     # The FakeGuid facility makes ids sequentially, so we can predict we'll get
                                     # one guid added to our mock queue. This test doesn't test the queue processing,
@@ -232,17 +236,59 @@ def mocked_get_trusted_email(request, context, raise_errors):
                                     assert response.status_code == 200
 
 
+# This runs the standard test pretty much as expected.
 def test_submit_for_ingestion_authorized(testapp):
-    check_submit_for_ingestion_authorized(testapp, MockBotoS3Client())
 
+    check_submit_for_ingestion_authorized(testapp, MockBotoS3Client())
 
-def test_submit_for_ingestion_authorized_buggy_s3(testapp):
+# The next couple of tests are small variations in which the first or second interaction with S3 fails
 
-    class MockBuggyBotoS3Client(MockBotoS3Client):
+class MockBuggyBotoS3Client(MockBotoS3Client):
 
-        def upload_fileobj(self, input_file_stream, Bucket, Key):  # noqa - AWS decided args were uppercase
+    def __init__(self, allowed_ok=0):
+        self.counter = 0
+        self.allowed_ok = allowed_ok
+        super().__init__()
 
+    def upload_fileobj(self, input_file_stream, Bucket, Key):  # noqa - AWS decided args were uppercase
+        self.counter += 1
+        if self.counter <= self.allowed_ok:
+            return super().upload_fileobj(input_file_stream, Bucket=Bucket, Key=Key)
+        else:
             raise botocore.exceptions.ClientError({'Error': {'Code': 400, 'Message': "Simulated error."}},
                                                   'upload_fileobj')
 
-    check_submit_for_ingestion_authorized(testapp, MockBuggyBotoS3Client(), expected_status=400)
+
+def test_submit_for_ingestion_authorized_but_failed_first_s3_interaction(testapp):
+
+    try:
+        check_submit_for_ingestion_authorized(testapp, MockBuggyBotoS3Client(), expected_status=400)
+    except webtest.AppError as e:
+        assert str(e) == ('Bad response: 500 Internal Server Error (not 400)\n'
+                          'b\'{"@type": ["SubmissionFailure", "Error"],'
+                          ' "status": "error",'
+                          ' "code": 500,'
+                          ' "title": "Internal Server Error",'
+                          ' "description": "",'
+                          ' "detail": "botocore.exceptions.ClientError:'
+                          ' An error occurred (400) when calling the upload_fileobj operation: Simulated error."}\'')
+    else:
+        raise AssertionError("An expected webtest.AppError was not raised.")
+
+
+def test_submit_for_ingestion_authorized_but_failed_second_s3_interaction(testapp):
+
+    try:
+        check_submit_for_ingestion_authorized(testapp, MockBuggyBotoS3Client(allowed_ok=1), expected_status=400)
+    except webtest.AppError as e:
+        assert str(e) == ('Bad response: 500 Internal Server Error (not 400)\n'
+                          'b\'{"@type": ["SubmissionFailure", "Error"],'
+                          ' "status": "error",'
+                          ' "code": 500,'
+                          ' "title": "Internal Server Error",'
+                          ' "description": "",'
+                          ' "detail": "botocore.exceptions.ClientError (while uploading metadata):'
+                          ' An error occurred (400) when calling the upload_fileobj operation: Simulated error."}\'')
+    else:
+        raise AssertionError("An expected webtest.AppError was not raised.")
+

From 5b7825f4250e5c992ee92879a560a701b5ffea38 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Thu, 10 Sep 2020 13:52:45 -0400
Subject: [PATCH 122/125] PEP8 considerations.

---
 src/encoded/tests/test_ingestion_metadata_bundle.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/encoded/tests/test_ingestion_metadata_bundle.py b/src/encoded/tests/test_ingestion_metadata_bundle.py
index b068b32e41..3439df5938 100644
--- a/src/encoded/tests/test_ingestion_metadata_bundle.py
+++ b/src/encoded/tests/test_ingestion_metadata_bundle.py
@@ -241,6 +241,7 @@ def test_submit_for_ingestion_authorized(testapp):
 
     check_submit_for_ingestion_authorized(testapp, MockBotoS3Client())
 
+
 # The next couple of tests are small variations in which the first or second interaction with S3 fails
 
 class MockBuggyBotoS3Client(MockBotoS3Client):
@@ -291,4 +292,3 @@ def test_submit_for_ingestion_authorized_but_failed_second_s3_interaction(testap
                           ' An error occurred (400) when calling the upload_fileobj operation: Simulated error."}\'')
     else:
         raise AssertionError("An expected webtest.AppError was not raised.")
-

From 090f5c65bf8787c6fafb7d90b168a859e67cf994 Mon Sep 17 00:00:00 2001
From: Sarah <sarah_reiff@hms.harvard.edu>
Date: Thu, 17 Sep 2020 14:59:19 -0400
Subject: [PATCH 123/125] param 'final_json' changed to 'result' to fix bug in
 submit_metadata_bundle

---
 src/encoded/submit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index c3cbc818f4..7355e7b28b 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -84,7 +84,7 @@ def submit_metadata_bundle(*, s3_client, bucket, key, project, institution, vapp
         results = {
             'success': False,
             'validation_output': [],
-            'final_json': {},
+            'result': {},
             'post_output': [],
             'upload_info': []
         }

From c03d7b5232455520a7f241deba51c47cc9ea0465 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Fri, 18 Sep 2020 11:34:49 -0400
Subject: [PATCH 124/125] Bump minor version to 3.1.0, end the beta.

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1f1c140bb0..40212d2685 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 # Note: Various modules refer to this system as "encoded", not "cgap-portal".
 name = "encoded"
-version = "3.0.0.1b7"  # formerly "2.4.1.1bX", includes master 3.0.0, eventually bumping minor version (probably 3.1.0)
+version = "3.1.0"
 description = "Clinical Genomics Analysis Platform"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"

From 2770f4c1503ad62983933a27a90c224d18a95176 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Fri, 18 Sep 2020 14:03:22 -0400
Subject: [PATCH 125/125] Fix some problems that PyCharm found in submit.py,
 some of them lurking semantic problems, mostly syntactic.

---
 src/encoded/submit.py | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/encoded/submit.py b/src/encoded/submit.py
index 7355e7b28b..70487321f1 100644
--- a/src/encoded/submit.py
+++ b/src/encoded/submit.py
@@ -1,4 +1,3 @@
-# import ast
 from copy import deepcopy
 import csv
 import datetime
@@ -7,7 +6,7 @@
 
 from dcicutils.qa_utils import ignored
 from dcicutils.misc_utils import VirtualAppError
-from webtest.app import AppError
+from webtest import AppError
 from .util import s3_local_file, debuglog
 
 
@@ -63,7 +62,7 @@
 ]
 
 
-ID_SOURCES = [ 'UDN' ]
+ID_SOURCES = ['UDN']
 
 
 def submit_metadata_bundle(*, s3_client, bucket, key, project, institution, vapp,  # <- Required keyword arguments
@@ -72,11 +71,13 @@ def submit_metadata_bundle(*, s3_client, bucket, key, project, institution, vapp
     Handles processing of a submitted workbook.
 
     Args:
-        data_stream: an open stream to xls workbook data
+        s3_client: a boto3 s3 client object
+        bucket: the name of the s3 bucket that contains the data to be processed
+        key: the name of a key within the given bucket that contains the data to be processed
         project: a project identifier
         institution: an institution identifier
         vapp: a VirtualApp object
-        log: a logging object capable of .info, .warning, .error, or .debug messages
+        validate_only: a bool. If True, only do validation, not posting; otherwise (if False), do posting, too.
     """
     with s3_local_file(s3_client, bucket=bucket, key=key) as filename:
         project_json = vapp.get(project).json
@@ -171,7 +172,7 @@ def digest_csv(input_data, delim=','):
     with open(input_data) as csvfile:
         rows = list(csv.reader(csvfile, delimiter=delim))
     for row in rows:
-        yield(row)
+        yield row
 
 
 def xls_to_json(row, project, institution):
@@ -196,6 +197,7 @@ def xls_to_json(row, project, institution):
     a dictionary of {column header: cell value} pairs, then gathers the metadata it can find for each
     db item type in each row. Minor spreadsheet errors are added to the output dictionary.
     """
+    keys = {}  # In case there are no rows, so key doesn't get assigned below
     header = False
     counter = 0
     # debuglog("top_header:", top_header)  # Temporary instrumentation for debugging to go away soon. -kmp 25-Jul-2020
@@ -264,7 +266,7 @@ def xls_to_json(row, project, institution):
                 samp_alias = samp_alias + '-' + row['run no.']
             analysis_alias = '{}:analysis-{}'.format(project['name'], row['analysis id'])
             items = extract_sample_metadata(row_num, row, items, indiv_alias, samp_alias, analysis_alias,
-                                          fam_alias, project['name'], a_types, case_names)
+                                            fam_alias, project['name'], a_types, case_names)
             if row.get('files'):
                 file_items = extract_file_metadata(row_num, row['files'].split(','), project['name'])
                 file_errors.extend(file_items['errors'])
@@ -387,7 +389,7 @@ def extract_family_metadata(idx, row, items, indiv_alias, fam_alias):
 
 
 def extract_sample_metadata(idx, row, items, indiv_alias, samp_alias, analysis_alias,
-                          fam_alias, proj_name, analysis_type_dict, case_name_dict):
+                            fam_alias, proj_name, analysis_type_dict, case_name_dict):
     """
     Extracts 'sample' item metadata from each row
     """
@@ -459,7 +461,7 @@ def extract_file_metadata(idx, filenames, proj_name):
                 files['errors'].append('File must be compressed - please gzip file {}'.format(filename))
             else:
                 files['errors'].append('File extension on {} not supported - expecting one of: '
-                              '.fastq.gz, .fq.gz, .cram, .vcf.gz'.format(filename))
+                                       '.fastq.gz, .fq.gz, .cram, .vcf.gz'.format(filename))
             continue
         file_alias = '{}:{}'.format(proj_name, filename.strip().split('/')[-1])
         fmt = valid_extensions[extension[0]][0]
@@ -540,7 +542,9 @@ def add_relations(items):
                 if parents:
                     for parent in ['mother', 'father']:
                         if new_items['individual'][fam['proband']].get(parent):
-                            new_items['individual'][fam[relation]][parent] = new_items['individual'][fam['proband']][parent]
+                            new_items['individual'][fam[relation]][parent] = (
+                                new_items['individual'][fam['proband']][parent]
+                            )
                 del new_items['family'][alias][relation]
     return new_items
 
@@ -565,6 +569,7 @@ def validate_item(virtualapp, item, method, itemtype, aliases, atid=None):
     if method == 'post':
         try:
             validation = virtualapp.post_json('/{}/?check_only=true'.format(itemtype), data)
+            ignored(validation)  # should it be? why did we assign it? -kmp 18-Sep-2020
         except (AppError, VirtualAppError) as e:
             return parse_exception(e, aliases)
         else:
@@ -572,6 +577,7 @@ def validate_item(virtualapp, item, method, itemtype, aliases, atid=None):
     elif method == 'patch':
         try:
             validation = virtualapp.patch_json(atid + '?check_only=true', data, status=200)
+            ignored(validation)  # should it be? why did we assign it? -kmp 18-Sep-2020
         except (AppError, VirtualAppError) as e:
             return parse_exception(e, aliases)
         else:
@@ -636,8 +642,8 @@ def compare_fields(profile, aliases, json_item, db_item):
     to_patch = {}
     for field in json_item:
         if field == 'filename':
-            if (db_item.get('status') in ['uploading', 'upload failed', 'to be uploaded by workflow'] or
-                        json_item['filename'].split('/')[-1] != db_item.get('filename')):
+            if (db_item.get('status') in ['uploading', 'upload failed', 'to be uploaded by workflow']
+                    or json_item['filename'].split('/')[-1] != db_item.get('filename')):
                 to_patch['filename'] = json_item['filename']
                 to_patch['status'] = 'uploading'
             continue
@@ -672,6 +678,7 @@ def validate_all_items(virtualapp, json_data):
     2. if item in db, will validate and patch any different metadata
     3. if item not in db, will post item
     """
+    output = []
     if list(json_data.keys()) == ['errors']:
         output.append('Errors found in spreadsheet columns. Please fix spreadsheet before submitting.')
         return {}, output, False
@@ -680,7 +687,6 @@ def validate_all_items(virtualapp, json_data):
     all_aliases = [k for itype in json_data for k in json_data[itype]]
     json_data_final = {'post': {}, 'patch': {}}
     validation_results = {}
-    output = []
     for itemtype in POST_ORDER:  # don't pre-validate case and report
         db_results = {}
         if itemtype in json_data: