diff --git a/egasub/ega/entities/analysis.py b/egasub/ega/entities/analysis.py index edc739a..ade5219 100644 --- a/egasub/ega/entities/analysis.py +++ b/egasub/ega/entities/analysis.py @@ -3,7 +3,7 @@ class Analysis(object): def __init__(self, alias, title, description, study_id, sample_references, analysis_center, analysis_date, - analysis_type_id, files, attributes, genome_id, chromosome_references, experiment_type_id,platform,status=None, id_=None): + analysis_type_id, files, attributes, genome_id, chromosome_references, experiment_type_id,platform,status=None, id_=None,ega_accession_id=None): self.title = title self.description = description self.study_id = study_id @@ -20,6 +20,7 @@ def __init__(self, alias, title, description, study_id, sample_references, analy self.alias = alias self.status = status self.id = id_ + self.ega_accession_id = ega_accession_id def to_dict(self): return { @@ -37,7 +38,8 @@ def to_dict(self): 'experimentTypeId' : self.experiment_type_id, 'platform' : self.platform, 'alias' : self.alias, - 'status': self.status + 'status': self.status, + 'egaAccessionId': self.ega_accession_id } def to_xml(self): diff --git a/egasub/ega/entities/dataset.py b/egasub/ega/entities/dataset.py index 0c9b7b9..5a36bee 100644 --- a/egasub/ega/entities/dataset.py +++ b/egasub/ega/entities/dataset.py @@ -1,7 +1,7 @@ class Dataset(object): def __init__(self,alias,dataset_type_ids, policy_id, runs_references, analysis_references, title, - dataset_links, attributes, id_=None): + dataset_links, attributes, id_=None, ega_accession_id=None): self.alias = alias self.dataset_type_ids = dataset_type_ids self.policy_id = policy_id @@ -11,6 +11,7 @@ def __init__(self,alias,dataset_type_ids, policy_id, runs_references, analysis_r self.dataset_links = dataset_links self.attributes = attributes self.id = id_ + self.ega_accession_id = ega_accession_id def to_dict(self): return { @@ -21,7 +22,8 @@ def to_dict(self): 'analysisReferences' : self.analysis_references, 'title' : self.title, 'datasetLinks' : map(lambda dataset_link: dataset_link.to_dict(), self.dataset_links), - 'attributes' : map(lambda attribute: attribute.to_dict(), self.attributes) + 'attributes' : map(lambda attribute: attribute.to_dict(), self.attributes), + 'egaAccessionId': self.ega_accession_id } def to_xml(self): diff --git a/egasub/ega/entities/experiment.py b/egasub/ega/entities/experiment.py index 374d3f9..c1c5eeb 100644 --- a/egasub/ega/entities/experiment.py +++ b/egasub/ega/entities/experiment.py @@ -3,7 +3,7 @@ class Experiment(object): def __init__(self, alias, title, instrument_model_id, library_source_id, library_selection_id, library_strategy_id, design_description, library_name, library_construction_protocol, library_layout_id, paired_nomial_length, paired_nominal_sdev, - sample_id, study_id,_id, status=None): + sample_id, study_id,_id, status=None, ega_accession_id=None): self.alias = alias self.title = title self.instrument_model_id = instrument_model_id @@ -20,6 +20,7 @@ def __init__(self, alias, title, instrument_model_id, library_source_id, library self.study_id = study_id self.id = _id self.status = status + self.ega_accession_id = ega_accession_id def to_dict(self): return { @@ -38,7 +39,8 @@ def to_dict(self): 'sampleId' : self.sample_id, 'studyId' : self.study_id, 'id' : self.id, - 'status': self.status + 'status': self.status, + 'egaAccessionId': self.ega_accession_id } diff --git a/egasub/ega/entities/run.py b/egasub/ega/entities/run.py index 95e7239..0b9a2be 100644 --- a/egasub/ega/entities/run.py +++ b/egasub/ega/entities/run.py @@ -1,7 +1,7 @@ from file import File class Run(object): - def __init__(self,alias,sample_id,run_file_type_id,experiment_id,files,_id, status=None): + def __init__(self,alias,sample_id,run_file_type_id,experiment_id,files,_id, status=None, ega_accession_id=None): self.alias = alias self.sample_id = sample_id self.run_file_type_id = run_file_type_id @@ -9,6 +9,7 @@ def __init__(self,alias,sample_id,run_file_type_id,experiment_id,files,_id, stat self.files = files self.id = _id self.status = status + self.ega_accession_id = ega_accession_id def to_dict(self): return { @@ -18,7 +19,8 @@ def to_dict(self): 'experimentId' : self.experiment_id, 'files' : map(lambda file: file.to_dict(), self.files), 'id' : self.id, - 'status': self.status + 'status': self.status, + 'egaAccessionId': self.ega_accession_id } diff --git a/egasub/ega/entities/sample.py b/egasub/ega/entities/sample.py index cec9bc1..53c857d 100644 --- a/egasub/ega/entities/sample.py +++ b/egasub/ega/entities/sample.py @@ -4,7 +4,7 @@ class Sample(object): def __init__(self,alias,title,description,case_or_control_id,gender_id,organism_part, cell_line,region,phenotype, subject_id, anonymized_name, bio_sample_id, - sample_age, sample_detail, attributes,id_,status=None): + sample_age, sample_detail, attributes,id_=None,status=None,ega_accession_id=None): self.alias = alias self.title = title self.description = description @@ -22,6 +22,7 @@ def __init__(self,alias,title,description,case_or_control_id,gender_id,organism_ self.attributes = attributes self.id = id_ self.status = status + self.ega_accession_id = ega_accession_id def to_dict(self): return { @@ -41,7 +42,8 @@ def to_dict(self): 'sampleDetail' : self.sample_detail, 'attributes' : map(lambda attribute: attribute.to_dict(), self.attributes), 'id' : self.id, - 'status': self.status + 'status': self.status, + 'egaAccessionId': self.ega_accession_id } def to_xml(self): diff --git a/egasub/ega/services/__init__.py b/egasub/ega/services/__init__.py index 89fef18..933dfc3 100644 --- a/egasub/ega/services/__init__.py +++ b/egasub/ega/services/__init__.py @@ -98,6 +98,7 @@ def object_submission(ctx, obj, obj_type, dry_run=True): # PARTIALLY_SUBMITTED, SUBMITTED, SUBMITTED_DRAFT, SUBMITTED_VALIDATED and SUBMITTED_VALIDATED_WITH_ERRORS if 'SUBMITTED' in o.get('status'): obj.status = o.get('status') + obj.ega_accession_id = o.get('egaAccessionId') ctx.obj['LOGGER'].info("%s with alias '%s' already exists in '%s' status, not submitting." \ % (obj_type, obj.alias, o.get('status'))) @@ -206,6 +207,11 @@ def _validate_submit_obj(ctx, obj, obj_type, op_type): ctx.obj['LOGGER'].error("Validation exception (note that 'Sample not found' or 'Unknown sample' error, if any, will disappear when perform 'submit' instead of 'dry_run'; 'File not found' error, if any, will disappear if you make sure file is indeed uploaded and give it a bit more time (could be a few hours) for EGA systems to synchronize file information): \n%s" % '\n'.join(errors)) obj.status = r_data.get('response').get('result')[0].get('status') + ega_accession_id = r_data.get('response').get('result')[0].get('egaAccessionId') + if ega_accession_id: + obj.ega_accession_id = str(ega_accession_id) + elif r_data.get('response').get('result')[0].get('egaAccessionIds'): # for same reason experiment object has a single element of egaAccessionIds + obj.ega_accession_id = str(r_data.get('response').get('result')[0].get('egaAccessionIds')[0]) ctx.obj['LOGGER'].info("%s '%s' completed." % (op_type.capitalize(), obj_type)) diff --git a/egasub/submission/submittable/base.py b/egasub/submission/submittable/base.py index e667e6e..40e06d5 100644 --- a/egasub/submission/submittable/base.py +++ b/egasub/submission/submittable/base.py @@ -144,18 +144,20 @@ def restore_latest_object_status(self, obj_type): with open(status_file, 'r') as f: lines = f.readlines() if lines: - line = lines[-1] + line = lines[-1].rstrip('\n') status_values = line.split('\t') - id_, alias, status, timestamp = status_values[0:4] + id_, alias, status, timestamp, op_type, session_id, log_file, ega_accession_id = \ + (lambda a,b,c,d=None,e=None,f=None,g=None,h=None: (a,b,c,d,e,f,g,h))(*status_values[0:8]) # this is mostly for backward compatibility, earlier versions may have fewer columns if obj.alias and not obj.alias == alias: pass # alias has changed, this should never happen, if it does, we simply ignore and do not restore the status else: # never restore object id, which should always be taken from the server side obj.alias = alias obj.status = status # we need to get status at last operation with EGA, it will be used to decide whether it's ready for performing submission + obj.ega_accession_id = ega_accession_id except Exception: pass - def record_object_status(self, obj_type, dry_run, submission_session, log_file): + def record_object_status(self, obj_type, dry_run, submission_session, log_file, ega_accession_id=None): if not obj_type in ('sample', 'analysis', 'experiment', 'run'): return @@ -169,13 +171,20 @@ def record_object_status(self, obj_type, dry_run, submission_session, log_file): obj = getattr(self, obj_type) op_type = 'dry_run' if dry_run else 'submit' + + if ega_accession_id == None: ega_accession_id = "" + with open(status_file, 'a') as f: - f.write("%s\n" % '\t'.join([str(obj.id), str(obj.alias), str(obj.status), str(int(time.time())), op_type, submission_session, log_file])) + f.write("%s\n" % '\t'.join([str(obj.id), str(obj.alias), str(obj.status), str(int(time.time())), op_type, submission_session, log_file, ega_accession_id])) def local_validate(self, ega_enums): # Alias validation sample_alias_in_sub_dir = self.submission_dir.split('.')[0] # first portion is sample alias - if not self.sample.alias == sample_alias_in_sub_dir: + if not self.sample.alias: + self._add_local_validation_error("sample",self.sample.alias,"alias","Invalid value '%s'. Sample alias must be set" % (self.sample.alias)) + return # no need to move on + + if self.sample.alias != sample_alias_in_sub_dir: self._add_local_validation_error("sample",self.sample.alias,"alias","Invalid value '%s'. Sample alias must be set and match the 'alias' portion in the submission directory '%s'." % (self.sample.alias, sample_alias_in_sub_dir)) if not re.match(r'^[a-zA-Z0-9_\-]+$', self.sample.alias): # validate sample alias pattern diff --git a/egasub/submission/submitter.py b/egasub/submission/submitter.py index 29959b9..810424a 100644 --- a/egasub/submission/submitter.py +++ b/egasub/submission/submitter.py @@ -24,7 +24,7 @@ def submit(self, submittable, dry_run=True): return object_submission(self.ctx, submittable.sample, 'sample', dry_run) - submittable.record_object_status('sample', dry_run, self.ctx.obj['SUBMISSION']['id'], self.ctx.obj['log_file']) + submittable.record_object_status('sample', dry_run, self.ctx.obj['SUBMISSION']['id'], self.ctx.obj['log_file'], submittable.sample.ega_accession_id) if self.ctx.obj['CURRENT_DIR_TYPE'] == 'unaligned': try: @@ -38,13 +38,13 @@ def submit(self, submittable, dry_run=True): submittable.experiment.study_id = self.ctx.obj['SETTINGS']['ega_study_id'] object_submission(self.ctx, submittable.experiment, 'experiment', dry_run) - submittable.record_object_status('experiment', dry_run, self.ctx.obj['SUBMISSION']['id'], self.ctx.obj['log_file']) + submittable.record_object_status('experiment', dry_run, self.ctx.obj['SUBMISSION']['id'], self.ctx.obj['log_file'], submittable.experiment.ega_accession_id) submittable.run.sample_id = submittable.sample.id submittable.run.experiment_id = submittable.experiment.id object_submission(self.ctx, submittable.run, 'run', dry_run) - submittable.record_object_status('run', dry_run, self.ctx.obj['SUBMISSION']['id'], self.ctx.obj['log_file']) + submittable.record_object_status('run', dry_run, self.ctx.obj['SUBMISSION']['id'], self.ctx.obj['log_file'], submittable.run.ega_accession_id) self.ctx.obj['LOGGER'].info("Finished processing '%s'" % submittable.submission_dir) except Exception, error: @@ -83,7 +83,7 @@ def submit(self, submittable, dry_run=True): ) ] object_submission(self.ctx, submittable.analysis, 'analysis', dry_run) - submittable.record_object_status('analysis', dry_run, self.ctx.obj['SUBMISSION']['id'], self.ctx.obj['log_file']) + submittable.record_object_status('analysis', dry_run, self.ctx.obj['SUBMISSION']['id'], self.ctx.obj['log_file'], submittable.analysis.ega_accession_id) self.ctx.obj['LOGGER'].info('Finished processing %s' % submittable.submission_dir) except Exception, error: diff --git a/tests/submission/test_alignment.py b/tests/submission/test_alignment.py index 16cbf17..69ab0e0 100644 --- a/tests/submission/test_alignment.py +++ b/tests/submission/test_alignment.py @@ -29,7 +29,8 @@ def test_alignment(): 'attributes': [], 'bioSampleId': None, 'anonymizedName': None, - 'sampleDetail': None + 'sampleDetail': None, + 'egaAccessionId':None } reference_analysis = { @@ -43,6 +44,7 @@ def test_alignment(): 'analysisTypeId': 0, 'experimentTypeId' : [0], 'status': None, + 'egaAccessionId': None, 'files': [ { 'unencryptedChecksum': '5e0024389829a7b131fed6476f7e71c4', diff --git a/tests/submission/test_unaligned.py b/tests/submission/test_unaligned.py index 6a01bb1..f3d7715 100644 --- a/tests/submission/test_unaligned.py +++ b/tests/submission/test_unaligned.py @@ -31,7 +31,8 @@ def test_unaligned(): 'bioSampleId': None, 'anonymizedName': None, 'sampleDetail': None, - 'status': None + 'status': None, + 'egaAccessionId': None } reference_experiment = { @@ -50,7 +51,8 @@ def test_unaligned(): 'studyId': None, 'libraryStrategyId': 5, 'alias': None, - 'status': None + 'status': None, + 'egaAccessionId': None } reference_run = { @@ -68,7 +70,8 @@ def test_unaligned(): 'runFileTypeId': 0, 'alias': None, 'id': None, - 'status': None + 'status': None, + 'egaAccessionId': None } assert cmp(unaligned.sample.to_dict(),reference_sample) == 0 diff --git a/tests/submission/test_variation.py b/tests/submission/test_variation.py index bcc9112..70d48d8 100644 --- a/tests/submission/test_variation.py +++ b/tests/submission/test_variation.py @@ -29,7 +29,8 @@ def test_variation(): 'bioSampleId': None, 'anonymizedName': None, 'status': None, - 'sampleDetail': None + 'sampleDetail': None, + 'egaAccessionId': None } reference_analysis = { @@ -43,6 +44,7 @@ def test_variation(): 'analysisTypeId': 1, 'experimentTypeId' : [0], 'status': None, + 'egaAccessionId': None, 'files': [ { 'unencryptedChecksum': '5e0024389829a7b131fed6476f7e71c4', diff --git a/tests/test_analysis.py b/tests/test_analysis.py index f7cc22b..b8e5cba 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -72,6 +72,7 @@ def test_to_dict(): 'attributes' : map(lambda attribute: attribute.to_dict(), attributes), 'chromosomeReferences' : map(lambda ref: ref.to_dict(), chromosome_references), 'alias' : 'an alias', - 'status': None + 'status': None, + 'egaAccessionId': None }, analysis.to_dict()) == 0 diff --git a/tests/test_dataset.py b/tests/test_dataset.py index d2b9994..b2f7393 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -5,7 +5,7 @@ links = [DatasetLink('label 1','url1'),DatasetLink('label 2','url2')] attributes = [Attribute('The tag 1','The value 1','an unit'),Attribute('The tag 2','The value 2','an unit')] -dataset = Dataset('an alias',[3,4,5],3,[6,1,4],[8,21,4],'a title',links,attributes) +dataset = Dataset('an alias',[3,4,5],3,[6,1,4],[8,21,4],'a title',links,attributes,None,'ega_accession_id') def test_dataset_type_ids(): @@ -36,7 +36,8 @@ def test_to_dict(): 'analysisReferences' : [8,21,4], 'datasetLinks' : map(lambda dataset_link: dataset_link.to_dict(), links), 'attributes' : map(lambda attribute: attribute.to_dict(), attributes), - 'alias' : 'an alias' + 'alias' : 'an alias', + 'egaAccessionId': 'ega_accession_id' }, dataset.to_dict()) == 0 def test_alias(): diff --git a/tests/test_experiment.py b/tests/test_experiment.py index 056e19a..f2e8661 100644 --- a/tests/test_experiment.py +++ b/tests/test_experiment.py @@ -17,7 +17,8 @@ 'studyId' : 3000, 'alias' : 'an alias', 'id' : 22, - 'status': None + 'status': None, + 'egaAccessionId': None } def test_title(): diff --git a/tests/test_run.py b/tests/test_run.py index 4c1e936..931319f 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -28,7 +28,8 @@ def test_to_dict(): 'files' : map(lambda file: file.to_dict(), files), 'alias' : 'an alias', 'id' : 22, - 'status': None + 'status': None, + 'egaAccessionId': None }, run.to_dict()) == 0 def test_alias(): diff --git a/tests/test_sample.py b/tests/test_sample.py index 005bbb3..7863c75 100644 --- a/tests/test_sample.py +++ b/tests/test_sample.py @@ -71,7 +71,8 @@ def test_to_dict(): 'sampleDetail' : 'some details', 'attributes' : map(lambda attribute: attribute.to_dict(), attributes), 'id' : 33, - 'status': None + 'status': None, + 'egaAccessionId':None }, sample.to_dict()) == 0 \ No newline at end of file