diff --git a/src/dcd_mapping/vrs_map.py b/src/dcd_mapping/vrs_map.py index f2bf22a..9779f7c 100644 --- a/src/dcd_mapping/vrs_map.py +++ b/src/dcd_mapping/vrs_map.py @@ -829,17 +829,20 @@ def _map_protein_coding( error_message=str(transcript).strip("'"), ) else: - if _hgvs_pro_is_valid(row.hgvs_pro) and protein_align_result is not None: - hgvs_pro_mappings = _map_protein_coding_pro( - row, psequence_id, transcript, protein_align_result - ) - # This should not occur because protein align result is only None if transcript selection failed, which should be caught by the TxSelectError check above. - elif protein_align_result is None: - hgvs_pro_mappings = MappedScore( - accession_id=row.accession, - score=row.score, - error_message="Could not perform mapping for protein variant because transcript sequence is missing or could not be aligned to reference sequence", - ) + if _hgvs_pro_is_valid(row.hgvs_pro): + if protein_align_result is not None: + hgvs_pro_mappings = _map_protein_coding_pro( + row, psequence_id, transcript, protein_align_result + ) + # Only create this error message if there is not a valid hgvs nt mapping, because if there is a valid hgvs nt mapping, + # it indicates we expect protein alignemnt to fail and we don't want to create redundant error messages about missing + # transcript sequence or alignment failure + elif protein_align_result is None and not hgvs_nt_mappings: + hgvs_pro_mappings = MappedScore( + accession_id=row.accession, + score=row.score, + error_message="Could not perform mapping for protein variant because transcript sequence is missing or could not be aligned to reference sequence", + ) elif ( not hgvs_nt_mappings ): # only create error message if there is not an hgvs nt mapping