From 408f9960bbe7cb84855c20c0b7fe75a71aebdd7c Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Thu, 6 Jun 2024 23:45:15 +0300 Subject: [PATCH] #1912 Support unresolved IDT monomers (no structure, IDT only) (#1988) Merge from master to 1.21 --- .../integration/ref/formats/idt_to_ket.py.out | 17 +- .../integration/ref/formats/ket_to_idt.py.out | 2 + .../integration/tests/formats/idt_to_ket.py | 17 +- .../integration/tests/formats/ket_to_idt.py | 5 + .../integration/tests/formats/ref/chem.ket | 3 + .../tests/formats/ref/conj_no_class.mol | 3 +- .../integration/tests/formats/ref/fmoc.ket | 1 + .../integration/tests/formats/ref/fmoc.mol | 2 +- .../tests/formats/ref/idt_52moera.ket | 541 ++++++++ .../tests/formats/ref/idt_i2moera.ket | 541 ++++++++ .../tests/formats/ref/idt_i2moera_t.ket | 1112 +++++++++++++++++ .../tests/formats/ref/idt_t_i2moera.ket | 1112 +++++++++++++++++ .../tests/formats/ref/idt_unresolved.ket | 121 ++ .../ref/macro/conjugates/pep-chem-rna.mol | 4 +- .../layout/src/sequence_layout.cpp | 9 +- core/indigo-core/molecule/idt_alias.h | 131 ++ core/indigo-core/molecule/molecule_sgroups.h | 2 + core/indigo-core/molecule/molecule_tgroups.h | 3 + core/indigo-core/molecule/monomers_lib.h | 136 +- core/indigo-core/molecule/sequence_loader.h | 3 + core/indigo-core/molecule/sequence_saver.h | 4 + .../molecule/src/base_molecule.cpp | 6 + core/indigo-core/molecule/src/idt_alias.cpp | 79 ++ .../molecule/src/molecule_json_loader.cpp | 26 +- .../molecule/src/molecule_json_saver.cpp | 41 + .../molecule/src/molecule_sgroups.cpp | 2 +- .../molecule/src/molecule_tgroups.cpp | 10 +- .../molecule/src/monomer_commons.cpp | 2 +- .../indigo-core/molecule/src/monomers_lib.cpp | 88 +- .../molecule/src/sequence_loader.cpp | 84 +- .../molecule/src/sequence_saver.cpp | 336 ++--- 31 files changed, 4104 insertions(+), 339 deletions(-) create mode 100644 api/tests/integration/tests/formats/ref/idt_52moera.ket create mode 100644 api/tests/integration/tests/formats/ref/idt_i2moera.ket create mode 100644 api/tests/integration/tests/formats/ref/idt_i2moera_t.ket create mode 100644 api/tests/integration/tests/formats/ref/idt_t_i2moera.ket create mode 100644 api/tests/integration/tests/formats/ref/idt_unresolved.ket create mode 100644 core/indigo-core/molecule/idt_alias.h create mode 100644 core/indigo-core/molecule/src/idt_alias.cpp diff --git a/api/tests/integration/ref/formats/idt_to_ket.py.out b/api/tests/integration/ref/formats/idt_to_ket.py.out index d6d6772aa1..a56603e091 100644 --- a/api/tests/integration/ref/formats/idt_to_ket.py.out +++ b/api/tests/integration/ref/formats/idt_to_ket.py.out @@ -1,6 +1,9 @@ *** IDT to KET *** +idt_52moera.ket:SUCCEED idt_52moera_with_3phos.ket:SUCCEED idt_bases.ket:SUCCEED +idt_i2moera.ket:SUCCEED +idt_i2moera_t.ket:SUCCEED idt_many_molecules.ket:SUCCEED idt_mixed.ket:SUCCEED idt_mod_phosphates.ket:SUCCEED @@ -9,25 +12,23 @@ idt_prefix_suffix.ket:SUCCEED idt_singe_32moera_nucleoside.ket:SUCCEED idt_single_nucleoside.ket:SUCCEED idt_std_phosphates.ket:SUCCEED +idt_t_i2moera.ket:SUCCEED +idt_unresolved.ket:SUCCEED Test '!+A-$#12w12r23e32e33': got expected error 'Invalid symbols in the sequence: !,-,$,#,1,2,w,1,2,2,3,e,3,2,e,3,3' Test '+/5Phos/A': got expected error 'Sugar prefix could not be used with modified monomer.' Test '/': got expected error 'Unexpected end of data' Test '//': got expected error 'Invalid modification: empty string.' -Test '/32MOErA/T': got expected error 'IDT alias 32MOErA not found at five-prime end position.' -Test '/3Phos/T': expected error 'IDT alias 3Phos not found at three-prime end position.' but got 'SEQUENCE loader: IDT alias 3Phos not found at five-prime end position.' -Test '/52MOErA/': got expected error 'IDT alias 52MOErA not found at three-prime end position.' +Test '/32MOErA/T': got expected error 'IDT alias '32MOErA' cannot be used at five prime end.' +Test '/3Phos/T': got expected error 'IDT alias '3Phos' cannot be used at five prime end.' Test '/5Phos/*A': got expected error '/5Phos/ cannot be modified to 'sP'' Test '/a/': got expected error 'Invalid modification: a.' -Test '/i2MOErA/': got expected error 'IDT alias i2MOErA not found at three-prime end position.' -Test '/i2MOErA/T': got expected error 'IDT alias i2MOErA not found at five-prime end position.' Test 'A*': got expected error 'Invalid IDT sequence: '*' couldn't be the last symbol.' Test 'A*/3Phos/': got expected error 'Phosphor /3Phos/ cannod be modified with '*'.' Test 'A+/3Phos/': got expected error 'Sugar prefix could not be used with modified monomer.' Test 'Am/3Phos/': got expected error 'Sugar prefix could not be used with modified monomer.' Test 'Ar/3Phos/': got expected error 'Sugar prefix could not be used with modified monomer.' -Test 'T/52MOErA/': got expected error 'IDT alias 52MOErA not found at three-prime end position.' -Test 'T/5Phos/': got expected error 'IDT alias 5Phos not found at three-prime end position.' -Test 'T/i2MOErA/': got expected error 'IDT alias i2MOErA not found at three-prime end position.' +Test 'T/52MOErA/': got expected error 'IDT alias '52MOErA' cannot be used at three prime end.' +Test 'T/5Phos/': got expected error 'IDT alias '5Phos' cannot be used at three prime end.' Test 'm/5Phos/A': got expected error 'Sugar prefix could not be used with modified monomer.' Test 'r+A': got expected error 'Sugar prefix 'r' whithout base.' Test 'r/5Phos/A': got expected error 'Sugar prefix could not be used with modified monomer.' diff --git a/api/tests/integration/ref/formats/ket_to_idt.py.out b/api/tests/integration/ref/formats/ket_to_idt.py.out index 1a7c5ddadc..32272ca4c9 100644 --- a/api/tests/integration/ref/formats/ket_to_idt.py.out +++ b/api/tests/integration/ref/formats/ket_to_idt.py.out @@ -1,6 +1,7 @@ *** KET to IDT *** idt_52moera_with_3phos.ket:SUCCEED idt_bases.ket:SUCCEED +idt_i2moera_t.ket:SUCCEED idt_many_molecules.ket:SUCCEED idt_mixed.ket:SUCCEED idt_mod_phosphates.ket:SUCCEED @@ -9,6 +10,7 @@ idt_prefix_suffix.ket:SUCCEED idt_singe_32moera_nucleoside.ket:SUCCEED idt_single_nucleoside.ket:SUCCEED idt_std_phosphates.ket:SUCCEED +idt_unresolved.ket:SUCCEED Test ket-to-idt-invalid-last-phosphate: got expected error 'Cannot save molecule in IDT format - phosphate sP cannot be last monomer in sequence.' Test ket-to-idt-invalid-nucleotide: got expected error 'IDT alias for group sugar:m2e2r base:z8c3G phosphate:mepo2 not found.' Test ket-to-idt-invalid-posphates: got expected error 'Cannot save molecule in IDT format - sugar R with too much phosphates connected P and P.' diff --git a/api/tests/integration/tests/formats/idt_to_ket.py b/api/tests/integration/tests/formats/idt_to_ket.py index e5ef9dc761..7b99c6e81e 100644 --- a/api/tests/integration/tests/formats/idt_to_ket.py +++ b/api/tests/integration/tests/formats/idt_to_ket.py @@ -39,6 +39,11 @@ def find_diff(a, b): "idt_mod_phosphates": "/5Phos//i2MOErC//3Phos/", "idt_mixed": "/5Phos/+A*/i2MOErA/*rG/3Phos/", "idt_many_molecules": "ACTG\n/52MOErA/*AU/3Phos/\rAC/i2MOErC//3Phos/\n\rTACG", + "idt_i2moera": "/i2MOErA/", + "idt_52moera": "/52MOErA/", + "idt_i2moera_t": "/i2MOErA/T", + "idt_t_i2moera": "T/i2MOErA/", + "idt_unresolved": "/unr1//unr2/", } lib = indigo.loadMoleculeFromFile( @@ -66,14 +71,10 @@ def find_diff(a, b): "/a/": "Invalid modification: a.", "r+A": "Sugar prefix 'r' whithout base.", "A*": "Invalid IDT sequence: '*' couldn't be the last symbol.", - "/i2MOErA/": "IDT alias i2MOErA not found at three-prime end position.", - "/i2MOErA/T": "IDT alias i2MOErA not found at five-prime end position.", - "T/i2MOErA/": "IDT alias i2MOErA not found at three-prime end position.", - "/32MOErA/T": "IDT alias 32MOErA not found at five-prime end position.", - "/52MOErA/": "IDT alias 52MOErA not found at three-prime end position.", - "T/52MOErA/": "IDT alias 52MOErA not found at three-prime end position.", - "/3Phos/T": "IDT alias 3Phos not found at three-prime end position.", - "T/5Phos/": "IDT alias 5Phos not found at three-prime end position.", + "/32MOErA/T": "IDT alias '32MOErA' cannot be used at five prime end.", + "T/52MOErA/": "IDT alias '52MOErA' cannot be used at three prime end.", + "/3Phos/T": "IDT alias '3Phos' cannot be used at five prime end.", + "T/5Phos/": "IDT alias '5Phos' cannot be used at three prime end.", "/5Phos/*A": "/5Phos/ cannot be modified to 'sP'", "r/5Phos/A": "Sugar prefix could not be used with modified monomer.", "+/5Phos/A": "Sugar prefix could not be used with modified monomer.", diff --git a/api/tests/integration/tests/formats/ket_to_idt.py b/api/tests/integration/tests/formats/ket_to_idt.py index 0d6d858f4f..d41c4fe93f 100644 --- a/api/tests/integration/tests/formats/ket_to_idt.py +++ b/api/tests/integration/tests/formats/ket_to_idt.py @@ -41,6 +41,11 @@ def find_diff(a, b): "idt_mod_phosphates": "/5Phos//i2MOErC//3Phos/", "idt_mixed": "/5Phos/+A*/i2MOErA/*rG/3Phos/", "idt_many_molecules": "ACTG\n/52MOErA/*AU/3Phos/\nAC/i2MOErC//3Phos/\nTACG", + # "idt_i2moera": "/i2MOErA/", + # "idt_52moera": "/52MOErA/", + "idt_i2moera_t": "/52MOErA/T", + # "idt_t_i2moera": "T/i2MOErA/", + "idt_unresolved": "/unr1//unr2/", } for filename in sorted(idt_data.keys()): diff --git a/api/tests/integration/tests/formats/ref/chem.ket b/api/tests/integration/tests/formats/ref/chem.ket index 353cfc4082..866948e540 100644 --- a/api/tests/integration/tests/formats/ref/chem.ket +++ b/api/tests/integration/tests/formats/ref/chem.ket @@ -17,6 +17,7 @@ "name": "cch", "attachmentPoints": [ { + "id": "Br", "type": "right", "label": "R2", "attachmentAtom": 8, @@ -27,6 +28,7 @@ } }, { + "id": "Cx", "type": "side", "label": "R3", "attachmentAtom": 6, @@ -37,6 +39,7 @@ } }, { + "id": "Ex", "type": "side", "label": "R5", "attachmentAtom": 3, diff --git a/api/tests/integration/tests/formats/ref/conj_no_class.mol b/api/tests/integration/tests/formats/ref/conj_no_class.mol index e840a0259c..c9ac3163c1 100644 --- a/api/tests/integration/tests/formats/ref/conj_no_class.mol +++ b/api/tests/integration/tests/formats/ref/conj_no_class.mol @@ -64,7 +64,8 @@ M V30 41 Cys 20.1724 -16.6666 0.0 0 CLASS=AA SEQID=16 ATTCHORD=(4 34 Al 35 B- M V30 r) M V30 42 Cys 17.8843 -16.6666 0.0 0 CLASS=AA SEQID=10 ATTCHORD=(4 29 Al 30 B- M V30 r) -M V30 43 SMCC 14.1885 -2.31085 0.0 0 CLASS=LINKER ATTCHORD=(4 40 Br 39 Al) +M V30 43 SMCC 14.1885 -2.31085 0.0 0 CLASS=LINKER SEQID=1 ATTCHORD=(4 40 Br - +M V30 39 Al) M V30 END ATOM M V30 BEGIN BOND M V30 1 1 2 1 diff --git a/api/tests/integration/tests/formats/ref/fmoc.ket b/api/tests/integration/tests/formats/ref/fmoc.ket index f01a5486d5..de16853a1a 100644 --- a/api/tests/integration/tests/formats/ref/fmoc.ket +++ b/api/tests/integration/tests/formats/ref/fmoc.ket @@ -4091,6 +4091,7 @@ "name": "Fmoc", "attachmentPoints": [ { + "id": "Cx", "type": "side", "label": "R3", "attachmentAtom": 16 diff --git a/api/tests/integration/tests/formats/ref/fmoc.mol b/api/tests/integration/tests/formats/ref/fmoc.mol index 3d6d08101d..92ce27cc3b 100644 --- a/api/tests/integration/tests/formats/ref/fmoc.mol +++ b/api/tests/integration/tests/formats/ref/fmoc.mol @@ -52,7 +52,7 @@ M V30 ) M V30 30 Gly 23.2645 -6.2109 0.0 0 CLASS=AA SEQID=31 ATTCHORD=(2 29 Al) M V30 31 E 16.1313 -5.15906 0.0 0 CLASS=MODAA SEQID=21 ATTCHORD=(6 32 Cx 20 - M V30 Al 21 Br) -M V30 32 Fmoc 15.7704 -3.60196 0.0 0 CLASS=LINKER ATTCHORD=(2 31 Cx) +M V30 32 Fmoc 15.7704 -3.60196 0.0 0 CLASS=LINKER SEQID=1 ATTCHORD=(2 31 Cx) M V30 END ATOM M V30 BEGIN BOND M V30 1 1 1 2 diff --git a/api/tests/integration/tests/formats/ref/idt_52moera.ket b/api/tests/integration/tests/formats/ref/idt_52moera.ket new file mode 100644 index 0000000000..c0274137d9 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/idt_52moera.ket @@ -0,0 +1,541 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer0" + }, + { + "$ref": "monomer1" + } + ], + "connections": [ + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer1", + "attachmentPointId": "R1" + } + } + ], + "templates": [ + { + "$ref": "monomerTemplate-MOE___2'-O-Methoxyethyl ribose" + }, + { + "$ref": "monomerTemplate-A___Adenine" + } + ] + }, + "monomer0": { + "type": "monomer", + "id": "0", + "seqid": 0, + "position": { + "x": 0.0, + "y": -0.0 + }, + "alias": "MOE", + "templateId": "MOE___2'-O-Methoxyethyl ribose" + }, + "monomer1": { + "type": "monomer", + "id": "1", + "seqid": 0, + "position": { + "x": 0.0, + "y": -1.600000023841858 + }, + "alias": "A", + "templateId": "A___Adenine" + }, + "monomerTemplate-MOE___2'-O-Methoxyethyl ribose": { + "type": "monomerTemplate", + "id": "MOE___2'-O-Methoxyethyl ribose", + "class": "Sugar", + "classHELM": "RNA", + "alias": "MOE", + "name": "MOE", + "fullName": "2'-O-Methoxyethyl ribose", + "naturalAnalogShort": "R", + "naturalAnalog": "Rib", + "attachmentPoints": [ + { + "attachmentAtom": 9, + "leavingGroup": { + "atoms": [ + 10 + ] + } + }, + { + "attachmentAtom": 5, + "leavingGroup": { + "atoms": [ + 11 + ] + } + }, + { + "attachmentAtom": 2, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "O", + "location": [ + -2.2339999675750734, + 0.03280000016093254, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.722000002861023, + 1.4427000284194947, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + -1.0513999462127686, + -0.8898000121116638, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + -0.22290000319480897, + 1.3914999961853028, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.1915999948978424, + -0.05000000074505806, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "O", + "location": [ + 0.6962000131607056, + 2.5745999813079836, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.5987999439239503, + -0.5644000172615051, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -2.5632998943328859, + 2.682499885559082, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -1.0923000574111939, + -2.089099884033203, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -4.060200214385986, + 2.5745999813079836, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -4.734099864959717, + 3.5673999786376955, + 0.0 + ] + }, + { + "label": "H", + "location": [ + 1.88510000705719, + 2.411799907684326, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.8583999872207642, + -2.04259991645813, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.2679998874664308, + -2.557800054550171, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 3.5274999141693117, + -4.036099910736084, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 4.654600143432617, + -4.4481000900268559, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 7 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 2, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 8 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 4, + 6 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 11 + ] + }, + { + "type": 1, + "atoms": [ + 7, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 9, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 12 + ] + }, + { + "type": 1, + "atoms": [ + 12, + 13 + ] + }, + { + "type": 1, + "atoms": [ + 13, + 14 + ] + }, + { + "type": 1, + "atoms": [ + 14, + 15 + ] + } + ] + }, + "monomerTemplate-A___Adenine": { + "type": "monomerTemplate", + "id": "A___Adenine", + "class": "Base", + "classHELM": "RNA", + "alias": "A", + "name": "Ade", + "fullName": "Adenine", + "naturalAnalogShort": "A", + "naturalAnalog": "Ade", + "attachmentPoints": [ + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 10 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.0354000329971314, + 0.24979999661445619, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.07919999957084656, + -0.7540000081062317, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5056999921798707, + -0.2906000018119812, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.8177000284194947, + 1.1765999794006348, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.7031000256538391, + 2.1803998947143556, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 0.7235000133514404, + 1.7170000076293946, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -2.3870999813079836, + -1.5033999681472779, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5053000450134278, + -2.7167999744415285, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.0786999985575676, + -2.253200054168701, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 2.176800012588501, + -0.120899997651577, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.587100028991699, + -1.5033999681472779, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 10 + ] + }, + { + "type": 2, + "atoms": [ + 7, + 8 + ] + } + ] + } +} \ No newline at end of file diff --git a/api/tests/integration/tests/formats/ref/idt_i2moera.ket b/api/tests/integration/tests/formats/ref/idt_i2moera.ket new file mode 100644 index 0000000000..c0274137d9 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/idt_i2moera.ket @@ -0,0 +1,541 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer0" + }, + { + "$ref": "monomer1" + } + ], + "connections": [ + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer1", + "attachmentPointId": "R1" + } + } + ], + "templates": [ + { + "$ref": "monomerTemplate-MOE___2'-O-Methoxyethyl ribose" + }, + { + "$ref": "monomerTemplate-A___Adenine" + } + ] + }, + "monomer0": { + "type": "monomer", + "id": "0", + "seqid": 0, + "position": { + "x": 0.0, + "y": -0.0 + }, + "alias": "MOE", + "templateId": "MOE___2'-O-Methoxyethyl ribose" + }, + "monomer1": { + "type": "monomer", + "id": "1", + "seqid": 0, + "position": { + "x": 0.0, + "y": -1.600000023841858 + }, + "alias": "A", + "templateId": "A___Adenine" + }, + "monomerTemplate-MOE___2'-O-Methoxyethyl ribose": { + "type": "monomerTemplate", + "id": "MOE___2'-O-Methoxyethyl ribose", + "class": "Sugar", + "classHELM": "RNA", + "alias": "MOE", + "name": "MOE", + "fullName": "2'-O-Methoxyethyl ribose", + "naturalAnalogShort": "R", + "naturalAnalog": "Rib", + "attachmentPoints": [ + { + "attachmentAtom": 9, + "leavingGroup": { + "atoms": [ + 10 + ] + } + }, + { + "attachmentAtom": 5, + "leavingGroup": { + "atoms": [ + 11 + ] + } + }, + { + "attachmentAtom": 2, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "O", + "location": [ + -2.2339999675750734, + 0.03280000016093254, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.722000002861023, + 1.4427000284194947, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + -1.0513999462127686, + -0.8898000121116638, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + -0.22290000319480897, + 1.3914999961853028, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.1915999948978424, + -0.05000000074505806, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "O", + "location": [ + 0.6962000131607056, + 2.5745999813079836, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.5987999439239503, + -0.5644000172615051, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -2.5632998943328859, + 2.682499885559082, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -1.0923000574111939, + -2.089099884033203, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -4.060200214385986, + 2.5745999813079836, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -4.734099864959717, + 3.5673999786376955, + 0.0 + ] + }, + { + "label": "H", + "location": [ + 1.88510000705719, + 2.411799907684326, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.8583999872207642, + -2.04259991645813, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.2679998874664308, + -2.557800054550171, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 3.5274999141693117, + -4.036099910736084, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 4.654600143432617, + -4.4481000900268559, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 7 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 2, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 8 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 4, + 6 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 11 + ] + }, + { + "type": 1, + "atoms": [ + 7, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 9, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 12 + ] + }, + { + "type": 1, + "atoms": [ + 12, + 13 + ] + }, + { + "type": 1, + "atoms": [ + 13, + 14 + ] + }, + { + "type": 1, + "atoms": [ + 14, + 15 + ] + } + ] + }, + "monomerTemplate-A___Adenine": { + "type": "monomerTemplate", + "id": "A___Adenine", + "class": "Base", + "classHELM": "RNA", + "alias": "A", + "name": "Ade", + "fullName": "Adenine", + "naturalAnalogShort": "A", + "naturalAnalog": "Ade", + "attachmentPoints": [ + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 10 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.0354000329971314, + 0.24979999661445619, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.07919999957084656, + -0.7540000081062317, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5056999921798707, + -0.2906000018119812, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.8177000284194947, + 1.1765999794006348, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.7031000256538391, + 2.1803998947143556, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 0.7235000133514404, + 1.7170000076293946, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -2.3870999813079836, + -1.5033999681472779, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5053000450134278, + -2.7167999744415285, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.0786999985575676, + -2.253200054168701, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 2.176800012588501, + -0.120899997651577, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.587100028991699, + -1.5033999681472779, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 10 + ] + }, + { + "type": 2, + "atoms": [ + 7, + 8 + ] + } + ] + } +} \ No newline at end of file diff --git a/api/tests/integration/tests/formats/ref/idt_i2moera_t.ket b/api/tests/integration/tests/formats/ref/idt_i2moera_t.ket new file mode 100644 index 0000000000..6177aa3bf4 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/idt_i2moera_t.ket @@ -0,0 +1,1112 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer0" + }, + { + "$ref": "monomer1" + }, + { + "$ref": "monomer2" + }, + { + "$ref": "monomer3" + }, + { + "$ref": "monomer4" + } + ], + "connections": [ + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer1", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer2", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer3", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer4", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer2", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer3", + "attachmentPointId": "R1" + } + } + ], + "templates": [ + { + "$ref": "monomerTemplate-MOE___2'-O-Methoxyethyl ribose" + }, + { + "$ref": "monomerTemplate-P___Phosphate" + }, + { + "$ref": "monomerTemplate-A___Adenine" + }, + { + "$ref": "monomerTemplate-dRib" + }, + { + "$ref": "monomerTemplate-Thy" + } + ] + }, + "monomer0": { + "type": "monomer", + "id": "0", + "seqid": 0, + "position": { + "x": 0.0, + "y": -0.0 + }, + "alias": "MOE", + "templateId": "MOE___2'-O-Methoxyethyl ribose" + }, + "monomer1": { + "type": "monomer", + "id": "1", + "seqid": 0, + "position": { + "x": 0.0, + "y": -1.600000023841858 + }, + "alias": "A", + "templateId": "A___Adenine" + }, + "monomer2": { + "type": "monomer", + "id": "2", + "seqid": 0, + "position": { + "x": 1.600000023841858, + "y": -0.0 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer3": { + "type": "monomer", + "id": "3", + "seqid": 1, + "position": { + "x": 3.200000047683716, + "y": -0.0 + }, + "alias": "dR", + "templateId": "dRib" + }, + "monomer4": { + "type": "monomer", + "id": "4", + "seqid": 1, + "position": { + "x": 3.200000047683716, + "y": -1.600000023841858 + }, + "alias": "T", + "templateId": "Thy" + }, + "monomerTemplate-MOE___2'-O-Methoxyethyl ribose": { + "type": "monomerTemplate", + "id": "MOE___2'-O-Methoxyethyl ribose", + "class": "Sugar", + "classHELM": "RNA", + "alias": "MOE", + "name": "MOE", + "fullName": "2'-O-Methoxyethyl ribose", + "naturalAnalogShort": "R", + "naturalAnalog": "Rib", + "attachmentPoints": [ + { + "attachmentAtom": 9, + "leavingGroup": { + "atoms": [ + 10 + ] + } + }, + { + "attachmentAtom": 5, + "leavingGroup": { + "atoms": [ + 11 + ] + } + }, + { + "attachmentAtom": 2, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "O", + "location": [ + -2.2339999675750734, + 0.03280000016093254, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.722000002861023, + 1.4427000284194947, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + -1.0513999462127686, + -0.8898000121116638, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + -0.22290000319480897, + 1.3914999961853028, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.1915999948978424, + -0.05000000074505806, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "O", + "location": [ + 0.6962000131607056, + 2.5745999813079836, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.5987999439239503, + -0.5644000172615051, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -2.5632998943328859, + 2.682499885559082, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -1.0923000574111939, + -2.089099884033203, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -4.060200214385986, + 2.5745999813079836, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -4.734099864959717, + 3.5673999786376955, + 0.0 + ] + }, + { + "label": "H", + "location": [ + 1.88510000705719, + 2.411799907684326, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.8583999872207642, + -2.04259991645813, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.2679998874664308, + -2.557800054550171, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 3.5274999141693117, + -4.036099910736084, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 4.654600143432617, + -4.4481000900268559, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 7 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 2, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 8 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 4, + 6 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 11 + ] + }, + { + "type": 1, + "atoms": [ + 7, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 9, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 12 + ] + }, + { + "type": 1, + "atoms": [ + 12, + 13 + ] + }, + { + "type": 1, + "atoms": [ + 13, + 14 + ] + }, + { + "type": 1, + "atoms": [ + 14, + 15 + ] + } + ] + }, + "monomerTemplate-P___Phosphate": { + "type": "monomerTemplate", + "id": "P___Phosphate", + "class": "Phosphate", + "classHELM": "RNA", + "alias": "P", + "name": "P", + "fullName": "Phosphate", + "naturalAnalogShort": "P", + "attachmentPoints": [ + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 1 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 3 + ] + } + } + ], + "atoms": [ + { + "label": "P", + "location": [ + -0.23989999294281007, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -1.4399000406265259, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.3598000109195709, + -1.0393999814987183, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.960099995136261, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.3598000109195709, + 1.0393999814987183, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 4 + ] + } + ] + }, + "monomerTemplate-A___Adenine": { + "type": "monomerTemplate", + "id": "A___Adenine", + "class": "Base", + "classHELM": "RNA", + "alias": "A", + "name": "Ade", + "fullName": "Adenine", + "naturalAnalogShort": "A", + "naturalAnalog": "Ade", + "attachmentPoints": [ + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 10 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.0354000329971314, + 0.24979999661445619, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.07919999957084656, + -0.7540000081062317, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5056999921798707, + -0.2906000018119812, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.8177000284194947, + 1.1765999794006348, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.7031000256538391, + 2.1803998947143556, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 0.7235000133514404, + 1.7170000076293946, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -2.3870999813079836, + -1.5033999681472779, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5053000450134278, + -2.7167999744415285, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.0786999985575676, + -2.253200054168701, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 2.176800012588501, + -0.120899997651577, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.587100028991699, + -1.5033999681472779, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 10 + ] + }, + { + "type": 2, + "atoms": [ + 7, + 8 + ] + } + ] + }, + "monomerTemplate-dRib": { + "type": "monomerTemplate", + "id": "dRib", + "class": "Sugar", + "classHELM": "RNA", + "alias": "dR", + "name": "dRib", + "fullName": "Deoxy-Ribose", + "naturalAnalogShort": "R", + "naturalAnalog": "Rib", + "attachmentPoints": [ + { + "attachmentAtom": 8, + "leavingGroup": { + "atoms": [ + 9 + ] + } + }, + { + "attachmentAtom": 5, + "leavingGroup": { + "atoms": [ + 10 + ] + } + }, + { + "attachmentAtom": 2, + "leavingGroup": { + "atoms": [ + 7 + ] + } + } + ], + "atoms": [ + { + "label": "O", + "location": [ + -0.8787999749183655, + -1.2079999446868897, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.3668000102043152, + 0.20190000534057618, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.30379998683929446, + -2.13070011138916, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 1.1323000192642213, + 0.15060000121593476, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 1.5468000173568726, + -1.2910000085830689, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.051500082015991, + 1.333799958229065, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.2080999612808228, + 1.4416999816894532, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.262800008058548, + -3.329900026321411, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -2.7049999237060549, + 1.333799958229065, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.3787999153137209, + 2.32669997215271, + 0.0 + ] + }, + { + "label": "H", + "location": [ + 3.240299940109253, + 1.1708999872207642, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 6 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 2, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 7 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 9 + ] + } + ] + }, + "monomerTemplate-Thy": { + "type": "monomerTemplate", + "id": "Thy", + "class": "Base", + "classHELM": "RNA", + "alias": "T", + "name": "Thy", + "fullName": "Thymine", + "naturalAnalogShort": "T", + "naturalAnalog": "Thy", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.8617000579833985, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.1117000579833985, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.38830000162124636, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.138200044631958, + 1.350000023841858, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.3882000148296356, + 2.6489999294281008, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 1.1117000579833985, + 2.648900032043457, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 3.061800003051758, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -0.9882000088691711, + 3.688199996948242, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.3382999897003176, + 1.350000023841858, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.7116999626159669, + -0.9883999824523926, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 8 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 9 + ] + } + ] + } +} \ No newline at end of file diff --git a/api/tests/integration/tests/formats/ref/idt_t_i2moera.ket b/api/tests/integration/tests/formats/ref/idt_t_i2moera.ket new file mode 100644 index 0000000000..1ab870956e --- /dev/null +++ b/api/tests/integration/tests/formats/ref/idt_t_i2moera.ket @@ -0,0 +1,1112 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer0" + }, + { + "$ref": "monomer1" + }, + { + "$ref": "monomer2" + }, + { + "$ref": "monomer3" + }, + { + "$ref": "monomer4" + } + ], + "connections": [ + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer1", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer2", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer3", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer4", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer2", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer3", + "attachmentPointId": "R1" + } + } + ], + "templates": [ + { + "$ref": "monomerTemplate-dRib" + }, + { + "$ref": "monomerTemplate-Thy" + }, + { + "$ref": "monomerTemplate-P" + }, + { + "$ref": "monomerTemplate-MOE___2'-O-Methoxyethyl ribose" + }, + { + "$ref": "monomerTemplate-A___Adenine" + } + ] + }, + "monomer0": { + "type": "monomer", + "id": "0", + "seqid": 0, + "position": { + "x": 0.0, + "y": -0.0 + }, + "alias": "dR", + "templateId": "dRib" + }, + "monomer1": { + "type": "monomer", + "id": "1", + "seqid": 0, + "position": { + "x": 0.0, + "y": -1.600000023841858 + }, + "alias": "T", + "templateId": "Thy" + }, + "monomer2": { + "type": "monomer", + "id": "2", + "seqid": 0, + "position": { + "x": 1.600000023841858, + "y": -0.0 + }, + "alias": "P", + "templateId": "P" + }, + "monomer3": { + "type": "monomer", + "id": "3", + "seqid": 1, + "position": { + "x": 3.200000047683716, + "y": -0.0 + }, + "alias": "MOE", + "templateId": "MOE___2'-O-Methoxyethyl ribose" + }, + "monomer4": { + "type": "monomer", + "id": "4", + "seqid": 1, + "position": { + "x": 3.200000047683716, + "y": -1.600000023841858 + }, + "alias": "A", + "templateId": "A___Adenine" + }, + "monomerTemplate-dRib": { + "type": "monomerTemplate", + "id": "dRib", + "class": "Sugar", + "classHELM": "RNA", + "alias": "dR", + "name": "dRib", + "fullName": "Deoxy-Ribose", + "naturalAnalogShort": "R", + "naturalAnalog": "Rib", + "attachmentPoints": [ + { + "attachmentAtom": 8, + "leavingGroup": { + "atoms": [ + 9 + ] + } + }, + { + "attachmentAtom": 5, + "leavingGroup": { + "atoms": [ + 10 + ] + } + }, + { + "attachmentAtom": 2, + "leavingGroup": { + "atoms": [ + 7 + ] + } + } + ], + "atoms": [ + { + "label": "O", + "location": [ + -0.8787999749183655, + -1.2079999446868897, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.3668000102043152, + 0.20190000534057618, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.30379998683929446, + -2.13070011138916, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 1.1323000192642213, + 0.15060000121593476, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 1.5468000173568726, + -1.2910000085830689, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.051500082015991, + 1.333799958229065, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.2080999612808228, + 1.4416999816894532, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.262800008058548, + -3.329900026321411, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -2.7049999237060549, + 1.333799958229065, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.3787999153137209, + 2.32669997215271, + 0.0 + ] + }, + { + "label": "H", + "location": [ + 3.240299940109253, + 1.1708999872207642, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 6 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 2, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 7 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 9 + ] + } + ] + }, + "monomerTemplate-Thy": { + "type": "monomerTemplate", + "id": "Thy", + "class": "Base", + "classHELM": "RNA", + "alias": "T", + "name": "Thy", + "fullName": "Thymine", + "naturalAnalogShort": "T", + "naturalAnalog": "Thy", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.8617000579833985, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.1117000579833985, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.38830000162124636, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.138200044631958, + 1.350000023841858, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.3882000148296356, + 2.6489999294281008, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 1.1117000579833985, + 2.648900032043457, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 3.061800003051758, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -0.9882000088691711, + 3.688199996948242, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.3382999897003176, + 1.350000023841858, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.7116999626159669, + -0.9883999824523926, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 8 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 9 + ] + } + ] + }, + "monomerTemplate-P": { + "type": "monomerTemplate", + "id": "P", + "class": "Phosphate", + "classHELM": "RNA", + "alias": "P", + "name": "P", + "fullName": "Phosphate", + "naturalAnalogShort": "P", + "attachmentPoints": [ + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 1 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 3 + ] + } + } + ], + "atoms": [ + { + "label": "P", + "location": [ + -0.19991692900657655, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -1.199918270111084, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.2998337149620056, + -0.8661677837371826, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.8000843524932861, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.2998337149620056, + 0.8661677837371826, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 4 + ] + } + ] + }, + "monomerTemplate-MOE___2'-O-Methoxyethyl ribose": { + "type": "monomerTemplate", + "id": "MOE___2'-O-Methoxyethyl ribose", + "class": "Sugar", + "classHELM": "RNA", + "alias": "MOE", + "name": "MOE", + "fullName": "2'-O-Methoxyethyl ribose", + "naturalAnalogShort": "R", + "naturalAnalog": "Rib", + "attachmentPoints": [ + { + "attachmentAtom": 9, + "leavingGroup": { + "atoms": [ + 10 + ] + } + }, + { + "attachmentAtom": 5, + "leavingGroup": { + "atoms": [ + 11 + ] + } + }, + { + "attachmentAtom": 2, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "O", + "location": [ + -2.2339999675750734, + 0.03280000016093254, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.722000002861023, + 1.4427000284194947, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + -1.0513999462127686, + -0.8898000121116638, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + -0.22290000319480897, + 1.3914999961853028, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.1915999948978424, + -0.05000000074505806, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "O", + "location": [ + 0.6962000131607056, + 2.5745999813079836, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.5987999439239503, + -0.5644000172615051, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -2.5632998943328859, + 2.682499885559082, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -1.0923000574111939, + -2.089099884033203, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -4.060200214385986, + 2.5745999813079836, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -4.734099864959717, + 3.5673999786376955, + 0.0 + ] + }, + { + "label": "H", + "location": [ + 1.88510000705719, + 2.411799907684326, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.8583999872207642, + -2.04259991645813, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.2679998874664308, + -2.557800054550171, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 3.5274999141693117, + -4.036099910736084, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 4.654600143432617, + -4.4481000900268559, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 7 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 2, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 8 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 4, + 6 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 11 + ] + }, + { + "type": 1, + "atoms": [ + 7, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 9, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 12 + ] + }, + { + "type": 1, + "atoms": [ + 12, + 13 + ] + }, + { + "type": 1, + "atoms": [ + 13, + 14 + ] + }, + { + "type": 1, + "atoms": [ + 14, + 15 + ] + } + ] + }, + "monomerTemplate-A___Adenine": { + "type": "monomerTemplate", + "id": "A___Adenine", + "class": "Base", + "classHELM": "RNA", + "alias": "A", + "name": "Ade", + "fullName": "Adenine", + "naturalAnalogShort": "A", + "naturalAnalog": "Ade", + "attachmentPoints": [ + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 10 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.0354000329971314, + 0.24979999661445619, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.07919999957084656, + -0.7540000081062317, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5056999921798707, + -0.2906000018119812, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.8177000284194947, + 1.1765999794006348, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.7031000256538391, + 2.1803998947143556, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 0.7235000133514404, + 1.7170000076293946, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -2.3870999813079836, + -1.5033999681472779, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5053000450134278, + -2.7167999744415285, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.0786999985575676, + -2.253200054168701, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 2.176800012588501, + -0.120899997651577, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.587100028991699, + -1.5033999681472779, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 10 + ] + }, + { + "type": 2, + "atoms": [ + 7, + 8 + ] + } + ] + } +} \ No newline at end of file diff --git a/api/tests/integration/tests/formats/ref/idt_unresolved.ket b/api/tests/integration/tests/formats/ref/idt_unresolved.ket new file mode 100644 index 0000000000..f2579d553d --- /dev/null +++ b/api/tests/integration/tests/formats/ref/idt_unresolved.ket @@ -0,0 +1,121 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer0" + }, + { + "$ref": "monomer1" + } + ], + "connections": [ + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer1", + "attachmentPointId": "R1" + } + } + ], + "templates": [ + { + "$ref": "monomerTemplate-unknown_monomer_with_idt_alias_unr1" + }, + { + "$ref": "monomerTemplate-unknown_monomer_with_idt_alias_unr2" + } + ] + }, + "monomer0": { + "type": "monomer", + "id": "0", + "seqid": 0, + "position": { + "x": 0.0, + "y": -0.0 + }, + "alias": "unknown_monomer_with_idt_alias_unr1", + "templateId": "unknown_monomer_with_idt_alias_unr1" + }, + "monomer1": { + "type": "monomer", + "id": "1", + "seqid": 1, + "position": { + "x": 1.600000023841858, + "y": -0.0 + }, + "alias": "unknown_monomer_with_idt_alias_unr2", + "templateId": "unknown_monomer_with_idt_alias_unr2" + }, + "monomerTemplate-unknown_monomer_with_idt_alias_unr1": { + "type": "monomerTemplate", + "id": "unknown_monomer_with_idt_alias_unr1", + "class": "Chem", + "classHELM": "CHEM", + "alias": "unknown_monomer_with_idt_alias_unr1", + "name": "unknown_monomer_with_idt_alias_unr1", + "unresolved": true, + "idtAliases": { + "base": "unr1", + "modifications": { + "endpoint5": "unr1", + "internal": "unr1", + "endpoint3": "unr1" + } + }, + "attachmentPoints": [ + { + "attachmentAtom": -1 + }, + { + "attachmentAtom": -1 + }, + { + "attachmentAtom": -1 + }, + { + "attachmentAtom": -1 + } + ], + "atoms": [], + "bonds": [] + }, + "monomerTemplate-unknown_monomer_with_idt_alias_unr2": { + "type": "monomerTemplate", + "id": "unknown_monomer_with_idt_alias_unr2", + "class": "Chem", + "classHELM": "CHEM", + "alias": "unknown_monomer_with_idt_alias_unr2", + "name": "unknown_monomer_with_idt_alias_unr2", + "unresolved": true, + "idtAliases": { + "base": "unr2", + "modifications": { + "endpoint5": "unr2", + "internal": "unr2", + "endpoint3": "unr2" + } + }, + "attachmentPoints": [ + { + "attachmentAtom": -1 + }, + { + "attachmentAtom": -1 + }, + { + "attachmentAtom": -1 + }, + { + "attachmentAtom": -1 + } + ], + "atoms": [], + "bonds": [] + } +} \ No newline at end of file diff --git a/api/tests/integration/tests/formats/ref/macro/conjugates/pep-chem-rna.mol b/api/tests/integration/tests/formats/ref/macro/conjugates/pep-chem-rna.mol index 3e242d17d2..712db5490e 100644 --- a/api/tests/integration/tests/formats/ref/macro/conjugates/pep-chem-rna.mol +++ b/api/tests/integration/tests/formats/ref/macro/conjugates/pep-chem-rna.mol @@ -19,8 +19,8 @@ M V30 9 T 7.87333 -3.15333 0.0 0 CLASS=BASE SEQID=4 ATTCHORD=(2 10 Al) M V30 10 R 7.87333 -2.47333 0.0 0 CLASS=SUGAR SEQID=4 ATTCHORD=(6 9 Cx 11 Br- M V30 8 Al) M V30 11 P 8.55333 -2.47333 0.0 0 CLASS=PHOSPHATE SEQID=4 ATTCHORD=(2 10 Al) -M V30 12 Test-6-Ch 4.64 -3.55333 0.0 0 CLASS=LINKER ATTCHORD=(6 4 Cx 14 Dx 1- -M V30 7 Br) +M V30 12 Test-6-Ch 4.64 -3.55333 0.0 0 CLASS=LINKER SEQID=1 ATTCHORD=(6 4 Cx- +M V30 14 Dx 17 Br) M V30 13 A 3.37333 -4.52667 0.0 0 CLASS=AA SEQID=1 ATTCHORD=(2 14 Br) M V30 14 C 4.56667 -4.54667 0.0 0 CLASS=AA SEQID=2 ATTCHORD=(6 13 Al 15 Br 1- M V30 2 Cx) diff --git a/core/indigo-core/layout/src/sequence_layout.cpp b/core/indigo-core/layout/src/sequence_layout.cpp index 5e40ec361f..e5a93f4d82 100644 --- a/core/indigo-core/layout/src/sequence_layout.cpp +++ b/core/indigo-core/layout/src/sequence_layout.cpp @@ -62,8 +62,12 @@ void SequenceLayout::addNeigbourDirections(BaseMolecule& mol, DirectionsPriority { std::string to_class = mol.getTemplateAtomClass(nei_dir.second); std::string from_class = mol.getTemplateAtomClass(back_dir.second); + bool isCHEM = (from_class == kMonomerClassCHEM) || (to_class == kMonomerClassCHEM); + bool isBothAminoAcid = isAminoAcidClass(to_class) && isAminoAcidClass(from_class); + bool isBothNucleic = isNucleicClass(to_class) && isNucleicClass(from_class); // if to_class and from_class are different backbone types, treat the connection as branch - if (!((isAminoAcidClass(to_class) && isAminoAcidClass(from_class)) || (isNucleicClass(to_class) && isNucleicClass(from_class)))) + // CHEM is ok in any sequence + if (!(isCHEM || isBothAminoAcid || isBothNucleic)) { pq.emplace(kBranchAttachmentPointIdx, nei_dir.second, kBranchAttachmentPointIdx, back_dir.second); continue; @@ -94,8 +98,9 @@ void SequenceLayout::addSequenceElement(BaseMolecule& mol, PriorityElement& pel, bool isNucleoTo = isNucleicClass(to_class) || isNucleotideClass(to_class); bool isAAFrom = isAminoAcidClass(from_class); bool isAATo = isAminoAcidClass(to_class); + bool isCHEM = (kMonomerClassCHEM == from_class) || (kMonomerClassCHEM == to_class); - if ((isNucleoFrom && isNucleoTo) || (isAAFrom && isAATo)) + if ((isNucleoFrom && isNucleoTo) || (isAAFrom && isAATo) || isCHEM) { if (pel.from_dir.first == kRightAttachmentPointIdx && pel.dir.first == kLeftAttachmentPointIdx) { diff --git a/core/indigo-core/molecule/idt_alias.h b/core/indigo-core/molecule/idt_alias.h new file mode 100644 index 0000000000..0c59cf3474 --- /dev/null +++ b/core/indigo-core/molecule/idt_alias.h @@ -0,0 +1,131 @@ +#ifndef __idt_alias__ +#define __idt_alias__ + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4251) +#endif + +#include "base_cpp/exception.h" +#include + +namespace indigo +{ + enum class IdtModification + { + FIVE_PRIME_END, + INTERNAL, + THREE_PRIME_END + }; + + class DLLEXPORT IdtAlias + { + public: + DECL_ERROR; + IdtAlias(){}; + IdtAlias(const std::string& base) + : _base(base), _five_prime_end("5" + base), _internal("i" + base), _three_prime_end("3" + base), _has_modifications(false){}; + IdtAlias(const std::string& base, const std::string& five_prime_end, const std::string& internal, const std::string& three_prime_end) + : _base(base), _five_prime_end(five_prime_end), _internal(internal), _three_prime_end(three_prime_end), _has_modifications(true){}; + + inline void setModifications(const std::string& five_prime_end, const std::string& internal, const std::string& three_prime_end) + { + _five_prime_end = five_prime_end; + _internal = internal; + _three_prime_end = three_prime_end; + _has_modifications = true; + }; + + inline void setModification(IdtModification modification, const std::string& alias) + { + switch (modification) + { + case IdtModification::FIVE_PRIME_END: + _five_prime_end = alias; + break; + case IdtModification::INTERNAL: + _internal = alias; + break; + case IdtModification::THREE_PRIME_END: + _three_prime_end = alias; + break; + }; + _has_modifications = true; + }; + + inline bool hasModification(IdtModification modification) const + { + switch (modification) + { + case IdtModification::FIVE_PRIME_END: + return hasFivePrimeEnd(); + case IdtModification::INTERNAL: + return hasInternal(); + case IdtModification::THREE_PRIME_END: + return hasThreePrimeEnd(); + }; + return false; + } + + inline bool hasFivePrimeEnd() const + { + return _five_prime_end.size() != 0; + } + + inline bool hasInternal() const + { + return _internal.size() != 0; + } + + inline bool hasThreePrimeEnd() const + { + return _three_prime_end.size() != 0; + } + + const std::string& getModification(IdtModification modification) const; + + const std::string& getFivePrimeEnd() const; + const std::string& getInternal() const; + const std::string& getThreePrimeEnd() const; + + const std::string& getBase() const + { + return _base; + }; + + inline static std::string IdtModificationToString(IdtModification mod) + { + switch (mod) + { + case IdtModification::FIVE_PRIME_END: + return "five-prime end"; + case IdtModification::INTERNAL: + return "internal"; + case IdtModification::THREE_PRIME_END: + return "three-prime end"; + }; + return "unknown modification"; + }; + + static std::string getBaseForMod(const std::string& alias); + + inline bool hasModifications() + { + return _has_modifications; + } + + private: + std::string _base; + std::string _five_prime_end; + std::string _internal; + std::string _three_prime_end; + bool _has_modifications; + }; + +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif diff --git a/core/indigo-core/molecule/molecule_sgroups.h b/core/indigo-core/molecule/molecule_sgroups.h index 8f1e4edc98..e651d40381 100644 --- a/core/indigo-core/molecule/molecule_sgroups.h +++ b/core/indigo-core/molecule/molecule_sgroups.h @@ -171,6 +171,8 @@ namespace indigo int seqid; // SEQID (V3000 - 2017) Array sa_natreplace; // NATREPLACE (V3000 - 2017) + bool unresolved; + struct _AttachmentPoint { _AttachmentPoint() : aidx(-1), lvidx(-1) diff --git a/core/indigo-core/molecule/molecule_tgroups.h b/core/indigo-core/molecule/molecule_tgroups.h index d80f1f0999..e27f374986 100644 --- a/core/indigo-core/molecule/molecule_tgroups.h +++ b/core/indigo-core/molecule/molecule_tgroups.h @@ -23,6 +23,7 @@ #include "base_cpp/ptr_pool.h" #include "base_cpp/red_black.h" #include "base_cpp/tlscont.h" +#include "molecule/idt_alias.h" #ifdef _WIN32 #pragma warning(push) @@ -45,6 +46,8 @@ namespace indigo Array tgroup_natreplace; Array tgroup_text_id; int tgroup_id; + bool unresolved; + IdtAlias idt_alias; TGroup(); ~TGroup(); diff --git a/core/indigo-core/molecule/monomers_lib.h b/core/indigo-core/molecule/monomers_lib.h index 698e3ccade..388d943039 100644 --- a/core/indigo-core/molecule/monomers_lib.h +++ b/core/indigo-core/molecule/monomers_lib.h @@ -7,6 +7,7 @@ #endif #include "base_cpp/exception.h" +#include "molecule/idt_alias.h" #include "molecule/molecule_tgroups.h" #include #include @@ -90,84 +91,6 @@ namespace indigo std::unordered_map> _aminoacids_lib; }; - enum class IdtModification - { - FIVE_PRIME_END, - INTERNAL, - THREE_PRIME_END, - }; - - class DLLEXPORT IdtAlias - { - public: - DECL_ERROR; - IdtAlias(){}; - IdtAlias(const std::string& base) : _base(base), _five_prime_end("5" + base), _internal("i" + base), _three_prime_end("3" + base){}; - IdtAlias(const std::string& base, const std::string& five_prime_end, const std::string& internal, const std::string& three_prime_end) - : _base(base), _five_prime_end(five_prime_end), _internal(internal), _three_prime_end(three_prime_end){}; - - inline void setModifications(const std::string& five_prime_end, const std::string& internal, const std::string& three_prime_end) - { - _five_prime_end = five_prime_end; - _internal = internal; - _three_prime_end = three_prime_end; - }; - - inline bool hasModification(IdtModification modification) const - { - switch (modification) - { - case IdtModification::FIVE_PRIME_END: - return hasFivePrimeEnd(); - case IdtModification::INTERNAL: - return hasInternal(); - case IdtModification::THREE_PRIME_END: - return hasThreePrimeEnd(); - }; - return false; - } - - inline bool hasFivePrimeEnd() const - { - return _five_prime_end.size() != 0; - } - - inline bool hasInternal() const - { - return _internal.size() != 0; - } - - inline bool hasThreePrimeEnd() const - { - return _three_prime_end.size() != 0; - } - - const std::string& getModification(IdtModification modification) const; - const std::string& getFivePrimeEnd() const; - const std::string& getInternal() const; - const std::string& getThreePrimeEnd() const; - - inline static std::string IdtModificationToString(IdtModification mod) - { - switch (mod) - { - case IdtModification::FIVE_PRIME_END: - return "five-prime end"; - case IdtModification::INTERNAL: - return "internal"; - case IdtModification::THREE_PRIME_END: - return "three-prime end"; - }; - return "unknown modification"; - }; - - private: - std::string _base; - std::string _five_prime_end; - std::string _internal; - std::string _three_prime_end; - }; - enum class AttachmentPointType { LEFT, @@ -213,15 +136,15 @@ namespace indigo MonomerTemplate() = delete; MonomerTemplate(const std::string& id, MonomerClass mt_class, const std::string& class_HELM, const std::string& full_name, const std::string& alias, - const std::string& natural_analog, const TGroup& tgroup) - : _id(id), _class(mt_class), _class_HELM(class_HELM), _full_name(full_name), _alias(alias), _natural_analog(natural_analog) + const std::string& natural_analog, bool unresolved, const TGroup& tgroup) + : _id(id), _class(mt_class), _class_HELM(class_HELM), _full_name(full_name), _alias(alias), _natural_analog(natural_analog), _unresolved(unresolved) { _tgroup.copy(tgroup); } MonomerTemplate(const MonomerTemplate& other) : _id(other._id), _class(other._class), _class_HELM(other._class_HELM), _full_name(other._full_name), _alias(other._alias), - _natural_analog(other._natural_analog), _idt_alias(other._idt_alias) + _natural_analog(other._natural_analog), _idt_alias(other._idt_alias), _unresolved(other._unresolved) { _tgroup.copy(other._tgroup); } @@ -298,6 +221,11 @@ namespace indigo return _natural_analog; } + inline const IdtAlias& idtAlias() const + { + return _idt_alias; + } + inline void setIdtAlias(const IdtAlias& idt_alias) { _idt_alias = idt_alias; @@ -305,6 +233,8 @@ namespace indigo bool hasIdtAlias(const std::string& alias, IdtModification mod); + bool hasIdtAliasBase(const std::string& alias_base); + private: std::string _id; MonomerClass _class; @@ -315,6 +245,7 @@ namespace indigo std::map _attachment_points; std::string molecule; IdtAlias _idt_alias; + bool _unresolved; TGroup _tgroup; }; @@ -338,6 +269,8 @@ namespace indigo bool hasIdtAlias(const std::string& alias, IdtModification mod); + bool hasIdtAliasBase(const std::string& alias_base); + inline bool hasTemplateClass(MonomerClass monomer_class) { for (auto& id_template : _monomer_templates) @@ -359,7 +292,7 @@ namespace indigo return _id; } - inline const IdtAlias& idt_alias() const + inline const IdtAlias& idtAlias() const { return _idt_alias; } @@ -386,19 +319,44 @@ namespace indigo inline void addMonomerTemplate(MonomerTemplate& monomer_template) { - _monomer_templates.emplace(monomer_template.id(), monomer_template); - }; + _monomer_templates.erase(monomer_template.id()); + auto res = _monomer_templates.emplace(monomer_template.id(), monomer_template); + if (res.second) + for (auto modification : {IdtModification::FIVE_PRIME_END, IdtModification::INTERNAL, IdtModification::THREE_PRIME_END}) + { + if (monomer_template.idtAlias().hasModification(modification)) + { + const std::string& alias = monomer_template.idtAlias().getModification(modification); + MonomerTemplate& templ_ref = res.first->second; + _id_alias_to_monomer_templates.emplace(alias, std::make_pair(std::ref(templ_ref), modification)); + } + } + } + inline void addMonomerGroupTemplate(const MonomerGroupTemplate& monomer_group_template) { - _monomer_group_templates.emplace(monomer_group_template.id(), (monomer_group_template)); - }; + _monomer_group_templates.erase(monomer_group_template.id()); + auto res = _monomer_group_templates.emplace(monomer_group_template.id(), monomer_group_template); + if (res.second) + for (auto modification : {IdtModification::FIVE_PRIME_END, IdtModification::INTERNAL, IdtModification::THREE_PRIME_END}) + { + if (monomer_group_template.idtAlias().hasModification(modification)) + { + const std::string& alias = monomer_group_template.idtAlias().getModification(modification); + _id_alias_to_monomer_group_templates.emplace(alias, std::make_pair(std::ref(res.first->second), modification)); + } + } + } const MonomerTemplate& getMonomerTemplateById(const std::string& monomer_template_id); const std::string& getMonomerTemplateIdByAlias(MonomerClass monomer_class, const std::string& monomer_template_alias); MonomerGroupTemplate& getMonomerGroupTemplateById(const std::string& monomer_template_id); - const std::string& getMonomerTemplateIdByIdtAliasAndMod(const std::string& alias, IdtModification mod); - const std::string& getMGTidByIdtAliasAndMod(const std::string& alias, IdtModification mod); + const std::string& getMonomerTemplateIdByIdtAliasBase(const std::string& alias_base); + const std::string& getMGTidByIdtAliasBase(const std::string& alias_base); + + const std::string& getMonomerTemplateIdByIdtAlias(const std::string& alias, IdtModification& mod); + const std::string& getMGTidByIdtAlias(const std::string& alias, IdtModification& mod); const std::string& getIdtAliasByModification(IdtModification modification, const std::string sugar_id, const std::string base_id, const std::string phosphate_id); @@ -409,6 +367,8 @@ namespace indigo private: std::map _monomer_templates; std::map _monomer_group_templates; + std::map> _id_alias_to_monomer_templates; + std::map> _id_alias_to_monomer_group_templates; }; } diff --git a/core/indigo-core/molecule/sequence_loader.h b/core/indigo-core/molecule/sequence_loader.h index 21c8b2f7f1..64d4e9668c 100644 --- a/core/indigo-core/molecule/sequence_loader.h +++ b/core/indigo-core/molecule/sequence_loader.h @@ -82,6 +82,9 @@ namespace indigo bool addMonomerTemplate(BaseMolecule& mol, MonomerClass mt, const std::string& alias); bool checkAddTemplate(BaseMolecule& mol, MonomerClass type, const std::string monomer); SequenceLoader(const SequenceLoader&); // no implicit copy + + static void check_monomer_place(std::string& idt_alias, IdtModification mon_mod, IdtModification alias_mod, bool has_prev_mon); + Scanner& _scanner; std::unordered_set, pair_hash> _added_templates; const MonomerTemplates& _mon_lib; diff --git a/core/indigo-core/molecule/sequence_saver.h b/core/indigo-core/molecule/sequence_saver.h index 42a712885b..c3febe9575 100644 --- a/core/indigo-core/molecule/sequence_saver.h +++ b/core/indigo-core/molecule/sequence_saver.h @@ -53,6 +53,10 @@ namespace indigo void saveMolecule(BaseMolecule& mol, SeqFormat sf = SeqFormat::Sequence); + protected: + TGroup& getTGroup(); + std::string saveIdt(BaseMolecule& mol, std::deque& sequence); + private: SequenceSaver(const SequenceSaver&); // no implicit copy Output& _output; diff --git a/core/indigo-core/molecule/src/base_molecule.cpp b/core/indigo-core/molecule/src/base_molecule.cpp index d5a469f789..390dd7addb 100644 --- a/core/indigo-core/molecule/src/base_molecule.cpp +++ b/core/indigo-core/molecule/src/base_molecule.cpp @@ -3667,6 +3667,12 @@ bool BaseMolecule::_mergeSGroupWithSubmolecule(SGroup& sgroup, SGroup& super, Ba merged = true; } + if (super.sgroup_type == SGroup::SG_TYPE_SUP && static_cast(super).unresolved) + { + static_cast(sgroup).unresolved = true; + merged = true; + } + if (merged) updateEditRevision(); return merged; diff --git a/core/indigo-core/molecule/src/idt_alias.cpp b/core/indigo-core/molecule/src/idt_alias.cpp new file mode 100644 index 0000000000..422e75a6b5 --- /dev/null +++ b/core/indigo-core/molecule/src/idt_alias.cpp @@ -0,0 +1,79 @@ +/**************************************************************************** + * Copyright (C) from 2024 to Present EPAM Systems. + * + * This file is part of Indigo toolkit. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ***************************************************************************/ + +#include "molecule/idt_alias.h" + +namespace indigo +{ + IMPL_ERROR(IdtAlias, "IDT alias"); + + const std::string& IdtAlias::getModification(IdtModification modification) const + { + static std::string empty; + switch (modification) + { + case IdtModification::FIVE_PRIME_END: + return getFivePrimeEnd(); + case IdtModification::INTERNAL: + return getInternal(); + case IdtModification::THREE_PRIME_END: + return getThreePrimeEnd(); + }; + throw Error("Unknown IDT modification: %s.", modification); + return empty; + } + + const std::string& IdtAlias::getFivePrimeEnd() const + { + if (_five_prime_end != "") + return _five_prime_end; + else + throw Error("IDT alias %s has no five-prime end modification.", _base.c_str()); + } + + const std::string& IdtAlias::getInternal() const + { + if (_internal != "") + return _internal; + else + throw Error("IDT alias %s has no internal modification.", _base.c_str()); + } + + const std::string& IdtAlias::getThreePrimeEnd() const + { + if (_three_prime_end != "") + return _three_prime_end; + else + throw Error("IDT alias %s has no three-prime end modification.", _base.c_str()); + } + + std::string IdtAlias::getBaseForMod(const std::string& alias) + { + if (alias.size() < 2) + return alias; + switch (alias[0]) + { + case '3': + case 'i': + case '5': + return alias.substr(1, alias.size() - 1); + } + return alias; + } + +} diff --git a/core/indigo-core/molecule/src/molecule_json_loader.cpp b/core/indigo-core/molecule/src/molecule_json_loader.cpp index 2617ecfbf7..30ee008f4d 100644 --- a/core/indigo-core/molecule/src/molecule_json_loader.cpp +++ b/core/indigo-core/molecule/src/molecule_json_loader.cpp @@ -374,13 +374,12 @@ void MoleculeJsonLoader::parseAtoms(const rapidjson::Value& atoms, BaseMolecule& _pqmol->resetAtom(atom_idx, QueryMolecule::Atom::und(_pqmol->releaseAtom(atom_idx), atomlist.release())); } - if (a.HasMember("selected")) + if (a.HasMember("selected") && a["selected"].GetBool()) { - if (a["selected"].GetBool()) - if (_pmol) - _pmol->selectAtom(atom_idx); - else - _pqmol->selectAtom(atom_idx); + if (_pmol) + _pmol->selectAtom(atom_idx); + else + _pqmol->selectAtom(atom_idx); } if (a.HasMember("ringBondCount")) @@ -1147,8 +1146,11 @@ void MoleculeJsonLoader::addToLibMonomerTemplate(const rapidjson::Value& mt_json alias = mt_json["alias"].GetString(); if (mt_json.HasMember("naturalAnalog")) natural_analog = mt_json["naturalAnalog"].GetString(); + bool unresolved = false; + if (mt_json.HasMember("unresolved")) + unresolved = mt_json["unresolved"].GetBool(); - MonomerTemplate mon_template(id, MonomerTemplate::StrToMonomerClass(monomer_class), class_HELM, full_name, alias, natural_analog, + MonomerTemplate mon_template(id, MonomerTemplate::StrToMonomerClass(monomer_class), class_HELM, full_name, alias, natural_analog, unresolved, mol.tgroups.getTGroup(tgroup_id)); if (mt_json.HasMember("idtAliases")) @@ -1162,6 +1164,11 @@ void MoleculeJsonLoader::addToLibMonomerTemplate(const rapidjson::Value& mt_json mon_template.setIdtAlias(IdtAlias(idt_alias_base, idt_five_prime_end, idt_internal, idt_three_prime_end)); else mon_template.setIdtAlias(IdtAlias(idt_alias_base)); + mol.tgroups.getTGroup(tgroup_id).idt_alias = mon_template.idtAlias(); + } + else if (unresolved) + { + throw Error("Unresoved monomer '%s' without IDT alias.", id.c_str()); } if (mt_json.HasMember("attachmentPoints")) @@ -1323,6 +1330,9 @@ int MoleculeJsonLoader::parseMonomerTemplate(const rapidjson::Value& monomer_tem } tg.tgroup_name.readString(tg_name.c_str(), true); } + bool unresolved = false; + if (monomer_template.HasMember("unresolved")) + tg.unresolved = monomer_template["unresolved"].GetBool(); } if (monomer_template.HasMember("fullName")) @@ -1799,7 +1809,7 @@ void MoleculeJsonLoader::loadMetaObjects(rapidjson::Value& meta_objects, MetaDat p2.y = pos[1]["y"].GetFloat(); } else - throw("Bad pos array size %d. Most be equal to 2.", pos.Size()); + throw Error("Bad pos array size %d. Most be equal to 2.", pos.Size()); } meta_interface.addMetaObject(new KETSimpleObject(mode, std::make_pair(p1, p2))); } diff --git a/core/indigo-core/molecule/src/molecule_json_saver.cpp b/core/indigo-core/molecule/src/molecule_json_saver.cpp index ef31016d93..49a7a670d5 100644 --- a/core/indigo-core/molecule/src/molecule_json_saver.cpp +++ b/core/indigo-core/molecule/src/molecule_json_saver.cpp @@ -1175,6 +1175,42 @@ void MoleculeJsonSaver::saveMonomerTemplate(TGroup& tg, JsonWriter& writer) writer.String(tg.tgroup_comment.ptr()); } + if (tg.unresolved) + { + writer.Key("unresolved"); + writer.Bool(tg.unresolved); + + if (tg.idt_alias.getBase().size()) // Save IDT alias only for unresolved + { + writer.Key("idtAliases"); + writer.StartObject(); + writer.Key("base"); + writer.String(tg.idt_alias.getBase().c_str()); + if (tg.idt_alias.hasModifications()) + { + writer.Key("modifications"); + writer.StartObject(); + writer.Key("endpoint5"); + if (tg.idt_alias.hasModification(IdtModification::FIVE_PRIME_END)) + writer.String(tg.idt_alias.getModification(IdtModification::FIVE_PRIME_END).c_str()); + else + writer.String(""); + writer.Key("internal"); + if (tg.idt_alias.hasModification(IdtModification::INTERNAL)) + writer.String(tg.idt_alias.getModification(IdtModification::INTERNAL).c_str()); + else + writer.String(""); + writer.Key("endpoint3"); + if (tg.idt_alias.hasModification(IdtModification::THREE_PRIME_END)) + writer.String(tg.idt_alias.getModification(IdtModification::THREE_PRIME_END).c_str()); + else + writer.String(""); + writer.EndObject(); + } + writer.EndObject(); + } + } + saveMonomerAttachmentPoints(tg, writer); saveFragment(*tg.fragment, writer); writer.EndObject(); @@ -1205,6 +1241,11 @@ void MoleculeJsonSaver::saveSuperatomAttachmentPoints(Superatom& sa, JsonWriter& std::string atp_id_str(atp.apid.ptr()); if (!isAttachmentPointsInOrder(order++, atp_id_str)) { + if (atp_id_str.size()) + { + writer.Key("id"); + writer.String(atp_id_str.c_str()); + } writer.Key("type"); if (atp_id_str == kLeftAttachmentPoint || atp_id_str == kAttachmentPointR1) writer.String("left"); diff --git a/core/indigo-core/molecule/src/molecule_sgroups.cpp b/core/indigo-core/molecule/src/molecule_sgroups.cpp index 50b12db1af..e06d4dbb91 100644 --- a/core/indigo-core/molecule/src/molecule_sgroups.cpp +++ b/core/indigo-core/molecule/src/molecule_sgroups.cpp @@ -99,7 +99,7 @@ void DataSGroup::setMrv_implicit(int atom_idx, int hydrogens_count) detached = true; } -Superatom::Superatom() +Superatom::Superatom() : unresolved(false) { sgroup_type = SGroup::SG_TYPE_SUP; seqid = -1; diff --git a/core/indigo-core/molecule/src/molecule_tgroups.cpp b/core/indigo-core/molecule/src/molecule_tgroups.cpp index b5a9538ef2..0fab55f5c7 100644 --- a/core/indigo-core/molecule/src/molecule_tgroups.cpp +++ b/core/indigo-core/molecule/src/molecule_tgroups.cpp @@ -24,7 +24,7 @@ using namespace indigo; -TGroup::TGroup() +TGroup::TGroup() : unresolved(false) { } @@ -34,6 +34,7 @@ TGroup::~TGroup() void TGroup::clear() { + unresolved = false; } int TGroup::cmp(TGroup& tg1, TGroup& tg2, void* /*context*/) @@ -46,6 +47,11 @@ int TGroup::cmp(TGroup& tg1, TGroup& tg2, void* /*context*/) if (tg2.fragment.get() == 0) return 1; + if (tg1.unresolved && !tg2.unresolved) + return 1; + else if (!tg1.unresolved && tg2.unresolved) + return -1; + lgrps.clear(); bgrps.clear(); @@ -111,6 +117,8 @@ void TGroup::copy(const TGroup& other) tgroup_comment.copy(other.tgroup_comment); tgroup_natreplace.copy(other.tgroup_natreplace); tgroup_id = other.tgroup_id; + unresolved = other.unresolved; + idt_alias = other.idt_alias; fragment.reset(other.fragment->neu()); fragment->clone(*other.fragment.get(), 0, 0); } diff --git a/core/indigo-core/molecule/src/monomer_commons.cpp b/core/indigo-core/molecule/src/monomer_commons.cpp index e3dbd5f141..efd3af0f49 100644 --- a/core/indigo-core/molecule/src/monomer_commons.cpp +++ b/core/indigo-core/molecule/src/monomer_commons.cpp @@ -83,7 +83,7 @@ namespace indigo bool isBackboneClass(const std::string& monomer_class) { return isAminoAcidClass(monomer_class) || monomer_class == kMonomerClassSUGAR || monomer_class == kMonomerClassPHOSPHATE || - isNucleotideClass(monomer_class); + monomer_class == kMonomerClassCHEM || isNucleotideClass(monomer_class); } bool isBasicAminoAcid(const std::string& monomer_class, const std::string& alias) diff --git a/core/indigo-core/molecule/src/monomers_lib.cpp b/core/indigo-core/molecule/src/monomers_lib.cpp index 11173af75a..7cb3c35bae 100644 --- a/core/indigo-core/molecule/src/monomers_lib.cpp +++ b/core/indigo-core/molecule/src/monomers_lib.cpp @@ -172,48 +172,6 @@ namespace indigo } } - IMPL_ERROR(IdtAlias, "IDT alias"); - - const std::string& IdtAlias::getModification(IdtModification modification) const - { - static std::string empty; - switch (modification) - { - case IdtModification::FIVE_PRIME_END: - return getFivePrimeEnd(); - case IdtModification::INTERNAL: - return getInternal(); - case IdtModification::THREE_PRIME_END: - return getThreePrimeEnd(); - }; - throw Error("Unknown IDT modification: %s.", modification); - return empty; - } - - const std::string& IdtAlias::getFivePrimeEnd() const - { - if (_five_prime_end != "") - return _five_prime_end; - else - throw Error("IDT alias %s has no five-prime end modification.", _base.c_str()); - } - - const std::string& IdtAlias::getInternal() const - { - if (_internal != "") - return _internal; - else - throw Error("IDT alias %s has no internal modification.", _base.c_str()); - } - - const std::string& IdtAlias::getThreePrimeEnd() const - { - if (_three_prime_end != "") - return _three_prime_end; - else - throw Error("IDT alias %s has no three-prime end modification.", _base.c_str()); - } - IMPL_ERROR(MonomerTemplate, "MonomerTemplate"); void MonomerTemplate::AddAttachmentPoint(const std::string& id, const std::string& ap_type, int att_atom, std::vector& leaving_group) @@ -254,6 +212,13 @@ namespace indigo return false; } + bool MonomerTemplate::hasIdtAliasBase(const std::string& alias_base) + { + if (_idt_alias.getBase() == alias_base) + return true; + return false; + } + IMPL_ERROR(MonomerGroupTemplate, "MonomerGroupTemplate"); void MonomerGroupTemplate::addTemplate(const std::string& template_id) @@ -299,6 +264,13 @@ namespace indigo return false; } + bool MonomerGroupTemplate::hasIdtAliasBase(const std::string& alias_base) + { + if (_idt_alias.getBase() == alias_base) + return true; + return false; + } + IMPL_ERROR(MonomerTemplateLibrary, "MonomerTemplateLibrary"); MonomerTemplateLibrary& MonomerTemplateLibrary::instance() @@ -331,32 +303,52 @@ namespace indigo return _monomer_group_templates.at(monomer_group_template_id); } - const std::string& MonomerTemplateLibrary::getMonomerTemplateIdByIdtAliasAndMod(const std::string& alias, IdtModification mod) + const std::string& MonomerTemplateLibrary::getMonomerTemplateIdByIdtAliasBase(const std::string& alias_base) { for (auto& monomer_template : _monomer_templates) { - if (monomer_template.second.hasIdtAlias(alias, mod)) + if (monomer_template.second.hasIdtAliasBase(alias_base)) return monomer_template.first; }; return EMPTY_STRING; }; - const std::string& MonomerTemplateLibrary::getMGTidByIdtAliasAndMod(const std::string& alias, IdtModification mod) + const std::string& MonomerTemplateLibrary::getMGTidByIdtAliasBase(const std::string& alias_base) { for (auto& mgt : _monomer_group_templates) { - if (mgt.second.hasIdtAlias(alias, mod)) + if (mgt.second.hasIdtAliasBase(alias_base)) return mgt.first; }; return EMPTY_STRING; }; + const std::string& MonomerTemplateLibrary::getMonomerTemplateIdByIdtAlias(const std::string& alias, IdtModification& mod) + { + if (auto it = _id_alias_to_monomer_templates.find(alias); it != _id_alias_to_monomer_templates.end()) + { + mod = it->second.second; + return it->second.first.id(); + } + return EMPTY_STRING; + }; + + const std::string& MonomerTemplateLibrary::getMGTidByIdtAlias(const std::string& alias, IdtModification& mod) + { + if (auto it = _id_alias_to_monomer_group_templates.find(alias); it != _id_alias_to_monomer_group_templates.end()) + { + mod = it->second.second; + return it->second.first.id(); + } + return EMPTY_STRING; + }; + const std::string& MonomerTemplateLibrary::getIdtAliasByModification(IdtModification modification, const std::string sugar_id, const std::string base_id, const std::string phosphate_id) { for (auto& mgt : _monomer_group_templates) { - if (mgt.second.idt_alias().hasModification(modification)) + if (mgt.second.idtAlias().hasModification(modification)) { if (!mgt.second.hasTemplate(MonomerClass::Sugar, sugar_id)) continue; @@ -375,7 +367,7 @@ namespace indigo if (mgt.second.hasTemplate(MonomerClass::Base)) continue; } - return mgt.second.idt_alias().getModification(modification); + return mgt.second.idtAlias().getModification(modification); } } return EMPTY_STRING; diff --git a/core/indigo-core/molecule/src/sequence_loader.cpp b/core/indigo-core/molecule/src/sequence_loader.cpp index 0ddc6ebd44..4461555358 100644 --- a/core/indigo-core/molecule/src/sequence_loader.cpp +++ b/core/indigo-core/molecule/src/sequence_loader.cpp @@ -382,10 +382,22 @@ void SequenceLoader::checkAddTemplate(BaseMolecule& mol, const MonomerTemplate& auto& tg = mol.tgroups.getTGroup(tg_idx); tg.copy(monomer_template.getTGroup()); tg.tgroup_id = tg_idx; + tg.idt_alias = monomer_template.idtAlias(); _added_templates.emplace(monomer_template.monomerClass(), monomer_template.alias()); } } +void SequenceLoader::check_monomer_place(std::string& idt_alias, IdtModification mon_mod, IdtModification alias_mod, bool has_prev_mon) +{ + if (mon_mod == IdtModification::FIVE_PRIME_END && alias_mod == IdtModification::THREE_PRIME_END) + throw Error("IDT alias '%s' cannot be used at five prime end.", idt_alias.c_str()); + else if (mon_mod == IdtModification::INTERNAL && alias_mod != IdtModification::INTERNAL) // only internal modifications can be used in internal position + throw Error("IDT alias '%s' cannot be used at internal position.", idt_alias.c_str()); + else if (mon_mod == IdtModification::THREE_PRIME_END && alias_mod == IdtModification::FIVE_PRIME_END && has_prev_mon) + throw Error("IDT alias '%s' cannot be used at three prime end.", idt_alias.c_str()); // 5' monomers not allowed at 3' + // If this is only one monomer(no prev) - it could be any mod +} + void SequenceLoader::loadIdt(BaseMolecule& mol) { const auto IDT_DEF_SUGAR = "dR"; @@ -490,6 +502,7 @@ void SequenceLoader::loadIdt(BaseMolecule& mol) std::string phosphate = IDT_DEF_PHOSPHATE; std::string sugar = IDT_DEF_SUGAR; + std::string idt_alias = ""; std::string base = ""; std::string single_monomer = ""; std::string single_monomer_class; @@ -497,13 +510,13 @@ void SequenceLoader::loadIdt(BaseMolecule& mol) if (token.first.back() == '/') { token.first.pop_back(); - base = token.first; + idt_alias = token.first; } else { if (token.first.size() > 2) throw Error("Wrong IDT syntax: '%s'", token.first.c_str()); - base = token.first.back(); + idt_alias = token.first.back(); if (token.first.size() > 1) { switch (token.first[0]) @@ -526,7 +539,7 @@ void SequenceLoader::loadIdt(BaseMolecule& mol) if (tokens.size() == 0) { modification = IdtModification::THREE_PRIME_END; - if (base == "3Phos") + if (idt_alias == "3Phos") { if (prev_token.second) throw Error("Phosphor /3Phos/ cannod be modified with '*'."); @@ -537,25 +550,26 @@ void SequenceLoader::loadIdt(BaseMolecule& mol) if (token.second) { - if (base == "5Phos") + if (idt_alias == "5Phos") throw Error("/5Phos/ cannot be modified to 'sP'"); phosphate = IDT_MODIFIED_PHOSPHATE; } - if (base.size() == 1) + if (idt_alias.size() == 1) { - if (IDT_STANDARD_BASES.count(base[0]) == 0) + if (IDT_STANDARD_BASES.count(idt_alias[0]) == 0) { if (invalid_symbols.size()) invalid_symbols += ','; - invalid_symbols += base[0]; + invalid_symbols += idt_alias[0]; } else { + base = idt_alias; if (!checkAddTemplate(mol, MonomerClass::Sugar, sugar)) throw Error("Unknown sugar '%s'", sugar.c_str()); - if (base.size() > 0 && !checkAddTemplate(mol, MonomerClass::Base, base)) - throw Error("Unknown base '%s'", base.c_str()); + if (idt_alias.size() > 0 && !checkAddTemplate(mol, MonomerClass::Base, base)) + throw Error("Unknown base '%s'", idt_alias.c_str()); if (phosphate.size() > 0 && !checkAddTemplate(mol, MonomerClass::Phosphate, phosphate)) throw Error("Unknown phosphate '%s'", phosphate.c_str()); } @@ -563,10 +577,12 @@ void SequenceLoader::loadIdt(BaseMolecule& mol) else { sugar = ""; - - const std::string& mgt_id = MonomerTemplateLibrary::instance().getMGTidByIdtAliasAndMod(base, modification); + IdtModification alias_mod; + const std::string& mgt_id = MonomerTemplateLibrary::instance().getMGTidByIdtAlias(idt_alias, alias_mod); if (mgt_id.size()) { + // Check that alias modification can be used in current position + check_monomer_place(idt_alias, modification, alias_mod, prev_token.first.size() > 0); MonomerGroupTemplate& mgt = MonomerTemplateLibrary::instance().getMonomerGroupTemplateById(mgt_id); const MonomerTemplate& sugar_template = mgt.getTemplateByClass(MonomerClass::Sugar); sugar = sugar_template.alias(); @@ -588,7 +604,6 @@ void SequenceLoader::loadIdt(BaseMolecule& mol) { phosphate = ""; } - base = ""; if (mgt.hasTemplateClass(MonomerClass::Base)) { const MonomerTemplate& base_template = mgt.getTemplateByClass(MonomerClass::Base); @@ -598,13 +613,44 @@ void SequenceLoader::loadIdt(BaseMolecule& mol) } else { - const std::string& monomer_template_id = MonomerTemplateLibrary::instance().getMonomerTemplateIdByIdtAliasAndMod(base, modification); - if (!monomer_template_id.size()) - throw Error("IDT alias %s not found at %s position.", base.c_str(), IdtAlias::IdtModificationToString(modification).c_str()); - const MonomerTemplate& monomer_template = MonomerTemplateLibrary::instance().getMonomerTemplateById(monomer_template_id); - checkAddTemplate(mol, monomer_template); - single_monomer = monomer_template.alias(); - single_monomer_class = MonomerTemplates::classToStr(monomer_template.monomerClass()); + IdtModification alias_mod; + auto monomer_template_id = MonomerTemplateLibrary::instance().getMonomerTemplateIdByIdtAlias(idt_alias, alias_mod); + if (monomer_template_id.size()) + { + check_monomer_place(idt_alias, modification, alias_mod, prev_token.first.size() > 0); + const MonomerTemplate& monomer_template = MonomerTemplateLibrary::instance().getMonomerTemplateById(monomer_template_id); + checkAddTemplate(mol, monomer_template); + single_monomer = monomer_template.alias(); + single_monomer_class = MonomerTemplates::classToStr(monomer_template.monomerClass()); + } + else // IDT alias not found + { + TGroup t_group; + single_monomer = "unknown_monomer_with_idt_alias_" + idt_alias; + auto monomer_class = MonomerClass::CHEM; + single_monomer_class = MonomerTemplates::classToStr(monomer_class); + t_group.tgroup_name.readString(single_monomer.c_str(), true); + t_group.tgroup_alias.readString(single_monomer.c_str(), true); + t_group.tgroup_text_id.readString(single_monomer.c_str(), true); + // t_group.tgroup_natreplace.readString(single_monomer.c_str(), true); + t_group.tgroup_class.readString(single_monomer_class.c_str(), true); + t_group.fragment.reset(mol.neu()); + t_group.unresolved = true; + auto& monomer_mol = *t_group.fragment; + int grp_idx = monomer_mol.sgroups.addSGroup(SGroup::SG_TYPE_SUP); + Superatom& sa = static_cast(monomer_mol.sgroups.getSGroup(grp_idx)); + for (auto ap : {"R1", "R2", "R3", "R4"}) + { + int atp_index = sa.attachment_points.add(); + auto& atp = sa.attachment_points[atp_index]; + atp.aidx = -1; + atp.apid.readString(ap, true); + } + sa.unresolved = true; + MonomerTemplate monomer_template(single_monomer, monomer_class, "", single_monomer, single_monomer, single_monomer, true, t_group); + monomer_template.setIdtAlias(IdtAlias(idt_alias, idt_alias, idt_alias, idt_alias)); // Unresoved monomer could be in any position + checkAddTemplate(mol, monomer_template); + } } } diff --git a/core/indigo-core/molecule/src/sequence_saver.cpp b/core/indigo-core/molecule/src/sequence_saver.cpp index 904cb31b04..7b195415da 100644 --- a/core/indigo-core/molecule/src/sequence_saver.cpp +++ b/core/indigo-core/molecule/src/sequence_saver.cpp @@ -37,184 +37,218 @@ SequenceSaver::~SequenceSaver() { } -void SequenceSaver::saveMolecule(BaseMolecule& mol, SeqFormat sf) +std::string SequenceSaver::saveIdt(BaseMolecule& mol, std::deque& sequence) { - if (!mol.isQueryMolecule()) - mol.getTemplatesMap(_templates); - - std::string seq_text; - auto& mol_properties = mol.properties(); - std::vector> sequences; - SequenceLayout sl(mol); - sl.sequenceExtract(sequences); - auto prop_it = mol_properties.begin(); - int seq_idx = 0; static const std::unordered_set IDT_STANDARD_BASES = {"A", "T", "C", "G", "U", "I"}; static const std::map IDT_STANDARD_SUGARS{{"R", "r"}, {"LR", "+"}, {"mR", "m"}, {"dR", ""}}; - for (auto& sequence : sequences) + std::string seq_string; + std::unordered_set used_atoms; + IdtModification modification = IdtModification::FIVE_PRIME_END; + while (sequence.size() > 0) { - std::string seq_string; - if (sf == SeqFormat::IDT) + int atom_idx = sequence.front(); + used_atoms.emplace(atom_idx); + sequence.pop_front(); + if (!mol.isTemplateAtom(atom_idx)) + throw Error("Cannot save regular atom %s in IDT format.", mol.getAtomDescription(atom_idx).c_str()); + std::string monomer_class = mol.getTemplateAtomClass(atom_idx); + std::string monomer = mol.getTemplateAtom(atom_idx); + bool standard_sugar = true; + bool standard_base = true; + bool standard_phosphate = true; + std::string sugar; + std::string base; + std::string phosphate; + if (monomer_class != kMonomerClassSUGAR) { - std::unordered_set used_atoms; - IdtModification modification = IdtModification::FIVE_PRIME_END; - while (sequence.size() > 0) + // Check for unresoved monomer + std::optional> tg_ref = std::nullopt; + int temp_idx = mol.getTemplateAtomTemplateIndex(atom_idx); + if (temp_idx > -1) + tg_ref = std::optional>(std::ref(mol.tgroups.getTGroup(temp_idx))); + else + auto tg_ref = findTemplateInMap(monomer, monomer_class, _templates); + if (tg_ref.has_value()) { - int atom_idx = sequence.front(); - used_atoms.emplace(atom_idx); - sequence.pop_front(); - if (!mol.isTemplateAtom(atom_idx)) - throw Error("Cannot save regular atom %s in IDT format.", mol.getAtomDescription(atom_idx).c_str()); - std::string monomer_class = mol.getTemplateAtomClass(atom_idx); - std::string monomer = mol.getTemplateAtom(atom_idx); - bool standard_sugar = true; - bool standard_base = true; - bool standard_phosphate = true; - std::string sugar; - std::string base; - std::string phosphate; - if (monomer_class != kMonomerClassSUGAR) - { - if (used_atoms.size() > 1) - throw Error("Cannot save molecule in IDT format - expected sugar but found %s monomer %s.", monomer_class.c_str(), monomer.c_str()); - if (monomer_class != kMonomerClassPHOSPHATE || monomer != "P") // first monomer can be phosphate "P" - throw Error("Cannot save molecule in IDT format - %s monomer %s cannot be first.", monomer_class.c_str(), monomer.c_str()); - seq_string += "/5Phos/"; - modification = IdtModification::INTERNAL; - continue; - } - sugar = monomer; - if (IDT_STANDARD_SUGARS.count(monomer) == 0) - standard_sugar = false; - auto& v = mol.getVertex(atom_idx); - for (auto nei_idx = v.neiBegin(); nei_idx < v.neiEnd(); nei_idx = v.neiNext(nei_idx)) + auto& tg = tg_ref.value().get(); + if (tg.unresolved) { - int nei_atom_idx = v.neiVertex(nei_idx); - if (used_atoms.count(nei_atom_idx) > 0) - continue; - used_atoms.emplace(nei_atom_idx); - if (mol.isTemplateAtom(nei_atom_idx)) + const std::string& idt_alias = tg.idt_alias.getBase(); + if (idt_alias.size()) { - monomer_class = std::string(mol.getTemplateAtomClass(nei_atom_idx)); - if (monomer_class == kMonomerClassBASE) - { - if (base.size() > 0) - throw Error("Cannot save molecule in IDT format - sugar %s with two base connected %s and %s.", monomer.c_str(), base.c_str(), - mol.getTemplateAtom(nei_atom_idx)); - base = mol.getTemplateAtom(nei_atom_idx); - if (IDT_STANDARD_BASES.count(base) == 0) - standard_base = false; - } - else if (monomer_class == kMonomerClassPHOSPHATE) - { - if (phosphate.size() > 0) // left phosphate should be in used_atoms and skiped - throw Error("Cannot save molecule in IDT format - sugar %s with too much phosphates connected %s and %s.", monomer.c_str(), - phosphate.c_str(), mol.getTemplateAtom(nei_atom_idx)); - phosphate = mol.getTemplateAtom(nei_atom_idx); - } - else - { - throw Error( - "Cannot save molecule in IDT format - sugar %s connected to monomer %s with class %s (only base or phosphate expected).", - monomer.c_str(), mol.getTemplateAtom(nei_atom_idx), monomer_class.c_str()); - } + seq_string.push_back('/'); + seq_string.append(idt_alias); + seq_string.push_back('/'); + modification = IdtModification::INTERNAL; + continue; } else { - throw Error("Cannot save regular atom %s in IDT format.", mol.getAtomDescription(atom_idx).c_str()); + throw Error("Unresolved monomer '%s' has no IDT alias.", tg.tgroup_text_id.ptr()); } } + } - if (sequence.size() > 0) - { // process phosphate - atom_idx = sequence.front(); - sequence.pop_front(); - if (!mol.isTemplateAtom(atom_idx)) - throw Error("Cannot save regular atom %s in IDT format.", mol.getAtomDescription(atom_idx).c_str()); - monomer_class = mol.getTemplateAtomClass(atom_idx); - monomer = mol.getTemplateAtom(atom_idx); - if (monomer_class != kMonomerClassPHOSPHATE) - throw Error("Cannot save molecule in IDT format - phosphate expected between sugars but %s monomer %s found.", monomer_class.c_str(), - monomer.c_str()); - if (used_atoms.count(atom_idx) == 0) // phosphate should be already processed at sugar neighbours check - throw Error("Cannot save molecule in IDT format - phosphate %s not connected to previous sugar.", phosphate.c_str()); - if (phosphate != "P" && phosphate != "sP") - standard_phosphate = false; - } - else + if (used_atoms.size() > 1) + throw Error("Cannot save molecule in IDT format - expected sugar but found %s monomer %s.", monomer_class.c_str(), monomer.c_str()); + if (monomer_class != kMonomerClassPHOSPHATE || monomer != "P") // first monomer can be phosphate "P" + throw Error("Cannot save molecule in IDT format - %s monomer %s cannot be first.", monomer_class.c_str(), monomer.c_str()); + seq_string += "/5Phos/"; + modification = IdtModification::INTERNAL; + continue; + } + sugar = monomer; + if (IDT_STANDARD_SUGARS.count(monomer) == 0) + standard_sugar = false; + auto& v = mol.getVertex(atom_idx); + for (auto nei_idx = v.neiBegin(); nei_idx < v.neiEnd(); nei_idx = v.neiNext(nei_idx)) + { + int nei_atom_idx = v.neiVertex(nei_idx); + if (used_atoms.count(nei_atom_idx) > 0) + continue; + used_atoms.emplace(nei_atom_idx); + if (mol.isTemplateAtom(nei_atom_idx)) + { + monomer_class = std::string(mol.getTemplateAtomClass(nei_atom_idx)); + if (monomer_class == kMonomerClassBASE) { - modification = IdtModification::THREE_PRIME_END; + if (base.size() > 0) + throw Error("Cannot save molecule in IDT format - sugar %s with two base connected %s and %s.", monomer.c_str(), base.c_str(), + mol.getTemplateAtom(nei_atom_idx)); + base = mol.getTemplateAtom(nei_atom_idx); + if (IDT_STANDARD_BASES.count(base) == 0) + standard_base = false; } - - bool add_asterisk = false; - if (phosphate == "sP") + else if (monomer_class == kMonomerClassPHOSPHATE) { - phosphate = "P"; - add_asterisk = true; + if (phosphate.size() > 0) // left phosphate should be in used_atoms and skiped + throw Error("Cannot save molecule in IDT format - sugar %s with too much phosphates connected %s and %s.", monomer.c_str(), + phosphate.c_str(), mol.getTemplateAtom(nei_atom_idx)); + phosphate = mol.getTemplateAtom(nei_atom_idx); } - if (standard_base && standard_phosphate && standard_sugar) + else { - sugar = IDT_STANDARD_SUGARS.at(sugar); - if (sugar.size()) - seq_string += sugar; - seq_string += base; + throw Error("Cannot save molecule in IDT format - sugar %s connected to monomer %s with class %s (only base or phosphate expected).", + monomer.c_str(), mol.getTemplateAtom(nei_atom_idx), monomer_class.c_str()); } - else + } + else + { + throw Error("Cannot save regular atom %s in IDT format.", mol.getAtomDescription(atom_idx).c_str()); + } + } + + if (sequence.size() > 0) + { // process phosphate + atom_idx = sequence.front(); + sequence.pop_front(); + if (!mol.isTemplateAtom(atom_idx)) + throw Error("Cannot save regular atom %s in IDT format.", mol.getAtomDescription(atom_idx).c_str()); + monomer_class = mol.getTemplateAtomClass(atom_idx); + monomer = mol.getTemplateAtom(atom_idx); + if (monomer_class != kMonomerClassPHOSPHATE) + throw Error("Cannot save molecule in IDT format - phosphate expected between sugars but %s monomer %s found.", monomer_class.c_str(), + monomer.c_str()); + if (used_atoms.count(atom_idx) == 0) // phosphate should be already processed at sugar neighbours check + throw Error("Cannot save molecule in IDT format - phosphate %s not connected to previous sugar.", phosphate.c_str()); + if (phosphate != "P" && phosphate != "sP") + standard_phosphate = false; + } + else + { + modification = IdtModification::THREE_PRIME_END; + } + + bool add_asterisk = false; + if (phosphate == "sP") + { + phosphate = "P"; + add_asterisk = true; + } + if (standard_base && standard_phosphate && standard_sugar) + { + sugar = IDT_STANDARD_SUGARS.at(sugar); + if (sugar.size()) + seq_string += sugar; + seq_string += base; + } + else + { + // Try to find sugar,base,phosphate group template + MonomerTemplateLibrary& lib = MonomerTemplateLibrary::instance(); + const std::string& sugar_id = lib.getMonomerTemplateIdByAlias(MonomerClass::Sugar, sugar); + const std::string& phosphate_id = lib.getMonomerTemplateIdByAlias(MonomerClass::Phosphate, phosphate); + std::string base_id; + if (base.size()) + base_id = lib.getMonomerTemplateIdByAlias(MonomerClass::Base, base); + const std::string& idt_alias = lib.getIdtAliasByModification(modification, sugar_id, base_id, phosphate_id); + if (idt_alias.size()) + { + seq_string += '/'; + seq_string += idt_alias; + seq_string += '/'; + } + else + { + if (base.size()) { - // Try to find sugar,base,phosphate group template - MonomerTemplateLibrary& lib = MonomerTemplateLibrary::instance(); - const std::string& sugar_id = lib.getMonomerTemplateIdByAlias(MonomerClass::Sugar, sugar); - const std::string& phosphate_id = lib.getMonomerTemplateIdByAlias(MonomerClass::Phosphate, phosphate); - std::string base_id; - if (base.size()) - base_id = lib.getMonomerTemplateIdByAlias(MonomerClass::Base, base); - const std::string& idt_alias = lib.getIdtAliasByModification(modification, sugar_id, base_id, phosphate_id); - if (idt_alias.size()) - { - seq_string += '/'; - seq_string += idt_alias; - seq_string += '/'; - } + if (phosphate.size()) + throw Error("IDT alias for group sugar:%s base:%s phosphate:%s not found.", sugar.c_str(), base.c_str(), phosphate.c_str()); else - { - if (base.size()) - { - if (phosphate.size()) - throw Error("IDT alias for group sugar:%s base:%s phosphate:%s not found.", sugar.c_str(), base.c_str(), phosphate.c_str()); - else - throw Error("IDT alias for group sugar:%s base:%s not found.", sugar.c_str(), base.c_str()); - } - else - { - if (phosphate.size()) - - throw Error("IDT alias for group sugar:%s phosphate:%s not found.", sugar.c_str(), phosphate.c_str()); - else - throw Error("IDT alias for sugar:%s not found.", sugar.c_str()); - } - } + throw Error("IDT alias for group sugar:%s base:%s not found.", sugar.c_str(), base.c_str()); } - - if (phosphate.size()) + else { - if (add_asterisk) - { - seq_string += "*"; - phosphate = "sP"; - } - if (sequence.size() == 0) - { - modification = IdtModification::THREE_PRIME_END; - if (phosphate == "P") - seq_string += "/3Phos/"; - else - throw Error("Cannot save molecule in IDT format - phosphate %s cannot be last monomer in sequence.", phosphate.c_str()); - } + if (phosphate.size()) + + throw Error("IDT alias for group sugar:%s phosphate:%s not found.", sugar.c_str(), phosphate.c_str()); + else + throw Error("IDT alias for sugar:%s not found.", sugar.c_str()); } + } + } - if (modification == IdtModification::FIVE_PRIME_END) - modification = IdtModification::INTERNAL; + if (phosphate.size()) + { + if (add_asterisk) + { + seq_string += "*"; + phosphate = "sP"; } + if (sequence.size() == 0) + { + modification = IdtModification::THREE_PRIME_END; + if (phosphate == "P") + seq_string += "/3Phos/"; + else + throw Error("Cannot save molecule in IDT format - phosphate %s cannot be last monomer in sequence.", phosphate.c_str()); + } + } + + if (modification == IdtModification::FIVE_PRIME_END) + modification = IdtModification::INTERNAL; + } + return seq_string; +} + +void SequenceSaver::saveMolecule(BaseMolecule& mol, SeqFormat sf) +{ + if (!mol.isQueryMolecule()) + mol.getTemplatesMap(_templates); + + std::string seq_text; + auto& mol_properties = mol.properties(); + std::vector> sequences; + SequenceLayout sl(mol); + sl.sequenceExtract(sequences); + auto prop_it = mol_properties.begin(); + int seq_idx = 0; + for (auto& sequence : sequences) + { + std::string seq_string; + if (sf == SeqFormat::IDT) + { + seq_string.append(saveIdt(mol, sequence)); } else {