# Figure 3B Code Example; K48 reaction followed by Smac deprotection

Below is an example of the code that is shown in Figure 3B - Transformation operators for reaction pathway simulation - in the manuscript; S. Majima, E. Kummelstedt, T. Mikami, L. Seidl, J. Han & J. W. Bode. Comprehensive synthesis of K48/K63 ubiquitin pentamers by graph-empowered automation. ChemRxiv 2025, preprint (Version 1, 11 Sept 2025).

In [63]:
import sys
import os

# Add the backend src directory to Python path
# Get the current working directory (where the notebook is located)
project_root = os.getcwd()
backend_src_path = os.path.join(project_root, 'back_end', 'src')
sys.path.append(backend_src_path)

# Import the ubiquitin_simulation function
from main import ubiquitin_simulation, iterate_through_ubiquitin
from simulation import assign_correct_E2_enzyme

### (1) Initiate starting acceptor graph represented as a JSON; Gi(V,E)

In [64]:
# Sample deeply nested ubiquitin dictionary for testing
Gi = {
    "protein": "1ubq-histag",
    "chain_number": 1,
    "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGDHHHHHH",
    "chain_length": 83,
    "branching_sites": [
        {"site_name": "K63", "sequence_id": "NIQ(K)EST", "children": {
            "protein": "1ubq",
            "chain_number": 2,
            "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG",
            "chain_length": 76,
            "branching_sites": [{"site_name": "K63", "sequence_id": "NIQ(K)EST", "children": "ABOC"},
                                {"site_name": "K48", "sequence_id": "FAG(K)QLE", "children": ""},
                                {"site_name": "K33", "sequence_id": "IQD(K)EGI", "children": ""},
                                {"site_name": "K29", "sequence_id": "VKA(K)IQD", "children": ""},
                                {"site_name": "K27", "sequence_id": "ENV(K)AKI", "children": ""},
                                {"site_name": "K11", "sequence_id": "LTG(K)TIT", "children": ""},
                                {"site_name": "K6", "sequence_id": "IFV(K)TLT", "children": ""},
                                {"site_name": "M1", "sequence_id": "(M)QIF", "children": ""}]}
        },
        {"site_name": "K48", "sequence_id": "FAG(K)QLE", "children": "ABOC"},
        {"site_name": "K33", "sequence_id": "IQD(K)EGI", "children": ""},
        {"site_name": "K29", "sequence_id": "VKA(K)IQD", "children": ""},
        {"site_name": "K27", "sequence_id": "ENV(K)AKI", "children": ""},
        {"site_name": "K11", "sequence_id": "LTG(K)TIT", "children": ""},
        {"site_name": "K6", "sequence_id": "IFV(K)TLT", "children": ""},
        {"site_name": "M1", "sequence_id": "(M)QIF", "children": ""}
        ]}

### (2) Inititate donor graph represented as a JSON; Gii(V,E)

In [65]:
Gii = {
    "protein": "1ubq",
    "chain_number": 1,
    "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG",
    "chain_length": 76,
    "branching_sites": [{"site_name": "K63", "sequence_id": "NIQ(K)EST", "children": "SMAC"},
                        {"site_name": "K48", "sequence_id": "FAG(K)QLE", "children": "ABOC"},
                        {"site_name": "K33", "sequence_id": "IQD(K)EGI", "children": ""},
                        {"site_name": "K29", "sequence_id": "VKA(K)IQD", "children": ""},
                        {"site_name": "K27", "sequence_id": "ENV(K)AKI", "children": ""},
                        {"site_name": "K11", "sequence_id": "LTG(K)TIT", "children": ""},
                        {"site_name": "K6", "sequence_id": "IFV(K)TLT", "children": ""},
                        {"site_name": "M1", "sequence_id": "(M)QIF", "children": ""}]}


### (3) Ensure that the two graphs are correctly formatted and pull their context. The context contains all the properties of the graph. This is down with iterate through ubiquitin.

In [66]:
Gi_json, Gi_context = iterate_through_ubiquitin(Gi)
Gii_json, Gii_context = iterate_through_ubiquitin(Gii)

### (3a-i) Inspect the Gi json

In [67]:
Gi_json

{'protein': '1ubq-histag',
 'chain_number': 1,
 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGDHHHHHH',
 'chain_length': 83,
 'branching_sites': [{'site_name': 'K63',
   'sequence_id': 'NIQ(K)EST',
   'children': {'protein': '1ubq',
    'chain_number': 2,
    'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG',
    'chain_length': 76,
    'branching_sites': [{'site_name': 'K63',
      'sequence_id': 'NIQ(K)EST',
      'children': 'ABOC'},
     {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': ''},
     {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''},
     {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''},
     {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''},
     {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''},
     {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''},
     {'site_name': 'M1', 'sequence_id'

### (3a-ii) Inspect the Gi_context

In [68]:
Gi_context

{'chain_number_list': [1, 2, 3],
 'chain_length_list': [83, 76],
 'multimer_string_name': 'his-GG-1ubq-histag-1-(<K63_1ubq-2-(<K63_ABOC>)><K48_ABOC>)',
 'nomenclature_w_preorder': 'Ub1,63(Ub2,63aboc),48aboc',
 'nomenclature_wo_preorder': 'his-Ub,63(Ub,63aboc),48aboc',
 'max_chain_number': 2,
 'ABOC_lysines': [[2, 'K63'], [1, 'K48']],
 'SMAC_lysines': [],
 'free_lysines': [[2, 'K48']],
 'conjugated_lysines': [[1, 'K63', 2]]}

### (3b-i) Inspect the Gii_json

In [69]:
Gii_json

{'protein': '1ubq',
 'chain_number': 1,
 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG',
 'chain_length': 76,
 'branching_sites': [{'site_name': 'K63',
   'sequence_id': 'NIQ(K)EST',
   'children': 'SMAC'},
  {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': 'ABOC'},
  {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''},
  {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''},
  {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''},
  {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''},
  {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''},
  {'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}]}

### (3b-ii) Inspect the Gii_context

In [70]:
Gii_json

{'protein': '1ubq',
 'chain_number': 1,
 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG',
 'chain_length': 76,
 'branching_sites': [{'site_name': 'K63',
   'sequence_id': 'NIQ(K)EST',
   'children': 'SMAC'},
  {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': 'ABOC'},
  {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''},
  {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''},
  {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''},
  {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''},
  {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''},
  {'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}]}

### (4) React the accpetor and donor graphs with ubiquitin_simulation

In [71]:
Giii_json, Giii_context = ubiquitin_simulation(
    parent_dictionary = Gi_json, 
    ubi_molecule_to_add = Gii_json, 
    type_of_reaction = "K48"
    )

### (4-i) Inspect the Giii_json

In [72]:
Giii_json

{'protein': '1ubq-histag',
 'chain_number': 1,
 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGDHHHHHH',
 'chain_length': 83,
 'branching_sites': [{'site_name': 'K63',
   'sequence_id': 'NIQ(K)EST',
   'children': {'protein': '1ubq',
    'chain_number': 2,
    'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG',
    'chain_length': 76,
    'branching_sites': [{'site_name': 'K63',
      'sequence_id': 'NIQ(K)EST',
      'children': 'ABOC'},
     {'site_name': 'K48',
      'sequence_id': 'FAG(K)QLE',
      'children': {'protein': '1ubq',
       'chain_number': 3,
       'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG',
       'chain_length': 76,
       'branching_sites': [{'site_name': 'K63',
         'sequence_id': 'NIQ(K)EST',
         'children': 'SMAC'},
        {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': 'ABOC'},
        {'site

### (4-ii) Inspect the Giii_context

In [73]:
Giii_context

{'chain_number_list': [1, 2, 3, 4],
 'chain_length_list': [83, 76, 76],
 'multimer_string_name': 'his-GG-1ubq-histag-1-(<K63_1ubq-2-(<K63_ABOC><K48_1ubq-3-(<K63_SMAC><K48_ABOC>)>)><K48_ABOC>)',
 'nomenclature_w_preorder': 'Ub1,63(Ub2,63aboc,48(Ub3,63smac,48aboc)),48aboc',
 'nomenclature_wo_preorder': 'his-Ub,63(Ub,63aboc,48(Ub,63smac,48aboc)),48aboc',
 'max_chain_number': 3,
 'ABOC_lysines': [[2, 'K63'], [3, 'K48'], [1, 'K48']],
 'SMAC_lysines': [[3, 'K63']],
 'free_lysines': [],
 'conjugated_lysines': [[1, 'K63', 2], [2, 'K48', 3]]}

### (5) Run assign_correct_E2_enzyme

In [74]:
assign_correct_E2_enzyme(Gi_context, Giii_context)

'Ube2K'

### (6) React the product graph with a smac deprotection reaction through ubiquitin simulation

In [75]:
Giv_json, Giv_context = ubiquitin_simulation(
    parent_dictionary = Giii_json, 
    ubi_molecule_to_add = '', 
    type_of_reaction = "SMAC_deprot"
    )

### (6-i) Inspect the Giv_json

In [76]:
Giv_json

{'protein': '1ubq-histag',
 'chain_number': 1,
 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGDHHHHHH',
 'chain_length': 83,
 'branching_sites': [{'site_name': 'K63',
   'sequence_id': 'NIQ(K)EST',
   'children': {'protein': '1ubq',
    'chain_number': 2,
    'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG',
    'chain_length': 76,
    'branching_sites': [{'site_name': 'K63',
      'sequence_id': 'NIQ(K)EST',
      'children': 'ABOC'},
     {'site_name': 'K48',
      'sequence_id': 'FAG(K)QLE',
      'children': {'protein': '1ubq',
       'chain_number': 3,
       'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG',
       'chain_length': 76,
       'branching_sites': [{'site_name': 'K63',
         'sequence_id': 'NIQ(K)EST',
         'children': ''},
        {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': 'ABOC'},
        {'site_nam

### (6-i) Inspect the Giv_context

In [77]:
Giv_context

{'chain_number_list': [1, 2, 3, 4],
 'chain_length_list': [83, 76, 76],
 'multimer_string_name': 'his-GG-1ubq-histag-1-(<K63_1ubq-2-(<K63_ABOC><K48_1ubq-3-(<K48_ABOC>)>)><K48_ABOC>)',
 'nomenclature_w_preorder': 'Ub1,63(Ub2,63aboc,48(Ub3,48aboc)),48aboc',
 'nomenclature_wo_preorder': 'his-Ub,63(Ub,63aboc,48(Ub,48aboc)),48aboc',
 'max_chain_number': 3,
 'ABOC_lysines': [[2, 'K63'], [3, 'K48'], [1, 'K48']],
 'SMAC_lysines': [],
 'free_lysines': [[3, 'K63']],
 'conjugated_lysines': [[1, 'K63', 2], [2, 'K48', 3]]}