In [5]:
import json 
import logging
import copy
import sys

# figure out the path issues
# home_dir = os.path.expanduser('~')
# local_path = '/home/erickummelstedt/lecodebase/ubiquitinformatics/src/main.py'
local_path = '/Users/ekummelstedt/le_code_base/ubiquitinformatics/back_end'
sys.path.insert(0, local_path)

from src.utils import convert_json_to_dict
from src.logging_utils import log_protein_details, log_branching_details, log_end_of_branching, log_end_of_protein


In [6]:
# Import the functions from the original code
# from src.main_testing import relabelling_ubiquitin_numbers, inner_wrapper_relabelling_ubiquitin_numbers
from src.main import \
    iterate_through_ubiquitin, \
    inner_wrapper_iterate_through_ubiquitin, \
    find_branching_site, \
    validate_protein_keys, \
    check_branching_sites, \
    check_branching_sequences,\
    validate_branching_sites,\
    check_branching_site_sequence_match, \
    check_children_format,\
    process_current_protein, \
    process_branch, \
    add_max_chain_number, \
    process_ubiquitin_reaction, \
    ubiquitin_simulation, \
    inner_wrapper_ubiquitin_simulation, \
    handle_lysine_modification, \
    ubiquitin_building, \
    inner_wrapper_ubiquitin_building

from src.simulation import simulate_E2_step, simulate_deprot_step

from src.utils import \
    match_assertion_error_contains,\
    all_strings_exist, \
    all_strings_exist_in_list, \
    inject_fasta_sequence_at_chain,\
    inject_protein_key,\
    inject_branching_sites,\
    convert_json_to_dict

from tests.test_data import \
    five_level_nested_ubiquitin_,\
    k48_dimer_ubiquitin,\
    string_k48_dimer_ubiquitin,\
    ubiquitin_monomer, \
    histag_ubiquitin_monomer,\
    BASE_WORKING_DICT, \
    BASE_CONTEXT, \
    ubi_ubq_1_K48_SMAC,\
    ubi_ubq_1_K63_SMAC,\
    ubi_ubq_1_K48_SMAC_K63_ABOC,\
    ubi_ubq_1_K48_ABOC_K63_SMAC,\
    ubi_ubq_1_K48_ABOC_K63_ABOC,\
    histag_ubi_ubq_1,\
    histag_ubi_ubq_1_K48_aboc,\
    histag_ubi_ubq_1_K63_aboc

# ------------------- Test Data Setup -------------------
# build reaction sequence
# TEST 1 Reaction Sequence
# 1. dimer formation
acceptor = histag_ubi_ubq_1_K63_aboc
donor = ubi_ubq_1_K48_SMAC
reaction = "K48"
_acceptor_1a, _context_1a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "elongation"

# 2. dimer deprotection
acceptor = _acceptor_1a.copy()
context = _context_1a.copy()
donor = ''
reaction = "FAKE_deprot"
_acceptor_1b, _context_1b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )

# 3. trimer formation
acceptor = _acceptor_1b.copy()
context = _context_1b.copy()
donor = ubi_ubq_1_K48_ABOC_K63_ABOC
reaction = "K63"
_acceptor_2a, _context_2a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "elongation"

# 4. trimer deprotection
acceptor = _acceptor_2a.copy()
context = _context_2a.copy()
donor = ''
reaction = "SMAC_deprot"
_acceptor_2b, _context_2b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )

# 5. tetramer formation
acceptor = _acceptor_2b.copy()
context = _context_2b.copy()
donor = ubi_ubq_1_K48_ABOC_K63_ABOC
reaction = "K48"
_acceptor_3a, _context_3a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "branching"

# 4. tetramer deprotection
acceptor = _acceptor_3a.copy()
context = _context_3a.copy()
donor = ''
reaction = "FAKE_deprot"
_acceptor_3b, _context_3b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )


# TEST 2 Reaction Sequence
# 1. dimer formation
acceptor = histag_ubi_ubq_1_K63_aboc
donor = ubi_ubq_1_K48_ABOC_K63_SMAC
reaction = "K48"
acceptor_1a, context_1a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "elongation"

# 2. dimer deprotection
acceptor = acceptor_1a.copy()
context = context_1a.copy()
donor = ''
reaction = "SMAC_deprot"
acceptor_1b, context_1b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )

# 3. trimer formation
acceptor = acceptor_1b.copy()
context = context_1b.copy()
donor = ubi_ubq_1_K63_SMAC
reaction = "K48"
acceptor_2a, context_2a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "elongation"

# 4. trimer deprotection
acceptor = acceptor_2a.copy()
context = context_2a.copy()
donor = ''
reaction = "FAKE_deprot"
acceptor_2b, context_2b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )

# 5. tetramer formation
acceptor = acceptor_2b.copy()
context = context_2b.copy()
donor = ubi_ubq_1_K48_ABOC_K63_ABOC
reaction = "K48"
acceptor_3a, context_3a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "branching"

# 6. tetramer deprotection
acceptor = acceptor_3a.copy()
context = context_3a.copy()
donor = ''
reaction = "SMAC_deprot"
acceptor_3b, context_3b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )

# 6. pentamer formation
acceptor = acceptor_3b.copy()
context = context_3b.copy()
donor = ubi_ubq_1_K48_ABOC_K63_ABOC
reaction = "K63"
acceptor_4a, context_4a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "branching"

# 6. pentamer deprotection
acceptor = acceptor_4a.copy()
context = context_4a.copy()
donor = ''
reaction = "FAKE_deprot"
acceptor_4b, context_4b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )

acceptor_4b



























































































































































































































{'protein': '1ubq',
 'chain_number': 1,
 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGHHHHHH',
 'chain_length': 82,
 'branching_sites': [{'site_name': 'K48',
   'sequence_id': 'FAG(K)QLE',
   'children': {'protein': '1ubq',
    'chain_number': 2,
    'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG',
    'chain_length': 76,
    'branching_sites': [{'site_name': 'K48',
      'sequence_id': 'FAG(K)QLE',
      'children': 'ABOC'},
     {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''},
     {'site_name': 'K63',
      'sequence_id': 'NIQ(K)EST',
      'children': {'protein': '1ubq',
       'chain_number': 3,
       'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG',
       'chain_length': 76,
       'branching_sites': [{'site_name': 'K48',
         'sequence_id': 'FAG(K)QLE',
         'children': 'ABOC'},
        {'site_name': 'K11', 

In [None]:
# build reaction sequence
# TEST 1 Reaction Sequence
# 1. dimer formation
acceptor = histag_ubi_ubq_1_K63_aboc
donor = ubi_ubq_1_K48_SMAC
reaction = "K48"
_acceptor_1a, _context_1a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "elongation"

# 2. dimer deprotection
acceptor = _acceptor_1a.copy()
context = _context_1a.copy()
donor = ''
reaction = "FAKE_deprot"
_acceptor_1b, _context_1b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )

# 3. trimer formation
acceptor = _acceptor_1b.copy()
context = _context_1b.copy()
donor = ubi_ubq_1_K48_ABOC_K63_ABOC
reaction = "K63"
_acceptor_2a, _context_2a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "elongation"

# 4. trimer deprotection
acceptor = _acceptor_2a.copy()
context = _context_2a.copy()
donor = ''
reaction = "SMAC_deprot"
_acceptor_2b, _context_2b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )

# 5. tetramer formation
acceptor = _acceptor_2b.copy()
context = _context_2b.copy()
donor = ubi_ubq_1_K48_ABOC_K63_ABOC
reaction = "K48"
_acceptor_3a, _context_3a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "branching"

# 4. tetramer deprotection
acceptor = _acceptor_3a.copy()
context = _context_3a.copy()
donor = ''
reaction = "FAKE_deprot"
_acceptor_3b, _context_3b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )

# TEST 2 Reaction Sequence
# 1. dimer formation
acceptor = histag_ubi_ubq_1_K63_aboc
donor = ubi_ubq_1_K48_ABOC_K63_SMAC
reaction = "K48"
acceptor_1a, context_1a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "elongation"

# 2. dimer deprotection
acceptor = acceptor_1a.copy()
context = context_1a.copy()
donor = ''
reaction = "SMAC_deprot"
acceptor_1b, context_1b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )

# 3. trimer formation
acceptor = acceptor_1b.copy()
context = context_1b.copy()
donor = ubi_ubq_1_K63_SMAC
reaction = "K63"
acceptor_2a, context_2a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "elongation"

# 4. trimer deprotection
acceptor = acceptor_2a.copy()
context = context_2a.copy()
donor = ''
reaction = "FAKE_deprot"
acceptor_2b, context_2b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )

# 5. tetramer formation
acceptor = acceptor_2b.copy()
context = context_2b.copy()
donor = ubi_ubq_1_K48_ABOC_K63_ABOC
reaction = "K48"
acceptor_3a, context_3a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "branching"

# 6. tetramer deprotection
acceptor = acceptor_3a.copy()
context = context_3a.copy()
donor = ''
reaction = "SMAC_deprot"
acceptor_3b, context_3b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )

# 6. pentamer formation
acceptor = acceptor_3b.copy()
context = context_3b.copy()
donor = ubi_ubq_1_K48_ABOC_K63_ABOC
reaction = "K63"
acceptor_4a, context_4a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "branching"

# 6. pentamer deprotection
acceptor = acceptor_4a.copy()
context = context_4a.copy()
donor = ''
reaction = "FAKE_deprot"
acceptor_4b, context_4b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )















































































































{'chain_number_list': [1, 2, 3, 4, 5],
 'chain_length_list': [82, 76, 76, 76],
 'multimer_string_name': '1ubq-1-(<K48_1ubq-2-(<K48_1ubq-3-(<K48_ABOC><K63_ABOC>)><K63_1ubq-4-(<K48_ABOC><K63_ABOC>)>)><K63_ABOC>)',
 'max_chain_number': 4,
 'ABOC_lysines': [[3, 'K48'], [3, 'K63'], [4, 'K48'], [4, 'K63'], [1, 'K63']],
 'SMAC_lysines': [],
 'free_lysines': [],
 'conjugated_lysines': [[1, 'K48'], [2, 'K48'], [2, 'K63']]}

In [None]:


acceptor_4b











































































































































































{'protein': '1ubq',
 'chain_number': 1,
 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGHHHHHH',
 'chain_length': 82,
 'branching_sites': [{'site_name': 'K48',
   'sequence_id': 'FAG(K)QLE',
   'children': {'protein': '1ubq',
    'chain_number': 2,
    'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG',
    'chain_length': 76,
    'branching_sites': [{'site_name': 'K48',
      'sequence_id': 'FAG(K)QLE',
      'children': 'ABOC'},
     {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''},
     {'site_name': 'K63',
      'sequence_id': 'NIQ(K)EST',
      'children': {'protein': '1ubq',
       'chain_number': 3,
       'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG',
       'chain_length': 76,
       'branching_sites': [{'site_name': 'K48',
         'sequence_id': 'FAG(K)QLE',
         'children': {'protein': '1ubq',
          'chain_n

In [12]:
context_4a

{'chain_number_list': [1, 2, 3, 4, 5, 6],
 'chain_length_list': [82, 76, 76, 76, 76],
 'multimer_string_name': '1ubq-1-(<K48_1ubq-2-(<K48_ABOC><K63_1ubq-3-(<K48_1ubq-4-(<K48_ABOC><K63_ABOC>)><K63_1ubq-5-(<K48_ABOC><K63_ABOC>)>)>)><K63_ABOC>)',
 'max_chain_number': 5,
 'ABOC_lysines': [[2, 'K48'],
  [4, 'K48'],
  [4, 'K63'],
  [5, 'K48'],
  [5, 'K63'],
  [1, 'K63']],
 'SMAC_lysines': [],
 'free_lysines': [],
 'conjugated_lysines': [[1, 'K48'], [2, 'K63'], [3, 'K48'], [3, 'K63']]}

In [None]:
def find_correct_E2_enzyme(
        reactant_context: str|dict,
        product_context: str|dict
        ):
    """
    Find the enzyme type based on the reactant and product dictionaries.
    Args:
        reactant_dictionary (str|dict): Reactant dictionary or JSON string.
        product_dictionary (str|dict): Product dictionary or JSON string.
    Returns:
        str: Enzyme type ('Ube2K' or 'Ube2g2').
    """

    # Convert JSON strings to dictionaries if necessary
    reactant_context = convert_json_to_dict(reactant_context)    
    product_context = convert_json_to_dict(product_context)

    reactant_chain_number_list  = reactant_context["chain_number_list"].copy()
    reactant_chain_length_list = reactant_context["chain_length_list"].copy()
    reactant_free_lysine_list = reactant_context["free_lysines"].copy()
    reactant_conjugated_lysine_list = reactant_context["conjugated_lysines"].copy()
    reactant_max_chain_number = reactant_context["max_chain_number"]

    product_chain_number_list = product_context["chain_number_list"].copy()
    product_chain_length_list = product_context["chain_length_list"].copy()
    product_free_lysine_list = product_context["free_lysines"].copy()
    product_conjugated_lysine_list = product_context["conjugated_lysines"].copy()
    product_max_chain_number = product_context["max_chain_number"]

    # Type Error to check that product_max_chain_number is equal to reactant_max_chain_number + 1
    # Can only compare contexts that have a product_max_chain_number that is equal to reactant_max_chain_number + 1
    if int(product_max_chain_number) != int(reactant_max_chain_number) + 1:
        raise TypeError(f"product_max_chain_number: {int(product_max_chain_number)} != reactant_max_chain_number + 1: {int(reactant_max_chain_number) + 1}")  

    # Type Error to check all linkages are K48 or K63
    # Can only compare contexts that have K48 and K63 linkages
    if not all_strings_exist_in_list(reactant_conjugated_lysine_list, ["K48", "K63"]):
        raise TypeError(f"reactant_conjugated_lysine_list: {reactant_conjugated_lysine_list} does not contain K48 or K63")
    if not all_strings_exist_in_list(product_conjugated_lysine_list, ["K48", "K63"]):
        raise TypeError(f"product_conjugated_lysine_list: {product_conjugated_lysine_list} does not contain K48 or K63")

    final_dictionary = {'reactant_chain_numbers' : reactant_chain_number_list, 
                        'reactant_chain_lengths' : reactant_chain_length_list, 
                        'reactant_free_lysines' : reactant_free_lysine_list, 
                        'reactant_conjugated_lysines' : reactant_conjugated_lysine_list, 
                        'reactant_max_chain_number' : reactant_max_chain_number, 
                        'product_chain_numbers' : product_chain_number_list,
                        'product_chain_lengths' : product_chain_length_list,
                        'product_free_lysines' : product_free_lysine_list,
                        'product_conjugated_lysines' : product_conjugated_lysine_list,
                        'product_max_chain_number' : product_max_chain_number
    }

    # making sure the reactant and product lists are the same length, 
    # single empty lists are added to the end of the reactant_conjugated_lysines
    reactant_conjugated_lysines = final_dictionary['reactant_conjugated_lysines'].copy()
    reactant_conjugated_lysines = reactant_conjugated_lysines + [[]]

    reactant_max_chain_number = int(final_dictionary['reactant_max_chain_number'])

    product_conjugated_lysines = final_dictionary['product_conjugated_lysines'].copy()


    for index, (x,y) in enumerate(zip(reactant_conjugated_lysines,product_conjugated_lysines)):
    # if (x!=y) then this is the new bound site
        if x!=y:
            last_bound_site__before_new_ubi = y
            break

    # if "K48" is in the y that is not same then it is a K48 reaciton
    if "K48" in last_bound_site__before_new_ubi:
        reaction = "K48_reaction"
    # if "K63" is in the y that is not same then it is a K63 reaciton
    elif "K63" in last_bound_site__before_new_ubi:
        reaction = "K63_reaction"
    else:
        TypeError(f"last_bound_site__before_new_ubi: {last_bound_site__before_new_ubi} does not contain K48 or K63")

    # Count how many times the second element in y appears in the second elements of product_conjugated_lysines
    second_elements = [item[1] for sublist in product_conjugated_lysines if sublist for item in sublist]
    count_second_element = second_elements.count(last_bound_site__before_new_ubi[1])

    # If the count is 1 than it indicates elongation, if it is 2 then it indicates branching
    if count_second_element == 1:
        elongation_or_branching = 'elongation'
    elif count_second_element == 2:
        elongation_or_branching = 'branching'
    else: 
        TypeError(f"count_second_element: {count_second_element} is not 1 or 2")

    # If reaction == 'K48_reaction' & elongation_or_branching == 'elongation': enzyme == Ube2g2
    # If reaction == 'K48_reaction' & elongation_or_branching == 'branching'; enzyme == Ube2K
    # If reaction == 'K63_reaction' & elongation_or_branching == 'elongation' or 'branching'; enzyme == UBc13
    if reaction == 'K48_reaction' and elongation_or_branching == 'elongation':
        enzyme = 'gp78/Ube2g2'
    elif reaction == 'K48_reaction' and elongation_or_branching == 'branching':
        enzyme = 'Ube2K'
    elif reaction == 'K63_reaction' and elongation_or_branching in ['elongation', 'branching']:
        enzyme = 'Ubc13/Mms2'
    else:
        raise TypeError(f"Invalid combination of reaction: {reaction} and elongation_or_branching: {elongation_or_branching}")
    
    return enzyme

In [3]:
product_conjugated_lysines = [
        [1, "K48"], [2, "K63"], [3, "K48"]
    ]
new_bound_lysine = [3, "K48"]

def determine_elongation_or_branching(product_conjugated_lysines, new_bound_lysine):
    """
    Determines whether the addition of a ubiquitin results in elongation or branching
    based on how many times the same chain number appears.

    Args:
        product_conjugated_lysines (list): List of all conjugated lysines in the product context.
        new_bound_lysine (list): The site where the most recent ubiquitin was added (e.g. [chain_number, lysine_site]).

    Returns:
        str: 'elongation' if the chain appears once, 'branching' if it appears twice.

    Raises:
        TypeError: If the count of the chain number is not 1 or 2.
    """
    target_chain = new_bound_lysine[0]

    count = sum(1 for entry in product_conjugated_lysines if entry[0] == target_chain)

    if count == 1:
        return "elongation"
    elif count == 2:
        return "branching"
    else:
        raise TypeError(
            f"Count of chain number '{target_chain}' is {count}, expected 1 or 2"
        )

determine_elongation_or_branching(product_conjugated_lysines, new_bound_lysine)

'elongation'

In [8]:
import json 
import logging
import copy
import sys

# figure out the path issues
# home_dir = os.path.expanduser('~')
# local_path = '/home/erickummelstedt/lecodebase/ubiquitinformatics/src/main.py'
local_path = '/Users/ekummelstedt/le_code_base/ubiquitinformatics/back_end'
sys.path.insert(0, local_path)

from src.utils import convert_json_to_dict
from src.logging_utils import log_protein_details, log_branching_details, log_end_of_branching, log_end_of_protein

from src.main import iterate_through_ubiquitin
from src.simulation import ubiquitin_simulation


from tests.test_data import \
    five_level_nested_ubiquitin_,\
    k48_dimer_ubiquitin,\
    string_k48_dimer_ubiquitin,\
    ubiquitin_monomer, \
    histag_ubiquitin_monomer,\
    BASE_WORKING_DICT, \
    BASE_CONTEXT, \
    ubi_ubq_1_K48_SMAC,\
    ubi_ubq_1_K63_SMAC,\
    ubi_ubq_1_K48_SMAC_K63_ABOC,\
    ubi_ubq_1_K48_ABOC_K63_SMAC,\
    ubi_ubq_1_K48_ABOC_K63_ABOC,\
    histag_ubi_ubq_1,\
    histag_ubi_ubq_1_K48_aboc,\
    histag_ubi_ubq_1_K63_aboc

# build reaction sequence
# TEST 1 Reaction Sequence
# 1. dimer formation
_acceptor_0, _context_0 = iterate_through_ubiquitin(histag_ubi_ubq_1_K63_aboc)
acceptor = _acceptor_0.copy()
context = _context_0.copy()
donor = ubi_ubq_1_K48_SMAC
reaction = "K48"
_acceptor_1a, _context_1a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "elongation"
enyzme = "gp78/Ube2g2"

# 2. dimer deprotection
acceptor = _acceptor_1a.copy()
context = _context_1a.copy()
donor = ''
reaction = "FAKE_deprot"
_acceptor_1b, _context_1b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )

# 3. trimer formation
acceptor = _acceptor_1b.copy()
context = _context_1b.copy()
donor = ubi_ubq_1_K48_ABOC_K63_ABOC
reaction = "K63"
_acceptor_2a, _context_2a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "elongation"
enyzme = "Ubc13/Mms2"


# 4. trimer deprotection
acceptor = _acceptor_2a.copy()
context = _context_2a.copy()
donor = ''
reaction = "SMAC_deprot"
_acceptor_2b, _context_2b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )


# 5. tetramer formation
acceptor = _acceptor_2b.copy()
context = _context_2b.copy()
donor = ubi_ubq_1_K48_ABOC_K63_ABOC
reaction = "K48"
_acceptor_3a, _context_3a = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )
branching_or_elgonation = "branching"
enyzme = "Ube2K"


# 4. tetramer deprotection
acceptor = _acceptor_3a.copy()
context = _context_3a.copy()
donor = ''
reaction = "FAKE_deprot"
_acceptor_3b, _context_3b = ubiquitin_simulation(
    parent_dictionary=acceptor,
    ubi_molecule_to_add=donor,
    type_of_reaction=reaction
    )





















































































































In [None]:
def assign_enzyme(reaction, elongation_or_branching):
    """
    Assigns the correct E2 enzyme based on reaction type and linkage pattern.

    Args:
        reaction (str): Type of reaction ('K48_reaction' or 'K63_reaction').
        elongation_or_branching (str): 'elongation' or 'branching'.

    Returns:
        str: Name of the correct E2 enzyme.

    Raises:
        TypeError: If combination is invalid.
    """
    if reaction == "K48_reaction" and elongation_or_branching == "elongation":
        return "gp78/Ube2g2"
    elif reaction == "K48_reaction" and elongation_or_branching == "branching":
        return "Ube2K"
    elif reaction == "K63_reaction":
        return "Ubc13/Mms2"
    else:
        raise TypeError(
            f"Invalid combination of reaction: {reaction} and "
            f"elongation_or_branching: {elongation_or_branching}"
        )

def determine_elongation_or_branching(product_conjugated_lysines, new_bound_lysine):
    """
    Determines whether the addition of a ubiquitin results in elongation or branching
    based on how many times the same chain number appears.

    Args:
        product_conjugated_lysines (list): List of all conjugated lysines in the product context.
        new_bound_lysine (list): The site where the most recent ubiquitin was added (e.g. [chain_number, lysine_site]).

    Returns:
        str: 'elongation' if the chain appears once, 'branching' if it appears twice.

    Raises:
        TypeError: If the count of the chain number is not 1 or 2.
    """
    target_chain = new_bound_lysine[0]

    count = sum(1 for entry in product_conjugated_lysines if entry[0] == target_chain)

    if count == 1:
        return "elongation"
    elif count == 2:
        return "branching"
    else:
        raise TypeError(
            f"Count of chain number '{target_chain}' is {count}, expected 1 or 2"
        )

def determine_reaction_type(new_bound_lysine):
    """
    Determines the type of reaction (K48 or K63) from the lysine site of the newly bound ubiquitin.

    Args:
        new_bound_lysine (list): A list with [chain_number, lysine_site], e.g., [3, "K48"]

    Returns:
        str: 'K48_reaction' or 'K63_reaction'

    Raises:
        TypeError: If the lysine site is not K48 or K63.
    """
    lysine_site = new_bound_lysine[1]

    if lysine_site == "K48":
        return "K48_reaction"
    elif lysine_site == "K63":
        return "K63_reaction"
    else:
        raise TypeError(
            f"new_bound_lysine: {new_bound_lysine} "
            f"does not contain K48 or K63"
        )
    
def v(
        reactant_context: str | dict,
        product_context: str | dict
        ):
    """
    Determine the enzyme used in a reaction based on structural context changes
    between a reactant and product ubiquitin chain.

    Args:
        reactant_context (str | dict): Reactant context dictionary or JSON string.
        product_context (str | dict): Product context dictionary or JSON string.

    Returns:
        str: Enzyme type ('Ube2K', 'Ube2g2', or 'Ubc13/Mms2').
    """
    reactant_context = convert_json_to_dict(reactant_context)
    product_context = convert_json_to_dict(product_context)

    if int(product_context["max_chain_number"]) != int(reactant_context["max_chain_number"]) + 1:
        raise TypeError(
            f"product_max_chain_number: {product_context['max_chain_number']} != "
            f"reactant_max_chain_number + 1: {int(reactant_context['max_chain_number']) + 1}"
        )

    # Check if the reactant and product contexts are valid
    reactant_lysine_types = {
        lys[1] for lys in reactant_context["conjugated_lysines"] if isinstance(lys, list) and len(lys) == 2
    }
    product_lysine_types = {
        lys[1] for lys in product_context["conjugated_lysines"] if isinstance(lys, list) and len(lys) == 2
    }

    unsupported_lysines_reactant = reactant_lysine_types - {"K48", "K63"}
    unsupported_lysines_product = product_lysine_types - {"K48", "K63"}

    if unsupported_lysines_reactant:
        raise TypeError(
            f"Reactant context contains unsupported conjugated lysines: {unsupported_lysines_reactant}. "
            "This function only supports K48 and K63 conjugated lysines."
        )
    if unsupported_lysines_product:
        raise TypeError(
            f"Product context contains unsupported conjugated lysines: {unsupported_lysines_product}. "
            "This function only supports K48 and K63 conjugated lysines."
        )

    reactant_conjugated_lysines = reactant_context["conjugated_lysines"].copy() + [[]]
    product_conjugated_lysines = product_context["conjugated_lysines"].copy()

    for _, (reactant_lysine, product_lysine) in enumerate(
        zip(reactant_conjugated_lysines, product_conjugated_lysines)
    ):
        if reactant_lysine != product_lysine:
            new_bound_lysine = product_lysine
            break
    else:
        raise TypeError("No new conjugation site detected between reactant and product contexts.")

    reaction = determine_reaction_type(new_bound_lysine)

    elongation_or_branching = determine_elongation_or_branching(
        product_conjugated_lysines, new_bound_lysine
    )

    enzyme = assign_enzyme(reaction, elongation_or_branching)

    return enzyme

In [13]:
assign_correct_E2_enzyme(_context_2b, _context_3a)

'Ube2K'

In [9]:
_context_3a

{'chain_number_list': [1, 2, 3, 4, 5],
 'chain_length_list': [82, 76, 76, 76],
 'multimer_string_name': '1ubq-1-(<K48_1ubq-2-(<K48_1ubq-3-(<K48_ABOC><K63_ABOC>)><K63_1ubq-4-(<K48_ABOC><K63_ABOC>)>)><K63_ABOC>)',
 'max_chain_number': 4,
 'ABOC_lysines': [[3, 'K48'], [3, 'K63'], [4, 'K48'], [4, 'K63'], [1, 'K63']],
 'SMAC_lysines': [],
 'free_lysines': [],
 'conjugated_lysines': [[1, 'K48'], [2, 'K48'], [2, 'K63']]}

In [11]:
_context_2b

{'chain_number_list': [1, 2, 3, 4],
 'chain_length_list': [82, 76, 76],
 'multimer_string_name': '1ubq-1-(<K48_1ubq-2-(<K63_1ubq-3-(<K48_ABOC><K63_ABOC>)>)><K63_ABOC>)',
 'max_chain_number': 3,
 'ABOC_lysines': [[3, 'K48'], [3, 'K63'], [1, 'K63']],
 'SMAC_lysines': [],
 'free_lysines': [[2, 'K48']],
 'conjugated_lysines': [[1, 'K48'], [2, 'K63']]}