In [None]:
# make pep8 compliant

# type of reaction can be either K48 or K63, enzyme is not defined here
# Ube2K = K48
# Ube13/Mms2 = K63

# instead of find_max_chain_number, find_ABOC_lysines, find_SMAC_lysines, find_free_lysines, find_conjugated_lysines, this information is pulled out of iterate through ubiquitin and the context dictionary it produces

# instead of relabelling_ubiquitin_numbers, use iterate_through_ubiquitin and the context dictionary it produces

## loop simulate_next_steps with p
def simulate_reactions_step(
    acceptor_history: list,
    reaction_history: list,
    monomer_history: list,
    context_history: list,
    monomer_list: list
):
    """
    Simulate reactions for each monomer at a K48 or K63 site.

    Args:
        acceptor_history (list): List of previously accepted protein states.
        reaction_history (list): List of past reactions applied.
        monomer_history (list): List of ubiquitin monomers used.
        monomer_list (list): Available monomers to test in reactions.

    Returns:
        tuple: Lists of updated acceptor histories, reactions, and monomer paths.
    """
    reaction_types = ['K48', 'K63']
    new_acceptor_history_list = []
    new_reaction_history_list = []
    new_monomer_history_list = []
    new_context_history_list = []

    for reaction in reaction_types:
        for monomer in monomer_list:
            # Simulate new multimer from last acceptor and current monomer
            last_acceptor = acceptor_history[-1]
            new_multimer, new_context = ubiquitin_simulation(last_acceptor, monomer, reaction)

            # Build new histories
            updated_acceptor_history = acceptor_history.copy()
            updated_acceptor_history.append(new_multimer)

            updated_reaction_history = reaction_history.copy()
            updated_reaction_history.append(reaction)

            updated_monomer_history = monomer_history.copy()
            updated_monomer_history.append(monomer)

            updated_context_history = context_history.copy()
            updated_context_history.append(new_context)

            # Append updated paths to lists
            new_acceptor_history_list.append(updated_acceptor_history)
            new_reaction_history_list.append(updated_reaction_history)
            new_monomer_history_list.append(updated_monomer_history)
            new_context_history_list.append(updated_context_history)

    return (
        new_acceptor_history_list,
        new_reaction_history_list,
        new_monomer_history_list,
        new_context_history_list
    )

ubiquitin_library = {
  "histag_ubi_ubq_1": "{'protein': '1ubq', 'chain_number': 1, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGHHHHHH', 'chain_length': 82, 'branching_sites': [{'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}]}",
  "histag_ubi_ubq_1_K48_aboc": "{'protein': '1ubq', 'chain_number': 1, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGHHHHHH', 'chain_length': 82, 'branching_sites': [{'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': 'ABOC'}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}]}",
  "histag_ubi_ubq_1_K63_aboc": "{'protein': '1ubq', 'chain_number': 1, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGHHHHHH', 'chain_length': 82, 'branching_sites': [{'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': 'ABOC'}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}]}",
  "ubi_ubq_1": "{'protein': '1ubq', 'chain_number': 1, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}]}",
  "ubi_ubq_1_K48_SMAC": "{'protein': '1ubq', 'chain_number': 1, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': 'SMAC'}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}]}",
  "ubi_ubq_1_K63_SMAC": "{'protein': '1ubq', 'chain_number': 1, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': 'SMAC'}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}]}",
  "ubi_ubq_1_K48_SMAC_K63_ABOC": "{'protein': '1ubq', 'chain_number': 1, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': 'SMAC'}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': 'ABOC'}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}]}",
  "ubi_ubq_1_K48_ABOC_K63_SMAC": "{'protein': '1ubq', 'chain_number': 1, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': 'ABOC'}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': 'SMAC'}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}]}",
  "ubi_ubq_1_K48_ABOC_K63_ABOC": "{'protein': '1ubq', 'chain_number': 1, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': 'ABOC'}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': 'ABOC'}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}]}"
}

## set the values of the ubiquitin library
ubi_ubq_1_K48_SMAC = ubiquitin_library['ubi_ubq_1_K48_SMAC']
ubi_ubq_1_K63_SMAC = ubiquitin_library['ubi_ubq_1_K63_SMAC']
ubi_ubq_1_K48_SMAC_K63_ABOC = ubiquitin_library['ubi_ubq_1_K48_SMAC_K63_ABOC']
ubi_ubq_1_K48_ABOC_K63_SMAC = ubiquitin_library['ubi_ubq_1_K48_ABOC_K63_SMAC']
ubi_ubq_1_K48_ABOC_K63_ABOC = ubiquitin_library['ubi_ubq_1_K48_ABOC_K63_ABOC']
histag_ubi_ubq_1 = ubiquitin_library['histag_ubi_ubq_1']
histag_ubi_ubq_1_K48_aboc = ubiquitin_library['histag_ubi_ubq_1_K48_aboc']
histag_ubi_ubq_1_K63_aboc = ubiquitin_library['histag_ubi_ubq_1_K63_aboc']

ubi_donor_list = [ubi_ubq_1_K48_SMAC, ubi_ubq_1_K63_SMAC, ubi_ubq_1_K48_SMAC_K63_ABOC, ubi_ubq_1_K48_ABOC_K63_SMAC, ubi_ubq_1_K48_ABOC_K63_ABOC]
ubi_acceptor_list = [histag_ubi_ubq_1, histag_ubi_ubq_1_K48_aboc, histag_ubi_ubq_1_K63_aboc]


Write 6 comprehensive tests for simulate_reactions_step using a variation of 
    the ubi_acceptor_list as initial acceptor_history
    ubi_donor_list as initial monomer_list
    empty lists with the same number of components for reaction_history, monomer_history, and context_history
	•	Ensure the tests cover all key logic branches and edge cases within the function.
	•	Verify every test has one one note
	•	Make sure you use pep8 complianceX


SyntaxError: invalid character '•' (U+2022) (1783760712.py, line 96)

## instead of json.loads(x) code use convert_json_to_dict 

## use log_protein_details(working_dictionary, context) instead of 
# logging.info(f"Protein: {working_dictionary['protein']}")
# logging.info(f"Sequence: {working_dictionary['FASTA_sequence']}")
# logging.info(f"Chain Number List: {context['chain_number_list']}")
# logging.info(f"Chain Length: {working_dictionary['chain_length']}")
# logging.info(f"Chain Number: {working_dictionary['chain_number']}")
# logging.info(f"Branching Sites: {working_dictionary['branching_sites']}")


## use log_branching_details(branch, working_dictionary, context): instead of 
#  logging.info(' ===== START OF LYSINE SITE =====  ')
#  logging.info(f"Chain Number: {working_dictionary['chain_number']}")
#  logging.info(f"Lysine Site: {bra['site_name']}")

# use log_end_of_branching(): instead of logging.info(' ===== END OF LYSINE SITE =====  ')

# use log_end_of_protein(working_dictionary) instead of logging.info f' ===== END OF PROTEIN - CHAIN NUMBER: {working_dictionary["chain_number"]} =====  ')

# type of reaction can be either K48 or K63, enzyme is not defined here
# Ube2K = K48
# Ube13/Mms2 = K63

## instead of find_max_chain_number, find_ABOC_lysines, find_SMAC_lysines, find_free_lysines, find_conjugated_lysines, this information is pulled out of iterate through ubiquitin and the context dictionary it produces

five_level_nested_ubiquitin_ = {
    "protein": "1ubq",
    "chain_number": 1,
    "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG",
    "chain_length": 76,
    "branching_sites": [
        {"site_name": "M1", "sequence_id": "(M)QIF", "children": ""},
        {"site_name": "K6", "sequence_id": "IFV(K)TLT", "children": ""},
        {"site_name": "K11", "sequence_id": "LTG(K)TIT", "children": ""},
        {"site_name": "K27", "sequence_id": "ENV(K)AKI", "children": ""},
        {"site_name": "K29", "sequence_id": "VKA(K)IQD", "children": ""},
        {"site_name": "K33", "sequence_id": "IQD(K)EGI", "children": ""},
        {"site_name": "K48", "sequence_id": "FAG(K)QLE", "children": {
            "protein": "1ubq",
            "chain_number": 2,
            "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG",
            "chain_length": 76,
            "branching_sites": [
                {"site_name": "M1", "sequence_id": "(M)QIF", "children": ""},
                {"site_name": "K6", "sequence_id": "IFV(K)TLT", "children": ""},
                {"site_name": "K11", "sequence_id": "LTG(K)TIT", "children": ""},
                {"site_name": "K27", "sequence_id": "ENV(K)AKI", "children": ""},
                {"site_name": "K29", "sequence_id": "VKA(K)IQD", "children": ""},
                {"site_name": "K33", "sequence_id": "IQD(K)EGI", "children": ""},
                {"site_name": "K48", "sequence_id": "FAG(K)QLE", "children": ""},
                {"site_name": "K63", "sequence_id": "NIQ(K)EST", "children": {
                    "protein": "1ubq",
                    "chain_number": 3,
                    "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG",
                    "chain_length": 76,
                    "branching_sites": [
                        {"site_name": "M1", "sequence_id": "(M)QIF", "children": ""},
                        {"site_name": "K6", "sequence_id": "IFV(K)TLT", "children": {
                            "protein": "1ubq",
                            "chain_number": 4,
                            "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG",
                            "chain_length": 76,
                            "branching_sites": [
                                {"site_name": "M1", "sequence_id": "(M)QIF", "children": ""},
                                {"site_name": "K6", "sequence_id": "IFV(K)TLT", "children": ""},
                                {"site_name": "K11", "sequence_id": "LTG(K)TIT", "children": ""},
                                {"site_name": "K27", "sequence_id": "ENV(K)AKI", "children": ""},
                                {"site_name": "K29", "sequence_id": "VKA(K)IQD", "children": ""},
                                {"site_name": "K33", "sequence_id": "IQD(K)EGI", "children": ""},
                                {"site_name": "K48", "sequence_id": "FAG(K)QLE", "children": ""},
                                {"site_name": "K63", "sequence_id": "NIQ(K)EST", "children": ""}
                            ]
                        }},
                        {"site_name": "K11", "sequence_id": "LTG(K)TIT", "children": {
                            "protein": "1ubq",
                            "chain_number": 5,
                            "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG",
                            "chain_length": 76,
                            "branching_sites": [
                                {"site_name": "M1", "sequence_id": "(M)QIF", "children": ""},
                                {"site_name": "K6", "sequence_id": "IFV(K)TLT", "children": ""},
                                {"site_name": "K11", "sequence_id": "LTG(K)TIT", "children": ""},
                                {"site_name": "K27", "sequence_id": "ENV(K)AKI", "children": ""},
                                {"site_name": "K29", "sequence_id": "VKA(K)IQD", "children": ""},
                                {"site_name": "K33", "sequence_id": "IQD(K)EGI", "children": ""},
                                {"site_name": "K48", "sequence_id": "FAG(K)QLE", "children": ""},
                                {"site_name": "K63", "sequence_id": "NIQ(K)EST", "children": ""}
                            ]
                        }},
                        {"site_name": "K27", "sequence_id": "ENV(K)AKI", "children": ""},
                        {"site_name": "K29", "sequence_id": "VKA(K)IQD", "children": ""},
                        {"site_name": "K33", "sequence_id": "IQD(K)EGI", "children": ""},
                        {"site_name": "K48", "sequence_id": "FAG(K)QLE", "children": ""},
                        {"site_name": "K63", "sequence_id": "NIQ(K)EST", "children": ""}
                    ]
                }}
            ]
        }},
        {"site_name": "K63", "sequence_id": "NIQ(K)EST", "children": ""}
    ]
}



Write 12 comprehensive tests for handle_lysine_modification using the five_level_nested_ubiquitin_ fixture from test_data.py.
	•	Ensure the tests cover all key logic branches and edge cases within the function.
	•	Verify every test has one one note
	•	Make sure you use pep8 complianceX


In [1]:
hi = {'protein': '1ubq', 'chain_number': 1, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': {'protein': '1ubq', 'chain_number': 2, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': {'protein': 'dummy_protein', 'chain_number': 3, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}]}}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': {'protein': '1ubq', 'chain_number': 4, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': {'protein': '1ubq', 'chain_number': 5, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': {'protein': 'dummy_protein', 'chain_number': 6, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}]}}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': {'protein': 'dummy_protein', 'chain_number': 7, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': {'protein': 'dummy_protein', 'chain_number': 8, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}]}}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}]}}]}}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': {'protein': '1ubq', 'chain_number': 9, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': {'protein': 'dummy_protein', 'chain_number': 10, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}]}}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': {'protein': 'dummy_protein', 'chain_number': 11, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': {'protein': 'dummy_protein', 'chain_number': 12, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}]}}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}]}}]}}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': {'protein': 'dummy_protein', 'chain_number': 13, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}]}}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': {'protein': 'dummy_protein', 'chain_number': 14, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': {'protein': 'dummy_protein', 'chain_number': 15, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}]}}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}]}}]}}]}}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': {'protein': 'dummy_protein', 'chain_number': 16, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': {'protein': 'dummy_protein', 'chain_number': 17, 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG', 'chain_length': 76, 'branching_sites': [{'site_name': 'M1', 'sequence_id': '(M)QIF', 'children': ''}, {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''}, {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''}, {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''}, {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''}, {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''}, {'site_name': 'K48', 'sequence_id': 'FAG(K)QLE', 'children': ''}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}]}}, {'site_name': 'K63', 'sequence_id': 'NIQ(K)EST', 'children': ''}]}}]}


In [2]:
hi

{'protein': '1ubq',
 'chain_number': 1,
 'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG',
 'chain_length': 76,
 'branching_sites': [{'site_name': 'M1',
   'sequence_id': '(M)QIF',
   'children': ''},
  {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''},
  {'site_name': 'K11', 'sequence_id': 'LTG(K)TIT', 'children': ''},
  {'site_name': 'K27', 'sequence_id': 'ENV(K)AKI', 'children': ''},
  {'site_name': 'K29', 'sequence_id': 'VKA(K)IQD', 'children': ''},
  {'site_name': 'K33', 'sequence_id': 'IQD(K)EGI', 'children': ''},
  {'site_name': 'K48',
   'sequence_id': 'FAG(K)QLE',
   'children': {'protein': '1ubq',
    'chain_number': 2,
    'FASTA_sequence': 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG',
    'chain_length': 76,
    'branching_sites': [{'site_name': 'M1',
      'sequence_id': '(M)QIF',
      'children': ''},
     {'site_name': 'K6', 'sequence_id': 'IFV(K)TLT', 'children': ''},
     {