DONE
- find_branching_site
- validate_protein_keys

do the following;
- check_branching_sites
- check_branching_sequences
- validate_branching_sites
- check_branching_site_sequence_match

TESTING_FUNCTION (function to build recursive tests for) = validate_protein_keys

create a function called inject_protein_key

Within the codebase of ubiquitinformatics in vscode, the existing tests for TESTING_FUNCTION etc. primarily tests this function in isolation or at the top level of the input dictionary. 

@pytest.mark.parametrize("valid_dict", [
    # ✅ Test Case 1: Valid ubiquitin dictionary
    {
        "protein": "1ubq",
        "chain_number": 1,
        "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG",
        "chain_length": 76,
        "branching_sites": [{"site_name": "M1","sequence_id": "(M)QIF","children": ""},
                            {"site_name": "K6","sequence_id": "IFV(K)TLT","children": ""},
                            {"site_name": "K11","sequence_id": "LTG(K)TIT","children": ""},
                            {"site_name": "K27","sequence_id": "ENV(K)AKI","children": ""},
                            {"site_name": "K29","sequence_id": "VKA(K)IQD","children": ""},
                            {"site_name": "K33","sequence_id": "IQD(K)EGI","children": ""},
                            {"site_name": "K48","sequence_id": "FAG(K)QLE","children":""}, 
                            {"site_name": "K63","sequence_id": "NIQ(K)EST","children": ""}]
    },
    # ✅ Test Case 2: Another valid ubiquitin with different sequence and length
    {
        "protein": "2ubq",
        "chain_number": 2,
        "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGDHHHHHH",
        "chain_length": 82,
        "branching_sites": [{"site_name": "M1","sequence_id": "(M)QIF","children": ""},
                            {"site_name": "K6","sequence_id": "IFV(K)TLT","children": ""},
                            {"site_name": "K11","sequence_id": "LTG(K)TIT","children": ""},
                            {"site_name": "K27","sequence_id": "ENV(K)AKI","children": ""},
                            {"site_name": "K29","sequence_id": "VKA(K)IQD","children": ""},
                            {"site_name": "K33","sequence_id": "IQD(K)EGI","children": ""},
                            {"site_name": "K48","sequence_id": "FAG(K)QLE","children":""}, 
                            {"site_name": "K63","sequence_id": "NIQ(K)EST","children": ""}]
    }
])
def test_validate_protein_keys_valid(valid_dict):
    """
    Test that `validate_protein_keys` does not raise an error for valid dictionaries.
    """
    try:
        validate_protein_keys(valid_dict)  # Should not raise an error
        assert True  # Pass the test
    except KeyError:
        pytest.fail("validate_protein_keys raised KeyError unexpectedly for a valid dictionary.")

@pytest.mark.parametrize("invalid_dict, missing_keys", [
    # ❌ Test Case 3: Missing "chain_number"
    (
        {
            "protein": "1ubq",
            "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG",
            "chain_length": 76,
            "branching_sites": [{"site_name": "M1","sequence_id": "(M)QIF","children": ""},
                                {"site_name": "K6","sequence_id": "IFV(K)TLT","children": ""},
                                {"site_name": "K11","sequence_id": "LTG(K)TIT","children": ""},
                                {"site_name": "K27","sequence_id": "ENV(K)AKI","children": ""},
                                {"site_name": "K29","sequence_id": "VKA(K)IQD","children": ""},
                                {"site_name": "K33","sequence_id": "IQD(K)EGI","children": ""},
                                {"site_name": "K48","sequence_id": "FAG(K)QLE","children":""}, 
                                {"site_name": "K63","sequence_id": "NIQ(K)EST","children": ""}]
        },
        {"chain_number"}
    ),
    # ❌ Test Case 4: Missing multiple required keys
    (
        {
            "protein": "1ubq",
            "branching_sites": [{"site_name": "M1","sequence_id": "(M)QIF","children": ""},
                                {"site_name": "K6","sequence_id": "IFV(K)TLT","children": ""},
                                {"site_name": "K11","sequence_id": "LTG(K)TIT","children": ""},
                                {"site_name": "K27","sequence_id": "ENV(K)AKI","children": ""},
                                {"site_name": "K29","sequence_id": "VKA(K)IQD","children": ""},
                                {"site_name": "K33","sequence_id": "IQD(K)EGI","children": ""},
                                {"site_name": "K48","sequence_id": "FAG(K)QLE","children":""}, 
                                {"site_name": "K63","sequence_id": "NIQ(K)EST","children": ""}]
        },
        {"chain_length", "chain_number", "FASTA_sequence"}
    )
])
def test_validate_protein_keys_missing_keys(invalid_dict, missing_keys):
    """
    Test that `validate_protein_keys` raises a KeyError when required keys are missing.
    """
    allowed_keys = {"protein", "chain_number", "FASTA_sequence", "chain_length", "branching_sites"}
    with pytest.raises(KeyError, match=r"Missing required keys: .*Allowed keys: .*"):
        validate_protein_keys(invalid_dict)

@pytest.mark.parametrize("invalid_dict, invalid_keys", [
    # ❌ Test Case 5: Dictionary with an invalid key
    (
        {
            "protein": "1ubq",
            "chain_number": 1,
            "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG",
            "chain_length": 76,
            "branching_sites": [{"site_name": "M1","sequence_id": "(M)QIF","children": ""},
                                {"site_name": "K6","sequence_id": "IFV(K)TLT","children": ""},
                                {"site_name": "K11","sequence_id": "LTG(K)TIT","children": ""},
                                {"site_name": "K27","sequence_id": "ENV(K)AKI","children": ""},
                                {"site_name": "K29","sequence_id": "VKA(K)IQD","children": ""},
                                {"site_name": "K33","sequence_id": "IQD(K)EGI","children": ""},
                                {"site_name": "K48","sequence_id": "FAG(K)QLE","children":""}, 
                                {"site_name": "K63","sequence_id": "NIQ(K)EST","children": ""}],
            "extra_key": "invalid_value"
        },
        ["extra_key"]
    ),
    # ❌ Test Case 6: Dictionary with multiple invalid keys
    (
        {
            "protein": "1ubq",
            "chain_number": 1,
            "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG",
            "chain_length": 76,
            "branching_sites": [{"site_name": "M1","sequence_id": "(M)QIF","children": ""},
                                {"site_name": "K6","sequence_id": "IFV(K)TLT","children": ""},
                                {"site_name": "K11","sequence_id": "LTG(K)TIT","children": ""},
                                {"site_name": "K27","sequence_id": "ENV(K)AKI","children": ""},
                                {"site_name": "K29","sequence_id": "VKA(K)IQD","children": ""},
                                {"site_name": "K33","sequence_id": "IQD(K)EGI","children": ""},
                                {"site_name": "K48","sequence_id": "FAG(K)QLE","children":""}, 
                                {"site_name": "K63","sequence_id": "NIQ(K)EST","children": ""}],
            "extra_key_1": "value1",
            "extra_key_2": "value2"
        },
        ["extra_key_1", "extra_key_2"]
    )
])

def test_validate_protein_keys_invalid_keys(invalid_dict, invalid_keys):
    """
    Test that `validate_protein_keys` raises a KeyError when invalid keys are present.
    """    
    with pytest.raises(KeyError) as exc_info:
            validate_protein_keys(invalid_dict)
        
    error_msg = str(exc_info.value)
    assert match_assertion_error_contains(error_msg, invalid_keys), \
        f"Expected parts {invalid_keys} not found in error: {error_msg}"


def test_validate_protein_keys_missing_and_invalid_keys():
    """
    Test `validate_protein_keys` when the input dictionary has both missing required keys and unexpected keys.
    It should raise a KeyError with the correct message.
    """
    invalid_protein_data = {
        "protein": "1ubq",  # Valid key
        "FASTA_sequence": "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG",  # Valid key
        "extra_key": "unexpected_value",  # Invalid key
    }

    with pytest.raises(KeyError, match=r"Missing required keys: .*Invalid keys found: .*Allowed keys: .*"):
        validate_protein_keys(invalid_protein_data)


The goal now is to ensure these validations are triggered correctly for the nested dictionaries (with 4 of more ubiquitins bound together) during the recursive traversal performed by iterate_through_ubiquitin and its helper inner_wrapper_iterate_through_ubiquitin. Please write a new test for each test that are triggered for these nested dictionaries. Use the base nested dictionary found in test_data.py. Make sure the tests use iterate_through_ubiquitin as the function. Do not apply the code within vscode