In [21]:
from src.utils import get_project_root

with open(get_project_root() / "inputs/ryr1_rabit.txt", "r") as f:
    # read the only line
    line = f.readline()


len(line)

5037

In [30]:
from typing import NamedTuple


class SubseqResult(NamedTuple):
    final_sequence: str
    new_pocket_location: dict[str, list[int]]  # 1-based index in final_sequence


pockets = {
    "cff": [
        4716,  # main pocket
        3753,
        4996,
    ],
    "atp": [
        4979,
        4954
    ]
}

def extract_and_remove_disordered(
    sequence: str,
    aa_start: int,
    aa_end: int,
    pocket_locations: dict[str, list[int]],
) -> SubseqResult:
    seq_len = len(sequence)
    aa_start_idx = max(aa_start - 1, 0)
    aa_end_idx = min(aa_end, seq_len)

    # Get the initial substring
    sub_seq = sequence[aa_start_idx:aa_end_idx]

    offset = aa_start - 1

    new_pockets = {
        key: [p - offset for p in locs]
        for key, locs in pocket_locations.items()
    }


    return SubseqResult(sub_seq, new_pockets)


# disordered_regions = [
#     (4255, 4539),
#     (4588, 4625)
# ]

subseq_result = extract_and_remove_disordered(
    sequence=line,
    aa_start=3750,
    aa_end=5037,
    pocket_locations=pockets,
)

pocket_aas_new = {
    key: [subseq_result.final_sequence[p - 1] for p in locs]
    for key, locs in subseq_result.new_pocket_location.items()
}

pocket_aas_old = {
    key: [line[p - 1] for p in locs]
    for key, locs in pockets.items()
}


for key in pockets:
    pocket_for_old = pocket_aas_old[key]
    pocket_for_new = pocket_aas_new[key]

    print(
        f"Pocket {key} old: {pocket_for_old} new: {pocket_for_new}. "
        f"Match: {pocket_for_old == pocket_for_new}. "
        f"Locations old: {pockets[key]} new: {subseq_result.new_pocket_location[key]}"
    )

Pocket cff old: ['W', 'F', 'I'] new: ['W', 'F', 'I']. Match: True. Locations old: [4716, 3753, 4996] new: [967, 4, 1247]
Pocket atp old: ['T', 'M'] new: ['T', 'M']. Match: True. Locations old: [4979, 4954] new: [1230, 1205]


In [23]:
len(subseq_result.final_sequence)

1288

In [24]:
# remove output if exist
import os

output_file = get_project_root() / "inputs/ryr1_rabit_section.txt"


if output_file.exists():
    print(f"{output_file.name} already exists")
    os.remove(output_file)

with open(output_file, "w") as f:
    f.write(subseq_result.final_sequence)

ryr1_rabit_section.txt already exists


In [31]:
ryr1_rabit_base = {
    'sequences': [
        {
            "protein": {
                "id": "A",
                "sequence": subseq_result.final_sequence,
            }
        },
        {
            "ligand": {
                "id": "B",
                "ccd": "CFF"
            }
        },
        {
            "ligand": {
                "id": "C",
                "ccd": "ATP"
            }
        }
    ],
    "constraints": [
        {
            "pocket": {
                "binder": "B",
                "contacts": [
                    ["A", p] for p in subseq_result.new_pocket_location["cff"]
                ],
                "max_distance": 3.2, # Angstroms
            },

        },
        {
            "pocket": {
                "binder": "C",
                "contacts": [
                    ["A", p] for p in subseq_result.new_pocket_location["atp"]
                ],
                "max_distance": 5,
            }
        }
    ],
    "properties": [
        {
            "affinity": {
                "binder": "B"
            }
        }
    ]
}

import yaml

with open(get_project_root() / "inputs/ryr1_rabit_base.yaml", "w") as f:
    yaml.dump(ryr1_rabit_base, f, default_flow_style=False)