In [21]:
from src.utils import get_project_root

with open(get_project_root() / "inputs/ryr1_rabit.txt", "r") as f:
    # read the only line
    line = f.readline()


len(line)

5037

In [22]:
from typing import NamedTuple


class SubseqResult(NamedTuple):
    final_sequence: str
    new_pocket_location: list[int]  # 1-based index in final_sequence


pockets = [
    4716, # main pocket
    3753,
    4996,
]

def extract_and_remove_disordered(
    sequence: str,
    aa_start: int,
    aa_end: int,
    pocket_locations: list[int],
) -> SubseqResult:
    seq_len = len(sequence)
    aa_start_idx = max(aa_start - 1, 0)
    aa_end_idx = min(aa_end, seq_len)

    # Get the initial substring
    sub_seq = sequence[aa_start_idx:aa_end_idx]

    offset = aa_start - 1

    new_pockets = [
        p - offset
        for p in pocket_locations
    ]


    return SubseqResult(sub_seq, new_pockets)


# disordered_regions = [
#     (4255, 4539),
#     (4588, 4625)
# ]

subseq_result = extract_and_remove_disordered(
    sequence=line,
    aa_start=3750,
    aa_end=5037,
    pocket_locations=pockets,
)

pocket_aas_new = [
    subseq_result.final_sequence[p - 1] for p in subseq_result.new_pocket_location
]

pocket_aas_old = [
    line[p - 1] for p in pockets
]


print(
    f"AA pocket: ({pockets}) {pocket_aas_old}. New pocket ({subseq_result.new_pocket_location}): {pocket_aas_new}"
)

AA pocket: ([4716, 3753, 4996]) ['W', 'F', 'I']. New pocket ([967, 4, 1247]): ['W', 'F', 'I']


In [23]:
len(subseq_result.final_sequence)

1288

In [24]:
# remove output if exist
import os

output_file = get_project_root() / "inputs/ryr1_rabit_section.txt"


if output_file.exists():
    print(f"{output_file.name} already exists")
    os.remove(output_file)

with open(output_file, "w") as f:
    f.write(subseq_result.final_sequence)

ryr1_rabit_section.txt already exists


In [25]:
ryr1_rabit_base = {
    'sequences': [
        {
            "protein": {
                "id": "A",
                "sequence": subseq_result.final_sequence,
            }
        },
        {
            "ligand": {
                "id": "B",
                "ccd": "CFF"
            }
        }
    ],
    "constraints": [
        {
            "pocket": {
                "binder": "B",
                "contacts": [
                    ["A", p] for p in subseq_result.new_pocket_location
                ],
                "max_distance": 3.2, # Angstroms
            }
        }
    ],
    "properties": [
        {
            "affinity": {
                "binder": "B"
            }
        }
    ]
}

import yaml

with open(get_project_root() / "inputs/ryr1_rabit_base.yaml", "w") as f:
    yaml.dump(ryr1_rabit_base, f, default_flow_style=False)