In [1]:
from src.substring import substring_and_remap
from src.utils import get_project_root

with open(get_project_root() / "inputs/ryr1_rabit.txt", "r") as f:
    # read the only line
    line = f.readline()

len(line)

5037

In [7]:
from typing import NamedTuple


class SubseqResult(NamedTuple):
    final_sequence: str
    new_pocket_location: list[int]  # 1-based index in final_sequence


pockets = {
    "cff": [
        4716,  # main pocket
        3753,
        4996,
    ],
    # "atp": [4979, 4954],
}


def extract_and_remove_disordered(
        sequence: str,
        aa_start: int,
        aa_end: int,
        pocket_locations: list[int],
        remove_sequences: list[tuple[int, int]] = None,
) -> SubseqResult:
    # covnert to standard indicies
    aa_start -= 1
    aa_end -= 1

    if remove_sequences is None:
        remove_sequences = []

    removed_remap = [
        (r1 - 1, r2 - 1) for r1, r2 in remove_sequences
    ]

    # for the removed regions - replace them with "-"
    # for r1, r2 in removed_remap:
    #     sequence = sequence[:r1] + "-" * (r2 - r1 + 1) + sequence[r2 + 1:]

    pockets_remap = [
        locs - 1 for locs in pocket_locations
    ]

    new_seq, new_pockets = substring_and_remap(
        s=sequence,
        s_window=(aa_start, aa_end),
        remove_regions=removed_remap,
        pointers=pockets_remap,
    )

    return SubseqResult(new_seq, new_pockets)


# disordered_regions = [
#     (4255, 4539),
#     (4588, 4625),
# ]


atp_binding_sites = [
    (4960, 4980),
    (4250, 4540)
]

subseq_result = extract_and_remove_disordered(
    sequence=line,
    aa_start=3750,
    aa_end=5037,
    pocket_locations=pockets["cff"],
    remove_sequences=atp_binding_sites,
)

subseq_result

SubseqResult(final_sequence='EVSFEEKEMEKQRLLYQQSRLHTRGAAEMVLQMISACKGETGAMVSSTLKLGISILNGGNAEVQQKMLDYLKDKKEVGFFQSIQALMQTCSVLDLNAFERQNKAEGLGMVNEDGTVINRQNGEKVMADDEFTQDLFRFLQLLCEGHNNDFQNYLRTQTGNTTTINIIICTVDYLLRLQESISDFYWYYSGKDVIEEQGKRNFSKAMSVAKQVFNSLTEYIQGPCTGNQQSLAHSRLWDAVVGFLHVFAHMMMKLAQDSSQIELLKELLDLQKDMVVMLLSLLEGNVVNGMIARQMVDMLVESSSNVEMILKFFDMFLKLKDIVGSEAFQDYVTDPRGLISKKDFQKAMDSQKQFTGPEIQFLLSCSEADENEMINFEEFANRFQEPARDIGFNVAVLLTNLSEHVPHDPRLRNFLELAESILEYFRPYLGRIEIMGASRRIERIYFEISETNRAQWEMPQVKESKRQFIFDVVNEGGEAEKMELFVSFCEDTIFEMQIAAWGELEVQRVKFLNYLSRNFYTLRFLALFLAFAINFILLFYKVSDSPPGEDDMEGSAAGDLAGAGSGGGSGWGSGAGEEAEGDEDENMVYYFLEESTGYMEPALWCLSLLHTLVAFLCIIGYNCLKVPLVIFKREKELARKLEFDGLYITEQPGDDDVKGQWDRLVLNTPSFPSNYWDKFVKRKVLDKHGDIFGRERIAELLGMDLASLEITAHNERKPDPPPGLLTWLMSIDVKYQIWKFGVIFTDNSFLYLGWYMVMSLLGHYNNFFFAAHLLDIAMGVKTLRTILSSVTHNGKQLVMTVGLLAVVVYLYTVVAFNFFRKFYNKSEDEDEPDMKCDDMMTCYLFHMYVGVRAGGGIGDEIEDPAGDEYELYRVVFDITFFFFVIVILLAIIQGLIIDAFGELRDQQEQVKEDMETKCFEEHNLANYMFFLMYLINKDETEHTGQESYVWKMYQERCWDFFPAGDCFRKQY

In [8]:
len(subseq_result.final_sequence)

976

In [9]:
# remove output if exist
import os

output_file = get_project_root() / "inputs/ryr1_rabit_atp_pocket.txt"

if output_file.exists():
    print(f"{output_file.name} already exists")
    os.remove(output_file)

with open(output_file, "w") as f:
    f.write(subseq_result.final_sequence)

In [11]:
ryr1_rabit_base = {
    "sequences": [
        {
            "protein": {
                "id": "A",
                "sequence": subseq_result.final_sequence,
                "msa": "inputs/msa/ryr1_rabit.fasta",
            }
        },
        {"ligand": {"id": "B", "ccd": "CFF"}},
        # {"ligand": {"id": "C", "ccd": "ATP"}},
    ],
    "constraints": [
        {
            "pocket": {
                "binder": "B",
                "contacts": [
                    ["A", p] for p in subseq_result.new_pocket_location
                ],
                "max_distance": 5,  # Angstroms
            },
        },
        # {
        #     "pocket": {
        #         "binder": "C",
        #         "contacts": [
        #             ["A", p] for p in subseq_result.new_pocket_location["atp"]
        #         ],
        #         "max_distance": 5,
        #     }
        # },
    ],
    "properties": [{"affinity": {"binder": "B"}}],
}

import yaml

with open(get_project_root() / "inputs/ryr1_rabit_atp_pocket.yaml",
          "w") as f:
    yaml.dump(ryr1_rabit_base, f, default_flow_style=False)