In [1]:
import pandas as pd
from typing import List, Optional
from rna_motif_library.basepair import get_cached_basepairs
from rna_motif_library.chain import get_cached_chains, Chains, write_chain_to_cif
from rna_motif_library.motif import get_cached_motifs, MotifFactory, Motif
from rna_motif_library.residue import Residue
from rna_motif_library.logger import setup_logging
from rna_motif_library.util import wc_basepairs_w_gu, get_cached_path
from rna_motif_library.hbond import get_cached_hbonds
from rna_motif_library.x3dna import get_cached_dssr_output, X3DNAResidueFactory
from rna_motif_library.motif_factory import (
    HelixFinder,
    get_pdb_structure_data,
    get_pdb_structure_data_for_residues,
    get_cww_basepairs,
)

In [2]:
def setup_motif_factory(pdb_id: str) -> MotifFactory:
    """Create and return a MotifFactory instance for the given PDB ID.

    Args:
        pdb_id: PDB identifier

    Returns:
        Configured MotifFactory instance
    """
    basepairs = get_cached_basepairs(pdb_id)
    chains = get_cached_chains(pdb_id)
    hbonds = get_cached_hbonds(pdb_id)
    rna_chains = Chains(chains)
    return MotifFactory(pdb_id, rna_chains, basepairs, hbonds)

In [3]:
pdb_id = "7MSH"
mf = setup_motif_factory(pdb_id)

Residue a-U-241- has 2 basepairs. Only taking the one with the highest hbond score.
Basepair: a-G-283- a-U-241- with hbond score 1.5137164611552927
Basepair: a-G-284- a-U-241- with hbond score 1.6356722520030003
Residue A-G-172- has 2 basepairs. Only taking the one with the highest hbond score.
Basepair: A-G-172- A-U-158- with hbond score 1.300902863719972
Basepair: A-C-159- A-G-172- with hbond score 2.1476725479831975
Residue A-C-1303- has 2 basepairs. Only taking the one with the highest hbond score.
Basepair: A-C-1303- A-G-1309- with hbond score 2.0138593377227174
Basepair: A-C-1303- A-G-1308- with hbond score 2.480799231110086
Residue a-G-284- has 2 basepairs. Only taking the one with the highest hbond score.
Basepair: a-G-284- a-U-241- with hbond score 1.6356722520030003
Basepair: a-C-240- a-G-284- with hbond score 2.2751105136787375
Residue A-G-2579- has 2 basepairs. Only taking the one with the highest hbond score.
Basepair: A-G-2579- A-U-2530- with hbond score 1.527691503782223

In [4]:
hairpins = mf.get_looped_strands()

In [5]:
sorted(hairpins, key=lambda x: len(x.get_residues()))
print(len(hairpins[-1].get_residues()))

74


In [6]:
helices = mf.get_helices(hairpins)

In [7]:
non_helical_strands = mf.get_non_helical_strands(helices)

In [8]:
long_non_helical_strands = []
for s in non_helical_strands:
    if len(s) > 50:
        print(len(s))
        long_non_helical_strands.append(s)

58
84
78
131
60
74
75
58
71
54


In [9]:
print(len(long_non_helical_strands))

10


In [10]:
s = long_non_helical_strands[3]
write_chain_to_cif(s, "large_motif.cif")

In [11]:
org_pdb_data = get_pdb_structure_data(pdb_id)

In [12]:
pdb_data = get_pdb_structure_data_for_residues(org_pdb_data, s)

In [29]:
cww_basepairs = get_cww_basepairs(
    pdb_data, min_two_hbond_score=0.50, min_three_hbond_score=0.50
)

In [None]:
hf = HelixFinder(pdb_data, cww_basepairs, [])
helices = hf.get_helices()
print(len(helices))
for i, h in enumerate(helices):
    h.to_cif("helix_{}.cif".format(i))

8
