# Uni-Fold Folding

In [1]:
%load_ext autoreload
%autoreload 2

In [29]:
import numpy as np
from collections import namedtuple
from typing import NamedTuple
from prtm.constants import residue_constants as rc
from prtm.models.unifold.data import residue_constants as rc2

In [31]:
failed = []
success = []
for k in dir(rc):
    v = getattr(rc, k, None)
    v2 = getattr(rc2, k, None)
    if v is None or v2 is None:
        continue  
    elif isinstance(v, np.ndarray):
        if not np.allclose(v, v2):
            failed.append(k)
        else:
            success.append(k)
    elif (
        isinstance(v, list) 
        or isinstance(v, dict) 
        or isinstance(v, str) 
        or isinstance(v, tuple) 
        or isinstance(v, int)
        or isinstance(v, float)
    ):
        if v != v2:
            failed.append(k)
        else:
            success.append(k)
    else:
        print(k, type(k))

Bond <class 'str'>
BondAngle <class 'str'>
List <class 'str'>
Mapping <class 'str'>
Tuple <class 'str'>
__loader__ <class 'str'>
__spec__ <class 'str'>
_make_atom14_is_ambiguous <class 'str'>
_make_rigid_group_constants <class 'str'>
_make_rigid_transformation_4x4 <class 'str'>
_make_standard_atom_mask <class 'str'>
chi_angle_atom <class 'str'>
collections <class 'str'>
functools <class 'str'>
get_chi_atom_indices <class 'str'>
load_stereo_chemical_props <class 'str'>
make_atom14_dists_bounds <class 'str'>
np <class 'str'>
sequence_to_onehot <class 'str'>


In [34]:
for k in failed:
    v = getattr(rc, k, None)
    print(k, type(v))

STANDARD_ATOM_MASK <class 'numpy.ndarray'>
__cached__ <class 'str'>
__file__ <class 'str'>
__name__ <class 'str'>
__package__ <class 'str'>
atom_order <class 'dict'>
atom_types <class 'list'>
chi_angles_atom_indices <class 'numpy.ndarray'>
chi_angles_mask <class 'list'>
chi_atom_1_one_hot <class 'numpy.ndarray'>
chi_atom_2_one_hot <class 'numpy.ndarray'>
chi_atom_indices <class 'list'>
restype_1to3 <class 'dict'>
restype_3to1 <class 'dict'>
restype_atom14_to_atom37 <class 'numpy.ndarray'>
restype_atom37_mask <class 'numpy.ndarray'>
restype_atom37_rigid_group_positions <class 'numpy.ndarray'>
restype_atom37_to_atom14 <class 'numpy.ndarray'>
restype_atom37_to_rigid_group <class 'numpy.ndarray'>


In [2]:
from prtm import protein
from prtm.models.unifold.modeling import UniFoldForFolding
from prtm.visual import view_superimposed_structures





[2024-02-17 17:43:45,077] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
PyRosetta-4 2023 [Rosetta PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python310.Release 2023.47+release.5fe66cd241adb376f3a0af661ea0dcd77ea0dbbe 2023-11-21T10:47:25] retrieved from: http://www.pyrosetta.org
(C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team.


## Fold Monomer

In [None]:
monomer_sequence = (
    "LILNLRGGAFVSNTQITMADKQKKFINEIQEGDLVRSYSITDETFQQNAVTSIV"
    "KHEADQLCQINFGKQHVVCTVNHRFYDPESKLWKSVCPHPGSGISFLKKYDYLLS"
    "EEGEKLQITEIKTFTTKQPVFIYHIQVENNHNFFANGVLAHAMQVSI"
)
monomer_sequence_dict = {"A": monomer_sequence}

In [4]:
uf_folder = UniFoldForFolding(model_name="model_2_ft", use_templates=True, random_seed=0)
#af_folder = UniFoldForFolding(
#    model_name="model_1_af2", use_templates=True, random_seed=0
#)

In [5]:
uf_monomer_structure, uf_aux_output = uf_folder(monomer_sequence_dict, max_recycling_iters=3, num_ensembles=2)
#af_monomer_structure, af_aux_output = af_folder(monomer_sequence_dict, max_recycling_iters=3, num_ensembles=2)

Loaded result from cache.
Loaded result from cache.




In [10]:
uf_monomer_structure.show()

<py3Dmol.view at 0x7f9b8076ae00>

In [7]:
with open("/home/ubuntu/repos/prtm/test/unifold/reference_model_2_ft.pdb", mode="r") as f:
    pdb_str = f.read()

In [8]:
s2 = protein.Protein37.from_pdb_string(pdb_str)
s2.show()

<py3Dmol.view at 0x7f9ce03a0d60>

In [9]:
view_superimposed_structures(uf_monomer_structure, s2, color1="green")

<py3Dmol.view at 0x7f9b547cd390>

## Fold Multimer

In [None]:
complex_sequence_a = (
    "TTPLVHVASVEKGRSYEDFQKVYNAIALKLREDDEYDNYIGYGPVLVRLAWHTSGTW"
    "DKHDNTGGSYGGTYRFKKEFNDPSNAGLQNGFKFLEPIHKEFPWISSGDLFSLGGVTA"
    "VQEMQGPKIPWRCGRVDTPEDTTPDNGRLPDADKDADYVRTFFQRLNMNDREVVALMGAH"
    "ALGKTHLKNSGYEGPWGAANNVFTNEFYLNLLNEDWKLEKNDANNEQWDSKSGYMMLPTDY"
    "SLIQDPKYLSIVKEYANDQDKFFKDFSKAFEKLLENGITFPKDAPSPFIFKTLEEQGL"
)
complex_sequence_b = (
    "TEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHSGQAEGYSYTDA"
    "NIKKNVLWDENNMSEYLTNPKKYIPGTKMAIGGLKKEKDRNDLITYLKKACE"
)
complex_sequence_dict = {"A": complex_sequence_a, "B": complex_sequence_b}

In [None]:
uf_folder = UniFoldForFolding(model_name="multimer_ft", use_templates=True, random_seed=0)
af_folder = UniFoldForFolding(
    model_name="multimer_4_af2_v3", use_templates=True, random_seed=0
)

In [None]:
uf_complex_structure, uf_comp_aux_output = uf_folder(
    complex_sequence_dict, max_recycling_iters=3, num_ensembles=2
)
af_complex_structure, af_compp_aux_output = af_folder(
    complex_sequence_dict, max_recycling_iters=3, num_ensembles=2
)

In [None]:
view_superimposed_structures(
    uf_complex_structure.get_chain("A"), af_complex_structure.get_chain("A"), color1="green"
)

In [None]:
view_superimposed_structures(
    uf_complex_structure.get_chain("B"), af_complex_structure.get_chain("B"), color1="green"
)

## Fold Symmetric

In [None]:
symmetric_sequence = (
    "PPYTVVYFPVRGRCAALRMLLADQGQSWKEEVVTVETWQEGSLKASCLYGQLPKFQDGD"
    "LTLYQSNTILRHLGRTLGLYGKDQQEAALVDMVNDGVEDLRCKYISLIYTNYEAGKDDYV"
    "KALPGQLKPFETLLSQNQGGKTFIVGDQISFADYNLLDLLLIHEVLAPGCLDAFPLLSAY"
    "VGRLSARPKLKAFLASPEYVNLPINGNGKQ"
)
symmetric_sequence_dict = {"A": symmetric_sequence}

In [None]:
sym_folder = UniFoldForFolding(
    model_name="uf_symmetry", use_templates=True, random_seed=0, symmetry_group="C2"
)

In [None]:
sym_structure, sym_aux_output = sym_folder(symmetric_sequence_dict, max_recycling_iters=3, num_ensembles=2)

In [None]:
sym_structure.show()