# Uni-Fold Folding

In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from dataclasses import asdict
from prtm.models.unifold.config import (
    Model2FT, MultimerAF2V3, Model1AF2, make_data_config_dataclass, make_data_config, model_config, SHAPE_SCHEMA
)

In [None]:
old_config = model_config("multimer_af2_v3")
new_config = MultimerAF2V3()

In [None]:
new_config.globals

In [None]:
num_res = 300
old_data_cfg, old_feature_names = make_data_config(
    old_config.data,
    mode="predict",
    num_res=num_res,
    is_multimer=False,
    use_templates=True,
)
new_data_cfg, new_feature_names = make_data_config_dataclass(
    new_config.data,
    num_res=num_res,
    is_multimer=False,
    use_templates=True,
)

In [None]:
def compare_dicts(dict1, dict2, path=""):
    # Check if both arguments are dictionaries
    if not (isinstance(dict1, dict) and isinstance(dict2, dict)):
        return dict1 == dict2, [] if dict1 == dict2 else [f"Values at path '{path}' are not equal: {dict1} != {dict2}"]

    # Check if both dictionaries have the same keys
    diff_keys = set(dict1.keys()).symmetric_difference(set(dict2.keys()))
    differences = [f"Key '{path + ('.' if path else '') + str(key)}' is not present in both dictionaries" for key in diff_keys]

    # Recursively compare values of common keys
    for key in set(dict1.keys()).intersection(set(dict2.keys())):
        are_equal, diffs = compare_dicts(dict1[key], dict2[key], path + ('.' if path else '') + str(key))
        if not are_equal:
            differences.extend(diffs)

    return len(differences) == 0, differences

In [None]:
is_same, mismatch = compare_dicts(
    old_config.to_dict(),
    asdict(new_config),
)

In [None]:
# Missing keys
for k in mismatch:
    if k.startswith("Key"):
        print(k)

In [None]:
# Mismatched values
for k in mismatch:
    if k.startswith("Value"):
        print(k)

In [None]:
set(asdict(new_config.data.common.features).keys()).symmetric_difference(set(SHAPE_SCHEMA.keys()))

In [None]:
is_same, mismatch = compare_dicts(
    old_data_cfg.to_dict(),
    asdict(new_data_cfg),
)

In [None]:
# Missing keys
for k in mismatch:
    if k.startswith("Key"):
        print(k)

In [2]:
from prtm.models.unifold.modeling import UniFoldForFolding
from prtm.visual import view_superimposed_structures





[2024-02-19 21:14:00,097] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
PyRosetta-4 2023 [Rosetta PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python310.Release 2023.47+release.5fe66cd241adb376f3a0af661ea0dcd77ea0dbbe 2023-11-21T10:47:25] retrieved from: http://www.pyrosetta.org
(C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team.


## Fold Monomer

In [3]:
monomer_sequence = (
    "LILNLRGGAFVSNTQITMADKQKKFINEIQEGDLVRSYSITDETFQQNAVTSIV"
    "KHEADQLCQINFGKQHVVCTVNHRFYDPESKLWKSVCPHPGSGISFLKKYDYLLS"
    "EEGEKLQITEIKTFTTKQPVFIYHIQVENNHNFFANGVLAHAMQVSI"
)
monomer_sequence_dict = {"A": monomer_sequence}

In [4]:
uf_folder = UniFoldForFolding(model_name="model_2_ft", use_templates=True, random_seed=0)
af_folder = UniFoldForFolding(model_name="model_1_af2", use_templates=True, random_seed=0)

In [5]:
uf_monomer_structure, uf_aux_output = uf_folder(monomer_sequence_dict, max_recycling_iters=3, num_ensembles=2)
af_monomer_structure, af_aux_output = af_folder(monomer_sequence_dict, max_recycling_iters=3, num_ensembles=2)

Loaded result from cache.
Loaded result from cache.




Loaded result from cache.
Loaded result from cache.




In [6]:
view_superimposed_structures(uf_monomer_structure, af_monomer_structure, color1="green")

<py3Dmol.view at 0x7f730c075690>

## Fold Multimer

In [7]:
complex_sequence_a = (
    "TTPLVHVASVEKGRSYEDFQKVYNAIALKLREDDEYDNYIGYGPVLVRLAWHTSGTW"
    "DKHDNTGGSYGGTYRFKKEFNDPSNAGLQNGFKFLEPIHKEFPWISSGDLFSLGGVTA"
    "VQEMQGPKIPWRCGRVDTPEDTTPDNGRLPDADKDADYVRTFFQRLNMNDREVVALMGAH"
    "ALGKTHLKNSGYEGPWGAANNVFTNEFYLNLLNEDWKLEKNDANNEQWDSKSGYMMLPTDY"
    "SLIQDPKYLSIVKEYANDQDKFFKDFSKAFEKLLENGITFPKDAPSPFIFKTLEEQGL"
)
complex_sequence_b = (
    "TEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHSGQAEGYSYTDA"
    "NIKKNVLWDENNMSEYLTNPKKYIPGTKMAIGGLKKEKDRNDLITYLKKACE"
)
complex_sequence_dict = {"A": complex_sequence_a, "B": complex_sequence_b}

In [8]:
uf_folder = UniFoldForFolding(model_name="multimer_ft", use_templates=True, random_seed=0)
af_folder = UniFoldForFolding(model_name="multimer_4_af2_v3", use_templates=True, random_seed=0)

In [9]:
uf_complex_structure, uf_comp_aux_output = uf_folder(
    complex_sequence_dict, max_recycling_iters=3, num_ensembles=2
)
af_complex_structure, af_compp_aux_output = af_folder(
    complex_sequence_dict, max_recycling_iters=3, num_ensembles=2
)

Loaded result from cache.
Loaded result from cache.
Loaded result from cache.
Loaded result from cache.
Loaded result from cache.
Loaded result from cache.


In [10]:
view_superimposed_structures(
    uf_complex_structure.get_chain("A"), af_complex_structure.get_chain("A"), color1="green"
)

<py3Dmol.view at 0x7f72b12fe140>

In [11]:
view_superimposed_structures(
    uf_complex_structure.get_chain("B"), af_complex_structure.get_chain("B"), color1="green"
)

<py3Dmol.view at 0x7f72f9739900>

## Fold Symmetric

In [12]:
symmetric_sequence = (
    "PPYTVVYFPVRGRCAALRMLLADQGQSWKEEVVTVETWQEGSLKASCLYGQLPKFQDGD"
    "LTLYQSNTILRHLGRTLGLYGKDQQEAALVDMVNDGVEDLRCKYISLIYTNYEAGKDDYV"
    "KALPGQLKPFETLLSQNQGGKTFIVGDQISFADYNLLDLLLIHEVLAPGCLDAFPLLSAY"
    "VGRLSARPKLKAFLASPEYVNLPINGNGKQ"
)
symmetric_sequence_dict = {"A": symmetric_sequence}

In [13]:
sym_folder = UniFoldForFolding(
    model_name="uf_symmetry", use_templates=True, random_seed=0, symmetry_group="C2"
)

In [14]:
sym_structure, sym_aux_output = sym_folder(
    symmetric_sequence_dict, max_recycling_iters=3, num_ensembles=2
)

Loaded result from cache.
Loaded result from cache.




In [15]:
sym_structure.show()

<py3Dmol.view at 0x7f7309e33f10>