TODO:

1. Fix config types that are list, bool, optional, etc.
2. Finish modeling!

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from dataclasses import asdict

import py3Dmol
import torch
import numpy as np
from tqdm import tqdm

from proteome import protein
from proteome.models.design.rfdiffusion import config
from proteome.models.design.rfdiffusion.modeling import RFDiffusionForStructureDesign
from proteome.models.design.rfdiffusion.samplers import UnconditionalSampler, SelfConditioningSampler, ScaffoldedSampler

PyRosetta-4 2023 [Rosetta PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python310.Release 2023.27+release.e3ce6ea9faf661ae8fa769511e2a9b8596417e58 2023-07-07T12:00:46] retrieved from: http://www.pyrosetta.org
(C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team.
core.init: Checking for fconfig files in pwd and ./rosetta/flags
core.init: Rosetta version: PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python310.Release r353 2023.27+release.e3ce6ea e3ce6ea9faf661ae8fa769511e2a9b8596417e58 http://www.pyrosetta.org 2023-07-07T12:00:46
core.init: command: PyRosetta -ex1 -ex2aro -database /home/conradry71/micromamba/envs/proteome/lib/python3.10/site-packages/pyrosetta/database
basic.random.init_random_generator: 'RNG device' seed mode, using '/dev/urandom', seed=-164735246 seed_offset=0 real_seed=-164735246 thread_index=0
basic.random.init_random_generator: RandomGenerator:init: Normal mode, seed=-164735246 RG_type=mt19937


In [3]:
def show_pdb_string(pdb_str):
    PLDDT_BANDS = [
      (0, 50, '#FF7D45'),
      (50, 70, '#FFDB13'),
      (70, 90, '#65CBF3'),
      (90, 100, '#0053D6')
    ]
    view = py3Dmol.view(width=800, height=600)
    view.addModelsAsFrames(pdb_str)
    
    color_map = {i: bands[2] for i, bands in enumerate(PLDDT_BANDS)}
    style = {'cartoon': {'colorscheme': {'prop': 'b', 'map': color_map}}}
    
    style['stick'] = {}
    
    view.setStyle({'model': -1}, style)
    view.zoomTo()
    return view

In [5]:
designer = RFDiffusionForStructureDesign(model_name="auto")

## Unconditional Design

### Random Length

In [6]:
sampler_config = config.UnconditionalSamplerConfig(
    contigmap_params=config.ContigMap(contigs=["100-200"]),
)
designed_structure = designer.design_structure(sampler_config)

  assert input.numel() == input.storage().size(), (
100%|██████████████████████████████████████| 50/50 [01:20<00:00,  1.60s/it]


In [7]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7fae2efe38b0>

### Monomer ROG Potential

In [11]:
sampler_config = config.UnconditionalSamplerConfig(
    contigmap_params=config.ContigMap(contigs=["100-200"]),
    potentials_params=config.PotentialsParams(
        guiding_potentials=["type:monomer_ROG,weight:1,min_dist:5"],
        guide_scale=2,
        guide_decay="quadratic",
    ),
)
designed_structure = designer.design_structure(sampler_config)

100%|██████████████████████████████████████| 50/50 [00:50<00:00,  1.01s/it]


In [12]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7fae2efe0dc0>

### Contact Potential

In [14]:
sampler_config = config.UnconditionalSamplerConfig(
    contigmap_params=config.ContigMap(contigs=["100-200"]),
    potentials_params=config.PotentialsParams(guiding_potentials=["type:monomer_contacts,weight:0.05"]),
)
designed_structure = designer.design_structure(sampler_config)

100%|██████████████████████████████████████| 50/50 [01:22<00:00,  1.65s/it]


In [15]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7faf68743730>

### Tetrahedral Oligos

In [18]:
sampler_config = config.UnconditionalSamplerConfig(
    contigmap_params=config.ContigMap(contigs=["240-240"]),
    symmetry_params=config.SymmetryParams(symmetry="tetrahedral"),
    potentials_params=config.PotentialsParams(
        guiding_potentials=["type:olig_contacts,weight_intra:1,weight_inter:0.1"],
        olig_inter_all=True,
        olig_intra_all=True,
        guide_scale=2,
        guide_decay="quadratic",
    ),
)
designed_structure = designer.design_structure(sampler_config)

100%|██████████████████████████████████████| 50/50 [02:23<00:00,  2.87s/it]


In [19]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7fae22b9be20>

### Cyclic Oligos

In [21]:
sampler_config = config.UnconditionalSamplerConfig(
    contigmap_params=config.ContigMap(contigs=["90-90"]),
    symmetry_params=config.SymmetryParams(symmetry="C6"),
    potentials_params=config.PotentialsParams(
        guiding_potentials=["type:olig_contacts,weight_intra:1,weight_inter:0.1"], 
        olig_intra_all=True, 
        olig_inter_all=True, 
        guide_scale=2.0, 
        guide_decay="quadratic",
    ),
)
designed_structure = designer.design_structure(sampler_config)

100%|██████████████████████████████████████| 50/50 [00:34<00:00,  1.44it/s]


In [22]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7fae22b9b910>

### Dihedral Oligos

In [23]:
sampler_config = config.UnconditionalSamplerConfig(
    contigmap_params=config.ContigMap(contigs=["120-120"]),
    symmetry_params=config.SymmetryParams(symmetry="D2"),
    potentials_params=config.PotentialsParams(
        guiding_potentials=["type:olig_contacts,weight_intra:1,weight_inter:0.1"], 
        olig_intra_all=True, 
        olig_inter_all=True, 
        guide_scale=2.0, 
        guide_decay="quadratic",
    ),
)
designed_structure = designer.design_structure(sampler_config)

100%|██████████████████████████████████████| 50/50 [00:42<00:00,  1.18it/s]


In [24]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7fae22d92140>

## Self Conditioning Design

### Motifscaffolding

In [38]:
with open('5tpn.pdb', mode="r") as f:
    pdb_str = f.read()

reference_structure = protein.from_pdb_string(pdb_str, atom14_format=True, parse_hetatom=True)

In [39]:
sampler_config = config.SelfConditioningSamplerConfig(
    inference_params=config.InferenceParams(reference_structure=reference_structure),
    contigmap_params=config.ContigMap(contigs=["10-40/A163-181/10-40"]),
)
designed_structure = designer.design_structure(sampler_config)

100%|██████████████████████████████████████| 50/50 [00:29<00:00,  1.67it/s]


In [40]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7faddf731f90>

### Motifscaffolding with target

In [44]:
designer.set_model("complex_base")

In [45]:
with open('1ycr.pdb', mode="r") as f:
    pdb_str = f.read()

reference_structure = protein.from_pdb_string(pdb_str, atom14_format=True, parse_hetatom=True)

In [46]:
sampler_config = config.SelfConditioningSamplerConfig(
    inference_params=config.InferenceParams(reference_structure=reference_structure),
    contigmap_params=config.ContigMap(contigs=["A25-109/0 0-70/B17-29/0-70"], length="70-120"),
)
designed_structure = designer.design_structure(sampler_config)

100%|██████████████████████████████████████| 50/50 [01:12<00:00,  1.46s/it]


In [47]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7fae213bf9a0>

### Enzyme

In [29]:
designer.set_model("active_site")

In [30]:
with open("5an7.pdb", mode="r") as f:
    pdb_str = f.read()
    
reference_structure = protein.from_pdb_string(pdb_str, atom14_format=True, parse_hetatom=True)

In [32]:
sampler_config = config.SelfConditioningSamplerConfig(
    inference_params=config.InferenceParams(reference_structure=reference_structure),
    contigmap_params=config.ContigMap(contigs=["10-100/A1083-1083/10-100/A1051-1051/10-100/A1180-1180/10-100"]),
    potentials_params=config.PotentialsParams(
        guiding_potentials=["type:substrate_contacts,s:1,r_0:8,rep_r_0:5.0,rep_s:2,rep_r_min:1"], 
        guide_scale=1, 
        guide_decay="quadratic",
        substrate="LLK",
    ),
)
designed_structure = designer.design_structure(sampler_config)

100%|██████████████████████████████████████| 50/50 [03:01<00:00,  3.63s/it]


In [33]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7fae876c4490>

### Nickel Motif

In [34]:
designer.set_model("base_epoch8")

In [35]:
with open('nickel_motif.pdb', mode="r") as f:
    pdb_str = f.read()
    
reference_structure = protein.from_pdb_string(pdb_str, atom14_format=True, parse_hetatom=True)

In [36]:
sampler_config = config.SelfConditioningSamplerConfig(
    inference_params=config.InferenceParams(reference_structure=reference_structure),
    contigmap_params=config.ContigMap(contigs=["50/A2-4/50/0 50/A7-9/50/0 50/A12-14/50/0 50/A17-19/50/0"]),
    symmetry_params=config.SymmetryParams(symmetry="C4"),
    potentials_params=config.PotentialsParams(
        guiding_potentials=["type:olig_contacts,weight_intra:1,weight_inter:0.06"],
        olig_inter_all=True,
        olig_intra_all=True,
        guide_scale=2,
        guide_decay="quadratic",
    ),
)
designed_structure = designer.design_structure(sampler_config)

100%|██████████████████████████████████████| 50/50 [07:19<00:00,  8.80s/it]


In [37]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7faddf733490>

### Insulin PPI

In [48]:
with open('insulin_target.pdb', mode="r") as f:
    pdb_str = f.read()

reference_structure = protein.from_pdb_string(pdb_str, atom14_format=True, parse_hetatom=True)

In [49]:
sampler_config = config.SelfConditioningSamplerConfig(
    inference_params=config.InferenceParams(reference_structure=reference_structure),
    contigmap_params=config.ContigMap(contigs=["A1-150/0 70-100"]),
    ppi_params=config.PPIParams(hotspot_res=["A59", "A83", "A91"]),
    denoiser_params=config.DenoiserParams(noise_scale_ca=0, noise_scale_frame=0),
)
designed_structure = designer.design_structure(sampler_config)

100%|██████████████████████████████████████| 50/50 [02:09<00:00,  2.58s/it]


In [50]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7fadc1e708e0>

### Insulin PPI Beta Model

In [51]:
designer.set_model("complex_beta")

In [52]:
with open('insulin_target.pdb', mode="r") as f:
    pdb_str = f.read()

reference_structure = protein.from_pdb_string(pdb_str, atom14_format=True, parse_hetatom=True)

In [53]:
sampler_config = config.SelfConditioningSamplerConfig(
    inference_params=config.InferenceParams(reference_structure=reference_structure),
    contigmap_params=config.ContigMap(contigs=["A1-150/0 70-100"]),
    ppi_params=config.PPIParams(hotspot_res=["A59", "A83", "A91"]),
    denoiser_params=config.DenoiserParams(noise_scale_ca=0, noise_scale_frame=0),
)
designed_structure = designer.design_structure(sampler_config)

100%|██████████████████████████████████████| 50/50 [02:14<00:00,  2.70s/it]


In [54]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7faddd158cd0>

## Sequence Inpainting

In [55]:
with open("5tpn.pdb", mode="r") as f:
    pdb_str = f.read()
    
reference_structure = protein.from_pdb_string(pdb_str, atom14_format=True, parse_hetatom=True)

In [58]:
sampler_config = config.SelfConditioningSamplerConfig(
    inference_params=config.InferenceParams(reference_structure=reference_structure),
    contigmap_params=config.ContigMap(contigs=["10-40/A163-181/10-40"], inpaint_seq=["A163-168/A170-171/A179"]),
)
designed_structure = designer.design_structure(sampler_config)

100%|██████████████████████████████████████| 50/50 [00:30<00:00,  1.65it/s]


In [59]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7faddda97bb0>

## Partial Diffusion

### Without Sequence

In [83]:
with open('2kl8.pdb', mode="r") as f:
    pdb_str = f.read()

reference_structure = protein.from_pdb_string(pdb_str, atom14_format=True, parse_hetatom=True)

In [84]:
diffuser_config_override = config.DiffuserConfig(partial_T=10)

In [85]:
sampler_config = config.SelfConditioningSamplerConfig(
    inference_params=config.InferenceParams(reference_structure=reference_structure),
    contigmap_params=config.ContigMap(contigs=["79-79"]),
)
designed_structure = designer.design_structure(sampler_config)

100%|██████████████████████████████████████| 50/50 [00:30<00:00,  1.65it/s]


In [86]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7faddd50f2b0>

### With Sequence

In [78]:
with open('peptide_complex.pdb', mode="r") as f:
    pdb_str = f.read()

reference_structure = protein.from_pdb_string(pdb_str, atom14_format=True, parse_hetatom=True)

In [79]:
diffuser_config_override = config.DiffuserConfig(partial_T=10)

In [81]:
sampler_config = config.SelfConditioningSamplerConfig(
    inference_params=config.InferenceParams(reference_structure=reference_structure),
    contigmap_params=config.ContigMap(contigs=["172-172/0 34-34"], provide_seq=["172-205"]),
)
designed_structure = designer.design_structure(sampler_config, diffuser_config_override=diffuser_config_override)

100%|██████████████████████████████████████| 10/10 [00:20<00:00,  2.02s/it]


In [82]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7fae21464f70>

### With Multisequence

In [87]:
with open('peptide_complex.pdb', mode="r") as f:
    pdb_str = f.read()

reference_structure = protein.from_pdb_string(pdb_str, atom14_format=True, parse_hetatom=True)

In [88]:
diffuser_config_override = config.DiffuserConfig(partial_T=10)

In [89]:
sampler_config = config.SelfConditioningSamplerConfig(
    inference_params=config.InferenceParams(reference_structure=reference_structure),
    contigmap_params=config.ContigMap(contigs=["172-172/0 34-34"], provide_seq=["172-177,200-205"]),
)
designed_structure = designer.design_structure(sampler_config, diffuser_config_override=diffuser_config_override)

100%|██████████████████████████████████████| 10/10 [00:20<00:00,  2.03s/it]


In [90]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7fadfc0a29b0>

## Scaffold Guided

### TIM Barrel

In [100]:
with open('1qys.pdb', mode="r") as f:
    pdb_str = f.read()
    
reference_structure = protein.from_pdb_string(pdb_str, atom14_format=True, parse_hetatom=True)

with open('tim10.pdb', mode="r") as f:
    scaffold_pdb_str = f.read()

scaffold_structure = protein.from_pdb_string(scaffold_pdb_str, atom14_format=True, parse_hetatom=True)

In [101]:
sampler_config = config.ScaffoldedSamplerConfig(
    inference_params=config.InferenceParams(reference_structure=reference_structure),
    denoiser_params=config.DenoiserParams(noise_scale_ca=0.5, noise_scale_frame=0.5),
    scaffoldguided_params=config.ScaffoldGuidedParams(
        target_structure=None,
        target_adj=False,
        target_ss=False,
        scaffold_structure_list=[scaffold_structure],
        sampled_insertion="0-5",
        sampled_N="0-5",
        sampled_C="0-5",
    )
)
designed_structure = designer.design_structure(sampler_config)

core.pack.pack_missing_sidechains: packing residue number 1 because of missing atom number 7 atom name  OD1
core.pack.pack_missing_sidechains: packing residue number 2 because of missing atom number 8 atom name  CD1
core.pack.pack_missing_sidechains: packing residue number 3 because of missing atom number 7 atom name  CD1
core.pack.pack_missing_sidechains: packing residue number 4 because of missing atom number 8 atom name  CD1
core.pack.pack_missing_sidechains: packing residue number 5 because of missing atom number 7 atom name  CG2
core.pack.pack_missing_sidechains: packing residue number 6 because of missing atom number 7 atom name  OD1
core.pack.pack_missing_sidechains: packing residue number 8 because of missing atom number 6 atom name  OG1
core.pack.pack_missing_sidechains: packing residue number 9 because of missing atom number 7 atom name  OD1
core.pack.pack_missing_sidechains: packing residue number 10 because of missing atom number 7 atom name  CD
core.pack.pack_missing_sidec

100%|██████████████████████████████████████| 50/50 [02:04<00:00,  2.49s/it]


In [102]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)

<py3Dmol.view at 0x7fadd8229b40>

### PPI Scaffolded

In [104]:
with open('1qys.pdb', mode="r") as f:
    pdb_str = f.read()
reference_structure = protein.from_pdb_string(pdb_str, atom14_format=True, parse_hetatom=True)

with open('insulin_target.pdb', mode="r") as f:
    target_pdb_str = f.read()
target_structure = protein.from_pdb_string(target_pdb_str, atom14_format=True, parse_hetatom=True)

with open('5L33.pdb', mode="r") as f:
    scaffold_pdb_str = f.read()
scaffold_structure = protein.from_pdb_string(scaffold_pdb_str, atom14_format=True, parse_hetatom=True)

In [None]:
sampler_config = config.ScaffoldedSamplerConfig(
    inference_params=config.InferenceParams(reference_structure=reference_structure),
    denoiser_params=config.DenoiserParams(noise_scale_ca=0, noise_scale_frame=0),
    ppi_params=config.PPIParams(hotspot_res=["A59", "A83", "A91"]),
    scaffoldguided_params=config.ScaffoldGuidedParams(
        target_structure=target_structure,
        target_adj=True,
        target_ss=True,
        scaffold_structure_list=[scaffold_structure],
        sampled_insertion="0-5",
        sampled_N="0-5",
        sampled_C="0-5",
    )
)
designed_structure = designer.design_structure(sampler_config)

core.pack.pack_missing_sidechains: packing residue number 1 because of missing atom number 7 atom name  CD
core.pack.pack_missing_sidechains: packing residue number 2 because of missing atom number 7 atom name  CG2
core.pack.pack_missing_sidechains: packing residue number 3 because of missing atom number 6 atom name  SG
core.pack.pack_missing_sidechains: packing residue number 4 because of missing atom number 7 atom name  CD
core.pack.pack_missing_sidechains: packing residue number 6 because of missing atom number 7 atom name  SD
core.pack.pack_missing_sidechains: packing residue number 7 because of missing atom number 7 atom name  OD1
core.pack.pack_missing_sidechains: packing residue number 8 because of missing atom number 8 atom name  CD1
core.pack.pack_missing_sidechains: packing residue number 9 because of missing atom number 7 atom name  CD
core.pack.pack_missing_sidechains: packing residue number 10 because of missing atom number 7 atom name  OD1
core.pack.pack_missing_sidechain

 54%|████████████████████▌                 | 27/50 [01:45<01:29,  3.89s/it]

In [None]:
designed_pdb_str = protein.to_pdb(designed_structure)
show_pdb_string(designed_pdb_str)