# Protein Generator

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os, sys, json

from dataclasses import asdict

import py3Dmol
import torch
import numpy as np
from tqdm import tqdm

from proteome import protein
from proteome.models.design.protein_generator import config
from proteome.models.folding.omegafold.modeling import OmegaFoldForFolding
from proteome.models.design.protein_generator.modeling import ProteinGeneratorForJointDesign
from proteome.models.design.protein_generator.sampler import SeqDiffSampler

In [None]:
def show_structure(pdb_str):
    PLDDT_BANDS = [
      (0, 50, '#FF7D45'),
      (50, 70, '#FFDB13'),
      (70, 90, '#65CBF3'),
      (90, 100, '#0053D6')
    ]
    view = py3Dmol.view(width=800, height=600)
    view.addModelsAsFrames(pdb_str)
    
    color_map = {i: bands[2] for i, bands in enumerate(PLDDT_BANDS)}
    style = {'cartoon': {'colorscheme': {'prop': 'b', 'map': color_map}}}
    
    style['stick'] = {}
    
    view.setStyle({'model': -1}, style)
    view.zoomTo()
    return view

In [None]:
designer = ProteinGeneratorForJointDesign(model_name="auto", random_seed=1227)
folder = OmegaFoldForFolding()

## Binder Design

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/binder_design \
    --pdb examples/pdbs/cd86.pdb \
    --T 25 --save_best_plddt \
    --contigs B1-110,0 25-75 \
    --hotspots B40,B32,B87,B96,B30
```

In [None]:
with open("cd86.pdb", mode="r") as f:
    reference_pdb_str = f.read()

reference_structure = protein.from_pdb_string(reference_pdb_str)

In [None]:
designed_structure, designed_sequence = designer.design_structure_and_sequence(
    config.InferenceConfig(
        reference_structure=reference_structure,
        contigmap_params=config.ContigMap(contigs=["B1-110/0 25-75"]),
        hotspot_params=config.HotspotParams(hotspot_res=["B40", "B32", "B87", "B96", "B30"]),
    ),
)
designed_pdb = protein.to_pdb(designed_structure)

In [None]:
show_structure(designed_pdb)

## Motif Scaffolding

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --pdb examples/pdbs/rsv5_5tpn.pdb \
    --contigs 0-25,A163-181,25-30 --T 25 --save_best_plddt
```

In [None]:
with open("rsv5_5tpn.pdb", mode="r") as f:
    reference_pdb_str = f.read()

reference_structure = protein.from_pdb_string(reference_pdb_str)

In [None]:
designed_structure, designed_sequence = designer.design_structure_and_sequence(
    config.InferenceConfig(
        reference_structure=reference_structure,
        contigmap_params=config.ContigMap(contigs=["0-25/A163-181/25-30"]),
    ),
)
designed_pdb = protein.to_pdb(designed_structure)

In [None]:
show_structure(designed_pdb)

## Partial Diffusion

```bash
python ./inference.py \
    --num_designs 10 \
    --pdb examples/out/design_000000.pdb \
    --out examples/out/partial_diffusion_design \
    --contigs 38 --sampling_temp 0.3 --T 50 --save_best_plddt
```

In [None]:
with open("design_000000.pdb", mode="r") as f:
    reference_pdb_str = f.read()

reference_structure = protein.from_pdb_string(reference_pdb_str)
reference_structure = protein.crop_protein_37_to_27(reference_structure)

In [None]:
designed_structure, designed_sequence = designer.design_structure_and_sequence(
    config.InferenceConfig(
        reference_structure=reference_structure,
        diffuser_params=config.DiffuserParams(T=50),
        contigmap_params=config.ContigMap(contigs=["38"]),
        sampling_temp=0.3,
    ),
)
designed_pdb = protein.to_pdb(designed_structure)

In [None]:
show_structure(designed_pdb)

## Secondary Structure

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --contigs 100 \
    --T 25 --save_best_plddt \
    --secondary_structure XXXXXHHHHXXXLLLXXXXXXXXXXHHHHXXXLLLXXXXXXXXXXHHHHXXXLLLXXXXXXXXXXHHHHXXXLLLXXXXXXXXXXHHHHXXXLLLXXXXX
```

In [None]:
secondary_structure_str = "XXXXXHHHHXXXLLLXXXXXXXXXXHHHHXXXLLLXXXXXXXXXXHHHHXXXLLLXXXXXXXXXXHHHHXXXLLLXXXXXXXXXXHHHHXXXLLLXXXXX"

In [None]:
designed_structure, designed_sequence = designer.design_structure_and_sequence(
    config.InferenceConfig(
        contigmap_params=config.ContigMap(contigs=["100"]),
        secondary_structure_params=config.SecondaryStructureParams(
            secondary_structure=secondary_structure_str
        ),
    ),
)
designed_pdb = protein.to_pdb(designed_structure)

In [None]:
show_structure(designed_pdb)

## Secondary Structure Bias

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --contigs 100 \
    --T 25 --save_best_plddt \
    --helix_bias 0.01 --strand_bias 0.01 --loop_bias 0.0 
```

In [None]:
designed_structure, designed_sequence = designer.design_structure_and_sequence(
    config.InferenceConfig(
        contigmap_params=config.ContigMap(contigs=["100"]),
        structure_bias_params=config.StructureBiasParams(helix_bias=0.01, strand_bias=0.01)
    ),
)
designed_pdb = protein.to_pdb(designed_structure)

In [None]:
show_structure(designed_pdb)

## Secondary Structure from PDB

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --contigs 110 \
    --T 25 --save_best_plddt \
    --dssp_pdb examples/pdbs/cd86.pdb
```

In [None]:
with open("cd86.pdb", mode="r") as f:
    dssp_pdb_str = f.read()

dssp_structure = protein.from_pdb_string(dssp_pdb_str)

In [None]:
designed_structure, designed_sequence = designer.design_structure_and_sequence(
    config.InferenceConfig(
        contigmap_params=config.ContigMap(contigs=["110"]),
        secondary_structure_params=config.SecondaryStructureParams(dssp_structure=dssp_structure),
    ),
)
designed_pdb = protein.to_pdb(designed_structure)

In [None]:
show_structure(designed_pdb)

## Sequence Conditioning

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --sequence XXXXXXXXXXXXXXXXPEPSEQXXXXXXXXXXXXXXXX \
    --T 25 --save_best_plddt
```

In [None]:
designed_structure, designed_sequence = designer.design_structure_and_sequence(
    config.InferenceConfig(
        sequence="XXXXXXXXXXXXXXXXPEPSEQXXXXXXXXXXXXXXXX",
        contigmap_params=config.ContigMap(),
    ),
)
designed_pdb = protein.to_pdb(designed_structure)

In [None]:
show_structure(designed_pdb)

## Sequence Partial Diffusion

```bash
python ./inference.py \
    --num_designs 10 \
    --sequence SAKVEELLETAKALGISEEEVREILELLEAGFIVIEVVSLGDAVILILENKKLGKYYILKNGEIERIKKPENARELKRKIAEILNISVEEIEAIIEKLRAK \
    --out examples/out/partial_diffusion_design \
    --sampling_temp 0.3 --T 50 --save_best_plddt
```

In [None]:
designed_structure, designed_sequence = designer.design_structure_and_sequence(
    config.InferenceConfig(
        sequence="SAKVEELLETAKALGISEEEVREILELLEAGFIVIEVVSLGDAVILILENKKLGKYYILKNGEIERIKKPENARELKRKIAEILNISVEEIEAIIEKLRAK",
        diffuser_params=config.DiffuserParams(T=50),
        contigmap_params=config.ContigMap(),
        sampling_temp=0.3,
    ),
)
designed_pdb = protein.to_pdb(designed_structure)

In [None]:
designed_sequence

In [None]:
show_structure(designed_pdb)

## Symmetric Design

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/symmetric_design \
    --contigs 25,0 25,0 25,0 \
    --T 50 \
    --save_best_plddt \
    --symmetry 3
```

In [None]:
designed_structure, designed_sequence = designer.design_structure_and_sequence(
    config.InferenceConfig(
        diffuser_params=config.DiffuserParams(T=50),
        contigmap_params=config.ContigMap(contigs=["25/0 25/0 25/0"]),
        symmetry_params=config.SymmetryParams(symmetry=3),
    ),
)
designed_pdb = protein.to_pdb(designed_structure)

In [None]:
show_structure(designed_pdb)

## Unconditional Design

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --contigs 100 \
    --T 25 --save_best_plddt
```

In [None]:
designed_structure, designed_sequence = designer.design_structure_and_sequence(
    config.InferenceConfig(
        contigmap_params=config.ContigMap(contigs=["100"]),
    ),
)
designed_pdb = protein.to_pdb(designed_structure)

In [None]:
show_structure(designed_pdb)

## Weighted Sequence

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --contigs 100 \
    --T 25 --save_best_plddt \
    --potentials aa_bias \
    --aa_composition W0.2 --potential_scale 1.75 
```

In [None]:
designed_structure, designed_sequence = designer.design_structure_and_sequence(
    config.InferenceConfig(
        contigmap_params=config.ContigMap(contigs=["100"]),
        potentials_params=config.PotentialsParams(
            potentials=[config.AACompositionalBiasParams(aa_composition="W0.2")],
            potential_scales=[1.75],
        ),
    ),
)
designed_pdb = protein.to_pdb(designed_structure)

In [None]:
show_structure(designed_pdb)

## Folding

In [None]:
predicted_protein, confidence = folder.fold(designed_sequence)
folded_pdb = protein.to_pdb(predicted_protein)

In [None]:
PLDDT_BANDS = [
  (0, 50, '#FF7D45'),
  (50, 70, '#FFDB13'),
  (70, 90, '#65CBF3'),
  (90, 100, '#0053D6')
]
view = py3Dmol.view(width=800, height=600)
view.addModelsAsFrames(folded_pdb)

color_map = {i: bands[2] for i, bands in enumerate(PLDDT_BANDS)}
style = {'cartoon': {'colorscheme': {'prop': 'b', 'map': color_map}}}

style['stick'] = {}

view.setStyle({'model': -1}, style)
view.zoomTo()