# Protein Generator

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os, sys, json

from dataclasses import asdict

import py3Dmol
import torch
import numpy as np
from tqdm import tqdm

from proteome import protein
from proteome.models.design.protein_generator import config
from proteome.models.folding.omegafold.modeling import OmegaFoldForFolding
from proteome.models.design.protein_generator.modeling import ProteinGeneratorForJointDesign
from proteome.models.design.protein_generator.sampler import SeqDiffSampler

In [18]:
def show_structure(pdb_str):
    PLDDT_BANDS = [
      (0, 50, '#FF7D45'),
      (50, 70, '#FFDB13'),
      (70, 90, '#65CBF3'),
      (90, 100, '#0053D6')
    ]
    view = py3Dmol.view(width=800, height=600)
    view.addModelsAsFrames(pdb_str)
    
    color_map = {i: bands[2] for i, bands in enumerate(PLDDT_BANDS)}
    style = {'cartoon': {'colorscheme': {'prop': 'b', 'map': color_map}}}
    
    style['stick'] = {}
    
    view.setStyle({'model': -1}, style)
    view.zoomTo()
    return view

In [4]:
designer = ProteinGeneratorForJointDesign(model_name="auto")
folder = OmegaFoldForFolding()

## Binder Design

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/binder_design \
    --pdb examples/pdbs/cd86.pdb \
    --T 25 --save_best_plddt \
    --contigs B1-110,0 25-75 \
    --hotspots B40,B32,B87,B96,B30
```

In [15]:
designed_structure, designed_sequence = designer.design_structure_and_sequence(
    config.InferenceConfig(
        sequence_params=config.SequenceParams(length=50),
    ),
)
designed_pdb = protein.to_pdb(designed_structure)

100%|█████████████████████████████████████████████████████████████| 25/25 [01:10<00:00,  2.81s/it]


In [None]:
show_structure(designed_pdb)

## Motif Scaffolding

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --pdb examples/pdbs/rsv5_5tpn.pdb \
    --contigs 0-25,A163-181,25-30 --T 25 --save_best_plddt
```

## Partial Diffusion

```bash
python ./inference.py \
    --num_designs 10 \
    --pdb examples/out/design_000000.pdb \
    --out examples/out/partial_diffusion_design \
    --contigs 38 --sampling_temp 0.3 --T 50 --save_best_plddt
```

## Secondary Structure

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --contigs 100 \
    --T 25 --save_best_plddt \
    --secondary_structure XXXXXHHHHXXXLLLXXXXXXXXXXHHHHXXXLLLXXXXXXXXXXHHHHXXXLLLXXXXXXXXXXHHHHXXXLLLXXXXXXXXXXHHHHXXXLLLXXXXX
```

## Secondary Structure Bias

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --contigs 100 \
    --T 25 --save_best_plddt \
    --helix_bias 0.01 --strand_bias 0.01 --loop_bias 0.0 
```

## Secondary Structure from PDB

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --contigs 110 \
    --T 25 --save_best_plddt \
    --dssp_pdb examples/pdbs/cd86.pdb
```

## Sequence Conditioning

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --sequence XXXXXXXXXXXXXXXXPEPSEQXXXXXXXXXXXXXXXX \
    --T 25 --save_best_plddt
```

## Sequence Partial Diffusion

```bash
python ./inference.py \
    --num_designs 10 \
    --sequence SAKVEELLETAKALGISEEEVREILELLEAGFIVIEVVSLGDAVILILENKKLGKYYILKNGEIERIKKPENARELKRKIAEILNISVEEIEAIIEKLRAK \
    --out examples/out/partial_diffusion_design \
    --sampling_temp 0.3 --T 50 --save_best_plddt
```

## Symmetric Design

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/symmetric_design \
    --contigs 25,0 25,0 25,0 \
    --T 50 \
    --save_best_plddt \
    --symmetry 3
```

## Unconditional Design

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --contigs 100 \
    --T 25 --save_best_plddt
```

In [24]:
designed_structure, designed_sequence = designer.design_structure_and_sequence(
    config.InferenceConfig(
        contigmap_params=config.ContigMap(),
        sequence="XXXXXXXXXXXXXXXXPEPSEQXXXXXXXXXXXXXXXX",
    ),
)
designed_pdb = protein.to_pdb(designed_structure)

100%|█████████████████████████████████████| 25/25 [01:07<00:00,  2.68s/it]


In [25]:
show_structure(designed_pdb)

<py3Dmol.view at 0x7f0a32e4e770>

## Weighted Sequence

```bash
python ./inference.py \
    --num_designs 10 \
    --out examples/out/design \
    --contigs 100 \
    --T 25 --save_best_plddt \
    --potentials aa_bias \
    --aa_composition W0.2 --potential_scale 1.75 
```

## Folding

In [26]:
predicted_protein, confidence = folder.fold(designed_sequence)
folded_pdb = protein.to_pdb(predicted_protein)

In [27]:
PLDDT_BANDS = [
  (0, 50, '#FF7D45'),
  (50, 70, '#FFDB13'),
  (70, 90, '#65CBF3'),
  (90, 100, '#0053D6')
]
view = py3Dmol.view(width=800, height=600)
view.addModelsAsFrames(folded_pdb)

color_map = {i: bands[2] for i, bands in enumerate(PLDDT_BANDS)}
style = {'cartoon': {'colorscheme': {'prop': 'b', 'map': color_map}}}

style['stick'] = {}

view.setStyle({'model': -1}, style)
view.zoomTo()

<py3Dmol.view at 0x7f0a32e4f1c0>