## PROTEIN GENERATOR to test runs and play around

In [1]:
#!wget http://files.ipd.uw.edu/pub/sequence_diffusion/checkpoints/SEQDIFF_230205_dssp_hotspots_25mask_EQtasks_mod30.pt
#!wget http://files.ipd.uw.edu/pub/sequence_diffusion/checkpoints/SEQDIFF_221219_equalTASKS_nostrSELFCOND_mod30.pt

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os, sys, json 
from proteome.models.design.protein_generator.sampler import SEQDIFF_sampler

In [4]:
with open('args.json','r') as f:
    args = json.load(f)

In [5]:
S = SEQDIFF_sampler()

### Specify parameters here

In [6]:
# ARGUMENTS
args['sequence'] = 'XXXXXXXXXXXXXXXXPEPSEQXXXXXXXXXXXXXXXX' #None # starting sequence where X is mask token for example: XXXXXXXXXXXPEPSEQXXXXXXXXXXXX
args['pdb'] = None #'/home/jgershon/Desktop/SEQDIFF/tmp/test_000000.pdb' # path to pdb to diffuse
args['contigs'] = ["0"] # contigs for design use, to sample range use ['70-80'] aa, to add strucutral and sequence conditioning from pdb use chain
args['out'] = './' # path with prefix to where you want to save outputs
args['save_best_plddt'] = True # if true will save best plddt example in the trajectory
args['hotspots'] = None # residues to have model target for binder design probelms specified as a list of chain and pdb index example 'B23,B45,B66'
args['save_args'] = True # will save args as json
args['dump_trb'] = True # will save trb with meta data from run

# SECONDARY STRUCTURE BIAS (options a, b, c)
# option a
args['secondary_structure'] = None # dssp string 'XXXXXHHHHHHHXXXXLLLLXXXXEEEEEEEEEEEEEXXX' for each residue
# option b
args['dssp_pdb'] = None # path to pdb to copy dssp features of
# option c
args['helix_bias'] = 0.00 # fraction of sequence to randomly bias toward helix, recommend [0,0.05]
args['strand_bias'] = 0.00 # fraction of sequence to randomly bias toward helix, recommend [0,0.05]
args['loop_bias'] = 0.00 # fraction of sequence to randomly bias toward helix, recommend [0,0.05]

# GUIDING POTENTIALS (can use in combination)
# aa_bias
args['aa_composition'] = 'W0.2' # amino acid one letter char and fraction of seq to bias as a list 'W0.2,Y0.1'
# charge
args['target_charge'] = -10 # target charge to bias toward
args['target_pH'] = 7.4 # target ph to design in 
# hydrophobic
args['hydrophobic_score'] = -10 # hydrophobic GRAVY score to bias toward (use negative score to bias away from hyrdophobic
# for any potential used you must specify the following
args['potentials'] = '' # comma seperated list of potentials used 'aa_bias,charge,hydrophobic,'
args['potential_scale'] = '' # comma seperated list of weights for each potential in same order as potentials 

# DIFFUSION PARAMS
args['T'] = 25 # num steps to use (can use 10 or 15 steps to test), 25 is generally good, but use 50 or 100 for harder problems and better AF2 results

# PARTIAL DIFFUSION
args['sampling_temp'] = 1.0 # set to 1.0 for full diffusion, else entering partial diffusion mode, diffusing to specified temperature of input
args['trb'] = None # path to trb if you want to partially diffuse and use the same contigs, else you can input a pdb or sequence to partially diffuse from

In [7]:
S.set_args(args)

In [8]:
S.model_init()

Loading model checkpoint...
Successfully loaded model checkpoint


In [9]:
S.diffuser_init()

### Generate Samples
Once you generate samples you like, use the args.json file saved to launch a production run

For example:
```
python ./inference.py -input_json ./examples/out/design_000000_args.json
```

In [10]:
for i in range(1):
    S.generate_sample()

Preparing sequence input
Generating sample 000000 ...


  assert input.numel() == input.storage().size(), (


EGLEEDAKLLELELNLPEPSEQELLILEELLEELIELG
    TIMESTEP [25/25]   |   current PLDDT: 0.7695   <<  >>   best PLDDT: 0.7695
MALLELLLLLLRLLPAPEPSEQLLEDLEELLELAELLA
    TIMESTEP [24/25]   |   current PLDDT: 0.7329   <<  >>   best PLDDT: 0.7695
GVPELLLELLLPELELPEPSEQELALLELAEQERRLRG
    TIMESTEP [23/25]   |   current PLDDT: 0.7412   <<  >>   best PLDDT: 0.7695
TDPLLTEIDKAYELQHPEPSEQERLRAELLRALRRRAE
    TIMESTEP [22/25]   |   current PLDDT: 0.7349   <<  >>   best PLDDT: 0.7695
KTLERLELVERLLRILPEPSEQQVIEAESRAAQRLAEE
    TIMESTEP [21/25]   |   current PLDDT: 0.8286   <<  >>   best PLDDT: 0.8286
SVEAAEELLEKVLSHSPEPSEQEVVRLLDAALDIHEAG
    TIMESTEP [20/25]   |   current PLDDT: 0.8047   <<  >>   best PLDDT: 0.8286
SPPILSEKLRLLLANAPEPSEQEVEELVKILEELAGLL
    TIMESTEP [19/25]   |   current PLDDT: 0.7925   <<  >>   best PLDDT: 0.8286
RKGKLRIAAPLGLENVPEPSEQEVAAFDKQLEELALEE
    TIMESTEP [18/25]   |   current PLDDT: 0.7812   <<  >>   best PLDDT: 0.8286
SPGEKRLVAELRELGMPEPSEQELQEIKKQLQKAEREA
    TIMESTEP [17/

In [11]:
import py3Dmol

In [12]:
with open("_000000.pdb", mode="r") as f:
    result_pdb = f.read()

In [13]:
PLDDT_BANDS = [
  (0, 50, '#FF7D45'),
  (50, 70, '#FFDB13'),
  (70, 90, '#65CBF3'),
  (90, 100, '#0053D6')
]
view = py3Dmol.view(width=800, height=600)
view.addModelsAsFrames(result_pdb)

color_map = {i: bands[2] for i, bands in enumerate(PLDDT_BANDS)}
style = {'cartoon': {'colorscheme': {'prop': 'b', 'map': color_map}}}

style['stick'] = {}

view.setStyle({'model': -1}, style)
view.zoomTo()

<py3Dmol.view at 0x7f2791455b10>