In [0]:
%pip install ../dbboltz[gpu]
%pip install py3Dmol
dbutils.library.restartPython()

In [0]:
import mlflow
mlflow.autolog(disable=True)
from dbboltz.boltz import run_boltz
import yaml

In [0]:
%sh

if [ ! -d "/miniconda3" ]; then
  mkdir -p /miniconda3

  wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /miniconda3/miniconda.sh
  bash /miniconda3/miniconda.sh -b -u -p /miniconda3
  rm -rf /miniconda3/miniconda.sh
fi

source /miniconda3/bin/activate
conda config --remove channels defaults

conda create -n jackhmmer_env python=3.8 --yes
conda activate jackhmmer_env
conda install -y bioconda::hmmer

In [0]:
%sh
mkdir -p /local_disk0/cache/
cp -r /Volumes/peter_hawkins/testing/boltz /local_disk0/cache/

In [0]:
def get_model_config():
    model_config = {}
    model_config['jackhmmer_binary_path'] = "/miniconda3/envs/jackhmmer_env/bin/jackhmmer"
    model_config['compute_type'] = 'gpu'
    return model_config

model_config = get_model_config()

## Protein/DNA example

In [0]:
params = {
    'msa': 'no_msa',
    'msa_depth': 20,
    'diffusion_samples': 1,
    'recycling_steps': 3,
    'sampling_steps': 200,
    'cache':'/local_disk0/cache/boltz'
}

In [0]:
# PDB 2Y9H
inputs = {
    'protein':[
        ( ('A'),"GTGAMWLTKLVLNPASRAARRDLANPYEMHRTLSKAVSRALEEGRERLLWRLEPARGLEPPVVLVQTLTEPDWSVLDEGYAQVFPPKPFHPALKPGQRLRFRLRANPAKRLAATGKRVALKTPAEKVAWLERRLEEGGFRLLEGERGPWVQILQDTFLEVRRKKDGEEAGKLLQVQAVLFEGRLEVVDPERALATLRRGVGPGKALGLGLLSVAP"),
    ],
    'rna': [
        ( ('B'), "UCCCCACGCGUGUGGGGAU")
    ]
}

In [0]:
results = run_boltz(
    inputs,
    config={**model_config, **params},
)

In [0]:
import py3Dmol

view = py3Dmol.view(width=800, height=300)

view.addModel(
    results[0]['pdb'],
    'pdb'
)
view.setStyle({'chain': 'A'}, {'cartoon': {'color': 'blue'}})
view.setStyle({'chain': 'B'}, {'cartoon': {'color': 'red'}})

view.zoomTo()
html = view._make_html()
displayHTML(html)

#### Now use mmseqs2 server for msa instead of no msa

In [0]:
params.update({
    'msa': 'mmseqs',
    'use_msa_server':True
})

results = run_boltz(
    inputs,
    config={**model_config, **params},
)

In [0]:
import py3Dmol

view = py3Dmol.view(width=800, height=300)

view.addModel(
    results[0]['pdb'],
    'pdb'
)
view.setStyle({'chain': 'A'}, {'cartoon': {'color': 'blue'}})
view.setStyle({'chain': 'B'}, {'cartoon': {'color': 'red'}})

view.zoomTo()
html = view._make_html()
displayHTML(html)

## Now use Jackhmmer on uniref50 (could choose any fasta)
 - This is now not hitting any pulic server, but does get an MSA
 - Effective at improving accuracy in my testing
 - For large workloads strongly suggest setting up a mmseqs2 server on your own compute
    - Since the mmseq2 server requires ~1TB RAM, only spin up the server for the duration of large runs
    - fall back to no_msa or jckhmmer for ad-hoc and smaller workloads 

In [0]:
params.update({
    'msa': 'jh',
    'index_name':'/Volumes/protein_folding/boltz/datasets/uniref50/uniref50.fasta'
})

results = run_boltz(
    inputs,
    config={**model_config, **params},
)

In [0]:
import py3Dmol

view = py3Dmol.view(width=800, height=300)

view.addModel(
    results[0]['pdb'],
    'pdb'
)
view.setStyle({'chain': 'A'}, {'cartoon': {'color': 'blue'}})
view.setStyle({'chain': 'B'}, {'cartoon': {'color': 'red'}})

view.zoomTo()
html = view._make_html()
displayHTML(html)

## Note that using MSA (mmseqs or even UR50) improves performance