# Template preparation

In [1]:
import pickle
from pathlib import Path

from alphafold.data.prepare_templates import *
from alphafold.data.mmseqs_2_uniprot import *
from Bio import Align, SeqIO
from Bio.PDB.mmcifio import MMCIFIO

Variables that need to be set on a form

In [2]:
out_dir = jobname = "jupyter_test"
revision_date = "2100-01-01"

targets = ["examples/H1142/H1142.fasta"]
template = "examples/H1142/H1142.pdb"

fasta_target = is_fasta(targets[0])
target_chains = ["A", "B"]

align = True
align_tool = "blast"

append = True
inpaint_clashes = True
superimpose = False

Load template

In [3]:
template_model = load_PDB("examples/H1142/H1142.pdb", is_mmcif=False)
template_chains = [c.id for c in template_model]
remove_extra_chains(template_model, template_chains)
remove_hetatms(template_model)
template_sequences = [
        get_fastaseq(template_model, chain) for chain in template_chains
    ]

Load target

In [4]:
target_chains, target_sequences, target_models = get_target_data(
            targets,
            target_chains,
            is_fasta=fasta_target,
            is_mmcif=False,
        )

Write template cif files

In [5]:
mmcif_path = Path(out_dir, "template_data", "mmcif_files")
mmcif_path.mkdir(parents=True, exist_ok=True)
next_id = get_next_id(mmcif_path) if append else "0000"

io = MMCIFIO()
template_mmcif_path = os.path.join(
    out_dir, "template_data", "mmcif_files", f"{next_id}.cif"
)

if inpaint_clashes:
    template_model = detect_and_remove_clashes(template_model)
    template_sequences = [
        get_fastaseq(template_model, chain) for chain in template_chains
    ]

io.set_structure(template_model)
io.save(template_mmcif_path)

fix_mmcif(
    template_mmcif_path, template_chains, template_sequences, revision_date
)

pdb_seqres_path = Path(out_dir, "template_data", "pdb_seqres.txt").resolve()
write_seqres(
    pdb_seqres_path,
    template_sequences,
    template_chains,
    seq_id=next_id,
    append=append,
)

# extra flagfile for AF usage
af_flagfile_path = Path(out_dir, "template_data", "templates.flag")
if not af_flagfile_path.is_file():  # don't overwrite file if already there
    with open(af_flagfile_path, "w") as flagfile:
        flagfile.write(f"--template_mmcif_dir={mmcif_path.resolve()}\n")
        flagfile.write(f"--pdb_seqres_database_path={pdb_seqres_path}\n")
        if align:  # means we are not going to let AF overwrite pdb_hits.sto
            flagfile.write("--use_precomputed_msas\n")

Write alignments

In [7]:
"""
Handling targets: here alignments are performed against the template
The target can either be one or more PDB files, or a fasta file containing sequences.

If a fasta file is submitted, then sequence alignments will be performed
If one or more PDBs are submitted, then either sequence or structural alignments can be performed

If multiple model PDBs are submitted, then we are superimposing several unbound chains to the same template
"""
if align:  # only if an alignment tool is selected, otherwise leave it to AlphaFold's template search
    assert len(target_chains) == len(
        template_chains
    ), f"The number of chains to align from target ({target_chains}) doesn't match the number of chains in the template ({template_chains}). Make sure that the files contain the same number of chains or select the chains that should be paired with --target_chains, --template_chains"
    for (
        i,
        (
            template_chain,
            template_sequence,
            target_chain,
            target_sequence,
            target_model,
        ),
    ) in enumerate(
        zip(
            template_chains,
            template_sequences,
            target_chains,
            target_sequences,
            target_models,
        )
    ):
        msa_chain = ascii_upperlower[i]
        this_template_model = pickle.loads(pickle.dumps(template_model, -1))
        this_target_model = pickle.loads(pickle.dumps(target_model, -1))
        if not fasta_target:
            remove_extra_chains(this_template_model, [template_chain])
            remove_extra_chains(this_target_model, [target_chain])
        alignment = do_align(
            template_sequence,
            this_template_model,
            target_sequence,
            this_target_model,
            alignment_type=align_tool,
        )
        sto_alignment = format_alignment_stockholm(
            alignment, hit_id=next_id, hit_chain=template_chain
        )
        
        
        msa_path = f"msas/{msa_chain}"
        
        # write alignment to file
        Path(out_dir, msa_path).mkdir(parents=True, exist_ok=True)
        with open(
            Path(out_dir, msa_path, "pdb_hits.sto"),
            mode="a" if append else "w",
        ) as pdb_hits:
            for line in sto_alignment:
                pdb_hits.write(line)

if not fasta_target:
    print(
        f"Run AlphaFold with, e.g.:\npython run_alphafold.py --fasta_paths target.fasta --flagfile databases.flag --flagfile {af_flagfile_path} --output_dir {Path(out_dir).parents[0]} --cross_chain_templates --dropout --model_preset='multimer_v2' --separate_homomer_msas"
    )
    print(
        "*** NB: the name of the fasta target should be the same as the name of the folder containing the output msas: (e.g.  if the fasta target file is 'target.fasta', then --output_dir='somedir/target' ***"
    )
else:
    print(
        f"Run AlphaFold with, e.g.:\npython run_alphafold.py --fasta_paths {targets[0]} --flagfile databases.flag --flagfile {af_flagfile_path} --output_dir {Path(out_dir).parents[0]} --cross_chain_templates --dropout --model_preset='multimer_v2' --separate_homomer_msas"
    )

DO ALIGN blast
GLEKDFLPLYFGWFLTKKSSETLRKAGQVFLEELGNHKAFKKELRHFISGDEPKEKLELVSYFGKRPPGVLHCTTKFCDYKAAGAEEYAQQEVVKRSYGKAFKLSISALFVTPKTAGAQVVLTDQELQLWPSDLDKPSASEGLPPGSRAHVTLGCAADVQPVQTGLDLLDILQQVKGGSQGEAVGELPRGKLYSLGKGRWMLSLTKKMEVKAIFTGYYG
GLEKDFLPLYFGWFLTKKSSETLRKAGQVFLEELGNHKAFKKELRHFISGDEPKEKLELVSYFGKRPPGVLHCTTKFCDYKAAGAEEYAQQEVVKRSYGKAFKLSISALFVTPKTAGAQVVLTDQELQLWPSDLDKPSASEGLPPGSRAHVTLGCAADVQPVQTGLDLLDILQQVKGGSQGEAVGELPRGKLYSLGKGRWMLSLTKKMEVKAIFTGYYG
DO ALIGN blast
EVQLEESGGGLVQAGGSLTLSCAASGFTFDDYAMGWYRQAPGKERVGVSCISRTDGYTYYLDSVKGRFTISTDHAKHTVYLQMNNLKPDDTGLYYCAADADPEYGSRCPDPYYGMDYWGKGILVTVSS
EVQLEESGGGLVQAGGSLTLSCAASGFTFDDYAMGWYRQAPGKERVGVSCISRTDGYTYYLDSVKGRFTISTDHAKHTVYLQMNNLKPDDTGLYYCAADADPEYGSRCPDPYYGMDYWGKGILVTVSS
Run AlphaFold with, e.g.:
python run_alphafold.py --fasta_paths examples/H1142/H1142.fasta --flagfile databases.flag --flagfile jupyter_test/template_data/templates.flag --output_dir . --cross_chain_templates --dropout --model_preset='multimer_v2' --separate_homomer_msas


In [None]:
"""
if superimpose:  # modify template
    # superimpose target chains to template, then save those as template mmcif, and realign to itself
    target_model = superimpose(
        template_model, template_chains, target_models, target_chains, alignment_type=args.align_tool
    )
    template_model = target_model
    template_sequences = target_sequences
    template_chains = target_chains
"""

Run mmseqs

In [68]:
#@title Install dependencies
import os
from sys import version_info
python_version = f"{version_info.major}.{version_info.minor}"

USE_AMBER = False
USE_TEMPLATES = True
PYTHON_VERSION = python_version

if not os.path.isfile("COLABFOLD_READY"):
  print("installing colabfold...")
  os.system("pip install -q --no-warn-conflicts 'colabfold[alphafold-minus-jax] @ git+https://github.com/sokrypton/ColabFold'")
  if os.environ.get('TPU_NAME', False) != False:
    os.system("pip uninstall -y jax jaxlib")
    os.system("pip install --no-warn-conflicts --upgrade dm-haiku==0.0.10 'jax[cuda12_pip]'==0.3.25 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html")
  os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabfold colabfold")
  os.system("ln -s /usr/local/lib/python3.*/dist-packages/alphafold alphafold")
  os.system("touch COLABFOLD_READY")

if USE_AMBER or USE_TEMPLATES:
  if not os.path.isfile("CONDA_READY"):
    print("installing conda...")
    os.system("wget -qnc https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh")
    os.system("bash Mambaforge-Linux-x86_64.sh -bfp /usr/local")
    os.system("mamba config --set auto_update_conda false")
    os.system("touch CONDA_READY")

if USE_TEMPLATES and not os.path.isfile("HH_READY") and USE_AMBER and not os.path.isfile("AMBER_READY"):
  print("installing hhsuite and amber...")
  os.system(f"mamba install -y -c conda-forge -c bioconda kalign2=2.04 hhsuite=3.3.0 openmm=7.7.0 python='{PYTHON_VERSION}' pdbfixer")
  os.system("touch HH_READY")
  os.system("touch AMBER_READY")
else:
  if USE_TEMPLATES and not os.path.isfile("HH_READY"):
    print("installing hhsuite...")
    os.system(f"mamba install -y -c conda-forge -c bioconda kalign2=2.04 hhsuite=3.3.0 python='{PYTHON_VERSION}'")
    os.system("touch HH_READY")
  if USE_AMBER and not os.path.isfile("AMBER_READY"):
    print("installing amber...")
    os.system(f"mamba install -y -c conda-forge openmm=7.7.0 python='{PYTHON_VERSION}' pdbfixer")
    os.system("touch AMBER_READY")

installing colabfold...
installing conda...
PREFIX=/usr/local


Mambaforge-Linux-x86_64.sh: line 313: /usr/local/_conda: Permission denied



                  __    __    __    __
                 /  \  /  \  /  \  /  \
                /    \/    \/    \/    \
███████████████/  /██/  /██/  /██/  /████████████████████████
              /  / \   / \   / \   / \  \____
             /  /   \_/   \_/   \_/   \    o \__,
            / _/                       \_____/  `
            |/
        ███╗   ███╗ █████╗ ███╗   ███╗██████╗  █████╗
        ████╗ ████║██╔══██╗████╗ ████║██╔══██╗██╔══██╗
        ██╔████╔██║███████║██╔████╔██║██████╔╝███████║
        ██║╚██╔╝██║██╔══██║██║╚██╔╝██║██╔══██╗██╔══██║
        ██║ ╚═╝ ██║██║  ██║██║ ╚═╝ ██║██████╔╝██║  ██║
        ╚═╝     ╚═╝╚═╝  ╚═╝╚═╝     ╚═╝╚═════╝ ╚═╝  ╚═╝

        mamba (0.20.0) supported by @QuantStack

        GitHub:  https://github.com/mamba-org/mamba
        Twitter: https://twitter.com/QuantStack

█████████████████████████████████████████████████████████████

Currently, only install, create, list, search, run, info and clean are supported through mamba.
installing hhsuit

In [83]:
from colabfold.batch import get_msa_and_templates
from colabfold.utils import DEFAULT_API_SERVER
import importlib_metadata

In [84]:

query_sequence = target_sequences[0]
msa_lines = None
msa_mode = "mmseqs2_uniref"
use_templates = False
custom_template_path = None
pair_mode = "unpaired"
pairing_strategy = "greedy"
host_url = DEFAULT_API_SERVER, get_commit

version = importlib_metadata.version("colabfold")
commit = get_commit()
if commit:
    version += f" ({commit})"
user_agent = f"colabfold/{version}"

In [85]:
get_msa_and_templates(jobname, query_sequence, msa_lines, out_dir, msa_mode, use_templates,
                        custom_template_path, pair_mode, pairing_strategy, host_url, user_agent)


TypeError: 'type' object is not subscriptable