<a href="https://colab.research.google.com/github/grandrea/AlphaCrop/blob/main/AlphaCrop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# AlphaCrop

Web server to split and crop AlphaFold models by confidence according to the metrics provided by DeepMind:

Crop your PDB into muliple files made up of:
- only very high confidence regions (plDDT > 90)
- only confident regions and above (plDDT > 70)
- only low concfidence regions and above, discarding very low confidence (plDDT > 50)

Upload files generated by AlphaFold2.x or 3.x .

Essentially splits the file by b-factor column, generating 3 files.


In [1]:
#@title Provide the arguments here and hit `Run` -> `Run All Cells`
jobname = 'AlphaCrop_colab' #@param {type:"string"}
structure_format = "mmcif" #@param {type:"string"}
#@markdown can only be "pdb" or "mmcif"


if structure_format != "pdb" and structure_format != "mmcif":
  raise Exception("can only be pdb or mmcif")


In [None]:
#@title Load structure file
#@markdown Please execute this cell by pressing the _Play_ button
#@markdown on the left to upload the structure file and specify the format
#@markdown via the dropdown menu. AlphaFold2.x and ColabFold will generate
#@markdown pdb files, AlphaFold 3.x will generate mmcif
import os
import contextlib
import glob

# clean up previous run
with contextlib.suppress(FileNotFoundError):
  previous_run = glob.glob("*cif")
  for item in previous_run:
    os.remove(item)

  previous_run = glob.glob("*pdb")
  for item in previous_run:
    os.remove(item)

  previous_run = glob.glob("*mmcif")
  for item in previous_run:
    os.remove(item)

from google.colab import files
structure_file = files.upload()

if not len(structure_file) == 1:
    raise ValueError('Please only upload a single pdb or mmcif file.')

In [None]:
#@title install packages
import sys
!wget -qO ac.sh https://repo.anaconda.com/archive/Anaconda3-2020.11-Linux-x86_64.sh && bash ac.sh -bfp /usr/local
sys.path.append('/usr/local/lib/python3.8/site-packages/')
!conda install --yes -c conda-forge biopython

In [9]:
#@title process file
from Bio.PDB import *
if structure_format=="pdb":
  parser = PDBParser()
elif structure_format=="mmcif":
  parser = MMCIFParser()
else:
  print("file format missing")


class VeryHighSelect(Select):
    def accept_atom(self, atom):
        if atom.get_bfactor() >90:
            return 1
        else:
            return 0

class HighSelect(Select):
    def accept_atom(self, atom):
        if atom.get_bfactor() >70:
            return 1
        else:
            return 0

class LowSelect(Select):
    def accept_atom(self, atom):
        if atom.get_bfactor() >50:
            return 1
        else:
            return 0


pdb_filename = list(structure_file.keys())[0]

pdb_contents = structure_file[pdb_filename].decode('utf-8')

structure = parser.get_structure('structure_id', pdb_filename)

io = PDBIO()

# Write out the selected atoms to a new PDB file
file_name_very_confident = jobname + "_very_confident.pdb"
file_name_confident = jobname + "_confident.pdb"
file_name_low_confidence = jobname + "_low_confidence.pdb"

io.set_structure(structure)
io.save(file_name_very_confident, VeryHighSelect())
io.save(file_name_confident, HighSelect())
io.save(file_name_low_confidence, LowSelect())




In [None]:
#@title Download result
from google.colab import files
import glob
import os

file_lists = glob.glob("*.pdb")

!zip -q {jobname}.zip {"*.pdb"}
files.download(f'{jobname}.zip')
