# Metal 3D CNN 

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lcbc-epfl/metal-site-prediction/blob/main/Metal3D/ColabMetal.ipynb)

Quickly find out where in a protein potential metal binding sites are located. 

*If using please cite:* 
>Accurate prediction of transition metal ion location via deep learning
>S.L. Dürr, A. Levy, U. Rothlisberger
>bioRxiv 2022.08.22.504853; doi: https://doi.org/10.1101/2022.08.22.504853

In [None]:
# @title Install Conda
# @markdown Note that you will have to run this cell, then the session will crash and then you can run all other cells.
import os

if not os.path.exists("CONDA_READY"):
    print("CONDA not installed, installing it now")
    !pip install -U https://github.com/conda-incubator/condacolab/archive/cuda-version-12.tar.gz > /dev/null &
    !pip install -U ipywidgets==7.7.1   > /dev/null &
    import condacolab

    condacolab.install()
    !touch CONDA_READY

In [None]:
%%shell
#@title Install dependencies
if [ ! -f PACKAGES_READY ]; then
  
  pip install py3Dmol > /dev/null 
  conda install moleculekit=1.6.9 -c acellera -c conda-forge  > /dev/null 
  conda install rdkit -c conda-forge > /dev/null 
  conda install pdb2pqr openbabel -c conda-forge > /dev/null 
  conda install scipy=1.7.3 > /dev/null 
  touch PACKAGES_READY
fi

In [2]:
%%shell
#@title Get helpers
if [ ! -f READY ]; then
  mkdir -p utils weights
  wget -qnc https://raw.githubusercontent.com/lcbc-epfl/metal-site-prediction/main/Metal3D/utils/helpers.py 
  wget -qnc https://raw.githubusercontent.com/lcbc-epfl/metal-site-prediction/main/Metal3D/utils/model.py  
  wget -qnc https://raw.githubusercontent.com/lcbc-epfl/metal-site-prediction/main/Metal3D/utils/voxelization.py 
  wget -qnc https://github.com/lcbc-epfl/metal-site-prediction/blob/main/Metal3D/weights/metal_0.5A_v3_d0.2_16Abox.pth?raw=true

  mv helpers.py model.py voxelization.py utils/
  mv "metal_0.5A_v3_d0.2_16Abox.pth?raw=true" weights/metal_0.5A_v3_d0.2_16Abox.pth
  touch READY
fi



In [3]:
# @title Imports
import urllib
import re
import sys
import warnings

import torch
import torch.nn as nn
import ipywidgets as widgets
from ipywidgets import interact, fixed

from utils.helpers import *
from utils.voxelization import processStructures
from utils.model import Model


sys.executable = "/usr/local/bin/python"

In [4]:
# @title Input PDB or upload file, then run remaining cells.
from google.colab import files
import os.path

# @markdown Use a 4 letter code for a entries in the RCSB Protein data bank (e.g `2cba`), use Uniprot accession codes for AlphaFold structures e.g(`Q5VSL9`)
pdbfile = "6F5N"  # @param {type:"string"}
use_local_file = False  # @param {type:"boolean"}
# @markdown If using a local file, upload it via the left toolbar and provide the path in the `pdbfile` field.

# @markdown ## Grid resolution
# @markdown For the aggregation of per residue predictions a grid is created. The default resolution on Colab is 1 A because large cube files cannot be visualized in the browser. Use high resolution (0.5 A) if you want to have maximum precision.
use_high_resolution = False  # @param {type:"boolean"}

# @markdown ### Mode
mode = "Only metalbinding"  # @param ["All residues", "Only metalbinding","custom"]
custom_residues = ""  # @param  {type:"string"}
# @markdown  Custom residues should be provided as comma separated list e.g `1,4,6,10,11`. Use the same id codes as in the provided PDB file.

save_to_google_drive = False  # @param {type:"boolean"}

# @markdown Files will be saved in `Metal3D/` in your home folder

if save_to_google_drive:
    from google.colab import drive

    drive.mount("/content/drive")
    print("Google Drive mounted")

In [None]:
# @title Predictions

# check that pdbfile exists and is readable
if use_local_file:
    if not os.path.isfile(pdbfile):
        raise Exception(f"File {pdbfile} does not exist")
    if not os.access(pdbfile, os.R_OK):
        raise Exception(f"File {pdbfile} is not readable")
    pdb = os.path.basename(pdbfile)
    pdb_file = pdbfile
else:
    if len(pdbfile) == 4:
        urllib.request.urlretrieve(
            f"http://files.rcsb.org/download/{pdbfile.lower()}.pdb1", f"{pdbfile}.pdb"
        )
    else:
        try:
            if (
                re.match(
                    "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}",
                    pdbfile,
                ).group()
                == pdbfile
            ):
                print("using alphafold structure")
                urllib.request.urlretrieve(
                    f"https://alphafold.ebi.ac.uk/files/AF-{pdbfile}-F1-model_v2.pdb",
                    f"{pdbfile}.pdb",
                )
        except AttributeError:
            raise ValueError(
                "pdb code must be 4 letters or Uniprot code does not match"
            )
    pdb = os.path.basename(pdbfile)
    pdb_file = pdbfile + ".pdb"


print(pdbfile)
if mode == "All residues":
    print("No resid passed, using whole protein")
    ids = get_all_protein_resids(pdb_file)
elif mode == "Only metalbinding":
    ids = get_all_metalbinding_resids(pdb_file)
else:
    custom_residues = custom_residues.split(",")
    if len(custom_residues) == 0:
        raise ValueError("No custom residues provided")
    ids = custom_residues

voxels, prot_centers, prot_N, prots = processStructures(pdb_file, ids)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
voxels.to(device)

model = Model()
model.to(device)

model.load_state_dict(torch.load("weights/metal_0.5A_v3_d0.2_16Abox.pth"))


model.eval()

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    output = model(voxels)

# process all predicted probabilities
prot_v = np.vstack(prot_centers)
output_v = output.flatten().cpu().detach().numpy()

bb = get_bb(prot_v)

# Determine if high resolution grid should be used. Can be prohibitive in terms of memory in the browser due to the size of the cube file.
if use_high_resolution:
    gridres = 0.5
else:
    gridres = 1

grid, box_N = create_grid_fromBB(bb, voxelSize=gridres)

probability_values = get_probability_mean(grid, prot_v, output_v)

write_cubefile(
    bb, probability_values, box_N, outname=f"metal_{pdb}.cube", gridres=gridres
)
find_unique_sites(
    probability_values,
    grid,
    writeprobes=True,
    probefile=f"probes_{pdb}.pdb",
    threshold=7,
    p=0.1,
)

if save_to_google_drive:
    !mkdir -p '/content/drive/MyDrive/Metal3D/{pdb}'
    !cp '/content/metal_{pdb}.cube' '/content/drive/MyDrive/Metal3D/{pdb}/'
    !cp '/content/probes_{pdb}.pdb' '/content/drive/MyDrive/Metal3D/{pdb}/'
    !cp '/content/{pdb_file}' '/content/drive/MyDrive/Metal3D/{pdb}/'

In [6]:
# @title Visualization
# @markdown Drag the slider to adjust the probability isovalue
interact(
    show_map,
    pdb=fixed(pdb_file),
    id=fixed(pdb),
    p=widgets.FloatSlider(min=0, max=1, step=0.05, value=0.5),
);

interactive(children=(FloatSlider(value=0.5, description='p', max=1.0, step=0.05), Checkbox(value=False, descr…