<a href="https://colab.research.google.com/github/huhlim/cg2all/blob/main/cg2all.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# List of tasks
- Task 1: Conversion of an all-atom structure to a CG model using __convert_all2cg__
- Task 2: Conversion of a CG model to an all-atom structure using __convert_cg2all__
- Task 3: Conversion of a CG simulation trajectory to an atomistic simulation trajectory using __convert_cg2all__
- Task 4: Local optimization of a protein model structure against a cryo-EM density map using __cryo_em_minimizer.py__

# Supported coarse-grained models
- CalphaBasedModel: C$\alpha$-trace (atom names should be "CA")
- ResidueBasedModel: Residue center-of-mass (atom names should be "CA")
- CalphaCMModel: C$\alpha$-trace + Residue center-of-mass (atom names should be "CA" and "CM")
- BackboneModel: Model only with backbone atoms (N, CA, C)
- MainchainModel: Model only with mainchain atoms (N, CA, C, O)
- Martini: [Martini](http://cgmartini.nl/) model
- PRIMO: [PRIMO](http://dx.doi.org/10.1002/prot.22645) model
---

In [None]:
#@title Install cg2all package (takes ~3 minutes)
%%bash

gpu_available=$(nvidia-smi | grep "CUDA Version" | wc -l)
if [[ $gpu_available == 1 ]]; then
    echo "This notebook is running on a GPU runtime."
    pip install dgl -f https://data.dgl.ai/wheels/cu116/repo.html &> /dev/null
else
    echo "This notebook is running on a CPU runtime."
fi

pip install -q git+http://github.com/huhlim/cg2all &> /dev/null
pip install -q nglview gdown mrcfile &> /dev/null

In [None]:
#@title Download model checkpoint files (optional, takes <1 minute)
#@markdown This step downloads all PyTorch model checkpoint files. If you did not run this step and a necessary checkpoint file is missing, then the script will download it automatically.

import cg2all.lib.libmodel
from cg2all.lib.libconfig import MODEL_HOME

for model_type in ["CalphaBasedModel", "ResidueBasedModel", "CalphaCMModel", "BackboneModel", "MainchainModel", "Martini", "PRIMO"]:
    ckpt_fn = MODEL_HOME / f"{model_type}.ckpt"
    if not ckpt_fn.exists():
        cg2all.lib.libmodel.download_ckpt_file(model_type, ckpt_fn)


In [None]:
#@title Set up NGLview for structure display (TODO)

import google.colab
from google.colab import output as colab_output
colab_output.enable_custom_widget_manager()

import mdtraj
import nglview

def display(traj, representation="cartoon"):
    v = nglview.show_mdtraj(traj, default=False)
    v.layout.height = "600px"
    v.layout.width = "600px"
    v.add_representation(representation, color="residueindex")
    if representation == "cartoon":
        sel = traj.top.select("protein and (name CA or sidechain) and element != H and name != OXT")
        v.add_representation("licorice", sel)
    v.center()
    return v

---

In [None]:
#@title Task 1: Conversion of an all-atom structure to a CG model using __convert_all2cg__

# upload a PDB file
import requests
from google.colab import files

coarse_grained_model_type = "CalphaBasedModel" #@param ["CalphaBasedModel", "ResidueBasedModel", "CalphaCMModel", "BackboneModel", "MainchainModel", "Martini", "PRIMO"]

use_example = True #@param {type:"boolean"}
#@markdown - An example input PDB file will be downloaded from [our repository](https://github.com/huhlim/cg2all/tree/main/tests).

if use_example:
    url = "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.pdb"
    input_pdb = url.split("/")[-1]
    with open(input_pdb, "wt") as fout:
        fout.write(requests.get(url).text)
else:
    input_pdb = files.upload()
    input_pdb = list(input_pdb)[0]

# convert
import pathlib
output_pdb = pathlib.Path(input_pdb).stem + f".{coarse_grained_model_type}.pdb"

!convert_all2cg -p $input_pdb -o $output_pdb --cg $coarse_grained_model_type
print(f"Converted {input_pdb} to {output_pdb} in {coarse_grained_model_type}")

pdb = mdtraj.load(output_pdb)
display(pdb, representation="ball+stick")

In [None]:
#@title Download the converted file
files.download(output_pdb)

---

In [None]:
#@title Task 2: Conversion of a CG model to an all-atom structure using __convert_cg2all__

# upload a PDB file
import requests
from google.colab import files

coarse_grained_model_type = "CalphaBasedModel" #@param ["CalphaBasedModel", "ResidueBasedModel", "CalphaCMModel", "BackboneModel", "MainchainModel", "Martini", "PRIMO"]

use_example = True #@param {type:"boolean"}
#@markdown - An example input PDB file will be downloaded from [our repository](https://github.com/huhlim/cg2all/tree/main/tests).

if use_example:
    url = {"CalphaBasedModel": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.calpha.pdb", \
           "ResidueBasedModel": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.residue.pdb", \
           "CalphaCMModel": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.cacm.pdb", \
           "BackboneModel": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.bb.pdb", \
           "MainchainModel": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.mc.pdb", \
           "Martini": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.martini.pdb", \
           "PRIMO": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.primo.pdb"}
    url = url[coarse_grained_model_type]
    input_pdb = url.split("/")[-1]
    with open(input_pdb, "wt") as fout:
        fout.write(requests.get(url).text)
else:
    input_pdb = files.upload()
    input_pdb = list(input_pdb)[0]

# convert
import pathlib
output_pdb = pathlib.Path(input_pdb).stem + ".all.pdb"

!convert_cg2all -p $input_pdb -o $output_pdb --cg $coarse_grained_model_type
print(f"Converted {input_pdb} in {coarse_grained_model_type} to {output_pdb}")

pdb = mdtraj.load(output_pdb)
display(pdb, representation="cartoon")

In [None]:
#@title Download the converted file
files.download(output_pdb)

---

In [None]:
#@title Task 3: Conversion of a CG simulation trajectory to an atomistic simulation trajectory using __convert_cg2all__
#@markdown Input trajectory file should be in the DCD format.

# upload a PDB file
import requests
from google.colab import files

coarse_grained_model_type = "CalphaBasedModel" #@param ["CalphaBasedModel", "ResidueBasedModel", "CalphaCMModel", "BackboneModel", "MainchainModel", "Martini", "PRIMO"]

use_example = True #@param {type:"boolean"}
#@markdown - An example input PDB and DCD files will be downloaded from [our repository](https://github.com/huhlim/cg2all/tree/main/tests).

if use_example:
    if coarse_grained_model_type != "CalphaBasedModel":
        print("The example is based on CalphaBasedModel.")
        coarse_grained_model_type = "CalphaBasedModel"
    #
    url = "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1jni.calpha.pdb"
    input_pdb = url.split("/")[-1]
    with open(input_pdb, "wt") as fout:
        fout.write(requests.get(url).text)
    #
    url = "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1jni.calpha.dcd"
    input_dcd = url.split("/")[-1]
    with open(input_dcd, "wb") as fout:
        fout.write(requests.get(url).content)

else:
    input_pdb = files.upload()
    input_pdb = list(input_pdb)[0]
    input_dcd = files.upload()
    input_dcd = list(input_dcd)[0]

batch_size = 1 #@param {type: "number"}
#@markdown - Batch size should be a divisor of the total number of frames. The example trajectory file has five frames.

# convert
import pathlib
output_dcd = pathlib.Path(input_dcd).stem + ".all.dcd"
output_pdb = pathlib.Path(input_pdb).stem + ".all.pdb"

!convert_cg2all -p $input_pdb --dcd $input_dcd -o $output_dcd -opdb $output_pdb --cg $coarse_grained_model_type --batch $batch_size
print(f"Converted {input_dcd} in {coarse_grained_model_type} to {output_dcd}")

traj = mdtraj.load(output_dcd, top=output_pdb)
traj = traj.superpose(traj)
display(traj)

In [None]:
#@title Download the converted file
files.download(output_pdb)
files.download(output_dcd)

---

In [None]:
#@title Task 4: Local optimization of a protein model structure against a cryo-EM density map using __cryo_em_minimizer.py__
#@markdown This task is running very slow on the Google Colab __CPU__ runtime. Thus, it is highly recommended to use a __GPU__ runtime with enough VRAM. 

#@markdown Input structure should be roughly fitted to the input electron density map.

# upload a PDB file
import requests
from google.colab import files

coarse_grained_model_type = "CalphaBasedModel" 
use_example = True #@param {type:"boolean"}
#@markdown - An example input PDB and electron density map files will be downloaded from [our repository](https://github.com/huhlim/cg2all/tree/main/tests).

if use_example:
    url = "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/3isr.af2.pdb"
    input_pdb = url.split("/")[-1]
    with open(input_pdb, "wt") as fout:
        fout.write(requests.get(url).text)
    #
    url = "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/3isr_5.mrc"
    input_map = url.split("/")[-1]
    with open(input_map, "wb") as fout:
        fout.write(requests.get(url).content)

else:
    input_pdb = files.upload()
    input_pdb = list(input_pdb)[0]
    input_map = files.upload()
    input_map = list(input_map)[0]

number_of_snapshots = 10 #@param {type: "slider", min:1, max:20}
#@markdown - The number of minimization steps is 10 x number_of_snapshots. It was intentionally limited to 200 steps in this notebook.
n_step = number_of_snapshots * 10

import pathlib
output_prefix = pathlib.Path(input_map).stem + "+" + pathlib.Path(input_pdb).stem

from cg2all.lib.libconfig import BASE
EXEC = BASE / "script/cryo_em_minimizer.py"
!python $EXEC --pdb $input_pdb --map $input_map --output $output_prefix -n $n_step --output_freq 10

pdb_fn_s = list(pathlib.Path(output_prefix).glob("min.*.pdb"))
pdb_fn_s.sort(key=lambda fn: int(fn.name.split(".")[-2]))
pdb_s = mdtraj.load(pdb_fn_s)
display(pdb_s)

In [None]:
#@title Download the optimized files

import os
zip = f"{output_prefix}.zip"
os.system(f"zip -r {zip} {output_prefix}")
files.download(zip)