# **IMPORTANT**: Set the runtime to CPU-only

This notebook is designed to run on a CPU-only runtime. Using a GPU will not provide any performance benefits and may lead to unexpected errors.

**How to set the runtime to CPU-only:**
1. Go to `Runtime` in the top menu.
2. Select `Change runtime type`.
3. In the `Hardware accelerator` dropdown, select `CPU`.
4. Click `Save`.

# **Quilt**: identification of hydrophobic patches on the PDB structure surface of the target protein of interest. Based on the content of [this](https://github.com/plijnzaad/quilt) GitHub repo

## Quilt Installation

In [1]:
!rm -rf quilt
!git clone https://github.com/plijnzaad/quilt.git
%cd quilt
!source setup.sh.example && make && make test && make install

Cloning into 'quilt'...
remote: Enumerating objects: 596, done.[K
remote: Counting objects: 100% (596/596), done.[K
remote: Compressing objects: 100% (222/222), done.[K
remote: Total 596 (delta 354), reused 591 (delta 349), pack-reused 0 (from 0)[K
Receiving objects: 100% (596/596), 288.77 KiB | 7.04 MiB/s, done.
Resolving deltas: 100% (354/354), done.
/content/quilt
(cd src; make)
make[1]: Entering directory '/content/quilt/src'
gcc -c -g -Wall -DLinux -DVERSION='"version: branch master; commit v1.3-1-g2606e6b"' -I../utils					    -c -o main.o main.c
In file included from [01m[K../utils/extra-math.h:25[m[K,
                 from [01m[Kmain.c:27[m[K:
   59 | #define AFREEA                          /* @@@debugging */
      | 
[01m[K../utils/utils.h:56:[m[K [01;36m[Knote: [m[Kthis is the location of the previous definition
   56 | #  define AFREEA free
      | 
[01m[Kmain.c:[m[K In function ‘[01m[Kpre_process_atoms[m[K’:
   73 |   int i, j, [01;35m[Ksize[m

## Execute Quilt (when asked to upload, please uplod the target PDB)

In [None]:
# @title
from google.colab import files
import os

uploaded = files.upload()

if not uploaded:
    print('No file uploaded.')
else:
    input_pdb = list(uploaded.keys())[0]
    pdb_content = uploaded[input_pdb].decode('utf-8')
    print(f'Using {input_pdb} as input.')
    os.environ['PDBPATH'] = os.getcwd()
    quilt_output = !./src/quilt -n 252 -ep 1.4 -R -p {input_pdb} -a {input_pdb}.area

## 3. Visualize Patches

In [None]:
# @title
!pip install py3Dmol
import py3Dmol
import re
from ipywidgets import interact, Dropdown

def parse_quilt_output(output):
    patches = {0: [], 1: [], 2: []}
    patch_residues_text = {0: '', 1: '', 2: ''}
    capture = False
    current_patch = -1
    for line in output:
        if line.startswith('# 0') or line.startswith('# 1') or line.startswith('# 2'):
            match = re.match(r'# (\d+)', line)
            if match:
                current_patch = int(match.group(1))
                if current_patch in patches:
                    capture = True
                else:
                    capture = False
            else:
                capture = False
        elif capture and line.startswith('#'):
            capture = False
            current_patch = -1
        elif capture:
            res_matches = re.findall(r'([A-Z]) ([A-Z])(\d+) @', line)
            for res_name, chain, res_id in res_matches:
                if int(res_id) not in patches[current_patch]:
                    patches[current_patch].append(int(res_id))
                    patch_residues_text[current_patch] += f'{res_name}{res_id}, '
    return patches, patch_residues_text

if 'quilt_output' in locals():
    patches, patch_residues_text = parse_quilt_output(quilt_output)
    with open('Patches.txt', 'w') as f:
        for i in range(3):
            text = patch_residues_text[i].strip()
            if text:
                line = f'Patch {i} residues: {text[:-1]}'
                print(line)
                f.write(line + '\n')
            else:
                line = f'Patch {i} residues: None'
                print(line)
                f.write(line + '\n')

    view = py3Dmol.view(width=800, height=600)
    view.addModel(pdb_content, 'pdb')
    view.setStyle({'cartoon': {'color': 'spectrum'}})

    def highlight_patch(patch_num):
        view.removeAllSurfaces()
        view.addSurface(py3Dmol.SAS, {'opacity': 0.7, 'color': 'green'})
        if patch_num != 'None':
            res_ids = patches[int(patch_num)]
            view.addSurface(py3Dmol.SAS, {'opacity': 1.0, 'color': 'yellow'}, {'resi': res_ids})
        view.zoomTo()
        view.show()

    view.addSurface(py3Dmol.SAS, {'opacity': 0.7, 'color': 'green'})
    view.zoomTo()

    patch_selector = Dropdown(options=['None', '0', '1', '2'], description='Select Patch:')
    interact(highlight_patch, patch_num=patch_selector)
else:
    print('Please run the previous cell to generate quilt output first.')

--------------------
# **Nanocdr-x**: identification of CDRs on the PDB structure of the nanobody scaffold. Based on the content of [this](https://github.com/lescailab/nanocdr-x) GitHub repo

## 4. Map Nanobody CDRs (when asked to upload, please uplod the nanobody scaffold)

In [None]:
# @title
!pip install -q condacolab
import condacolab
condacolab.install()

!conda install -c lescailab nanocdr-x -y

import pandas as pd
from google.colab import files
import os

# Dictionary to convert three-letter amino acid code to one-letter
three_to_one = {'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',
                'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',
                'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',
                'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y'}

def get_sequence_from_pdb(pdb_string):
    sequence = []
    residue_info = []
    seen_residues = set()
    for line in pdb_string.splitlines():
        if line.startswith('ATOM') and line[13:15].strip() == 'CA': # Only alpha carbons
            res_id = int(line[22:26])
            res_name = line[17:20]
            if res_id not in seen_residues:
                seen_residues.add(res_id)
                if res_name in three_to_one:
                    sequence.append(three_to_one[res_name])
                    residue_info.append((res_id, three_to_one[res_name]))
    return "".join(sequence), residue_info

print('Please upload the nanobody PDB file.')
uploaded_nb = files.upload()

if not uploaded_nb:
    print('No nanobody file uploaded.')
else:
    nanobody_pdb_name = list(uploaded_nb.keys())[0]
    nanobody_pdb_content = uploaded_nb[nanobody_pdb_name].decode('utf-8')
    full_sequence, residue_map = get_sequence_from_pdb(nanobody_pdb_content)

    # Create input CSV for nanocdr-x
    input_df = pd.DataFrame([{'identifier': nanobody_pdb_name, 'input': full_sequence}])
    input_csv_path = 'nanobody_input.csv'
    output_csv_path = 'nanobody_cdrs.csv'
    input_df.to_csv(input_csv_path, index=False)

    # Run nanocdr-x
    !predict_cdrs -i {input_csv_path} -o {output_csv_path}

    # Parse the output and map CDRs
    if os.path.exists(output_csv_path):
        results_df = pd.read_csv(output_csv_path)
        all_cdr_res_ids = {}
        for i in range(1, 4):
            cdr_seq = results_df[f'predicted_cdr{i}'][0]
            if pd.notna(cdr_seq):
                start_index = full_sequence.find(cdr_seq)
                if start_index != -1:
                    cdr_residues = []
                    cdr_res_ids = []
                    for j in range(len(cdr_seq)):
                        res_id, res_name = residue_map[start_index + j]
                        cdr_residues.append(f'{res_name}{res_id}')
                        cdr_res_ids.append(res_id)
                    all_cdr_res_ids[i] = cdr_res_ids
                    print(f'CDR{i}: {",".join(cdr_residues)}')
                else:
                    print(f'CDR{i}: Could not map sequence {cdr_seq} to the original structure.')
            else:
                print(f'CDR{i}: Not found.')

        # Generate REMARKs and append to PDB
        remark_lines = []
        for cdr_num, res_ids in all_cdr_res_ids.items():
            for res_id in res_ids:
                remark_lines.append(f"REMARK PDBinfo-LABEL: {res_id:>4} H{cdr_num}\n")

        modified_pdb_content = nanobody_pdb_content
        if not modified_pdb_content.endswith('\n'):
            modified_pdb_content += '\n'
        modified_pdb_content += "".join(remark_lines)
        modified_pdb_content += "END\n"

        modified_pdb_filename = f"{os.path.splitext(nanobody_pdb_name)[0]}_with_CDRs.pdb"
        with open(modified_pdb_filename, 'w') as f:
            f.write(modified_pdb_content)
        print(f"Saved PDB with CDR remarks to {modified_pdb_filename}")
        files.download(modified_pdb_filename)
    else:
        print('CDR prediction output file not found.')

## 5. Download annotations

In [None]:
# @title
from google.colab import files
import pandas as pd

annotations_content = ''

# Patches from Quilt
if 'patch_residues_text' in locals():
    annotations_content += 'Quilt Patches:\n'
    for i in range(3):
        text = patch_residues_text[i].strip()
        if text:
            line = f'Patch {i} residues: {text[:-1]}\n'
        else:
            line = f'Patch {i} residues: None\n'
        annotations_content += line
else:
    annotations_content += 'Quilt patch information not available.\n'

annotations_content += '\n'

# CDRs from Nanocdr-X
if 'all_cdr_res_ids' in locals() and 'residue_map' in locals():
    annotations_content += 'Nanocdr-X CDRs:\n'
    res_id_to_name = {info[0]: info[1] for info in residue_map}
    for i in range(1, 4):
        if i in all_cdr_res_ids:
            cdr_res_ids = all_cdr_res_ids[i]
            cdr_residues = []
            for res_id in cdr_res_ids:
                if res_id in res_id_to_name:
                    cdr_residues.append(f'{res_id_to_name[res_id]}{res_id}')
            annotations_content += f'CDR{i}: {",".join(cdr_residues)}\n'
        else:
            try:
                results_df = pd.read_csv('nanobody_cdrs.csv')
                cdr_seq = results_df[f'predicted_cdr{i}'][0]
                if pd.isna(cdr_seq):
                    annotations_content += f'CDR{i}: Not found.\n'
                else:
                    annotations_content += f'CDR{i}: Could not map sequence {cdr_seq} to the original structure.\n'
            except (FileNotFoundError, KeyError):
                annotations_content += f'CDR{i}: Information not available.\n'
else:
    annotations_content += 'Nanocdr-X CDR information not available.\n'

# Write to file and download
with open('Step_1_annotations.txt', 'w') as f:
    f.write(annotations_content)

print('Downloading annotations file...')
files.download('Step_1_annotations.txt')