Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
SPDX-License-Identifier: Apache-2.0

# Protein Complex Analysis with AlphaFold-Multimer

## Table of Contents
0. [Install Dependencies](#0.-Install-Dependencies)
1. [Create Target](#1.-Create-Target)
2. [Submit Sequence Alignment and Folding Jobs](#2.-Submit-Sequence-Alignment-and-Folding-Jobs) 
3. [Download and Visualize Results](#3.-Download-and-Visualize-Results)

## 0. Install Dependencies

In [1]:
%pip install -U -r -q notebook-requirements.txt

Processing /Users/bloyal/batch-protein-folding
  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: batchfold
  Attempting uninstall: batchfold
    Found existing installation: batchfold 1.0.0
    Uninstalling batchfold-1.0.0:
      Successfully uninstalled batchfold-1.0.0
[33m  DEPRECATION: batchfold is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559[0m[33m
[0m  Running setup.py install for batchfold ... [?25ldone
[?25hSuccessfully installed batchfold-1.0.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
# Import required Python packages

import boto3
from datetime import datetime
import matplotlib.pyplot as plt
from batchfold.batchfold_environment import BatchFoldEnvironment
from batchfold.batchfold_target import BatchFoldTarget
from batchfold.jackhmmer_job import JackhmmerJob
from batchfold.openfold_job import OpenFoldJob
from batchfold.alphafold2_job import AlphaFold2Job
from batchfold.omegafold_job import OmegaFoldJob
from batchfold.utils import utils
from IPython import display
import numpy as np

# Create AWS clients
boto_session = boto3.session.Session(profile_name='bloyal+proteinfolding-Admin')

batch_environment = BatchFoldEnvironment(boto_session=boto_session)

S3_BUCKET = batch_environment.default_bucket
print(f" S3 bucket name is {S3_BUCKET}")

 S3 bucket name is batchfold-221102-batchfolds3bucket-1byh6n52qfaov


## 1. Create Target

In [3]:
target_id = "4ZQK"
target = BatchFoldTarget(target_id=target_id, s3_bucket=S3_BUCKET, boto_session=boto_session)
target.add_sequence(
    seq_id="4ZQK_1",
    seq="AFTVTVPKDLYVVEYGSNMTIECKFPVEKQLDLAALIVYWEMEDKNIIQFVHGEEDLKVQHSSYRQRARLLKDQLSLGNAALQITDVKLQDAGVYRCMISYGGADYKRITVKVNA",
    description="Chain A|Programmed cell death 1 ligand 1|Homo sapiens (9606)",
)
target.add_sequence(
    seq_id="4ZQK_2",
    seq="NPPTFSPALLVVTEGDNATFTCSFSNTSESFVLNWYRMSPSNQTDKLAAFPEDRSQPGQDSRFRVTQLPNGRDFHMSVVRARRNDSGTYLCGAISLAPKAQIKESLRAELRVTERRAE",
    description="Chain B|Programmed cell death protein 1|Homo sapiens (9606)",
)

's3://batchfold-221102-batchfolds3bucket-1byh6n52qfaov/4ZQK/fastas/4ZQK.fasta'

## 2. Submit Sequence Alignment and Folding Jobs

In [4]:
jackhmmer_job_name = (
    target.target_id + "_JackhmmerJob_" + datetime.now().strftime("%Y%m%d%s")
)
jackhmmer_job = JackhmmerJob(
    job_name=jackhmmer_job_name,
    target_id=target.target_id,
    fasta_s3_uri=target.get_fasta_s3_uri(),
    output_s3_uri=target.get_msas_s3_uri(),
    boto_session=boto_session,
    cpu=16,
    memory=31,
    model_preset="multimer",
)

alphafold2_job_name = (
    target.target_id + "_AlphaFold2Job_" + datetime.now().strftime("%Y%m%d%s")
)
alphafold2_job = AlphaFold2Job(
    job_name=alphafold2_job_name,
    boto_session=boto_session,
    target_id=target.target_id,
    fasta_s3_uri=target.get_fasta_s3_uri(),
    msa_s3_uri=target.get_msas_s3_uri(),
    output_s3_uri=target.get_predictions_s3_uri() + "/" + alphafold2_job_name,
    use_precomputed_msas=True,
    model_preset="multimer",
    cpu=4,
    memory=15,  # Why not 16? ECS needs about 1 GB for container services
    gpu=1,
)

jackhmmer_submission = batch_environment.submit_job(
    jackhmmer_job, job_queue_name="GravitonOnDemandJobQueue"
)
alphafold2_submission = batch_environment.submit_job(
    alphafold2_job, job_queue_name="G4dnJobQueue", depends_on=[jackhmmer_submission]
)


Check on job statuses

In [5]:
for job in [jackhmmer_job, alphafold2_job]:
    print(
        f"Job {job.describe_job()[0]['jobName']} is in status {job.describe_job()[0]['status']}"
    )

Job 4ZQK_JackhmmerJob_202212211671632393 is in status RUNNABLE
Job 4ZQK_AlphaFold2Job_202212211671632393 is in status SUBMITTED


## 3. Download and Visualize Results

Once the jobs are finished, download the results

### Plot Alignment Data

In [None]:
target = BatchFoldTarget(
    target_id=target_id, s3_bucket=S3_BUCKET, boto_session=boto_session
)

In [None]:
target.download_msas(local_path="data")
utils.plot_msa_output_folder(
    path=f"data/{target_id}/msas/jackhmmer", id=target_id
)

### Plot Predicted Structure

In [None]:
last_job_name = target.get_last_job_name(job_type="AlphaFold2")

print(f"Downloading results for job {last_job_name}")
target.download_predictions(local_path="data", job=last_job_name)

print("Identifying best model")
best_model_name = utils.get_best_alphafold_model(f"data/{target_id}/predictions/{last_job_name}/ranking_debug.json")

print(f"Displaying predicted structure for model {best_model_name}")
pdb = f"data/{target_id}/predictions/{last_job_name}/ranked_0.pdb"
utils.plot_banded_pdb(pdb)