Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
SPDX-License-Identifier: Apache-2.0

# Prepare the AWS Batch Architecture for Protein Folding File System

This notebook allows you to download and unpack the necessary data to the FSx Lustre filesystem.

In [1]:
import boto3
from batchfold.batchfold_environment import BatchFoldEnvironment
from batchfold.download_job import DownloadJob

boto_session = boto3.session.Session(profile_name="bloyal+proteinfolding-Admin")
batch_environment = BatchFoldEnvironment(boto_session = boto_session)

test_download = DownloadJob(
    job_definition_name = "DownloadJobDefinition",
    script="./download_test.sh"
)

test_submission = batch_environment.submit_job(test_download, job_queue_name="GravitonSpotJobQueue")

In [None]:
download_alphafold_params_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/download_alphafold_params.sh"),
    job_queue_name="GravitonSpotJobQueue",
)

download_bfd_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/download_bfd.sh"),
    job_queue_name="GravitonSpotJobQueue",
)

download_mgnify_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/download_mgnify.sh"),
    job_queue_name="GravitonSpotJobQueue",
)

download_openfold_params_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/download_openfold_params.sh"),
    job_queue_name="GravitonSpotJobQueue",
)

download_pdb70_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/download_pdb70.sh"),
    job_queue_name="GravitonSpotJobQueue",
)

download_pdb_mmcif_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/download_pdb_mmcif.sh"),
    job_queue_name="GravitonSpotJobQueue",
)

download_pdb_seqres_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/download_pdb_seqres.sh"),
    job_queue_name="GravitonSpotJobQueue",
)

download_small_bfd_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/download_small_bfd.sh"),
    job_queue_name="GravitonSpotJobQueue",
)

download_uniclust30_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/download_uniclust30.sh"),
    job_queue_name="GravitonSpotJobQueue",
)

download_uniprot_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/download_uniprot.sh"),
    job_queue_name="GravitonSpotJobQueue",
)

download_uniref30_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/download_uniref30.sh"),
    job_queue_name="GravitonSpotJobQueue",
)

download_uniref90_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/download_uniref90.sh"),
    job_queue_name="GravitonSpotJobQueue",
)

download_colabfold_envdb_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/download_colabfold_envdb.sh"),
    job_queue_name="GravitonSpotJobQueue",
)

prep_mmseqs_dbs_submission = batch_environment.submit_job(
    DownloadJob(script="./scripts/prep_mmseqs_dbs.sh", memory=500, cpu=64),
    job_queue_name="GravitonSpotJobQueue",
    depends_on=[download_uniref30_submission, download_colabfold_envdb_submission],
)
