In [None]:
# Sandbox

In [1]:
PROJECT_ID = 'jk-mlops-dev'
REGION = 'us-central1'
STAGING_BUCKET = 'gs://jk-vertex-staging'
VERTEX_SA = f'vertex-sa@{PROJECT_ID}.iam.gserviceaccount.com'

In [2]:
import os
import sys

import google.cloud.aiplatform as aip

from datetime import datetime
import kfp

from kfp.v2 import components
from kfp.v2 import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import component



In [3]:
aip.init(project=PROJECT_ID, staging_bucket=f'{STAGING_BUCKET}/alphafold_sandbox')

In [4]:
IMAGE_NAME = 'gcr.io/jk-mlops-dev/alphafold'
ROOT_DIR = '/gcs/jk-alphafold-datasets-archive/nov-2021'

data_dir = f'{ROOT_DIR}'
target_fasta_paths = [f'{ROOT_DIR}/fasta/T1031.fasta']
uniref90_database_path = f'{ROOT_DIR}/uniref90/uniref90.fasta'
uniprot_database_path = f'{ROOT_DIR}/uniprot/uniprot.fasta'
mgnify_database_path = f'{ROOT_DIR}/mgnify/mgy_clusters.fa'
bfd_database_path = f'{ROOT_DIR}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt'
uniclust30_database_path = f'{ROOT_DIR}/uniclust30/uniclust30_2018_08/uniclust30_2018_08'
pdb70_database_path = f'{ROOT_DIR}/pdb70/pdb70'
template_mmcif_dir = f'{ROOT_DIR}/pdb_mmcif/mmcif_files'
obsolete_pdbs_path = f'{ROOT_DIR}/pdb_mmcif/obsolete.dat'
output_target_path = f'{ROOT_DIR}/output'
max_template_date = '2020-05-14'
db_preset = 'full_dbs'
model_preset = 'monomer'
benchmark = False
use_precomputed_msas = False
run_relax = True
use_gpu_relax = True

ARGUMENTS = [
    f'--fasta_paths={",".join(target_fasta_paths)}',
    f'--uniref90_database_path={uniref90_database_path}',
    f'--mgnify_database_path={mgnify_database_path}',
    f'--template_mmcif_dir={template_mmcif_dir}',
    f'--obsolete_pdbs_path={obsolete_pdbs_path}',
    f'--pdb70_database_path={pdb70_database_path}',
    f'--uniclust30_database_path={uniclust30_database_path}',
    f'--bfd_database_path={bfd_database_path}',
    f'--output_dir={output_target_path}',
    f'--max_template_date={max_template_date}',
    f'--db_preset={db_preset}',
    f'--model_preset={model_preset}',
    f'--benchmark={benchmark}',
    f'--use_precomputed_msas={use_precomputed_msas}',
    f'--run_relax={run_relax}',
    f'--use_gpu_relax={use_gpu_relax}',
    '--logtostderr',
]

component_spec = f"""
name: Run alphafold
description: Runs alphafold

implementation:
  container:
    image: {IMAGE_NAME}
    args: [
        '--data_dir={data_dir}',
        '--fasta_paths={",".join(target_fasta_paths)}',
        '--uniref90_database_path={uniref90_database_path}',
        '--mgnify_database_path={mgnify_database_path}',
        '--template_mmcif_dir={template_mmcif_dir}',
        '--obsolete_pdbs_path={obsolete_pdbs_path}',
        '--pdb70_database_path={pdb70_database_path}',
        '--uniclust30_database_path={uniclust30_database_path}',
        '--bfd_database_path={bfd_database_path}',
        '--output_dir={output_target_path}',
        '--max_template_date={max_template_date}',
        '--db_preset={db_preset}',
        '--model_preset={model_preset}',
        '--benchmark={benchmark}',
        '--use_precomputed_msas={use_precomputed_msas}',
        '--run_relax={run_relax}',
        '--use_gpu_relax={use_gpu_relax}',
        '--logtostderr', 
    ]

"""

component_spec

"\nname: Run alphafold\ndescription: Runs alphafold\n\nimplementation:\n  container:\n    image: gcr.io/jk-mlops-dev/alphafold\n    args: [\n        '--data_dir=/gcs/jk-alphafold-datasets-archive/nov-2021',\n        '--fasta_paths=/gcs/jk-alphafold-datasets-archive/nov-2021/fasta_paths/T1050.fasta',\n        '--uniref90_database_path=/gcs/jk-alphafold-datasets-archive/nov-2021/uniref90/uniref90.fasta',\n        '--mgnify_database_path=/gcs/jk-alphafold-datasets-archive/nov-2021/mgnify/mgy_clusters.fa',\n        '--template_mmcif_dir=/gcs/jk-alphafold-datasets-archive/nov-2021/pdb_mmcif/mmcif_files',\n        '--obsolete_pdbs_path=/gcs/jk-alphafold-datasets-archive/nov-2021/pdb_mmcif/obsolete.dat',\n        '--pdb70_database_path=/gcs/jk-alphafold-datasets-archive/nov-2021/pdb70/pdb70',\n        '--uniclust30_database_path=/gcs/jk-alphafold-datasets-archive/nov-2021/uniclust30/uniclust30_2018_08/uniclust30_2018_08',\n        '--bfd_database_path=/gcs/jk-alphafold-datasets-archive/nov-20

In [5]:
run_alphafold_op = kfp.components.load_component_from_text(component_spec)

@dsl.pipeline(name="alphafold-test")
def pipeline():
    run_alphafold_step = run_alphafold_op()

    run_alphafold_step.set_cpu_limit('24')
    run_alphafold_step.set_memory_limit('80G')
    run_alphafold_step.set_env_variable(
        name='NVIDIA_VISIBLE_DEVICES', value='all')
    run_alphafold_step.set_env_variable(
        name='TF_FORCE_UNIFIED_MEMORY', value='1')
    run_alphafold_step.set_env_variable(
        name='XLA_PYTHON_CLIENT_MEM_FRACTION', value='4.0')





In [6]:
compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="custom_model_training_spec.json"
)





In [7]:
DISPLAY_NAME = "alphafold_test_" + datetime.now().strftime("%Y%m%d%H%M%S")
PIPELINE_ROOT = f'{STAGING_BUCKET}/alphafold/pipeline_root'

job = aip.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path="custom_model_training_spec.json",
    pipeline_root=PIPELINE_ROOT,
)

job.run(service_account=VERTEX_SA)

INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/895222332033/locations/us-central1/pipelineJobs/alphafold-test-20220203142017
INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:
INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/895222332033/locations/us-central1/pipelineJobs/alphafold-test-20220203142017')
INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/alphafold-test-20220203142017?project=895222332033
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/895222332033/locations/us-central1/pipelineJobs/alphafold-test-20220203142017 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/895222332033/locations/us-central1/pipelineJobs/alphafold

RuntimeError: Job failed with:
code: 9
message: "The DAG failed because some tasks failed. The failed tasks are: [run-alphafold].; Job (project_id = jk-mlops-dev, job_id = 6242433286233128960) is failed due to the above error.; Failed to handle the job: {project_number = 895222332033, job_id = 6242433286233128960}"
