# Define variables

In [1]:
import time
import os

import workflow_executor

### Project, GCS and job ID

In [2]:
project_id = 'rl-alphafold-dev'
bucket_name = 'rl-alphafold-dbs'
region = 'us-central1'

local_sequence_path = 'sequences/seq1.fasta'

In [3]:
with open(local_sequence_path, 'r') as fp:
    sequence_str = fp.read()

job_id = f'job-alphafold-{int(time.time()*1000)}'
blob_sequence_path = os.path.join(job_id, 'sequence.fasta')
gcs_job_path = os.path.join(bucket_name, job_id)
image_uri = f'gcr.io/{project_id}/alphafold'

### Network

In [4]:
network_name = 'alphafold'
subnetwork_name = 'alphafold-nw'

nfs_ip_address = '10.130.0.2'
nfs_path = 'datasets'

In [5]:
network = f'projects/{project_id}/global/networks/{network_name}'
subnet = f'projects/{project_id}/regions/{region}/subnetworks/{subnetwork_name}'

### Data Pipeline

In [6]:
max_template_date = '2020-05-14'
model_preset = 'monomer'
db_preset = 'reduced_dbs'

data_metadata_filename = 'data_pipeline.json'
msas_folder = 'msas'
data_features_filename = 'features.pkl'

blob_data_metadata_path = os.path.join(gcs_job_path,'data_pipeline.json')
blob_data_features_path = os.path.join(gcs_job_path,'features.pkl')

### Predict and Relax

In [7]:
num_predictions_per_model = 1
model_params_path = ''
run_relax = True
random_seed = 1000
parallelism = 5

num_models = 5

# Execute Workflow

In [8]:
args = workflow_executor.prepare_args_for_experiments(
    project_id,
    bucket_name,
    region,
    sequence_str,
    job_id,
    blob_sequence_path,
    gcs_job_path,
    image_uri,
    network_name,
    subnetwork_name,
    nfs_ip_address,
    nfs_path,
    network,
    subnet,
    max_template_date,
    model_preset,
    db_preset,
    data_metadata_filename,
    msas_folder,
    data_features_filename,
    blob_data_metadata_path,
    blob_data_features_path,
    num_predictions_per_model,
    model_params_path,
    num_models,
    run_relax,
    random_seed,
    parallelism
)

In [9]:
args

{'project_id': 'rl-alphafold-dev',
 'bucket_name': 'rl-alphafold-dbs',
 'region': 'us-central1',
 'sequence_str': '>test sequence\nMAAHKGAEHHHKAAEHHEQAAKHHHAAAEHHEKGEHEQAAHHADTAYAHHKHAEEHAAQAAKHDAEHHAPKPH',
 'job_id': 'job-alphafold-1669682221567',
 'blob_sequence_path': 'job-alphafold-1669682221567/sequence.fasta',
 'gcs_job_path': 'rl-alphafold-dbs/job-alphafold-1669682221567',
 'image_uri': 'gcr.io/rl-alphafold-dev/alphafold',
 'network_name': 'alphafold',
 'subnetwork_name': 'alphafold-nw',
 'nfs_ip_address': '10.130.0.2',
 'nfs_path': 'datasets',
 'network': 'projects/rl-alphafold-dev/global/networks/alphafold',
 'subnet': 'projects/rl-alphafold-dev/regions/us-central1/subnetworks/alphafold-nw',
 'max_template_date': '2020-05-14',
 'model_preset': 'monomer',
 'db_preset': 'reduced_dbs',
 'data_metadata_filename': 'data_pipeline.json',
 'msas_folder': 'msas',
 'data_features_filename': 'features.pkl',
 'blob_data_metadata_path': 'rl-alphafold-dbs/job-alphafold-1669682221567/data_pi