In [1]:
import os
import azureml.core
from azureml.core import Workspace, Experiment, Datastore,Environment
from azureml.widgets import RunDetails
 
from azureml.core import Dataset
 
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.core import PipelineRun, StepRun, PortDataReference
from azureml.pipeline.steps import PythonScriptStep
 
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
 
from azureml.core.runconfig import RunConfiguration, DockerConfiguration
from azureml.core.conda_dependencies import CondaDependencies
 
from azureml.core.model import Model

from azureml.data import OutputFileDatasetConfig
 
# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.37.0


In [2]:
ws = Workspace.from_config()

In [3]:
def_blob_store = ws.get_default_datastore()

In [4]:
aml_compute_target = "RLModel"
try:
    aml_compute = AmlCompute(ws, aml_compute_target)
    print("found existing compute target.")
except ComputeTargetException:
    print("creating new compute target")
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_ND6S",
                                                                min_nodes = 0, 
                                                                max_nodes = 3)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=60)
    
print("Azure Machine Learning Compute attached")

found existing compute target.
Azure Machine Learning Compute attached


In [5]:
aml_run_config = RunConfiguration()

aml_run_config.docker = DockerConfiguration(use_docker=True)
#DOCKER_ARGUMENTS = ["--shm_size","128g"]  # increase shared memory
#aml_run_config.environment.docker.arguments = DOCKER_ARGUMENTS

aml_run_config.target = aml_compute
#aml_run_config.environment.docker.enabled = True
aml_run_config.environment.docker.base_image = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.1-cudnn8-ubuntu18.04:20220113.v1"
 
aml_run_config.environment.python.user_managed_dependencies = False
 
aml_run_config.environment.python.conda_dependencies = CondaDependencies(conda_dependencies_file_path='./environment.yml')



AttributeError: <class 'azureml.core.runconfig.DockerConfiguration'> has no attribute base_image

In [None]:
def scriptNode(name,script,arguments = None, inputs=None,outputs=None,source=None):   
    nodestep = PythonScriptStep(name=name, script_name=script, arguments = arguments,
                         inputs = inputs, outputs = outputs, compute_target=aml_compute,
                         runconfig=aml_run_config, source_directory=source,
                         allow_reuse=True)
    return nodestep

In [None]:
#file_dataset = Dataset.File.upload_directory(src_dir="./pred_data",target=def_blob_store)
#file_dataset.register(workspace=ws,name='pred')

#davis_data = Dataset.File.upload_directory(src_dir="./davis_data",target=def_blob_store)
#davis_data.register(workspace=ws,name='davis')

#qsar_data = Dataset.File.upload_directory(src_dir="./qsar",target=def_blob_store)
#qsar_data.register(workspace=ws,name='qsar')

In [None]:
moses = Dataset.get_by_name(ws, name='moses')
pred_dataset = Dataset.get_by_name(ws, name='pred')
davis_dataset = Dataset.get_by_name(ws, name='davis')
qsar_dataset = Dataset.get_by_name(ws,"qsar")

In [None]:
raw_data = moses.as_named_input('raw_data')
in_data = PipelineData("Input_Data", datastore=def_blob_store)

train = PipelineData("training_processed", datastore=def_blob_store)
test = PipelineData("test_processed", datastore=def_blob_store)
valid = PipelineData("valid_processed", datastore=def_blob_store)
train_job_dir = PipelineData("train_job_dir", datastore=def_blob_store)

finetune_job_dir = PipelineData("finetune", datastore=def_blob_store)

qsar = qsar_dataset.as_named_input('qsar')

generation = PipelineData("generation", datastore=def_blob_store)

modified_csv = PipelineData("csv", datastore=def_blob_store)

pred1 = pred_dataset.as_named_input('pred')

davis1 = davis_dataset.as_named_input('davis')

model_output = PipelineData("model", datastore=def_blob_store)

predict = PipelineData("predict", datastore=def_blob_store)

In [None]:
step1 = scriptNode("dataSplit","./datasplit/dataSplit.py",["--split_data", in_data],
                  [raw_data],[in_data],"./preprocess_data")
                  
step2 = scriptNode("preprocess_training_data","train.py",
                    ["--input-data",in_data,"--output-data", train],
                    [in_data],[train],"./preprocess_data")

step3 = scriptNode("preprocess_test_data","test.py",
                    ["--input-data",in_data,"--output-data", test],
                    [in_data],[test],"./preprocess_data")

step4 = scriptNode("preprocess_valid_data","valid.py",
                    ["--input-data",in_data,"--output-data", valid],
                    [in_data],[valid],"./preprocess_data")
                    
step5 = scriptNode("Train GEFA","train_test.py",
                    ["--data-path",davis1.as_mount(), "--model_path",model_output,
                    "--data_type",0],
                    None,[model_output],"./GEFA")

step6 = scriptNode("Train GGNN","main.py",
                    ["--input-data",in_data,"--train_dir",train,"--test_dir",test,
                     "--valid_dir",valid,"--job-dir",train_job_dir],
                    [in_data,train,test,valid],[train_job_dir],"./train_model")

step7 = scriptNode("R-Learn","main.py",
                    ["--input-data",in_data,"--train_dir",train,"--test_dir",test,
                     "--valid_dir",valid,"--job-dir",finetune_job_dir,"--data_path",qsar.as_mount(),
                     "--trained",train_job_dir],
                    [in_data,train,test,valid,train_job_dir],[finetune_job_dir],"./finetune_model")


step8 = scriptNode("Generate molecules","main.py",
                    ["--input-data",in_data, "--job-dir",finetune_job_dir,"--train_dir",train,
                    "--generation",generation,"--trained",train_job_dir,"--data_path",qsar.as_mount()],
                    [in_data,finetune_job_dir,train,train_job_dir],[generation],"./generate_molecule")


step9 = scriptNode("Prepare data for GEFA","prepare_data.py",
                    ["--data",modified_csv, "--pred",pred1.as_mount(),
                    "--generation",generation],
                    [generation],[modified_csv],"./generate_molecule")



step10 = scriptNode("Predict Binding score","predict.py",
                    ["--data-path",pred1.as_mount(), "--model_path",model_output,
                    "--data_type",1,"--out_path",predict,
                    "--generated",modified_csv],
                    [model_output,modified_csv],[predict],"./GEFA")



In [None]:
steps = [step1,step2,step3,step4,step5,step6,step7,step8,step9,step10]

In [None]:
pipeline = Pipeline(workspace=ws, steps=steps)
pipeline_run1 = Experiment(ws, 'RL-Model').submit(pipeline, regenerate_outputs=False)

In [None]:
envs = Environment.list(workspace=ws)

for env in envs:
    if env.startswith("AzureML"):
        print("Name",env)