# Docetaxel Sensitivity: Automated Machine Learning using Gene Expression
---------------------------

In [3]:
import numpy as np
import pandas as pd
from azureml.core import Workspace, Dataset, Run

ws = Workspace.from_config()

In [4]:
## Get Sensitivity Data
doce_data = Dataset.get_by_name(ws, name='docetaxel_sensitivity')
doce_data_df = doce_data.to_pandas_dataframe()
doce_data_df

Unnamed: 0,L10_IC_50,TSPAN6,TNMD,DPM1,SCYL3,C1orf112,FGR,CFH,FUCA2,GCLC,...,ARHGAP11B,AC004593.2,AC090517.4,AL160269.1,ABCF2-H2BE1,POLR2J3,H2BE1,AL445238.1,GET1-SH3BGR,AC113348.1
0,-1.6643720768426125,2.643856189774725,0.0,6.219555769166913,3.418189947945766,4.659924558402379,0.01435529297707,0.1110313123887439,5.846242565109514,7.057883448899626,...,2.111031312388744,0.0565835283663675,0.0426443374084937,0.0565835283663675,1.1243281350022014,5.069959890459774,0.0,0.0,1.803227036434928,0.0
1,-2.2657961042166934,2.985500430304885,0.0,6.778734244117836,4.130930869826448,3.778208576398088,0.0,0.2986583155645151,7.43379405875492,6.689299160535892,...,1.7355221772965377,0.3448284969974411,0.3673710656485294,0.1243281350022017,2.192194165283345,4.358958825832329,0.0,0.0426443374084937,0.0976107966264223,0.0
2,-2.194771295116583,4.574707046415546,0.0,6.632413641137546,1.9373443921502325,3.4019034716079584,0.0285691521967709,0.5753123306874368,5.7755773609306535,3.320484678017693,...,2.4776773275653072,1.2203299548795556,0.1110313123887439,0.0,2.84197311892718,3.6158870739160567,0.0,0.0,1.1890338243900171,0.0
3,-2.8168508023444905,5.868637384170314,0.0,6.636044526028037,2.0461417816447205,4.9963887464476215,0.1763227726404629,1.655351828612554,6.200457271428572,3.4982508675278257,...,0.7739963251111731,0.0285691521967709,0.137503523749935,0.070389327891398,1.1763227726404628,6.022811650915631,0.0,0.0,0.0565835283663675,0.0
4,-2.370915780677939,5.026800059343715,0.0,6.96613048992893,1.8991756304805127,3.5310694927259543,0.0,3.910732661902913,6.371384613557957,4.693208148910016,...,1.9523335663696857,0.0426443374084937,0.0426443374084937,0.0426443374084937,3.104336659814736,5.9342805936400245,0.0,0.0,0.3785116232537298,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
676,-2.4501423858336175,0.0,0.0,6.23112515787905,2.6848187375532224,4.898208352508718,3.0268000593437154,0.0285691521967709,3.3476656563009706,3.7729413378313352,...,2.533563348214512,0.0,0.0565835283663675,0.0,1.9068905956085185,5.387500406480984,0.0,0.0565835283663675,0.4005379295837288,0.0
677,-0.24498841253817064,5.649328040984784,0.0,6.6748278321868,3.085764553778314,3.790772037862,0.2265085298086797,3.878725341480105,6.167518503735217,5.101818134227409,...,2.0840642647884744,0.2016338611696504,0.0,0.0,2.3757345385831563,4.361768359419154,0.0,0.0,0.3334237337251918,0.0
678,-2.6648063309377252,4.863938450423972,0.0,5.977966757619478,2.553360503335328,3.7729413378313352,0.2750070474998698,7.17881421635862,6.867278739709662,3.959770155211468,...,3.3103401206121505,0.0,0.0,0.0,3.748461233004035,6.263973354803409,0.0,0.0,0.1110313123887439,0.0
679,-3.0343703784319263,5.9140860970127225,0.0,6.749668340903956,2.8094144442358986,4.175524601089875,0.1763227726404629,5.859224161978204,6.535275376620803,4.598126959919604,...,2.263034405833794,0.1634987322828795,0.0976107966264223,0.4956951626240688,3.3798981635246923,6.094869432718697,0.0,0.0,1.0285691521967708,0.0


## Provision Compute Target

In [5]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "ml-cluster1"

try:
    # Check for existing compute target
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=4)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster.


## Configure AutoML

In [10]:
from azureml.train.automl import AutoMLConfig

automl_config = AutoMLConfig(name='docetaxel_sensitivity_automl_config',
                             task='regression',
                             compute_target = training_cluster,
                             training_data = doce_data,
                             label_column_name = 'L10_IC_50',
                             primary_metric = 'r2_score',
                             n_cross_validations = 5,
                             iterations = 25,
                             max_concurrent_iterations = 8,
                             experiment_timeout_minutes = 240,
                             featurization = 'off',
                             enable_voting_ensemble = True,
                             enable_stack_ensemble = True,
                             model_explainability = True)

## Run the Experiment

In [11]:
from azureml.core.experiment import Experiment

automl_experiment = Experiment(ws, 'docetaxel_sensitivity_training_automl')
automl_run = automl_experiment.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
docetaxel_sensitivity_training_automl,AutoML_fa62a263-797a-468f-bebd-97218ef50d7a,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


In [12]:
## Add Tags
automl_run.tag('input_dataset', 'docetaxel_sensitivity')
automl_run.tag('input_dataset_version', '1')
automl_run.tag('dependent_variable', 'L10_IC_50')

Converting non-string tag to string: (input_dataset_version: 1)


In [13]:
from azureml.widgets import RunDetails

## Show the status in the notebook as the experiment runs
RunDetails(automl_run).show()
automl_run.wait_for_completion(show_output=True)

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

Experiment,Id,Type,Status,Details Page,Docs Page
docetaxel_sensitivity_training_automl,AutoML_fa62a263-797a-468f-bebd-97218ef50d7a,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Received interrupt. Returning now.

{'runId': 'AutoML_fa62a263-797a-468f-bebd-97218ef50d7a',
 'target': 'ml-cluster1',
 'status': 'Running',
 'startTimeUtc': '2022-04-03T01:17:42.077163Z',
 'services': {},
 'properties': {'num_iterations': '25',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'r2_score',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'ml-cluster1',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"542e6c51-8026-42f8-863d-8a55b709a88f\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': 'False',
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'regression',
  'dependencies_versions': '{"azureml-widgets": "1.37.0", "azureml-train": "1.37.0", "azureml-train-restclients-hyperdrive": "1.37.0", "azureml-train-core": "1.37.0", "azureml-train-automl": "1.37.0", "azureml-train-automl-runtime": "1.37.0", "azureml-train-automl-client": "1.37.0", "azureml-tensorboard": "1.37.0", "azur