In [1]:
!pip install --upgrade azureml-sdk[notebooks,automl,explain]

Requirement already up-to-date: azureml-sdk[automl,explain,notebooks] in /anaconda/envs/azureml_py36/lib/python3.6/site-packages (1.3.0)


In [2]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.3.0 to work with myaml


## 1- Prepare Data for an Experiment
> Since we don't actually have a fully staffed clinic with patients from whom to get new data for this course, you'll generate a random sample from our diabetes CSV file and use those to test the pipeline. Then you'll upload that data to a datastore in the Azure Machine Learning workspace and register a dataset for it.

In [3]:
from azureml.core import Datastore, Dataset
import pandas as pd
import os

# Load the diabetes data
diabetes = pd.read_csv('data/diabetes2.csv')
# Get a 100-item sample of the feature columns (not the diabetic label)
sample = diabetes[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].sample(n=100).values

# Create a folder
batch_folder = './batch-data'
os.makedirs(batch_folder, exist_ok=True)
print("Folder created!")

# Save each sample as a separate file
print("Saving files...")
for i in range(100):
    fname = str(i+1) + '.csv'
    sample[i].tofile(os.path.join(batch_folder, fname), sep=",")
print("files saved!")

# Upload the files to the default datastore
print("Uploading files to datastore...")
default_ds = ws.get_default_datastore()


if 'diabetes dataset' not in ws.datasets:
    default_ds.upload(src_dir="batch-data", target_path="batch-data", overwrite=True, show_progress=True)
    # Register a dataset for the input data
    batch_data_set = Dataset.File.from_files(path=(default_ds, 'batch-data/'), validate=False)
    
    try:
        batch_data_set = batch_data_set.register(workspace=ws, 
                                             name='batch-data',
                                             description='batch data',
                                             create_new_version=True)
        print('Dataset registered.')
    except Exception as ex:
        print(ex)

else:
    print('Dataset already registered.')

Folder created!
Saving files...
files saved!
Uploading files to datastore...
Dataset already registered.


> Training Data : In this lab, you'll use a dataset containing details of diabetes patients. Run the cell below to create this dataset (if you created it in the previous lab, the code will find the existing version)

In [4]:
from azureml.core import Dataset

default_ds = ws.get_default_datastore()

if 'diabetes dataset sho' not in ws.datasets:
    default_ds.upload_files(files=['./data/diabetes.csv'], # Upload the diabetes csv files in /data
                        target_path='diabetes-data-sho/', # Put it in a folder path in the datastore
                        overwrite=True, # Replace existing files of the same name
                        show_progress=True)

## 2- Create Scripts for training

In [5]:
import os
# Create a folder for the experiment files
experiment_folder = 'batch_pipeline-shoresh'
os.makedirs(experiment_folder, exist_ok=True)

print(experiment_folder)

batch_pipeline-shoresh


In [6]:
%%writefile $experiment_folder/data_prep.py

from azureml.core import Dataset, Run
run = Run.get_context()
ws = run.experiment.workspace
default_ds = ws.get_default_datastore()

tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds,'diabetes-data-sho/*.csv'))

    # Register the tabular dataset
try:
    tab_data_set = tab_data_set.register(workspace=ws, 
                                name='diabetes dataset sho',
                                description='diabetes data sho',
                                tags = {'format':'CSV'},
                                create_new_version=True)
    print('Dataset registered.')
except Exception as ex:
    print(ex)
    
run.complete()

Overwriting batch_pipeline-shoresh/data_prep.py


In [7]:
%%writefile $experiment_folder/train_diabetes.py
# Import libraries
from azureml.core import Run
import argparse
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument('--output_folder', type=str, dest='output_folder', default="diabetes_model", help='output folder')
args = parser.parse_args()
output_folder = args.output_folder

# Get the experiment run context
run = Run.get_context()

# load the diabetes data (passed as an input dataset)
print("Loading Data...")
diabetes = run.input_datasets['diabetes_train'].to_pandas_dataframe()

# Separate features and labels
X, y = diabetes[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, diabetes['Diabetic'].values

# Split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

# Train adecision tree model
print('Training a decision tree model')
model = DecisionTreeClassifier().fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
print('Accuracy:', acc)
run.log('Accuracy', np.float(acc))

# calculate AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test,y_scores[:,1])
print('AUC: ' + str(auc))
run.log('AUC', np.float(auc))

# plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])
fig = plt.figure(figsize=(6, 4))
# Plot the diagonal 50% line
plt.plot([0, 1], [0, 1], 'k--')
# Plot the FPR and TPR achieved by our model
plt.plot(fpr, tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
run.log_image(name = "ROC", plot = fig)
plt.show()

# Save the trained model
os.makedirs(output_folder, exist_ok=True)
output_path = output_folder + "/model.pkl"
joblib.dump(value=model, filename=output_path)

run.complete()

Overwriting batch_pipeline-shoresh/train_diabetes.py


## 3-  Create Scripts for Model Registering

In [8]:
%%writefile $experiment_folder/register_diabetes.py
# Import libraries
import argparse
import joblib
from azureml.core import Workspace, Model, Run

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument('--model_folder', type=str, dest='model_folder', default="diabetes_model", help='model location')
args = parser.parse_args()
model_folder = args.model_folder

# Get the experiment run context
run = Run.get_context()

# load the model
print("Loading model from " + model_folder)
model_file = model_folder + "/model.pkl"
model = joblib.load(model_file)

Model.register(workspace=run.experiment.workspace,
               model_path = model_file,
               model_name = 'diabetes_model_shoresh',
               tags={'Training context':'Pipeline-shoresh'})

run.complete()

Overwriting batch_pipeline-shoresh/register_diabetes.py


## 4- Create Compute

In [9]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "aml-cluster"

# Verify that cluster exists
try:
    pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If not, create it
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS1_V2', 
                                                           max_nodes=2)
    pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)

pipeline_cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## 5- Create Environment for Training

In [10]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration

# Create a Python environment for the experiment
diabetes_env = Environment("diabetes-experiment-env")
diabetes_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
diabetes_env.docker.enabled = True # Use a docker container

# Create a set of package dependencies
diabetes_packages = CondaDependencies.create(conda_packages=['scikit-learn','ipykernel','matplotlib', 'pandas'],
                                             pip_packages=['azureml-sdk','pyarrow'])

# Add the dependencies to the environment
diabetes_env.python.conda_dependencies = diabetes_packages

# Register the environment (just in case previous lab wasn't completed)
diabetes_env.register(workspace=ws)
registered_env = Environment.get(ws, 'diabetes-experiment-env')

# Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()

# Use the compute you created above. 
pipeline_run_config.target = pipeline_cluster

# Assign the environment to the run configuration
pipeline_run_config.environment = registered_env

print ("Run configuration created.")

Run configuration created.


## 6- Create Training Pipeline

In [11]:
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.train.estimator import Estimator

# Get the training dataset
diabetes_ds = ws.datasets.get("diabetes dataset sho")

# Create a PipelineData (temporary Data Reference) for the model folder
model_folder = PipelineData("model_folder", datastore=ws.get_default_datastore())

register_dataset_step = PythonScriptStep(name = "Register DataSet",
                                source_directory = experiment_folder,
                                script_name = "data_prep.py",
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)

estimator = Estimator(source_directory=experiment_folder,
                        compute_target = pipeline_cluster,
                        environment_definition=pipeline_run_config.environment,
                        entry_script='train_diabetes.py')

train_step = EstimatorStep(name = "Train Model",
                           estimator=estimator, 
                           estimator_entry_script_arguments=['--output_folder', model_folder],
                           inputs=[diabetes_ds.as_named_input('diabetes_train')],
                           outputs=[model_folder],
                           compute_target = pipeline_cluster,
                           allow_reuse = True)

# Step 2, run the model registration script
register_step = PythonScriptStep(name = "Register Model",
                                source_directory = experiment_folder,
                                script_name = "register_diabetes.py",
                                arguments = ['--model_folder', model_folder],
                                inputs=[model_folder],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)

print("Pipeline steps defined")

Pipeline steps defined


In [12]:
train_step.run_after(register_dataset_step)
register_step.run_after(train_step)

from azureml.core import Experiment
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails

# Construct the pipeline
#pipeline_steps = [train_step, register_step]
pipeline = Pipeline(workspace = ws, steps=[register_step])
print("Pipeline is built.")

# Create an experiment and run the pipeline
experiment = Experiment(workspace = ws, name = 'diabetes-training-pipeline-shoresh')
pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
print("Pipeline submitted for execution.")

RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion()

Pipeline is built.
Created step Register Model [7155e1b3][aa26f832-3343-407a-a31d-ccca91d414e7], (This step will run and generate new outputs)
Created step Train Model [972fada1][c1a28530-ed28-4274-808e-3d822d8c37a2], (This step will run and generate new outputs)
Created step Register DataSet [1661368d][85ac0b80-93da-4a7d-a2d8-50a162f726e0], (This step will run and generate new outputs)
Submitted PipelineRun 8c1d89bc-e04b-4779-9336-4c8c859ce7a6
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/diabetes-training-pipeline-shoresh/runs/8c1d89bc-e04b-4779-9336-4c8c859ce7a6?wsid=/subscriptions/46926bff-fe7d-4284-bc62-eafdda8d8f2c/resourcegroups/DataSienceSolutionAzure/workspaces/myaml
Pipeline submitted for execution.


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: 8c1d89bc-e04b-4779-9336-4c8c859ce7a6
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/diabetes-training-pipeline-shoresh/runs/8c1d89bc-e04b-4779-9336-4c8c859ce7a6?wsid=/subscriptions/46926bff-fe7d-4284-bc62-eafdda8d8f2c/resourcegroups/DataSienceSolutionAzure/workspaces/myaml
PipelineRun Status: Running


StepRunId: 55c4d430-e570-4dc8-b22a-f9585637ae49
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/diabetes-training-pipeline-shoresh/runs/55c4d430-e570-4dc8-b22a-f9585637ae49?wsid=/subscriptions/46926bff-fe7d-4284-bc62-eafdda8d8f2c/resourcegroups/DataSienceSolutionAzure/workspaces/myaml
StepRun( Register DataSet ) Status: NotStarted
StepRun( Register DataSet ) Status: Running

Streaming azureml-logs/55_azureml-execution-tvmps_4fd027a638b71238782383b17cb8a15e791d6d6505c48b0b1282537517e0cecc_d.txt
2020-04-17T11:40:12Z Starting output-watcher...
2020-04-17T11:40:12Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
Lo

'Finished'

## 7- Publish Training Pipeline

In [15]:
# Get the most recent run of the pipeline
experiment_name = 'diabetes-training-pipeline-shoresh'
pipeline_experiment = ws.experiments.get(experiment_name)
pipeline_run = list(pipeline_experiment.get_runs())[0]

# Publish the pipeline from the run
published_pipeline = pipeline_run.publish_pipeline(
    name="Diabetes_Training_Pipeline_shoresh", description="Trains diabetes model", version="1.0")

published_pipeline

Name,Id,Status,Endpoint
Diabetes_Training_Pipeline_shoresh,c7f3771f-81d3-4113-895b-2f96ac139677,Active,REST Endpoint


In [16]:
rest_endpoint = published_pipeline.endpoint
print(rest_endpoint)

https://northeurope.api.azureml.ms/pipelines/v1.0/subscriptions/46926bff-fe7d-4284-bc62-eafdda8d8f2c/resourceGroups/DataSienceSolutionAzure/providers/Microsoft.MachineLearningServices/workspaces/myaml/PipelineRuns/PipelineSubmit/c7f3771f-81d3-4113-895b-2f96ac139677


In [17]:
from azureml.core.authentication import InteractiveLoginAuthentication

interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()
print("Authentication header ready.")

Authentication header ready.


In [18]:
import requests

rest_endpoint = published_pipeline.endpoint
response = requests.post(rest_endpoint, 
                         headers=auth_header, 
                         json={"ExperimentName": experiment_name})
run_id = response.json()["Id"]
run_id

'99065334-d7b3-40fe-bc38-34118338de60'

In [19]:
from azureml.pipeline.core.run import PipelineRun
from azureml.widgets import RunDetails

published_pipeline_run = PipelineRun(ws.experiments[experiment_name], run_id)
RunDetails(published_pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

# Creating a Batch Inferencing Servic

In [1]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.3.0 to work with myaml


## 1- script for Batch Inferencing

In [2]:
import os
# Create a folder for the experiment files
experiment_folder = 'batch_pipeline-shoresh'
os.makedirs(experiment_folder, exist_ok=True)

print(experiment_folder)

batch_pipeline-shoresh


In [3]:
%%writefile $experiment_folder/batch_diabetes.py
import os
import numpy as np
from azureml.core import Model
import joblib


def init():
    # Runs when the pipeline step is initialized
    global model

    # load the model
    model_path = Model.get_model_path('diabetes_model_shoresh')
    model = joblib.load(model_path)


def run(mini_batch):
    # This runs for each batch
    resultList = []

    # process each file in the batch
    for f in mini_batch:
        # Read the comma-delimited data into an array
        data = np.genfromtxt(f, delimiter=',')
        # Reshape into a 2-dimensional array for prediction (model expects multiple items)
        prediction = model.predict(data.reshape(1, -1))
        # Append prediction to results
        resultList.append("{}: {}".format(os.path.basename(f), prediction[0]))
    return resultList

Writing batch_pipeline-shoresh/batch_diabetes.py


## 2- Batch Environemnet

In [4]:
from azureml.core import Environment
from azureml.core.runconfig import DEFAULT_CPU_IMAGE
from azureml.core.runconfig import CondaDependencies

# Add dependencies required by the model
# For scikit-learn models, you need scikit-learn
cd = CondaDependencies.create(pip_packages=['scikit-learn'])

batch_env = Environment(name='batch_environment')
batch_env.python.conda_dependencies = cd
batch_env.docker.enabled = True
batch_env.docker.base_image = DEFAULT_CPU_IMAGE
print('Configuration ready.')

Configuration ready.


## 3- Create Batch Pipeleine

>You're going to use a pipeline to run the batch prediction script, generate predictions from the input data, and save the results as a text file in the output folder. To do this, you can use a ParallelRunStep, which enables the batch data to be processed in parallel and the results collated in a single output file named parallel_run_step.txt.

So you'll need to install the library containing the ParallelRunStep class.

In [5]:
!pip install --upgrade azureml-contrib-pipeline-steps

Requirement already up-to-date: azureml-contrib-pipeline-steps in /anaconda/envs/azureml_py36/lib/python3.6/site-packages (1.3.0)


In [8]:
from azureml.contrib.pipeline.steps import ParallelRunConfig, ParallelRunStep
from azureml.pipeline.core import PipelineData
from azureml.core.compute import ComputeTarget
from azureml.core import Dataset

inference_cluster = ComputeTarget(workspace=ws, name="aml-cluster")
default_ds = ws.get_default_datastore()
batch_data_set = Dataset.get_by_name(ws, 'batch-data')
model = ws.models['diabetes_model_shoresh']

output_dir = PipelineData(name='inferences', 
                          datastore=default_ds, 
                          output_path_on_compute='diabetes/results')

parallel_run_config = ParallelRunConfig(
    source_directory=experiment_folder,
    entry_script="batch_diabetes.py",
    mini_batch_size="5",
    error_threshold=10,
    output_action="append_row",
    environment=batch_env,
    compute_target=inference_cluster,
    node_count=2)

parallelrun_step = ParallelRunStep(
    name='batch-score-diabetes',
    models=[model],
    parallel_run_config=parallel_run_config,
    inputs=[batch_data_set.as_named_input('diabetes_batch')],
    output=output_dir,
    arguments=[],
    allow_reuse=True
)

print('Steps defined')

Steps defined


In [9]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails

pipeline = Pipeline(workspace=ws, steps=[parallelrun_step])
pipeline_run = Experiment(ws, 'batch_prediction_pipeline_shoresh').submit(pipeline)
RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion(show_output=True)

Created step batch-score-diabetes [ef70fad3][9b31f6e6-dd36-453b-ab44-3a291259c325], (This step will run and generate new outputs)
Using data reference diabetes_batch_0 for StepId [371a2377][7d00d637-b0a8-45e9-923f-afef9993efe2], (Consumers of this data are eligible to reuse prior runs.)
Submitted PipelineRun f44431c1-bafc-42b7-ac97-0463d847837f
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/batch_prediction_pipeline_shoresh/runs/f44431c1-bafc-42b7-ac97-0463d847837f?wsid=/subscriptions/46926bff-fe7d-4284-bc62-eafdda8d8f2c/resourcegroups/DataSienceSolutionAzure/workspaces/myaml


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: f44431c1-bafc-42b7-ac97-0463d847837f
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/batch_prediction_pipeline_shoresh/runs/f44431c1-bafc-42b7-ac97-0463d847837f?wsid=/subscriptions/46926bff-fe7d-4284-bc62-eafdda8d8f2c/resourcegroups/DataSienceSolutionAzure/workspaces/myaml
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: 12a9f0b3-5a6b-4391-bf4f-c94b3c122283
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/batch_prediction_pipeline_shoresh/runs/12a9f0b3-5a6b-4391-bf4f-c94b3c122283?wsid=/subscriptions/46926bff-fe7d-4284-bc62-eafdda8d8f2c/resourcegroups/DataSienceSolutionAzure/workspaces/myaml
StepRun( batch-score-diabetes ) Status: NotStarted
StepRun( batch-score-diabetes ) Status: Running

Streaming azureml-logs/55_azureml-execution-tvmps_4a206a7a698cfd651a071c119f6e66ef4cb81e6b614b2e284374954184961da0_d.txt
2020-04-20T07:32:58Z Starting output-watcher...
2020-04-20T07:32:58Z IsDedicatedCompute == True,

'Finished'

In [10]:
import pandas as pd
import shutil

shutil.rmtree('diabetes-results', ignore_errors=True)

prediction_run = next(pipeline_run.get_children())
prediction_output = prediction_run.get_output_data('inferences')
prediction_output.download(local_path='diabetes-results')


for root, dirs, files in os.walk('diabetes-results'):
    for file in files:
        if file.endswith('parallel_run_step.txt'):
            result_file = os.path.join(root,file)

# cleanup output format
df = pd.read_csv(result_file, delimiter=":", header=None)
df.columns = ["File", "Prediction"]

# Display the first 20 results
df.head(20)

Unnamed: 0,File,Prediction
0,1.csv,1
1,10.csv,0
2,100.csv,1
3,11.csv,1
4,12.csv,0
5,13.csv,1
6,14.csv,0
7,15.csv,0
8,16.csv,0
9,17.csv,0


## 4- Publish the Pipeline and use its REST Interface

In [11]:
published_pipeline = pipeline_run.publish_pipeline(
    name='Diabetes_Parallel_Batch_Pipeline_shoresh', description='Batch scoring of diabetes data', version='1.0')

published_pipeline

Name,Id,Status,Endpoint
Diabetes_Parallel_Batch_Pipeline_shoresh,ec46d97c-f16d-4a4f-841f-e7eeb2d2165d,Active,REST Endpoint


In [12]:
rest_endpoint = published_pipeline.endpoint
print(rest_endpoint)

https://northeurope.api.azureml.ms/pipelines/v1.0/subscriptions/46926bff-fe7d-4284-bc62-eafdda8d8f2c/resourceGroups/DataSienceSolutionAzure/providers/Microsoft.MachineLearningServices/workspaces/myaml/PipelineRuns/PipelineSubmit/ec46d97c-f16d-4a4f-841f-e7eeb2d2165d


## 5- Test Publish batch pipeline via rest

In [13]:
from azureml.core.authentication import InteractiveLoginAuthentication

interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()
print('Authentication header ready.')

Authentication header ready.


In [14]:

import requests

rest_endpoint = published_pipeline.endpoint
response = requests.post(rest_endpoint, 
                         headers=auth_header, 
                         json={"ExperimentName": "Batch_Pipeline_via_REST"})
run_id = response.json()["Id"]
run_id

'd1109a4e-2297-4af6-8f2b-d4a225b842b1'

In [17]:
from azureml.pipeline.core.run import PipelineRun
from azureml.widgets import RunDetails

published_pipeline_run = PipelineRun(ws.experiments["Batch_Pipeline_via_REST"], run_id)
RunDetails(published_pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [18]:
import pandas as pd
import shutil

shutil.rmtree("diabetes-results1", ignore_errors=True)

prediction_run = next(published_pipeline_run.get_children())
prediction_output = prediction_run.get_output_data("inferences")
prediction_output.download(local_path="diabetes-results1")


for root, dirs, files in os.walk("diabetes-results1"):
    for file in files:
        if file.endswith('parallel_run_step.txt'):
            result_file = os.path.join(root,file)

# cleanup output format
df = pd.read_csv(result_file, delimiter=":", header=None)
df.columns = ["File", "Prediction"]

# Display the first 20 results
df.head(20)

Unnamed: 0,File,Prediction
0,1.csv,1
1,10.csv,0
2,100.csv,1
3,11.csv,1
4,12.csv,0
5,13.csv,1
6,14.csv,0
7,15.csv,0
8,16.csv,0
9,17.csv,0
