In [47]:
import os
import dotenv

from azure.ai.ml import MLClient
from azure.identity import AzureCliCredential

dotenv.load_dotenv()
subscription = os.getenv(f"subscription_id")
resource_group = os.getenv(f"resource_group")
workspace = os.getenv(f"workspace_name")

ml_client = MLClient(
    AzureCliCredential(), 
    subscription, 
    resource_group, 
    workspace,
)

In [49]:
from azure.ai.ml import command, Input, Output
from azure.ai.ml.constants import AssetTypes, InputOutputModes


# Azure Machine Learning workspace details:
subscription = os.getenv(f"subscription_id")
resource_group = os.getenv(f"resource_group")
workspace = os.getenv(f"workspace_name")
datastore_name = 'workspaceblobstore'
path_on_datastore = 'stroppel_videos/_test/'
output_path_on_datastore = 'stroppel_tracking_output/_test/'

# long-form Datastore uri format:
uri = f'azureml://subscriptions/{subscription}/resourcegroups/{resource_group}/workspaces/{workspace}/datastores/{datastore_name}/paths/'
uri_input = f'{uri}{path_on_datastore}'
uri_output = f'{uri}{output_path_on_datastore}'

inputs = {
    "job_data_path": Input(type="uri_folder",
              path=uri_input,
              mode=InputOutputModes.RO_MOUNT,
              )
}
outputs = {
    "job_output_path": Output(type="uri_folder",
                path=uri_output,
                mode=InputOutputModes.RW_MOUNT,
                # name="stroppel_tracking_output"
                )
}

arguments = [
    "--tracking_config", "kalman_tracking_settings.yaml",
    "--job_inputs_path", "${{inputs.job_data_path}}",
    "--job_output_path", "${{outputs.job_output_path}}",
    "--log_level", "INFO",
]

job = command(
    code=".",
    command="python kalman_tracking_azure.py " + " ".join(arguments),
    environment="fishy_environment@latest",
    compute="Standard-A1-v2",
    experiment_name="batch_tracking",
    inputs=inputs,
    outputs=outputs,
)

In [50]:
import shutil
# copy library files to job source directory temporarily
pth = './algorithm/'
shutil.copytree('../algorithm/', pth)

returned_job = ml_client.create_or_update(job)

shutil.rmtree(pth)

[32mUploading run_on_azure (4.66 MBs): 100%|##########| 4662837/4662837 [00:01<00:00, 3822332.10it/s]
[39m



# Large Scale Parallel Job

In [127]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient, Input, Output, load_component
from azure.ai.ml.dsl import pipeline
from azure.ai.ml.entities import Environment
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.ai.ml.parallel import parallel_run_function, RunFunction

In [128]:
# parallel task to process file data
file_batch_inference = parallel_run_function(
    name="batch_run_kalman_tracking",
    display_name="Batch Run Kalman Tracking",
    description="parallel component for batch tracking on videos",
    inputs=inputs,
    outputs=outputs,
    input_data="${{inputs.job_data_path}}",
    instance_count=2,
    max_concurrency_per_instance=1,
    mini_batch_size="1",
    mini_batch_error_threshold=1,
    retry_settings=dict(max_retries=2, timeout=60),
    logging_level="DEBUG",
    task=RunFunction(
        code=".",
        entry_script="kalman_tracking_azure.py",
        program_arguments=" ".join(arguments),
        environment="fishy_environment@latest",
    ),
)

In [135]:
@pipeline()
def parallel_in_pipeline(input_data_path: Input):

    batch_inference_with_file_data = file_batch_inference(job_data_path=input_data_path)

    return {
        "pipeline_job_out_file": batch_inference_with_file_data.outputs.job_output_path,
    }

# create a pipeline
pipeline_job = parallel_in_pipeline(
    input_data_path=inputs['job_data_path'],
)
pipeline_job.outputs.pipeline_job_out_file.type = AssetTypes.URI_FILE

# set pipeline level compute
pipeline_job.settings.default_compute = "Standard-A1-v2"

In [136]:
import shutil

pth = './algorithm/'
shutil.copytree('../algorithm/', pth)

pipeline_job = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="parallel_pipeline_kalman_tracking"
)

shutil.rmtree(pth)

[32mUploading run_on_azure (3.49 MBs): 100%|##########| 3486555/3486555 [00:01<00:00, 2032259.73it/s]
[39m



# SDK V1

In [110]:
from azureml.core.workspace import Workspace, Dataset

ws = Workspace(
    subscription_id=subscription,
    resource_group=resource_group,
    workspace_name=workspace,
)

In [119]:
from azureml.pipeline.steps import ParallelRunConfig
from azureml.core import Environment, ComputeTarget, Experiment


parallel_run_config = ParallelRunConfig(
    source_directory='.',
    entry_script='kalman_tracking_azure.py',
    mini_batch_size='1',
    error_threshold=10,
    output_action='summary_only',
    environment=Environment.get(workspace=ws, name='fishy_environment', label='latest'),
    compute_target=ComputeTarget(workspace=ws, name='Standard-A1-v2'),
    node_count=2,
)

In [120]:
from azureml.pipeline.steps import ParallelRunStep
from azureml.pipeline.core import PipelineData
from azureml.data import OutputFileDatasetConfig

input_data = Dataset.get_by_name(ws, name='test_videos', version=1)
output_data = OutputFileDatasetConfig('output_data').register_on_complete('azure blob storage output directory')

parallel_run_step = ParallelRunStep(
    name='parallel_pipeline_kalman_tracking',
    parallel_run_config=parallel_run_config,
    inputs=[input_data.as_named_input('input_data')],
    output=output_data,
    arguments=arguments,
    allow_reuse=False,
)

exp = Experiment(workspace=ws, name="test_parallel_pipeline")

In [121]:
from azureml.pipeline.core import Pipeline
import shutil

pth = './algorithm/'
shutil.copytree('../algorithm/', pth)

pipeline = Pipeline(workspace=ws, steps=[parallel_run_step])
pipeline_run = exp.submit(pipeline)

shutil.rmtree(pth)

Created step parallel_pipeline_kalman_tracking [9d55c335][ba9a33c9-a5fd-4356-8eaf-b0f449d4c736], (This step will run and generate new outputs)
Submitted PipelineRun ec069bf1-8eb6-4268-9421-0b79cec8bb5f
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/ec069bf1-8eb6-4268-9421-0b79cec8bb5f?wsid=/subscriptions/e9eedc4b-b891-4bf4-a924-06b9c6e0f02f/resourcegroups/axsa-lab-appl-fishsonar-rg/workspaces/axsa-lab-appl-fishsonar-ml&tid=8619c67c-945a-48ae-8e77-35b1b71c9b98


In [None]:
import cv2

def save_first_frame(video_path, output_path):
    vidcap = cv2.VideoCapture(video_path)
    success, image = vidcap.read()
    if success:
        cv2.imwrite(output_path, image)  # save frame as PNG file
    else:
        print(f"Failed to open video file at {video_path}")
        
save_first_frame("../data/raw/videos/Passe3_Mar11_12-37-58.mp4", "./masks/lavey_flow_area_mask.png")