In [1]:
import os
import dotenv

from azure.ai.ml import MLClient, command, Input, Output
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.identity import AzureCliCredential

dotenv.load_dotenv()
subscription = os.getenv(f"subscription_id")
resource_group = os.getenv(f"resource_group")
workspace = os.getenv(f"workspace_name")

ml_client = MLClient(
    AzureCliCredential(), 
    subscription, 
    resource_group, 
    workspace,
)

In [2]:
plant = 'lavey'
sonar_location = 'tourelle'
datastore_name = 'workspaceblobstore'
path_on_datastore = f'{plant}_videos/{sonar_location}/2024/04/02/'
intermediate_path_on_datastore = f'{plant}_tracking_intermediate_data/{sonar_location}/2024/04/02/'
output_path_on_datastore = f'{plant}_tracking_output/{sonar_location}/2024/04/02/'
classification_settings_file = 'classification_settings_lavey_tourelle.yaml'

# long-form Datastore uri format:
uri = f'azureml://subscriptions/{subscription}/resourcegroups/{resource_group}/workspaces/{workspace}/datastores/{datastore_name}/paths/'
uri_input = f'{uri}{path_on_datastore}'
uri_intermediate_data = f'{uri}{intermediate_path_on_datastore}'
uri_output = f'{uri}{output_path_on_datastore}'
uri_train_val_gt_data = f'{uri}{plant}_classification/train_data/{sonar_location}/'


# Run all steps in pipeline

In [5]:
from azure.ai.ml import load_component
from azure.ai.ml.dsl import pipeline

run_tracking = load_component(source="./components/kalman_tracking/tracking.yml")
run_classification = load_component(source="./components/classification/classification.yml")

@pipeline(
    compute="Standard-D1-v2",
)
def tracking_all_steps(
    input_videos_dir: Input(type=AssetTypes.URI_FOLDER), 
    train_val_gt_data_dir: Input(type=AssetTypes.URI_FOLDER),
    classification_settings_file: str,
    output_data_uri: str = None,
    intermediate_data_uri: str = None,
    log_level: str = "INFO",
):
    
    tracking_results = run_tracking(
        data=input_videos_dir,
        tracking_config="kalman_tracking_settings.yaml",
    )
    tracking_results.outputs.detections = Output(type="uri_folder", path=intermediate_data_uri, mode=InputOutputModes.RW_MOUNT)
    
    classification_run_results = run_classification(
        classification_settings_file=classification_settings_file,
        train_val_gt_data_dir=train_val_gt_data_dir,
        files_to_classify_dir=tracking_results.outputs.detections,
        log_level=log_level,
    )
    classification_run_results.outputs.classified_detections_dir = Output(
        type="uri_folder",
        path=output_data_uri,
        mode=InputOutputModes.RW_MOUNT,
    )
    classification_run_results.compute = "Standard-A2m-v2"
    
    labeling_results = run_tracking(
        data=input_videos_dir,
        labels_dir=classification_run_results.outputs.classified_detections_dir,
        tracking_config="annotate_video_settings.yaml",
    )

    return {"detections": labeling_results.outputs.detections}
    

@pipeline(
    compute="Standard-D1-v2",
)
def tracking_base_steps(
    input_data: Input(type=AssetTypes.URI_FOLDER), 
    train_val_data: Input(type=AssetTypes.URI_FOLDER), 
    train_val_gt_data: Input(type=AssetTypes.URI_FOLDER),
    classification_settings_file: str,
    indermediate_data_uri: str = None,
    log_level: str = "INFO",
):
    
    tracking_results = run_tracking(
        data=input_data,
        tracking_config="kalman_tracking_settings.yaml",
    )
    tracking_results.outputs.detections = Output(type="uri_folder", path=indermediate_data_uri, mode=InputOutputModes.RW_MOUNT)
    
    classification_run_results = run_classification(
        train_val_data=train_val_data,
        train_val_gt_data=train_val_gt_data,
        files_to_classify=tracking_results.outputs.detections,
        log_level=log_level,
    )
    classification_run_results.compute = "Standard-A2m-v2"
    
    return {"detections": classification_run_results.outputs.classified_detections_dir}

# TODO: adapt the other two pipelines
@pipeline(
    compute="Standard-D1-v2",
)
def classification_and_labeling_videos(
    input_data: Input(type=AssetTypes.URI_FOLDER),
    train_val_data: Input(type=AssetTypes.URI_FOLDER), 
    train_val_gt_data: Input(type=AssetTypes.URI_FOLDER),
    intermediate_data: Input(type=AssetTypes.URI_FOLDER),
    output_data_uri: str = None,
    log_level: str = "INFO",
):
    
    classification_run_results = run_classification(
        train_val_data=train_val_data,
        train_val_gt_data=train_val_gt_data,
        files_to_classify=intermediate_data,
        log_level=log_level,
    )
    classification_run_results.outputs.classified_detections_dir = Output(
        type="uri_folder",
        path=output_data_uri,
        mode=InputOutputModes.RW_MOUNT,
    )
    classification_run_results.compute = "Standard-A2m-v2"
    
    labeling_results = run_tracking(
        data=input_data,
        labels_dir=classification_run_results.outputs.classified_detections_dir,
        tracking_config="annotate_video_settings.yaml",
    )

    return {"detections": labeling_results.outputs.detections}

@pipeline(
    compute="Standard-D1-v2",
)
def labeling_videos(
    input_data: Input(type=AssetTypes.URI_FOLDER),
    labels_dir: Input(type=AssetTypes.URI_FOLDER),
):
    labeling_results = run_tracking(
        data=input_data,
        labels_dir=labels_dir,
        tracking_config="annotate_video_settings.yaml",
    )

    return {"detections": labeling_results.outputs.detections}

In [6]:
import shutil
from azure.ai.ml import load_component

generate_videos_with_detections = True


if generate_videos_with_detections:
    pipeline_job = tracking_all_steps(
        classification_settings_file=classification_settings_file,
        input_videos_dir=Input(type=AssetTypes.URI_FOLDER, path=uri_input, mode=InputOutputModes.RO_MOUNT),
        train_val_gt_data_dir=Input(type=AssetTypes.URI_FOLDER, path=uri_train_val_gt_data, mode=InputOutputModes.RO_MOUNT),
        intermediate_data_uri=uri_intermediate_data,
        output_data_uri=uri_output,
    )
    pipeline_job.outputs.detections = Output(type="uri_folder", path=uri_output, mode=InputOutputModes.RW_MOUNT)
    pipeline_job.tags = {"date": "2024-04-02"}
else:
    pipeline_job = tracking_base_steps(
        input_videos_dir=Input(type=AssetTypes.URI_FOLDER, path=uri_input, mode=InputOutputModes.DOWNLOAD),
        train_val_gt_data_dir=Input(type=AssetTypes.URI_FOLDER, path=uri_train_val_gt_data, mode=InputOutputModes.DOWNLOAD),
        indermediate_data_uri=uri_intermediate_data,
    )
    pipeline_job.outputs.classified_detection_videos_dir = Output(type="uri_folder", path=uri_output, mode=InputOutputModes.RW_MOUNT)
    pipeline_job.tags = {"date": "2023-03-28"}

# copy library files to job source directory temporarily
pth_cls = './components/classification/src/analysis/classification_utils/'
pth_masks = './components/classification/src/analysis/demo/'
pth = './components/kalman_tracking/src/algorithm/'
shutil.copytree('../analysis/classification_utils/', pth_cls, dirs_exist_ok=True)
shutil.copytree('../analysis/demo/', pth_masks, dirs_exist_ok=True)
shutil.copytree('../algorithm/', pth, dirs_exist_ok=True)

pipeline_job_run = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name=f"track-and-classify-{plant}-{sonar_location}"
)

shutil.rmtree(pth)
shutil.rmtree(pth_masks)
shutil.rmtree(pth_cls)
pipeline_job_run

[32mUploading src (4.5 MBs): 100%|██████████| 4502851/4502851 [00:00<00:00, 11922590.96it/s]
[39m



Experiment,Name,Type,Status,Details Page
track-and-classify-lavey-tourelle,musing_animal_0s799zxh4y,pipeline,NotStarted,Link to Azure Machine Learning studio


# Run Jobs for every day of the year

In [167]:
from typing import Optional
import pandas as pd
import shutil


def generate_paths(
    datastore_uri: str,
    day_list: list,
    base_path_on_datastore: str = 'stroppel_videos/',
    base_intermediate_path_on_datastore: str = 'stroppel_tracking_intermediate_data_short_burn_in/',
    base_output_path_on_datastore: str = 'stroppel_tracking_output_short_burn_in/',
) -> tuple[str, str, str, str]:
    for date_str in day_list:
        # Create the paths for the current date
        path_on_datastore = f'{datastore_uri}{base_path_on_datastore}{date_str}/'
        intermediate_path_on_datastore = f'{datastore_uri}{base_intermediate_path_on_datastore}{date_str}/'
        output_path_on_datastore = f'{datastore_uri}{base_output_path_on_datastore}{date_str}/'

        yield path_on_datastore, intermediate_path_on_datastore, output_path_on_datastore, date_str


def generate_paths_for_range(
        datastore_uri: str,
        start_date: Optional[str] = None,
        end_date: Optional[str] = None,
        base_path_on_datastore: str = 'stroppel_videos/',
        base_intermediate_path_on_datastore: str = 'stroppel_tracking_intermediate_data_short_burn_in/',
        base_output_path_on_datastore: str = 'stroppel_tracking_output_short_burn_in/',
    ) -> tuple[str, str, str, str]:
    if start_date:
        dates = pd.date_range(start=start_date, end=end_date)
    else:
        dates = ['_test']
        
    dates = [date.strftime('%Y-%m-%d') for date in dates]
    return generate_paths(datastore_uri, dates, base_path_on_datastore, base_intermediate_path_on_datastore, base_output_path_on_datastore)

In [None]:
# # for a range of dates
# start_date = '2023-06-19'
# end_date = '2023-06-19'
# date_generator = generate_paths_for_range(uri, start_date, end_date)
# for a list of dates
dates = ['2023-03-11', '2023-03-20', '2023-03-26', '2024-04-02']
date_generator = generate_paths(uri, dates)

In [172]:
# copy library files to job source directory temporarily
pth_cls = './components/classification/src/analysis/classification_utils/'
shutil.copytree('../analysis/classification_utils/', pth_cls, dirs_exist_ok=True)
pth = './components/kalman_tracking/src/algorithm/'
shutil.copytree('../algorithm/', pth, dirs_exist_ok=True)

for raw_videos_dir_path, intermediate_path_on_datastore, output_path_on_datastore, date_str in date_generator:
    pipeline_job = tracking_all_steps(
        input_data=Input(type=AssetTypes.URI_FOLDER, path=raw_videos_dir_path, mode=InputOutputModes.RO_MOUNT),
        train_val_data=Input(type=AssetTypes.URI_FOLDER, path=uri_train_val_data, mode=InputOutputModes.DOWNLOAD),
        train_val_gt_data=Input(type=AssetTypes.URI_FOLDER, path=uri_train_val_gt_data, mode=InputOutputModes.DOWNLOAD),
        indermediate_data_uri=intermediate_path_on_datastore,
        output_data_uri=output_path_on_datastore,
    )
    pipeline_job.outputs.detections = Output(type="uri_folder", path=output_path_on_datastore, mode=InputOutputModes.RW_MOUNT)
    pipeline_job.tags = {"date": date_str, "location": "{plant}-{sonar}"}
    pipeline_job.display_name = f"{plant}-{sonar_location}-{date_str}"
    
    pipeline_job_run = ml_client.jobs.create_or_update(
        pipeline_job, 
        experiment_name=f"{plant}-{sonar_location}-short-burn-in",
    )
    print(f'submitted job with tags: {pipeline_job_run.tags}')
    
shutil.rmtree(pth)
shutil.rmtree(pth_cls)
    
shutil.rmtree(pth)
shutil.rmtree(pth_cls)

submitted job with tags: {'date': '2023-03-29'}
submitted job with tags: {'date': '2023-03-30'}
submitted job with tags: {'date': '2023-03-31'}
submitted job with tags: {'date': '2023-04-01'}
submitted job with tags: {'date': '2023-04-02'}
submitted job with tags: {'date': '2023-04-03'}
submitted job with tags: {'date': '2023-04-04'}
submitted job with tags: {'date': '2023-04-05'}
submitted job with tags: {'date': '2023-04-06'}
submitted job with tags: {'date': '2023-04-07'}
submitted job with tags: {'date': '2023-04-08'}
submitted job with tags: {'date': '2023-04-09'}
submitted job with tags: {'date': '2023-04-10'}
submitted job with tags: {'date': '2023-04-11'}
submitted job with tags: {'date': '2023-04-12'}
submitted job with tags: {'date': '2023-04-13'}
submitted job with tags: {'date': '2023-04-14'}
submitted job with tags: {'date': '2023-04-15'}
submitted job with tags: {'date': '2023-04-16'}
submitted job with tags: {'date': '2023-04-17'}
submitted job with tags: {'date': '2023-