In [1]:
import os
import dotenv

from azure.ai.ml import MLClient
from azure.identity import AzureCliCredential

dotenv.load_dotenv()
subscription = os.getenv(f"subscription_id")
resource_group = os.getenv(f"resource_group")
workspace = os.getenv(f"workspace_name")

ml_client = MLClient(
    AzureCliCredential(), 
    subscription, 
    resource_group, 
    workspace,
)

In [4]:
from azure.ai.ml import command, Input, Output
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.ai.ml.dsl import pipeline


datastore_name = 'workspaceblobstore'
path_on_datastore = 'stroppel_videos/_test/'
output_path_on_datastore = 'stroppel_tracking_output/_test_with_videos_2/'

# long-form Datastore uri format:
uri = f'azureml://subscriptions/{subscription}/resourcegroups/{resource_group}/workspaces/{workspace}/datastores/{datastore_name}/paths/'
uri_input = f'{uri}{path_on_datastore}'
uri_output = f'{uri}{output_path_on_datastore}'

inputs = {
    "job_data_path": Input(type="uri_file",
              path=uri_input + 'start_2023-03-28T00-25-10.036+00-00.mp4',
              mode=InputOutputModes.DOWNLOAD,
              )
}
outputs = {
    "job_output_path": Output(type="uri_folder",
                path=uri_output,
                mode=InputOutputModes.RW_MOUNT,
                )
}

In [3]:
arguments = [
    "--tracking_config", "kalman_tracking_settings.yaml",
    "--job_inputs_path", "${{inputs.job_data_path}}",
    "--job_output_path", "${{outputs.job_output_path}}",
    "--log_level", "INFO",
]

job = command(
    code="./components/kalman_tracking/src/",
    command="python kalman_tracking_azure.py " + " ".join(arguments),
    environment="fishy_environment:6",
    compute="Standard-A1-v2",
    experiment_name="batch_tracking",
    inputs=inputs,
    outputs=outputs,
)

In [4]:
import shutil
# copy library files to job source directory temporarily
pth = './components/kalman_tracking/src/algorithm/'
shutil.copytree('../algorithm/', pth)

returned_job = ml_client.create_or_update(job)

shutil.rmtree(pth)

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


# As normal pipeline

In [3]:
from azure.ai.ml import load_component

run_tracking = load_component(source="./components/kalman_tracking/tracking.yml")

In [58]:
from azure.ai.ml.dsl import pipeline


@pipeline(
    compute="Standard-A1-v2",
)
def kalman_tracking_pipeline(
    input_data: Input(type=AssetTypes.URI_FOLDER), 
    output_data: Output(type=AssetTypes.URI_FOLDER) = None,
):
    tracking_results = run_tracking(
        data=input_data,
    )
    
    # tracking_results.outputs.detections = Output(
    #     type="uri_folder",
    #     path=output_uri,
    #     mode=InputOutputModes.RW_MOUNT,
    # )

    return {"detections": tracking_results.outputs.detections}

In [10]:
pipeline_input = Input(type="uri_folder",
    path=uri_input,
    mode=InputOutputModes.DOWNLOAD,
)

In [None]:
pipeline_job = kalman_tracking_pipeline(
    input_data=pipeline_input,
)

import shutil
# copy library files to job source directory temporarily
pth = './components/kalman_tracking/src/algorithm/'
shutil.copytree('../algorithm/', pth)

pipeline_job_run = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="pipeline-kalman-tracking"
)
shutil.rmtree(pth)
pipeline_job_run

# Classify the Trajectories

In [4]:
from azure.ai.ml import load_component

run_classification = load_component(source="./components/classification/classification.yml")

In [5]:
from azure.ai.ml.dsl import pipeline
from azure.ai.ml import command, Input, Output
from azure.ai.ml.constants import AssetTypes, InputOutputModes


@pipeline(
    compute="Standard-A1-v2",
)
def track_classification_pipeline(
    train_val_data: Input(type=AssetTypes.URI_FOLDER), 
    train_val_gt_data: Input(type=AssetTypes.URI_FOLDER),
    files_to_classify: Input(type=AssetTypes.URI_FOLDER),
    classified_detections_dir: Output(type=AssetTypes.URI_FOLDER) = None,
    log_level: str = "INFO",
):
    
    classification_run_results = run_classification(
        train_val_data=train_val_data,
        train_val_gt_data=train_val_gt_data,
        files_to_classify=files_to_classify,
        log_level=log_level
    )
    


    return {"classified_detections_dir": classification_run_results.outputs.classified_detections_dir}

In [71]:
uri = f'azureml://subscriptions/{subscription}/resourcegroups/{resource_group}/workspaces/{workspace}/datastores/{datastore_name}/paths/'
uri_train_val_data = f'{uri}stroppel_classification/train_data/cached_features/'
uri_train_val_gt_data = f'{uri}stroppel_classification/train_data/ground_truth_tracks/'

uri_files_to_classify = f'{uri}stroppel_tracking_output/_test_with_videos/'

In [72]:
pipeline_job = track_classification_pipeline(
    train_val_data=Input(type=AssetTypes.URI_FOLDER, path=uri_train_val_data, mode=InputOutputModes.DOWNLOAD),
    train_val_gt_data=Input(type=AssetTypes.URI_FOLDER, path=uri_train_val_gt_data, mode=InputOutputModes.DOWNLOAD),
    files_to_classify=Input(type=AssetTypes.URI_FOLDER, path=uri_files_to_classify, mode=InputOutputModes.DOWNLOAD),
)

import shutil
# copy library files to job source directory temporarily
pth = './components/classification/src/analysis/classification_utils/'
shutil.copytree('../analysis/classification_utils/', pth)

pipeline_job_run = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="pipeline-track-classification"
)
shutil.rmtree(pth)
pipeline_job_run

Uploading src (4.49 MBs): 100%|##########| 4488338/4488338 [00:00<00:00, 4972237.76it/s]




Experiment,Name,Type,Status,Details Page
pipeline-track-classification,jolly_yuca_kd7ftj93hl,pipeline,NotStarted,Link to Azure Machine Learning studio


# Generate Videos with Classifications

In [61]:
from azure.ai.ml import load_component

run_tracking = load_component(source="./components/kalman_tracking/tracking.yml")

In [34]:
label_files_path_on_datastore = 'azureml/578c4768-4789-4046-98a5-057188bc2662/classified_detections_dir/'
uri_label_files = f'{uri}{label_files_path_on_datastore}'

labels_dir_input = Input(
    type=AssetTypes.URI_FOLDER,
    path=uri_label_files,
    mode=InputOutputModes.RO_MOUNT,
)

In [35]:
from azure.ai.ml.dsl import pipeline


@pipeline(
    compute="Standard-D1-v2",
)
def track_labeling_pipeline(
    input_data: Input(type=AssetTypes.URI_FOLDER), 
    labels_dir: Input(type=AssetTypes.URI_FOLDER),
    output_data: Output(type=AssetTypes.URI_FOLDER) = None,
    save_output_video: bool = True,
):
    tracking_results = run_tracking(
        data=input_data,
        labels_dir=labels_dir,
        save_output_video=save_output_video,
    )

    return {"detections": tracking_results.outputs.detections}

In [36]:
pipeline_job = track_labeling_pipeline(
    input_data=pipeline_input,
    labels_dir=labels_dir_input,
)

import shutil
# copy library files to job source directory temporarily
pth = './components/kalman_tracking/src/algorithm/'
shutil.copytree('../algorithm/', pth)

pipeline_job_run = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="pipeline-kalman-labeling"
)
shutil.rmtree(pth)
pipeline_job_run

[32mUploading src (4.56 MBs): 100%|##########| 4560902/4560902 [00:01<00:00, 2743127.63it/s]
[39m



Experiment,Name,Type,Status,Details Page
pipeline-kalman-labeling,sweet_tail_g4ydknpfcg,pipeline,NotStarted,Link to Azure Machine Learning studio


# Run all steps in pipeline

In [6]:
from azure.ai.ml import load_component

run_tracking = load_component(source="./components/kalman_tracking/tracking.yml")
run_classification = load_component(source="./components/classification/classification.yml")

@pipeline(
    compute="Standard-D1-v2",
)
def tracking_all_steps(
    input_data: Input(type=AssetTypes.URI_FOLDER), 
    train_val_data: Input(type=AssetTypes.URI_FOLDER), 
    train_val_gt_data: Input(type=AssetTypes.URI_FOLDER),
    output_data_uri: str = None,
    indermediate_data_uri: str = None,
    log_level: str = "INFO",
):
    
    tracking_results = run_tracking(
        data=input_data,
        save_output_video=False,
    )
    tracking_results.outputs.detections = Output(type="uri_folder", path=indermediate_data_uri, mode=InputOutputModes.RW_MOUNT)
    
    classification_run_results = run_classification(
        train_val_data=train_val_data,
        train_val_gt_data=train_val_gt_data,
        files_to_classify=tracking_results.outputs.detections,
        log_level=log_level,
    )
    classification_run_results.outputs.classified_detections_dir = Output(
        type="uri_folder",
        path=output_data_uri,
        mode=InputOutputModes.RW_MOUNT,
    )
    classification_run_results.compute = "Standard-A2m-v2"
    
    labeling_results = run_tracking(
        data=input_data,
        labels_dir=classification_run_results.outputs.classified_detections_dir,
        save_output_video=True,
    )

    return {"detections": labeling_results.outputs.detections}
    

@pipeline(
    compute="Standard-D1-v2",
)
def tracking_base_steps(
    input_data: Input(type=AssetTypes.URI_FOLDER), 
    train_val_data: Input(type=AssetTypes.URI_FOLDER), 
    train_val_gt_data: Input(type=AssetTypes.URI_FOLDER),
    indermediate_data_uri: str = None,
    log_level: str = "INFO",
):
    
    tracking_results = run_tracking(
        data=input_data,
        save_output_video=False,
    )
    tracking_results.outputs.detections = Output(type="uri_folder", path=indermediate_data_uri, mode=InputOutputModes.RW_MOUNT)
    
    classification_run_results = run_classification(
        train_val_data=train_val_data,
        train_val_gt_data=train_val_gt_data,
        files_to_classify=tracking_results.outputs.detections,
        log_level=log_level,
    )
    classification_run_results.compute = "Standard-A2m-v2"
    
    return {"classified_detection_videos_dir": classification_run_results.outputs.classified_detections_dir}

In [8]:
import shutil
from azure.ai.ml import load_component

path_on_datastore = 'stroppel_videos/_sanity_check_one_file/'
intermediate_path_on_datastore = 'stroppel_tracking_intermediate_data/_sanity_check_one_file/'
output_path_on_datastore = 'stroppel_tracking_output/_sanity_check_one_file/'

# path_on_datastore = 'stroppel_videos/_sanity_check_one_file/'
# intermediate_path_on_datastore = 'stroppel_tracking_intermediate_data/_sanity_check_one_file/'
# output_path_on_datastore = 'stroppel_tracking_output/_sanity_check_second_file/'

# long-form Datastore uri format:
uri_input = f'{uri}{path_on_datastore}'
uri_intermediate_data = f'{uri}{intermediate_path_on_datastore}'
uri_output = f'{uri}{output_path_on_datastore}'
uri_train_val_data = f'{uri}stroppel_classification/train_data/cached_features/'
uri_train_val_gt_data = f'{uri}stroppel_classification/train_data/ground_truth_tracks/'

generate_videos_with_detections = True


if generate_videos_with_detections:
    pipeline_job = tracking_all_steps(
        input_data=Input(type=AssetTypes.URI_FOLDER, path=uri_input, mode=InputOutputModes.DOWNLOAD),
        train_val_data=Input(type=AssetTypes.URI_FOLDER, path=uri_train_val_data, mode=InputOutputModes.DOWNLOAD),
        train_val_gt_data=Input(type=AssetTypes.URI_FOLDER, path=uri_train_val_gt_data, mode=InputOutputModes.DOWNLOAD),
        output_data_uri=uri_output,
        indermediate_data_uri=uri_intermediate_data,
    )
    pipeline_job.outputs.detections = Output(type="uri_folder", path=uri_output, mode=InputOutputModes.RW_MOUNT)
    pipeline_job.tags = {"date": "2023-03-28"}
else:
    pipeline_job = tracking_base_steps(
        input_data=Input(type=AssetTypes.URI_FOLDER, path=uri_input, mode=InputOutputModes.DOWNLOAD),
        train_val_data=Input(type=AssetTypes.URI_FOLDER, path=uri_train_val_data, mode=InputOutputModes.DOWNLOAD),
        train_val_gt_data=Input(type=AssetTypes.URI_FOLDER, path=uri_train_val_gt_data, mode=InputOutputModes.DOWNLOAD),
        indermediate_data_uri=uri_intermediate_data,
    )
    pipeline_job.outputs.classified_detection_videos_dir = Output(type="uri_folder", path=uri_output, mode=InputOutputModes.RW_MOUNT)
    pipeline_job.tags = {"date": "2023-03-28"}

# copy library files to job source directory temporarily
pth_cls = './components/classification/src/analysis/classification_utils/'
shutil.copytree('../analysis/classification_utils/', pth_cls)
pth = './components/kalman_tracking/src/algorithm/'
shutil.copytree('../algorithm/', pth)

pipeline_job_run = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="pipeline-track-all-steps"
)

shutil.rmtree(pth)
shutil.rmtree(pth_cls)
pipeline_job_run

Experiment,Name,Type,Status,Details Page
pipeline-track-all-steps,honest_sun_4rgvjnvnxz,pipeline,NotStarted,Link to Azure Machine Learning studio


# Run Jobs for every day of the year

In [167]:
from typing import Optional
import pandas as pd


def generate_paths(
        datastore_uri: str,
        start_date: Optional[str] = None,
        end_date: Optional[str] = None,
        base_path_on_datastore: str = 'stroppel_videos/',
        base_intermediate_path_on_datastore: str = 'stroppel_tracking_intermediate_data/',
        base_output_path_on_datastore: str = 'stroppel_tracking_output/',
    ):
    if start_date:
        dates = pd.date_range(start=start_date, end=end_date)
    else:
        dates = ['_test']
    for date in dates:
        # Format the date as a string
        date_str = date.strftime('%Y-%m-%d') if start_date else date

        # Create the paths for the current date
        path_on_datastore = f'{datastore_uri}{base_path_on_datastore}{date_str}/'
        intermediate_path_on_datastore = f'{datastore_uri}{base_intermediate_path_on_datastore}{date_str}/'
        output_path_on_datastore = f'{datastore_uri}{base_output_path_on_datastore}{date_str}/'

        yield path_on_datastore, intermediate_path_on_datastore, output_path_on_datastore, date_str

In [172]:
start_date = '2023-03-29'
end_date = '2023-04-30'

# copy library files to job source directory temporarily
pth_cls = './components/classification/src/analysis/classification_utils/'
shutil.copytree('../analysis/classification_utils/', pth_cls)
pth = './components/kalman_tracking/src/algorithm/'
shutil.copytree('../algorithm/', pth)

for raw_videos_dir_path, intermediate_path_on_datastore, output_path_on_datastore, date_str in generate_paths(uri, start_date, end_date):
    pipeline_job = tracking_base_steps(
        input_data=Input(type=AssetTypes.URI_FOLDER, path=raw_videos_dir_path, mode=InputOutputModes.DOWNLOAD),
        train_val_data=Input(type=AssetTypes.URI_FOLDER, path=uri_train_val_data, mode=InputOutputModes.DOWNLOAD),
        train_val_gt_data=Input(type=AssetTypes.URI_FOLDER, path=uri_train_val_gt_data, mode=InputOutputModes.DOWNLOAD),
        indermediate_data_uri=intermediate_path_on_datastore,
    )
    pipeline_job.outputs.classified_detection_videos_dir = Output(type="uri_folder", path=output_path_on_datastore, mode=InputOutputModes.RW_MOUNT)
    pipeline_job.tags = {"date": date_str}
    
    pipeline_job_run = ml_client.jobs.create_or_update(
        pipeline_job, 
        experiment_name="pipeline-track-all-steps",
    )
    print(f'submitted job with tags: {pipeline_job_run.tags}')
    
shutil.rmtree(pth)
shutil.rmtree(pth_cls)

submitted job with tags: {'date': '2023-03-29'}
submitted job with tags: {'date': '2023-03-30'}
submitted job with tags: {'date': '2023-03-31'}
submitted job with tags: {'date': '2023-04-01'}
submitted job with tags: {'date': '2023-04-02'}
submitted job with tags: {'date': '2023-04-03'}
submitted job with tags: {'date': '2023-04-04'}
submitted job with tags: {'date': '2023-04-05'}
submitted job with tags: {'date': '2023-04-06'}
submitted job with tags: {'date': '2023-04-07'}
submitted job with tags: {'date': '2023-04-08'}
submitted job with tags: {'date': '2023-04-09'}
submitted job with tags: {'date': '2023-04-10'}
submitted job with tags: {'date': '2023-04-11'}
submitted job with tags: {'date': '2023-04-12'}
submitted job with tags: {'date': '2023-04-13'}
submitted job with tags: {'date': '2023-04-14'}
submitted job with tags: {'date': '2023-04-15'}
submitted job with tags: {'date': '2023-04-16'}
submitted job with tags: {'date': '2023-04-17'}
submitted job with tags: {'date': '2023-