In [1]:
import wandb
import pandas as pd

from pathlib import Path

In [2]:
# ! wandb login

In [3]:
# ! wandb status

In [4]:
# Path to working directory
working_dir = "D:/SLEAP/20250102_generalizability_experiment/primary/sorghum" # This should be the same as the previous notebook

In [5]:
# Set the working directory
cwd = Path(working_dir)
print(f"Current working directory: {cwd}")

Current working directory: D:\SLEAP\20250102_generalizability_experiment\primary\sorghum


In [6]:
# Constants for W&B initialization
ENTITY_NAME = "eberrigan-salk-institute-for-biological-studies"
PROJECT_NAME = "sleap-roots"
EXPERIMENT_NAME = "sorghum-primary-pilot03-2025-01-04"  # Unique name for the experiment
CSV_PATH = cwd / "train_test_splits.csv"  # Path to the CSV file with the train/test splits
REGISTRY = "model"

# Tags for the model artifact
# https://docs.wandb.ai/guides/registry/organize-with-tags/
MODEL_TAGS = ["sorghum", "primary", "5-12DAG", "pilot02", "2025-01-04"]

In [7]:
def load_training_data(csv_path):
    """Loads training data from a CSV file.

    Args:
        csv_path (Path): Path to the CSV file containing training data.

    Returns:
        pandas.DataFrame: DataFrame containing the training data.
    """
    return pd.read_csv(csv_path)

def get_training_groups(df):
    """Groups training data by version.

    Args:
        df (pandas.DataFrame): DataFrame containing the training data.

    Returns:
        pandas.core.groupby.DataFrameGroupBy: Grouped DataFrame.
    """
    return df.groupby("version")

def fetch_model_artifact_from_experiment(project_name, entity_name, artifact_name, wandb_version=None):
    """Fetches a specific version of a model artifact from a W&B experiment.

    Args:
        project_name (str): Name of the W&B project.
        entity_name (str): Name of the W&B entity.
        artifact_name (str): Name of the artifact to fetch.
        wandb_version (str, optional): Specific version from the training run names to fetch. Defaults to latest.

    Returns:
        wandb.Artifact: The fetched artifact.
    """
    run = wandb.init(project=project_name, entity=entity_name, job_type="fetch_artifact")
    artifact_version = f"{wandb_version}" if wandb_version else "latest"
    full_artifact_name = f"{artifact_name}:{artifact_version}"
    print(f"Fetching artifact '{full_artifact_name}' from project '{project_name}'.")
    artifact = run.use_artifact(f"{full_artifact_name}")
    print(f"Fetched artifact '{full_artifact_name}'.")
    artifact_dir = artifact.download()
    print(f"Fetched artifact '{artifact_name}:{artifact_version}' to directory '{artifact_dir}'.")
    run.finish()
    return artifact


def fetch_model_artifact_and_link_to_registry(project_name, entity_name, artifact_name, registry_name, collection_name, wandb_version=None):
    """Fetchs a specific version of a model artifact from a W&B experiment and links it to the registry.
    
    Args:
        project_name (str): Name of the W&B project.
        entity_name (str): Name of the W&B entity.
        artifact_name (str): Name of the artifact to fetch.
        registry_name (str): Name of the registry to link the artifact to.
        collection_name (str): Name of the collection to store the model artifact.
        wandb_version (str, optional): Specific version from the training run names to fetch. Defaults to latest.
    """
    run = wandb.init(project=project_name, entity=entity_name, job_type="fetch_artifact")
    artifact_version = f"{wandb_version}" if wandb_version else "latest"
    full_artifact_name = f"{artifact_name}:{artifact_version}"
    print(f"Fetching artifact '{full_artifact_name}' from project '{project_name}'.")
    artifact = run.use_artifact(f"{full_artifact_name}")
    print(f"Fetched artifact '{full_artifact_name}'.")

    # Link the artifact to the registry
    full_registry_name = f"{entity_name}-org/wandb-registry-{registry_name}/{collection_name}"
    print(f"Linking artifact '{full_artifact_name}' to registry '{full_registry_name}'.")
    run.link_artifact(artifact, full_registry_name)
    print(f"Linked artifact '{artifact_name}:{artifact_version}' to registry '{full_registry_name}'.")
    run.finish()


def promote_model_in_registry(project_name, registry_name, artifact_name, stage):
    """Promotes a specific artifact in the W&B model registry to a given stage.

    Args:
        project_name (str): Name of the W&B project.
        registry_name (str): Name of the model registry.
        artifact_name (str): Name of the artifact to promote.
        stage (str): Stage to promote the artifact to (e.g., 'production', 'staging').

    Returns:
        None
    """
    run = wandb.init(project=project_name, job_type="promote_registry_artifact")
    artifact = run.use_artifact(f"{registry_name}/{artifact_name}:latest")
    artifact.aliases.append(stage)
    artifact.save()
    print(f"Promoted artifact '{artifact_name}' in registry '{registry_name}' to stage '{stage}'.")
    run.finish()


In [8]:
def main(csv_path, wandb_version=None):
    """Main function to add model artifacts to the W&B registry.
    
    Args:
        csv_path (Path): Path to the CSV file containing train-test splits paths.
        wandb_version (str, optional): Specific version from the training run names to fetch. Defaults to None.
    """
    df = load_training_data(csv_path)
    grouped = get_training_groups(df)

    for version, group in grouped:
        print(f"Processing version {version}...")
        print(f"Group: {group}")
        
        # Build artifact name from version
        artifact_name = f"{EXPERIMENT_NAME}_v00{version}"
        collection_name = artifact_name
        # Fetch the model artifact from the experiment and link it to the registry
        fetch_model_artifact_and_link_to_registry(PROJECT_NAME, ENTITY_NAME, artifact_name, REGISTRY, collection_name, wandb_version)

In [9]:
main(CSV_PATH, wandb_version=None)

Processing version 0...
Group:                                                 path  version  labeled_frames  \
0  D:\SLEAP\20250102_generalizability_experiment\...        0             210   
1  D:\SLEAP\20250102_generalizability_experiment\...        0              45   
2  D:\SLEAP\20250102_generalizability_experiment\...        0              45   

  split_type  
0      train  
1        val  
2       test  


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33meberrigan[0m ([33meberrigan-salk-institute-for-biological-studies[0m). Use [1m`wandb login --relogin`[0m to force relogin


Fetching artifact 'sorghum-primary-pilot03-2025-01-04_v000:latest' from project 'sleap-roots'.
Fetched artifact 'sorghum-primary-pilot03-2025-01-04_v000:latest'.
Linking artifact 'sorghum-primary-pilot03-2025-01-04_v000:latest' to registry 'eberrigan-salk-institute-for-biological-studies-org/wandb-registry-model/sorghum-primary-pilot03-2025-01-04_v000'.
Linked artifact 'sorghum-primary-pilot03-2025-01-04_v000:latest' to registry 'eberrigan-salk-institute-for-biological-studies-org/wandb-registry-model/sorghum-primary-pilot03-2025-01-04_v000'.


Processing version 1...
Group:                                                 path  version  labeled_frames  \
3  D:\SLEAP\20250102_generalizability_experiment\...        1             210   
4  D:\SLEAP\20250102_generalizability_experiment\...        1              45   
5  D:\SLEAP\20250102_generalizability_experiment\...        1              45   

  split_type  
3      train  
4        val  
5       test  


Fetching artifact 'sorghum-primary-pilot03-2025-01-04_v001:latest' from project 'sleap-roots'.
Fetched artifact 'sorghum-primary-pilot03-2025-01-04_v001:latest'.
Linking artifact 'sorghum-primary-pilot03-2025-01-04_v001:latest' to registry 'eberrigan-salk-institute-for-biological-studies-org/wandb-registry-model/sorghum-primary-pilot03-2025-01-04_v001'.
Linked artifact 'sorghum-primary-pilot03-2025-01-04_v001:latest' to registry 'eberrigan-salk-institute-for-biological-studies-org/wandb-registry-model/sorghum-primary-pilot03-2025-01-04_v001'.


Processing version 2...
Group:                                                 path  version  labeled_frames  \
6  D:\SLEAP\20250102_generalizability_experiment\...        2             210   
7  D:\SLEAP\20250102_generalizability_experiment\...        2              45   
8  D:\SLEAP\20250102_generalizability_experiment\...        2              45   

  split_type  
6      train  
7        val  
8       test  


Fetching artifact 'sorghum-primary-pilot03-2025-01-04_v002:latest' from project 'sleap-roots'.
Fetched artifact 'sorghum-primary-pilot03-2025-01-04_v002:latest'.
Linking artifact 'sorghum-primary-pilot03-2025-01-04_v002:latest' to registry 'eberrigan-salk-institute-for-biological-studies-org/wandb-registry-model/sorghum-primary-pilot03-2025-01-04_v002'.
Linked artifact 'sorghum-primary-pilot03-2025-01-04_v002:latest' to registry 'eberrigan-salk-institute-for-biological-studies-org/wandb-registry-model/sorghum-primary-pilot03-2025-01-04_v002'.
