In [71]:
!az ml component create -f components/prompt_extractor.yml

{
  "$schema": "https://azuremlschemas.azureedge.net/latest/commandJob.schema.json",
  "code": "azureml:/subscriptions/781b03e7-6eb7-4506-bab8-cf3a0d89b1d4/resourceGroups/antonslutsky-rg/providers/Microsoft.MachineLearningServices/workspaces/gpu-workspace/codes/cc8ac56c-780b-404a-b752-bdab79a580fc/versions/1",
  "command": "python prompt_extractor.py  --prompts ${{inputs.prompts}} --output ${{outputs.output}} ",
  "creation_context": {
    "created_at": "2023-11-23T14:29:10.931785+00:00",
    "created_by": "Anton Slutsky",
    "created_by_type": "User",
    "last_modified_at": "2023-11-23T14:29:10.998522+00:00",
    "last_modified_by": "Anton Slutsky",
    "last_modified_by_type": "User"
  },
  "description": "Extract prompts from the input data",
  "display_name": "prompt_extractor",
  "environment": "azureml:/subscriptions/781b03e7-6eb7-4506-bab8-cf3a0d89b1d4/resourceGroups/antonslutsky-rg/providers/Microsoft.MachineLearningServices/workspaces/gpu-workspace/environments/text-to-image

In [175]:
%%writefile components/conda.yml
name: text-to-image-pipeline-env
channels:
  - conda-forge
dependencies:
  - python=3.10
  - pip
  - pip:
    - azureml-evaluate-mlflow==0.0.35
    - inference-schema[numpy-support]==1.3.0
    - pyarrow
    - torch


Overwriting components/conda.yml


In [1]:
%%writefile components/text-to-image-pipeline-env.yml
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
name: text-to-image-pipeline-cuda-env
image: mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04
conda_file: conda.yml
description: Environment created from a Docker image plus Conda environment.

Overwriting components/text-to-image-pipeline-env.yml


In [2]:
!az ml environment create --file components/text-to-image-pipeline-env.yml

{
  "conda_file": {
    "channels": [
      "conda-forge"
    ],
    "dependencies": [
      "python=3.10",
      "pip",
      {
        "pip": [
          "azureml-evaluate-mlflow==0.0.35",
          "inference-schema[numpy-support]==1.3.0",
          "pyarrow",
          "torch"
        ]
      }
    ],
    "name": "text-to-image-pipeline-env"
  },
  "creation_context": {
    "created_at": "2023-11-25T05:31:39.421519+00:00",
    "created_by": "Anton Slutsky",
    "created_by_type": "User",
    "last_modified_at": "2023-11-25T05:31:39.421519+00:00",
    "last_modified_by": "Anton Slutsky",
    "last_modified_by_type": "User"
  },
  "description": "Environment created from a Docker image plus Conda environment.",
  "id": "azureml:/subscriptions/781b03e7-6eb7-4506-bab8-cf3a0d89b1d4/resourceGroups/antonslutsky-rg/providers/Microsoft.MachineLearningServices/workspaces/gpu-workspace/environments/text-to-image-pipeline-cuda-env/versions/1",
  "image": "mcr.microsoft.com/azureml/openmpi4.1.0

In [65]:
%%writefile components/model_scorer.yml
$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
name: model_scorer
code: .
command: >-
  python model_scorer.py 
  --mlflow_model ${{inputs.mlflow_model}}
  --prompts ${{inputs.prompts}}
  --task_def ${{inputs.task_def}}
  --predictions ${{outputs.predictions}}
inputs:
  mlflow_model: 
    type: uri_folder
  prompts: 
    type: uri_file
  task_def:
    type: uri_file
outputs:
  predictions:
    type: uri_file
environment: azureml:custom-env-1@latest
compute: azureml:inference-compute-4a
display_name: model_scorer
experiment_name: text-to-image-pipeline
description: Score an MLFlow model

Overwriting components/model_scorer.yml


In [66]:
%%writefile components/model_scorer.py

# Read all the csvs in the data folder into a pandas dataframe
import glob
import os
import pandas as pd
import argparse
import mlflow
from pathlib import Path
import torch

def main():
    """Main function of the script."""

    # input and output arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--mlflow_model", type=str, help="mlflow model")
    parser.add_argument("--prompts", type=str, help="path to prompts input")
    parser.add_argument("--task_def", type=str, help="path to task instructions")
    parser.add_argument("--predictions", type=str, help="path to predictions output")
    args = parser.parse_args()

    # Start Logging
    mlflow.start_run()

    task_def = open(args.task_def).read()

    print(f"Task def: {task_def}")

    print(f"Got prompts: {args.prompts}")

    batch_df = pd.read_parquet(args.prompts, engine='pyarrow')

    print("Loaded columns: ", batch_df.columns)

    batch_df = batch_df[["prompt"]]

    batch_df['prompt'] = task_def + batch_df['prompt'].astype(str)

    batch_df = batch_df.head()

    print("Head:")

    print(batch_df.head())

    for i in range(len(batch_df)):
        print(f"Prompt {i}: {batch_df['prompt'][i]}")

    print(f"{args.mlflow_model}")


    model_contents = os.listdir(args.mlflow_model)
    print(f"{model_contents}")

    if "MLmodel" in model_contents:
        print("MLmodel found")

        print(f"{open(f'{args.mlflow_model}/MLmodel').read()}")

    

    model = mlflow.pyfunc.load_model(args.mlflow_model)
    
    print(f"Loaded model object: {model}")
    
    predictions = model.predict(batch_df)

    print(f"Predictions DF: {predictions.columns}")

    print(f"Predictions: {predictions.head()}")

    print(f"Predictions[0]: {predictions[0]}")


    print(f"Predictions[0][0]:\n----------------------------------------\n{predictions[0][0]}\n----------------------------------------\n")

    for i in range(len(predictions[0])):
        print("********************************************************************")
        print(f"Prediction {i}: {predictions[0][i]}")
        print("********************************************************************")

    predictions_df = pd.DataFrame(predictions[0])

    predictions_df.to_csv(args.predictions, index=False, header=False)
    
    mlflow.end_run()

if __name__ == "__main__":
    main()

Overwriting components/model_scorer.py


In [67]:
!az ml component create -f components/model_scorer.yml

{


Uploading components (0.01 MBs):   0%|          | 0/5150 [00:00<?, ?it/s]
Uploading components (0.01 MBs):  60%|######    | 3094/5150 [00:00<00:00, 7788.21it/s]
Uploading components (0.01 MBs): 100%|##########| 5150/5150 [00:00<00:00, 12554.91it/s]





  "$schema": "https://azuremlschemas.azureedge.net/latest/commandJob.schema.json",
  "code": "azureml:/subscriptions/781b03e7-6eb7-4506-bab8-cf3a0d89b1d4/resourceGroups/antonslutsky-rg/providers/Microsoft.MachineLearningServices/workspaces/gpu-workspace/codes/f4b1eafa-4784-47d9-be57-80a06573c5e4/versions/1",
  "command": "python model_scorer.py  --mlflow_model ${{inputs.mlflow_model}} --prompts ${{inputs.prompts}} --task_def ${{inputs.task_def}} --predictions ${{outputs.predictions}}",
  "creation_context": {
    "created_at": "2023-11-27T13:28:51.326628+00:00",
    "created_by": "Anton Slutsky",
    "created_by_type": "User",
    "last_modified_at": "2023-11-27T13:28:51.416359+00:00",
    "last_modified_by": "Anton Slutsky",
    "last_modified_by_type": "User"
  },
  "description": "Score an MLFlow model",
  "display_name": "model_scorer",
  "environment": "azureml:/subscriptions/781b03e7-6eb7-4506-bab8-cf3a0d89b1d4/resourceGroups/antonslutsky-rg/providers/Microsoft.MachineLearningSe