In [4]:

from azure.ai.ml import MLClient, command, Output
from azure.ai.ml.entities import Environment
from azure.identity import DefaultAzureCredential


# Connect to AML workspace
ml_client = MLClient.from_config(DefaultAzureCredential())


# Define environment
donut_env = Environment(
    name="donut-lora-env",
    image="mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:latest",
    conda_file="environment.yaml"
)

# Register environment
ml_client.environments.create_or_update(donut_env)


job = command(
    code="./src",
    command="python train.py --data_dir ./data --output_dir ${{outputs.model_output}}",
    environment=donut_env,
    compute="anishswiss1",
    display_name="donut-lora-train",
    experiment_name="donut-lora-exp",
    outputs={
        "model_output": Output(type="uri_folder", mode="upload")
    },
)


# Submit
returned_job = ml_client.jobs.create_or_update(job)
print(f"Submitted job: {returned_job.name}")




Found the config file in: /config.json
Overriding of current TracerProvider is not allowed
Overriding of current LoggerProvider is not allowed
Overriding of current MeterProvider is not allowed
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored


Submitted job: patient_zebra_hlpm8t7gt2


In [1]:
%pip install torch torchvision torchaudio


from transformers import DonutProcessor, VisionEncoderDecoderModel
from PIL import Image
import os

download_path = "./donut_qa_model"


model_folder = os.path.join(download_path, "donutQA/outputs/donut-lora")  # may need adjustment

files_only = [f for f in os.listdir(model_folder) if os.path.isfile(os.path.join(model_folder, f))]
print("FILES .... ")
print(files_only)

processor = DonutProcessor.from_pretrained(model_folder)
model = VisionEncoderDecoderModel.from_pretrained(model_folder)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/anaconda/envs/azureml_py310_sdkv2/bin/python -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
FILES .... 
['.amlignore', '.amlignore.amltmp', 'added_tokens.json', 'config.json', 'generation_config.json', 'model.safetensors', 'preprocessor_config.json', 'sentencepiece.bpe.model', 'special_tokens_map.json', 'tokenizer.json', 'tokenizer_config.json']


  from .autonotebook import tqdm as notebook_tqdm
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [2]:
image = Image.open("test_pay_stub.jpg").convert("RGB")
question = "What is the net pay?"

prompt = f"<s_docvqa><s_question>{question}</s_question><s_answer>"


In [3]:
# Prepare inputs
pixel_values = processor(image, return_tensors="pt").pixel_values
decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt").input_ids

# Generate prediction
outputs = model.generate(
    pixel_values,
    decoder_input_ids=decoder_input_ids,
    max_length=model.decoder.config.max_position_embeddings,
    early_stopping=True,
    pad_token_id=processor.tokenizer.pad_token_id,
    eos_token_id=processor.tokenizer.eos_token_id,
    use_cache=True,
    bad_words_ids=[[processor.tokenizer.unk_token_id]],
    return_dict_in_generate=True,
)

# Decode answer
sequence = processor.batch_decode(outputs.sequences)[0]
answer = sequence.split("<s_answer>")[1].split("</s_answer>")[0]

print(f"Predicted answer: {answer}")


The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Predicted answer:  $853.30


In [1]:
#conda env update -f environment.yaml

import sys
print(sys.executable)

/anaconda/envs/azureml_py38/bin/python


In [1]:
# Deploy the model to a managed online endpoint
from azure.ai.ml import MLClient
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment, Environment
from azure.identity import DefaultAzureCredential
from datetime import datetime

# Create ml_client if it doesn't exist (from Cell 1)
if 'ml_client' not in globals():
    print("Creating MLClient connection...")
    ml_client = MLClient.from_config(DefaultAzureCredential())

# Get or create donut_env if it doesn't exist
if 'donut_env' not in globals():
    print("Creating environment definition...")
    donut_env = Environment(
        name="donut-lora-env",
        image="mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:latest",
        conda_file="environment.yaml"
    )
    # Register it
    ml_client.environments.create_or_update(donut_env)

# Get the registered environment with specific version
# Use version 23 (or get the latest if you prefer)
env_version = 23  # Specify the version number
try:
    registered_env = ml_client.environments.get(donut_env.name, version=str(env_version))
    env_ref = registered_env
    print(f"Using registered environment: {registered_env.name}:{registered_env.version}")
except Exception as e:
    print(f"Warning: Could not get environment version {env_version}. Error: {e}")
    # Fallback: use string format "name:version"
    env_ref = f"{donut_env.name}:{env_version}"
    print(f"Using environment reference: {env_ref}")

# Create endpoint
endpoint_name = f"donutqa-endpoint-{datetime.now().strftime('%m%d%H%M')}"
endpoint = ManagedOnlineEndpoint(
    name=endpoint_name,
    auth_mode="key"
)
ml_client.online_endpoints.begin_create_or_update(endpoint).result()
print(f"Created endpoint: {endpoint_name}")

# Deploy model
deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=endpoint_name,
    model="donutQA:1",  # String reference to registered model
    environment=env_ref,  # Use registered environment object
    code_path="src",
    scoring_script="score.py",  # Use scoring_script instead of entry_script
    instance_type="Standard_E4s_v3",
    instance_count=1
)
ml_client.online_deployments.begin_create_or_update(deployment).result()
print("Deployment created successfully")

# Route traffic
endpoint.traffic = {"blue": 100}
ml_client.online_endpoints.begin_create_or_update(endpoint).result()
print("Traffic routed to deployment")

# Get endpoint details
endpoint = ml_client.online_endpoints.get(endpoint_name)
print(f"\nâœ… Endpoint deployed successfully!")
print(f"Endpoint name: {endpoint_name}")
print(f"Scoring URI: {endpoint.scoring_uri}")
print(f"Status: {endpoint.provisioning_state}")


Creating MLClient connection...
Creating environment definition...
Using registered environment: donut-lora-env:23
Created endpoint: donutqa-endpoint-11271214
.......................................................................................................................................................

  mlflow.mismatch._check_version_mismatch()
Check: endpoint donutqa-endpoint-11271214 exists


HttpResponseError: (BadArgument) User container has crashed or terminated. Please see troubleshooting guide, available here: https://aka.ms/oe-tsg#error-resourcenotready
Code: BadArgument
Message: User container has crashed or terminated. Please see troubleshooting guide, available here: https://aka.ms/oe-tsg#error-resourcenotready