# ONNX - DirectML GPU Acceleration

In [27]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from optimum.exporters.onnx import main_export
import onnx
from pathlib import Path
import onnxruntime as ort

## Verify connection to GPU

In [28]:
import onnxruntime as ort

def available_gpu_onnx():

    print("Available Providers:", ort.get_available_providers())

In [29]:
available_gpu_onnx()

Available Providers: ['DmlExecutionProvider', 'CPUExecutionProvider']


## Inference with ONNX

In [4]:
import numpy as np
import onnxruntime as ort
from transformers import AutoTokenizer

def run_inference_dml(onnx_model_path, text_input):
    """
    Run inference on an ONNX model using DirectML on AMD GPU.
    
    Args:
        onnx_model_path: Path to the ONNX model
        text_input: Text input for inference
    """
    # Check available providers
    providers = ort.get_available_providers()
    print(f"Available providers: {providers}")
    
    # Ensure DirectML is available
    if 'DmlExecutionProvider' not in providers:
        raise RuntimeError("DirectML provider not found. Please ensure onnxruntime-directml is installed.")
    
    # Create inference session with DirectML provider
    session_options = ort.SessionOptions()
    session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
    session = ort.InferenceSession(
        onnx_model_path,
        sess_options=session_options,
        providers=['DmlExecutionProvider']
    )
    
    # Get the model's input name
    input_name = session.get_inputs()[0].name
    
    # Load tokenizer (should match the model you converted)
    model_name = "distilbert-base-uncased"  # Replace with your model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    # Tokenize input
    tokens = tokenizer(text_input, return_tensors="np")
    
    # Run inference
    outputs = session.run(None, {input_name: tokens['input_ids']})
    
    # Process output based on your model type
    # This example assumes classification output
    probabilities = outputs[0]
    prediction = np.argmax(probabilities, axis=1)
    
    return {
        "raw_output": outputs,
        "prediction": prediction,
        "probabilities": probabilities
    }

# Example usage
result = run_inference_dml("onnx_model\model.onnx", "I really enjoyed this movie!")
print(f"Prediction: {result['prediction']}")
print(f"Probabilities: {result['probabilities']}")

  result = run_inference_dml("onnx_model\model.onnx", "I really enjoyed this movie!")


Available providers: ['DmlExecutionProvider', 'CPUExecutionProvider']


  result = run_inference_dml("onnx_model\model.onnx", "I really enjoyed this movie!")


ValueError: Required inputs (['attention_mask', 'token_type_ids']) are missing from input feed (['input_ids']).