# PyTorch - DirectML GPU Acceleration

In [1]:
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
import torch_directml
import torch
import numpy as np

## Verify connection to GPU

In [2]:
import torch_directml
import torch

def available_gpu_torch():
    if torch.cuda.is_available():
        print("CUDA is available.")
        print("CUDA Device:", torch.cuda.get_device_name(0))
    else:
        print("CUDA not available.")

    # DirectML check
    try:
        dml_device = torch_directml.device()
        print("DirectML Device available:", dml_device)
    except:
        print("DirectML not available.")

    # Always available
    print("CPU Device:", torch.device("cpu"))


In [3]:
def manual_test_gpu():
    dml = torch_directml.device()
    x = torch.randn(5, 5).to(dml)
    print(x)


In [4]:
available_gpu_torch()
manual_test_gpu()

CUDA not available.
DirectML Device available: privateuseone:0
CPU Device: cpu
tensor([[ 0.7117,  0.0386,  1.0696, -0.7149, -2.0709],
        [-1.1531,  1.9733,  0.4399, -0.9863, -2.2153],
        [ 1.4441,  0.2210, -0.4448,  1.6996,  0.3110],
        [ 0.6144,  1.9952,  2.0291, -0.0976, -0.6362],
        [ 0.8082, -1.0828, -0.2688,  2.7290, -1.0088]],
       device='privateuseone:0')


## Inference using DirectML

In [5]:
dml = torch_directml.device()

In [6]:
dml_device = torch_directml.device()

In [7]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

# Load model and move to DirectML device
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model.to(dml_device)
model.eval()

# Label mapping (often available from config)
id2label = model.config.id2label

# Prepare your input
text = "The movie was absolutely wonderful and engaging!"
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)

# Move input tensors to DirectML device
inputs_on_dml = {k: v.to(dml_device) for k, v in inputs.items()}

# Run inference
with torch.no_grad():
    outputs = model(**inputs_on_dml)

logits = outputs.logits.detach().to("cpu")
probs = torch.nn.functional.softmax(logits, dim=-1).squeeze().numpy()

# Get predicted class index
predicted_class_id = int(np.argmax(probs))
predicted_label = id2label[predicted_class_id]
confidence = float(probs[predicted_class_id])

# Print full response
print("Full response:")
print({
    "text": text,
    "label": predicted_label,
    "confidence": round(confidence, 3),
    "probabilities": {id2label[i]: round(float(p), 3) for i, p in enumerate(probs)}
})


Full response:
{'text': 'The movie was absolutely wonderful and engaging!', 'label': 'POSITIVE', 'confidence': 1.0, 'probabilities': {'NEGATIVE': 0.0, 'POSITIVE': 1.0}}
