In [2]:
import torch
from transformers import pipeline

# --- CHECK 1: Pure PyTorch Math ---
print(f"üîç Checking CUDA availability...")
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"‚úÖ GPU Detected: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {round(torch.cuda.get_device_properties(0).total_memory / 1024**3, 1)} GB")
    
    # Perform a simple calculation on GPU to make sure drivers are actually working
    x = torch.rand(1000, 1000).to(device)
    y = torch.matmul(x, x)
    print(f"‚úÖ Matrix multiplication on GPU successful! (Result shape: {y.shape})")
else:
    print("‚ùå CUDA not available. Check your pytorch installation.")
    exit()

print("-" * 30)

# --- CHECK 2: Actual Model Inference ---
print("üöÄ Running test inference with a tiny model...")

# Force the pipeline to use the GPU (device=0)
# We use a tiny sentiment model so it downloads instantly
classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=0)

# Verify where the model is actually sitting
model_device = next(classifier.model.parameters()).device
print(f"   Model loaded on device: {model_device}")

# Run inference
result = classifier("I love having a GPU that works!")
print(f"‚úÖ Inference Output: {result}")

if model_device.type == 'cuda':
    print("\nüéâ SUCCESS: Your GPU inference stack is fully operational.")
else:
    print("\n‚ö†Ô∏è WARNING: Model ran, but it looks like it stayed on CPU.")

üîç Checking CUDA availability...
‚úÖ GPU Detected: NVIDIA GeForce RTX 2070 SUPER
   Memory: 8.0 GB
‚úÖ Matrix multiplication on GPU successful! (Result shape: torch.Size([1000, 1000]))
------------------------------
üöÄ Running test inference with a tiny model...


Device set to use cuda:0


   Model loaded on device: cuda:0
‚úÖ Inference Output: [{'label': 'POSITIVE', 'score': 0.9997900128364563}]

üéâ SUCCESS: Your GPU inference stack is fully operational.
