In [1]:
print("Hello World!")

Hello World!


In [2]:
import ray

# Use the FQDN to explicitly address the service in the 'default' namespace
RAY_HEAD_ADDRESS = "ray://raycluster-latest-head-svc.ray.svc.cluster.local:10001"

try:
    # Run this code inside the Python environment of your remote kernel
    ray.init(RAY_HEAD_ADDRESS)
    print("SUCCESS! Ray is connected using the FQDN.")
    # You can now proceed to run Ray tasks
except Exception as e:
    print(f"Connection still failed: {e}")

2025-10-13 03:59:58,378	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-10-13 03:59:58,388	INFO client_builder.py:241 -- Passing the following kwargs to ray.init() on the server: log_to_driver
SIGTERM handler is not set because current thread is not the main thread.


SUCCESS! Ray is connected using the FQDN.


In [3]:
import ray
import time
import torch # We'll use torch to prove the environment is GPU-ready

# 1. Ensure Ray is connected (you ran this successfully before)
# Use the FQDN to connect to the Ray Head Service in the 'default' namespace
RAY_HEAD_ADDRESS = "ray://raycluster-latest-head-svc.ray.svc.cluster.local:10001"

if not ray.is_initialized():
    try:
        ray.init(RAY_HEAD_ADDRESS, runtime_env={"pip": ["torch", "torchvision", "torchaudio"]})
        print("Ray connection established.")
    except Exception as e:
        print(f"Connection Failed: {e}")
        # If connection fails, check Kubernetes service name or firewall.

# 2. Define a remote function that explicitly requests one GPU
@ray.remote(num_gpus=1)
def check_gpu_status():
    """
    A remote Ray task that runs on a GPU worker and checks for PyTorch's CUDA availability.
    """
    import socket
    
    # Check if a GPU is visible to this worker process
    gpu_available = torch.cuda.is_available()
    
    # Get the worker node's hostname (i.e., the Kubernetes Pod name)
    worker_hostname = socket.gethostname()
    
    return {
        "hostname": worker_hostname,
        "cuda_available": gpu_available,
        "device_count": torch.cuda.device_count()
    }

# 3. Execute the task and retrieve the result
print("Submitting GPU task...")

# Submit the task to the cluster (it will wait for a GPU worker to be available)
future = check_gpu_status.remote()

# Retrieve the result
result = ray.get(future)

# 4. Display results and verify GPU usage
print("\n--- GPU Task Result ---")
print(f"Task executed on node: {result['hostname']}")
print(f"CUDA Available (GPU Found by PyTorch): {result['cuda_available']}")
print(f"CUDA Devices Found: {result['device_count']}")

# 5. Clean up the Ray connection (optional)
ray.shutdown()

2025-10-13 04:17:58,285	INFO client_builder.py:241 -- Passing the following kwargs to ray.init() on the server: log_to_driver


SIGTERM handler is not set because current thread is not the main thread.


Ray connection established.
Submitting GPU task...

--- GPU Task Result ---
Task executed on node: raycluster-latest-gpu-worker-group-worker-ll57f
CUDA Available (GPU Found by PyTorch): True
CUDA Devices Found: 1


In [2]:
ray.shutdown()