# Dell AI SDK - Getting Started 

This notebook demonstrates the major functionality of the Dell AI SDK, including:
- Authentication and user management
- Model listing and details
- Platform listing and details
- Deployment snippet generation

In [None]:
%load_ext autoreload
%autoreload 2

from dell_ai import DellAIClient
from dell_ai.exceptions import DellAIError

## 1. Initialize the Client

The Dell AI client can be initialized with or without a token. If no token is provided, it will attempt to load from the Hugging Face token cache.

In [2]:
client = DellAIClient()  # optionally pass a token

## 2. Check Authentication

Verify that the client is properly authenticated and get user information.

In [None]:
try:
    # Check if authenticated
    is_auth = client.is_authenticated()
    print(f"Is authenticated: {is_auth}")

    # Get user information
    if is_auth:
        user_info = client.get_user_info()
        print("\nUser Information:")
        for key, value in user_info.items():
            print(f"{key}: {value}")
except DellAIError as e:
    print(f"Error: {e}")

## 3. List Available Models

Get a list of all available models and their details.

In [32]:
try:
    # Get list of available models
    models = client.list_models()
    print(f"Found {len(models)} models")

    # Get details for the first model as an example
    if models:
        first_model = models[0]
        model_details = client.get_model(first_model)
        print("\nExample Model Details:")
        print(f"Model ID: {first_model}")
        print(f"Description: {model_details.description}")
        print(f"License: {model_details.license}")
        print(f"Status: {model_details.status}")
except DellAIError as e:
    print(f"Error: {e}")

Found 30 models

Example Model Details:
Model ID: meta-llama/Llama-4-Maverick-17B-128E-Instruct
Description: The Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences. These models leverage a mixture-of-experts architecture to offer industry-leading performance in text and image understanding.
License: llama4
Status: new


## 4. List Available Platforms

Get a list of all available platforms and their details.

In [33]:
try:
    # Get list of available platforms
    platforms = client.list_platforms()
    print(f"Found {len(platforms)} platforms")

    # Get details for the first platform as an example
    if platforms:
        first_platform = platforms[0]
        platform_details = client.get_platform(first_platform)
        print("\nExample Platform Details:")
        print(f"SKU ID: {first_platform}")
        print(f"Name: {platform_details.name}")
        print(f"Server: {platform_details.server}")
        print(f"Vendor: {platform_details.vendor}")
        print(f"GPU Type: {platform_details.gputype}")
        print(f"GPU RAM: {platform_details.gpuram}")
        print(f"Total GPU Count: {platform_details.totalgpucount}")
except DellAIError as e:
    print(f"Error: {e}")

Found 7 platforms

Example Platform Details:
SKU ID: xe9680-nvidia-h200
Name: XE9680 Nvidia H200
Server: xe9680
Vendor: Nvidia
GPU Type: H200
GPU RAM: 141G
Total GPU Count: 8


## 5 Check Model Platform Support

Get information about which platforms and deployment configurations are supported for a specific model.

In [34]:
try:
    # Get details for a specific model
    if models:
        model_id = models[0]  # Using first model
        model_details = client.get_model(model_id)

        print(f"\nPlatform Support for Model: {model_id}")
        print("=" * 50)

        # Check if the model has any deployment configurations
        if not model_details.configs_deploy:
            print("No deployment configurations available for this model.")
        else:
            # Print supported platforms and their configurations
            for platform_id, configs in model_details.configs_deploy.items():
                print(f"\nPlatform: {platform_id}")
                for config in configs:
                    print("  Configuration:")
                    print(
                        f"    - Max Batch Prefill Tokens: {config.max_batch_prefill_tokens}"
                    )
                    print(f"    - Max Input Tokens: {config.max_input_tokens}")
                    print(f"    - Max Total Tokens: {config.max_total_tokens}")
                    print(f"    - Number of GPUs: {config.num_gpus}")
except DellAIError as e:
    print(f"Error: {e}")


Platform Support for Model: meta-llama/Llama-4-Maverick-17B-128E-Instruct

Platform: xe9680-amd-mi300x
  Configuration:
    - Max Batch Prefill Tokens: 16484
    - Max Input Tokens: 16383
    - Max Total Tokens: 16384
    - Number of GPUs: 8

Platform: xe9680-nvidia-h200
  Configuration:
    - Max Batch Prefill Tokens: 8484
    - Max Input Tokens: 8383
    - Max Total Tokens: 8384
    - Number of GPUs: 8


## 6. Get Deployment Snippet (for Docker or Kubernetes)

Get a deployment snippet for a specific model and platform configuration. We'll use the supported configuration values from the model details.

In [35]:
try:
    # Get model details to find supported configurations
    if models:
        model_id = models[0]  # Using first model
        model_details = client.get_model(model_id)

        # Get the first supported platform and its configuration
        if model_details.configs_deploy:
            platform_id = next(iter(model_details.configs_deploy))
            config = model_details.configs_deploy[platform_id][0]  # Get first config

            print(f"Using configuration for {model_id} on {platform_id}:")
            print(f"- Max Batch Prefill Tokens: {config.max_batch_prefill_tokens}")
            print(f"- Max Input Tokens: {config.max_input_tokens}")
            print(f"- Max Total Tokens: {config.max_total_tokens}")
            print(f"- Number of GPUs: {config.num_gpus}")

            # Get deployment snippet for Docker
            docker_snippet = client.get_deployment_snippet(
                model_id=model_id,
                sku_id=platform_id,
                container_type="docker",
                num_gpus=config.num_gpus,
                num_replicas=1,
            )

            print("\n", "--" * 50)
            print("Docker Deployment Snippet:")
            print(docker_snippet)

            # Get deployment snippet for Kubernetes
            k8s_snippet = client.get_deployment_snippet(
                model_id=model_id,
                sku_id=platform_id,
                container_type="kubernetes",
                num_gpus=config.num_gpus,
                num_replicas=1,
            )

            print("\n", "--" * 50)
            print("Kubernetes Deployment Snippet:")
            print(k8s_snippet)
        else:
            print("No deployment configurations available for this model.")
except DellAIError as e:
    print(f"Error: {e}")

Using configuration for meta-llama/Llama-4-Maverick-17B-128E-Instruct on xe9680-amd-mi300x:
- Max Batch Prefill Tokens: 16484
- Max Input Tokens: 16383
- Max Total Tokens: 16384
- Number of GPUs: 8

 ----------------------------------------------------------------------------------------------------
Docker Deployment Snippet:
docker run \
    -it \
    -p 80:80 \
    --security-opt seccomp=unconfined \
    --device=/dev/kfd \
    --device=/dev/dri \
    --group-add video \
    --ipc=host \
    --shm-size 256g \
    -e NUM_SHARD=8 \
    -e MAX_BATCH_PREFILL_TOKENS=16484 \
    -e MAX_TOTAL_TOKENS=16384 \
    -e MAX_INPUT_TOKENS=16383 \
    registry.dell.huggingface.co/enterprise-dell-inference-meta-llama-llama-4-maverick-17b-128e-instruct-amd

 ----------------------------------------------------------------------------------------------------
Kubernetes Deployment Snippet:
# Write the Kubernetes manifest below in a deployment.yaml file,
# and then run the following kubectl command on th