In [5]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

In [6]:
import google.cloud.aiplatform_v1beta1 as aip_beta

from google.cloud.aiplatform import Endpoint, Model
from google.api_core.exceptions import InvalidArgument

In [28]:
# from google.colab import auth
# auth.authenticate_user()

In [56]:
# Get account name
import requests
gcloud_token = !gcloud auth print-access-token
gcloud_tokeninfo = requests.get('https://www.googleapis.com/oauth2/v3/tokeninfo?access_token=' + gcloud_token[0]).json()
account_email = gcloud_tokeninfo['email']
#account_name = gcloud_tokeninfo['email'].split('@')[0]
account_name = 'cbtham'
#print(account_email)
print(account_name)

cbtham


In [36]:
region = 'asia-southeast1' # please set here, e.g. us-central1
project_id = 'astral-root-443419-b8' # please set here
public_repository = 'cbthamdev' # please set here any value to name the artifact registry

In [37]:
private_repository = account_name
bucket_url = f"gs://{account_name}"

nim_model = "nim:llama3-8b-instruct-1.0.0"
# NIM in NGC
ngc_nim_image = "nvcr.io/nim/meta/llama3-8b-instruct:1.0.0"
# NIM in Artifact Registry
public_nim_image = f"{region}-docker.pkg.dev/{project_id}/{public_repository}/{nim_model}"
private_nim_image = f"{region}-docker.pkg.dev/{project_id}/{private_repository}/{nim_model}"

va_model_name = "nim-llama3-8b-instruct"

selected_profile = "vllm-fp16-tp2"
machine_type = "g2-standard-24"
accelerator_type = "NVIDIA_L4"
accelerator_count = 2

endpoint_name = va_model_name+"_endpoint"
payload_model = "meta/llama3-8b-instruct"

In [39]:
! gsutil mb -l {region} -p {project_id} {bucket_url}
! gcloud artifacts repositories create {public_repository} --repository-format=docker --location={region}
! gcloud artifacts repositories add-iam-policy-binding {public_repository} --location={region} --member=allUsers --role=roles/artifactregistry.repoAdmin
! gcloud artifacts repositories create {private_repository} --repository-format=docker --location={region}

Creating gs://cbtham/...
ServiceException: 409 A Cloud Storage bucket named 'cbtham' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.
Create request issued for: [cbthamdev]
Waiting for operation [projects/astral-root-443419-b8/locations/asia-southeast1
/operations/47eb6970-5a8d-436b-86a7-2aec3eade69d] to complete...done.          
Created repository [cbthamdev].
Updated IAM policy for repository [cbthamdev].
bindings:
- members:
  - allUsers
  role: roles/artifactregistry.repoAdmin
etag: BwYoQJr-8MI=
version: 1
Create request issued for: [cbtham]
Waiting for operation [projects/astral-root-443419-b8/locations/asia-southeast1
/operations/da5a9ab2-5203-43b7-bf1c-64aac4b4ed0e] to complete...done.          
Created repository [cbtham].


In [41]:
from google.cloud import aiplatform

aiplatform.init(project=project_id, location=region, staging_bucket=bucket_url)

In [42]:
def run_bash_cmd(cmd):
    import subprocess

    if isinstance(cmd, str):
        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, text=True)
    elif isinstance(cmd, list):
        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False, text=True)
        
    output, error = process.communicate()
    if error:
        raise Exception(error)
    else:
        print(output)

In [43]:
bash_cmd = f"""
    export region={region}
    gcloud config set ai_platform/region {region}
    gcloud config set project {project_id}
    gcloud auth configure-docker {region}-docker.pkg.dev
    """
run_bash_cmd(bash_cmd)

Updated property [ai_platform/region].
Updated property [core/project].

{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud"
  }
}
Adding credentials for: asia-southeast1-docker.pkg.dev
After update, the following will be written to your Docker config file located 
at [/home/jupyter/.docker/config.json]:
 {
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud",
    "asia-southeast1-docker.pkg.dev": "gcloud"
  }
}

Do you want to continue (Y/n)?  
Docker configuration file updated.



In [45]:
NGC_API_KEY = '#_Redacted'

In [46]:
# Login to NGC
from pathlib import Path
container_name="llama3-8B-Instruct"
local_nim_cache=str(Path(".cache/nim").absolute())

bash_cmd = f"""
    sudo apt-get install -y nvidia-docker2
    export NGC_API_KEY={NGC_API_KEY}
    echo "export NGC_API_KEY={NGC_API_KEY}" >> ~/.bashrc
    echo "$NGC_API_KEY" | docker login nvcr.io --username '$oauthtoken' --password-stdin

    export LOCAL_NIM_CACHE={local_nim_cache}
    mkdir -p "$LOCAL_NIM_CACHE"
    echo "Local NIM cache created"
    """

run_bash_cmd(bash_cmd)

# Pull NIM image from NGC and run container
docker_cmd = [
    "docker", "run", "-d", "--rm",
    f"--name={container_name}",
    "--gpus", "all",
    "-e", f"{NGC_API_KEY}",
    "-v", f"{local_nim_cache}:/opt/nim/.cache",
    "-p", "8000:8000",
    ngc_nim_image
]

print(f"NIM image {ngc_nim_image} pulled from NGC successfully, running container is")
run_bash_cmd(docker_cmd)

# Push NIM image to public AR repository
bash_cmd = f"""
    docker tag {ngc_nim_image} {public_nim_image}

    docker push {public_nim_image}
    """

run_bash_cmd(bash_cmd)
print(f"NIM image {ngc_nim_image} pushed to Artifact Registry {public_nim_image} successfully")

# Optional
# Push NIM image to private AR repository
bash_cmd = f"""
    docker tag {public_nim_image} {private_nim_image}

    docker push {private_nim_image}
    """

run_bash_cmd(bash_cmd)
print(f"NIM image {public_nim_image} pushed to Artifact Registry {private_nim_image} successfully")

Reading package lists...
Building dependency tree...
Reading state information...
nvidia-docker2 is already the newest version (2.13.0-1).
0 upgraded, 0 newly installed, 0 to remove and 2 not upgraded.
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Local NIM cache created

NIM image nvcr.io/nim/meta/llama3-8b-instruct:1.0.0 pulled from NGC successfully, running container is
Unable to find image 'nvcr.io/nim/meta/llama3-8b-instruct:1.0.0' locally
1.0.0: Pulling from nim/meta/llama3-8b-instruct
5e8117c0bd28: Pulling fs layer
d67fcc6ef577: Pulling fs layer
47ee674c5713: Pulling fs layer
63daa0e64b30: Pulling fs layer
d9d9aecefab5: Pulling fs layer
d71f46a15657: Pulling fs layer
054e2ffff644: Pulling fs layer
7d3cd81654d5: Pulling fs layer
dca613dca886: Pulling fs layer
0fdcdcda3b2e: Pulling fs layer
af7b4f7dc15a: Pulling fs layer
6d101782f66c: Pulling fs layer
e8427cb13897: Pulling fs layer
d71f46a15657: Waiting
054e2ffff644: Waiting
7d3cd81

In [48]:
# Run NIM container
! docker run -it --rm --name={container_name} \
  --runtime=nvidia \
  --gpus all \
  --shm-size=16GB \
  -e NGC_API_KEY={NGC_API_KEY} \
  -e NIM_MODEL_PROFILE={selected_profile} \
  -v {local_nim_cache}":/opt/nim/.cache" \
  -u $(id -u) \
  -p 8000:8000 \
  {private_nim_image}


== NVIDIA Inference Microservice LLM NIM ==

NVIDIA Inference Microservice LLM NIM Version 1.0.0
Model: nim/meta/llama3-8b-instruct

Container image Copyright (c) 2016-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.

This NIM container is governed by the NVIDIA AI Product Agreement here:
https://www.nvidia.com/en-us/data-center/products/nvidia-ai-enterprise/eula/.
A copy of this license can be found under /opt/nim/LICENSE.

The use of this model is governed by the AI Foundation Models Community License
here: https://docs.nvidia.com/ai-foundation-models-community-license.pdf.

ADDITIONAL INFORMATION: Meta Llama 3 Community License, Built with Meta Llama 3. 
A copy of the Llama 3 license can be found under /opt/nim/MODEL_LICENSE.

2024-12-02 03:00:39,550 [INFO] PyTorch version 2.2.2 available.
2024-12-02 03:00:40,129 [INFO] [TRT-LLM] [I] Starting TensorRT-LLM init.
2024-12-02 03:00:40,237 [INFO] [TRT-LLM] [I] TensorRT-LLM inited.
[TensorRT-LLM] TensorRT-LLM version: 0.10.1.d

In [49]:
! docker images

REPOSITORY                                                           TAG                        IMAGE ID       CREATED        SIZE
asia-southeast1-docker.pkg.dev/astral-root-443419-b8/cbtham/nim      llama3-8b-instruct-1.0.0   3cb29b0d79e6   6 months ago   12.5GB
asia-southeast1-docker.pkg.dev/astral-root-443419-b8/cbthamdev/nim   llama3-8b-instruct-1.0.0   3cb29b0d79e6   6 months ago   12.5GB
nvcr.io/nim/meta/llama3-8b-instruct                                  1.0.0                      3cb29b0d79e6   6 months ago   12.5GB
