In [62]:
import os
import cmlapi
import random
import string
import json

#### Set Pinecone variables

In [63]:
os.environ["PINECONE_API_KEY"] = "YOUR KEY HERE"  ## Replace with yours
os.environ["PINECONE_ENVIRONMENT"] = "gcp-starter"  ## Default
os.environ["PINECONE_INDEX"] = "cml-default" ## Default

#### Get CML API Client and list the available Runtimes

In [64]:
client = cmlapi.default_client(url=os.getenv("CDSW_API_URL").replace("/api/v1", ""), cml_api_key=os.getenv("CDSW_APIV2_KEY"))
available_runtimes = client.list_runtimes(search_filter=json.dumps({
    "kernel": "Python 3.10",
    "edition": "Nvidia GPU",
    "editor": "JupyterLab"
}))
print(available_runtimes)

## Set available runtimes to the latest runtime in the environment (iterator is the number that begins with 0 and advances sequentially)
## The JOB_IMAGE_ML_RUNTIME variable stores the ML Runtime which will be used to launch the job
print(available_runtimes.runtimes[1])
print(available_runtimes.runtimes[1].image_identifier)
JOB_IMAGE_ML_RUNTIME = available_runtimes.runtimes[1].image_identifier

## Store the ML Runtime for any future jobs in an environment variable so we don't have to do this step again
os.environ['JOB_IMAGE_ML_RUNTIME'] = JOB_IMAGE_ML_RUNTIME

{'next_page_token': '',
 'runtimes': [{'description': 'Python runtime with CUDA libraries provided by '
                              'Cloudera',
               'edition': 'Nvidia GPU',
               'editor': 'JupyterLab',
               'full_version': '2023.08.1-b6',
               'image_identifier': 'docker.repository.cloudera.com/cloudera/cdsw/ml-runtime-jupyterlab-python3.10-cuda:2023.08.1-b6',
               'kernel': 'Python 3.10',
               'status': 'ENABLED'},
              {'description': 'Python runtime with CUDA libraries provided by '
                              'Cloudera',
               'edition': 'Nvidia GPU',
               'editor': 'JupyterLab',
               'full_version': '2023.08.2-b8',
               'image_identifier': 'docker.repository.cloudera.com/cloudera/cdsw/ml-runtime-jupyterlab-python3.10-cuda:2023.08.2-b8',
               'kernel': 'Python 3.10',
               'status': 'ENABLED'}]}
{'description': 'Python runtime with CUDA libraries provi

#### Get the current working project

In [65]:
project = client.get_project(project_id=os.getenv("CDSW_PROJECT_ID"))
print(project)

{'created_at': datetime.datetime(2023, 12, 11, 17, 36, 11, 602406, tzinfo=tzlocal()),
 'creation_status': 'success',
 'creator': {'email': 'ktalbert@cloudera.com',
             'name': 'Kevin Talbert',
             'username': 'ktalbert'},
 'default_engine_type': 'ml_runtime',
 'description': '',
 'environment': '{"CDSW_APP_POLLING_ENDPOINT":"/","PROJECT_OWNER":"ktalbert"}',
 'ephemeral_storage_limit': 10,
 'ephemeral_storage_request': 0,
 'id': 'i7c6-5g66-06pw-iu5i',
 'name': 'CML-LLM-HOL-Workshop',
 'owner': {'email': 'ktalbert@cloudera.com',
           'name': 'Kevin Talbert',
           'username': 'ktalbert'},
 'permissions': {'admin': True,
                 'business_user': True,
                 'inherit': False,
                 'operator': True,
                 'read': True,
                 'write': True},
 'shared_memory_limit': 0,
 'updated_at': datetime.datetime(2023, 12, 11, 18, 27, 54, 4162, tzinfo=tzlocal()),
 'visibility': 'private'}


#### Create and Run Job to Populate Pinecone Vector DB

In [66]:
random_id=''.join(random.choice(string.ascii_lowercase) for i in range(10))
job_body = cmlapi.CreateJobRequest(
    project_id = project.id,
    name = "Populate Pinecone Vector DB " + random_id, 
    kernel = "python3", 
    script = "3a_load_to_pinecone_vector_db/pinecone_vectordb_insert.py",
    cpu = 1,
    memory = 4,
    runtime_identifier = os.getenv('JOB_IMAGE_ML_RUNTIME')
)

job_result = client.create_job(
    body = job_body, 
    project_id = str(project.id)
)

job_run = client.create_job_run(
    cmlapi.CreateJobRunRequest(),
    project_id = project.id, 
    job_id = job_result.id
)