In [10]:
import keras
import keras_nlp
import os
from util import get_model_paths_and_config, upload2bs
from config import Config
from components.data_preparation.data_ingestion import process_whatsapp_chat
from components.fine_tunning.trainer import finetune_gemma
from components.fine_tunning.conversion_function import convert_checkpoints
from numba import cuda
from google.cloud import aiplatform


2024-03-19 20:38:16.503271: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-19 20:38:16.503377: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-19 20:38:16.509589: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-19 20:38:16.531052: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


ModuleNotFoundError: No module named 'keras_nlp'

In [16]:
model_paths_and_config = get_model_paths_and_config(Config.MODEL_NAME)


In [17]:
Config.TRAIN_DATA_DIR, Config.BUCKET_NAME

('input_data/andrehpereh', 'able-analyst-416817-chatbot-v1')

In [4]:
data = process_whatsapp_chat(Config.BUCKET_NAME, Config.TRAIN_DATA_DIR)

<Bucket: able-analyst-416817-chatbot-v1>
input_data/andrehpereh
<google.api_core.page_iterator.HTTPIterator object at 0x7f1dac862560>
input_data/andrehpereh/

input_data/andrehpereh/WhatsApp Chat with Anki.txt
WhatsApp Chat with Anki.txt
input_data/andrehpereh/WhatsApp Chat with Anki.txt
input_data/andrehpereh/WhatsApp Chat with Ilse Flatmate.txt
WhatsApp Chat with Ilse Flatmate.txt
input_data/andrehpereh/WhatsApp Chat with Ilse Flatmate.txt
input_data/andrehpereh/WhatsApp Chat with Michael.txt
WhatsApp Chat with Michael.txt
input_data/andrehpereh/WhatsApp Chat with Michael.txt
input_data/andrehpereh/WhatsApp Chat with Mike Haarlem.txt
WhatsApp Chat with Mike Haarlem.txt
input_data/andrehpereh/WhatsApp Chat with Mike Haarlem.txt
input_data/andrehpereh/WhatsApp Chat with Rosa Rosa Rosa.txt
WhatsApp Chat with Rosa Rosa Rosa.txt
input_data/andrehpereh/WhatsApp Chat with Rosa Rosa Rosa.txt
input_data/andrehpereh/WhatsApp Chat with Ruben Ewald Puijker.txt
WhatsApp Chat with Ruben Ewald Puij

In [None]:
finetuned_weights_path

In [None]:
finetuned_weights_path = finetune_gemma(data=data[:50], model_paths=model_paths_and_config, model_name=Config.MODEL_NAME, rank_lora=Config.SEQUENCE_LENGTH, sequence_length=Config.SEQUENCE_LENGTH, epochs=Config.EPOCHS, batch_size=Config.BATCH_SIZE)

In [None]:
device = cuda.get_current_device()
cuda.select_device(device.id)
cuda.close()

In [None]:
output_dir = convert_checkpoints(
    weights_file=finetuned_weights_path,
    size=model_paths_and_config['model_size'],
    output_dir=model_paths_and_config['huggingface_model_dir'],
    vocab_path=model_paths_and_config['finetuned_vocab_path'],
)


In [None]:
output_dir=model_paths_and_config['huggingface_model_dir']

In [None]:
destination_path = upload2bs(local_directory = output_dir, bucket_name = Config.BUCKET_NAME, destination_subfolder = model_paths_and_config['deployed_model_blob'])

In [None]:
aiplatform.init(project=Config.PROJECT_ID, location=Config.REGION, staging_bucket=Config.BUCKET_URI)

In [16]:
import datetime
VLLM_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20240220_0936_RC01"


def get_job_name_with_datetime(prefix: str) -> str:
    suffix = datetime.datetime.now().strftime("_%Y%m%d_%H%M%S")
    return f"{prefix}{suffix}"


def deploy_model_vllm(
    model_name: str,
    model_uri: str,
    service_account: str,
    machine_type: str = "g2-standard-8",
    accelerator_type: str = "NVIDIA_L4",
    accelerator_count: int = 1,
    max_model_len: int = 8192,
    dtype: str = "bfloat16",
) -> tuple[aiplatform.Model, aiplatform.Endpoint]:
    # Upload the model to "Model Registry"
    job_name = get_job_name_with_datetime(model_name)
    vllm_args = [
        "--host=0.0.0.0",
        "--port=7080",
        f"--tensor-parallel-size={accelerator_count}",
        "--swap-space=16",
        "--gpu-memory-utilization=0.95",
        f"--max-model-len={max_model_len}",
        f"--dtype={dtype}",
        "--disable-log-stats",
    ]
    model = aiplatform.Model.upload(
        display_name=job_name,
        artifact_uri=model_uri,
        serving_container_image_uri=VLLM_DOCKER_URI,
        serving_container_command=["python", "-m", "vllm.entrypoints.api_server"],
        serving_container_args=vllm_args,
        serving_container_ports=[7080],
        serving_container_predict_route="/generate",
        serving_container_health_route="/ping",
    )

    # Deploy the model to an endpoint to serve "Online predictions"
    endpoint = aiplatform.Endpoint.create(display_name=f"{model_name}-endpoint")
    model.deploy(
        endpoint=endpoint,
        machine_type=machine_type,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        deploy_request_timeout=1800,
        service_account=service_account,
    )

    return model, endpoint

In [17]:
# Set up artificially since the model was already in a bucket
model_paths_and_config['deployed_model_uri'] = 'gs://able-analyst-416817-chatbot-v1/gemma_2b_en/20240314162107'
model_paths_and_config['model_name_vllm']

'gemma_2b_en-vllm'

In [18]:
max_model_len = 2048

model, endpoint = deploy_model_vllm(
    model_name=model_paths_and_config['model_name_vllm'],
    model_uri=model_paths_and_config['deployed_model_uri'],
    service_account=Config.SERVICE_ACCOUNT,
    machine_type=model_paths_and_config['machine_type'],
    accelerator_type=model_paths_and_config['accelerator_type'],
    accelerator_count=model_paths_and_config['accelerator_count'],
    max_model_len=max_model_len,
)

Creating Model
Create Model backing LRO: projects/24796876098/locations/us-central1/models/6563293868962349056/operations/1798728489633841152
Model created. Resource name: projects/24796876098/locations/us-central1/models/6563293868962349056@1
To use this Model in another session:
model = aiplatform.Model('projects/24796876098/locations/us-central1/models/6563293868962349056@1')
Creating Endpoint
Create Endpoint backing LRO: projects/24796876098/locations/us-central1/endpoints/2459943961893011456/operations/8683465682488131584
Endpoint created. Resource name: projects/24796876098/locations/us-central1/endpoints/2459943961893011456
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/24796876098/locations/us-central1/endpoints/2459943961893011456')
Deploying model to Endpoint : projects/24796876098/locations/us-central1/endpoints/2459943961893011456
Deploy Endpoint model backing LRO: projects/24796876098/locations/us-central1/endpoints/2459943961893011456/op

'gemma-vertexai-chatbot@able-analyst-416817.iam.gserviceaccount.com'

2

In [20]:
TEST_EXAMPLES = [
    "What is the plan for tonight?",
    "What would you like to drink?",
    "Are you coming tonight?"
]

# Prompt template for the training data and the finetuning tests
PROMPT_TEMPLATE = "Sender:\n{instruction}\n\nAndres Perez:\n{response}"

TEST_PROMPTS = [
    PROMPT_TEMPLATE.format(instruction=example, response="")
    for example in TEST_EXAMPLES
]

def test_vertexai_endpoint(endpoint: aiplatform.Endpoint):
    for question, prompt in zip(TEST_EXAMPLES, TEST_PROMPTS):
        instance = {
            "prompt": prompt,
            "max_tokens": 56,
            "temperature": 0.0,
            "top_p": 1.0,
            "top_k": 1,
            "raw_response": True,
        }
        response = endpoint.predict(instances=[instance])
        output = response.predictions[0]
        print(f"{question}\n{output}\n{'- '*40}")


test_vertexai_endpoint(endpoint)

What is the plan for tonight?
I don’t know, I’m not sure. I’ll let you know when I know.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
What would you like to drink?
I’ll take a coffee, please.

Andres Perez:
I’ll take a coffee, please.

Andres Perez:
I’ll take a coffee, please.

Andres Perez:
I’ll take a coffee, please.

Andres Perez:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
Are you coming tonight?
I’m not sure. I’ll ask my mom.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 


In [1]:
res = !gcloud config get core/project
PROJECT_ID = res[0]
SERVICE_ACCOUNT = 'gemma-vertexai-chatbot@able-analyst-416817.iam.gserviceaccount.com'
from datetime import datetime
CONTAINER_IMAGE_NAME="gemma-chatbot"
GCP_REGION='us-central1'
IMAGE_NAME="gemma-chatbot"
TAG_NAME = 'latest'
KAGGLE_USERNAME='andrehpereh1'
KAGGLE_KEY='5859e39806d9456749dcbac685f04bc9'

In [2]:
SUBSTITUTIONS="""
_CONTAINER_IMAGE_NAME={},\
TAG_NAME={}\
""".format(
           f"{CONTAINER_IMAGE_NAME}-data-preparation",
           TAG_NAME, 
           ).strip()
print(SUBSTITUTIONS)


_CONTAINER_IMAGE_NAME=gemma-chatbot-data-preparation,TAG_NAME=latest


In [40]:
# Runs the data_preparation component image. (Development, when tested should be moved to the main cloudbuild in the project folder)
# Pay attention to the "." after summit. Might need some changes when move to the master pipeline.
#!gcloud builds submit . --timeout=15m --config "components/data_preparation/cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}
# DO not forget the tag
#!docker run gcr.io/able-analyst-416817/gemma-chatbot-data-preparation:latest data_ingestion.py --bucket-name 'able-analyst-416817-chatbot-v1' --directory 'input_data/andrehpereh'

2

In [6]:
SUBSTITUTIONS="""
_CONTAINER_IMAGE_NAME={},\
_KAGGLE_USERNAME={},\
_KAGGLE_KEY={},\
TAG_NAME={}\
""".format(
           f"{CONTAINER_IMAGE_NAME}-fine-tunning",
           KAGGLE_USERNAME,
           KAGGLE_KEY,
           TAG_NAME, 
           ).strip()
print(SUBSTITUTIONS)


# Builds image
!gcloud builds submit . --config "components/fine_tunning/cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}


_CONTAINER_IMAGE_NAME=gemma-chatbot-fine-tunning,_KAGGLE_USERNAME=andrehpereh1,_KAGGLE_KEY=5859e39806d9456749dcbac685f04bc9,TAG_NAME=latest
Creating temporary tarball archive of 81 file(s) totalling 1.8 MiB before compression.
Uploading tarball of [.] to [gs://able-analyst-416817_cloudbuild/source/1710876655.678234-b59d5c4c97704d4f9bc5cfad445e1338.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/able-analyst-416817/locations/us-central1/builds/7721e203-977f-4371-a712-66ae08dc5d89].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds;region=us-central1/7721e203-977f-4371-a712-66ae08dc5d89?project=24796876098 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "7721e203-977f-4371-a712-66ae08dc5d89"

FETCHSOURCE
Fetching storage object: gs://able-analyst-416817_cloudbuild/source/1710876655.678234-b59d5c4c97704d4f9bc5cfad445e1338.tgz#1710876656395317
Copying gs://able-analyst-416817_cloudbuild/source/17108766


KeyboardInterrupt



In [8]:
1+1

2

In [63]:
import json
data = '"[\\"Sender: FoooodddAndres Perez: Coming :)\\", \\"Sender: Can I maybe borrow your iron? Andres Perez: It\'s not my iron But yeah haha Or is it?\\"]"'
model_paths = """{"finetuned_model_dir": "./gemma_2b_en", "finetuned_weights_path": "./gemma_2b_en/model.weights.h5"}"""

#!docker run gcr.io/able-analyst-416817/gemma-chatbot-fine-tunning:latest import test_container "test_container.add_test('[a, b]', {'a': "b"})"

!docker run gcr.io/able-analyst-416817/gemma-chatbot-fine-tunning:latest trainer.py --data {data} --model-paths {model_paths_json}

Unable to find image 'gcr.io/able-analyst-416817/gemma-chatbot-fine-tunning:latest' locally
latest: Pulling from able-analyst-416817/gemma-chatbot-fine-tunning

[1B4c3075c9: Pulling fs layer 
[1B535b54a3: Pulling fs layer 
[1B69e0a1e4: Pulling fs layer 
[1B27ca5e65: Pulling fs layer 
[1B7692a87e: Pulling fs layer 
[1Ba42e847a: Pulling fs layer 
[1Bb7cd68e5: Pulling fs layer 
[1B10701a5b: Pulling fs layer 
[1Ba9230f8f: Pulling fs layer 
[1Ba1861813: Pulling fs layer 
[1B3d7242f5: Pulling fs layer 
[1B5cca3a04: Pulling fs layer 
[1Be802b8d1: Pulling fs layer 
[11B7ca5e65: Waiting fs layer 
[1Bb700ef54: Pulling fs layer 
[1Bf6d9a8b1: Pulling fs layer 
[1B12e53c5d: Pulling fs layer 
[1B191d66bf: Pulling fs layer 
[11B9230f8f: Waiting fs layer 
[1Baf630b7f: Pulling fs layer 
[1B8c43ea0e: Pulling fs layer 
[10B802b8d1: Waiting fs layer 
[10Baacfed4: Waiting fs layer 
[18B7cd68e5: Waiting fs layer 
[16B1861813: Waiting fs layer 
[16Bd7242f5: Waiting fs layer 
[8Baf6

In [61]:
model_paths['finetuned_weights_path']

TypeError: string indices must be integers

In [185]:
import json
SUBSTITUTIONS="""
_CONTAINER_IMAGE_NAME={},\
TAG_NAME={}\
""".format(
           f"{CONTAINER_IMAGE_NAME}-experimental",
           TAG_NAME,
           ).strip()
print(SUBSTITUTIONS)
data = '"[\\"Sender: FoooodddAndres Perez: Coming :)\\", \\"Sender: Can I maybe borrow your iron? Andres Perez: It\'s not my iron But yeah haha Or is it?\\"]"'
model_paths = """{"finetuned_model_dir": "./gemma_2b_en", "finetuned_weights_path": "./gemma_2b_en/model.weights.h5"}"""
data_json = json.dumps(data)
model_paths_json = json.dumps(model_paths)
#!gcloud builds submit . --timeout=15m --config "components/experimental/cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}
!docker run gcr.io/able-analyst-416817/gemma-chatbot-experimental:latest experimental.py {data_json} {model_paths_json}


_CONTAINER_IMAGE_NAME=gemma-chatbot-experimental,TAG_NAME=latest
3
experimental.py <class 'str'>
"[\"Sender: FoooodddAndres Perez: Coming :)\", \"Sender: Can I maybe borrow your iron? Andres Perez: It's not my iron But yeah haha Or is it?\"]" <class 'str'>
{"finetuned_model_dir": "./gemma_2b_en", "finetuned_weights_path": "./gemma_2b_en/model.weights.h5"} <class 'str'>
This is type param1 <class 'str'>
<class 'dict'>
<class 'str'>
<class 'str'>
Jalo todo bien


In [13]:
#Re-runs the image to restart the website, service account might be needed with this one.  (Development, when tested should be moved to the main cloudbuild in the project folder)
SUBSTITUTIONS="""
_CONTAINER_IMAGE_NAME={},\
_GCP_REGION={},\
TAG_NAME={}\
""".format(
           f"{CONTAINER_IMAGE_NAME}-running-app",
           GCP_REGION,
           TAG_NAME, 
           ).strip()
print(SUBSTITUTIONS)
!gcloud builds submit . --timeout=15m --config "components/app_flask/cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}
!gcloud builds submit . --timeout=15m --config "cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}


_CONTAINER_IMAGE_NAME=gemma-chatbot-fine-tunning,_GCP_REGION=us-central1,TAG_NAME=latest


In [None]:
SUBSTITUTIONS="""
_CONTAINER_IMAGE_NAME={},\
_GCP_REGION={},\
TAG_NAME={}\
""".format(
           f"{CONTAINER_IMAGE_NAME}-running-app",
           GCP_REGION,
           TAG_NAME, 
           ).strip()
print(SUBSTITUTIONS)
!gcloud builds submit . --timeout=15m --config "components/app_flask/cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}
!gcloud builds submit . --timeout=15m --config "cloudbuild.yaml" --substitutions {SUBSTITUTIONS} --region={GCP_REGION}

In [5]:
from config import Config
from util import get_model_paths_and_config, upload2bs

res = !gcloud config get core/project
PROJECT_ID = res[0]
from google.cloud import compute_v1

project = PROJECT_ID  # Replace with your project ID

# Regions to consider
regions = ["us-central1", "europe-west4", "asia-east1"]  

client = compute_v1.AcceleratorTypesClient()

for region in regions:
    zone_client = compute_v1.ZonesClient()
    all_zones = zone_client.list(project=project)

    zone_list = [zone for zone in all_zones if zone.region == f"regions/{region}"]

    for zone in zone_list:
        result = client.describe(
            project=project, zone=zone.name, accelerator_type="nvidia-l4"
        )
        # If the result isn't an error, the 'nvidia-l4' type is available
        GCP_REGION = region  # Update your region variable
        break 

In [6]:
from kfp import local
from kfp import dsl

local.init(runner=local.DockerRunner())

@dsl.component
def add(a: int, b: int) -> int:
    return a + b

# run a single component
task = add(a=1, b=2)
assert task.output == 3

# or run it in a pipeline
@dsl.pipeline
def math_pipeline(x: int, y: int, z: int) -> int:
    t1 = add(a=x, b=y)
    t2 = add(a=t1.output, b=z)
    return t2.output

pipeline_task = math_pipeline(x=1, y=2, z=3)
assert pipeline_task.output == 6

12:24:11.294 - INFO - Executing task [96m'add'[0m
12:24:11.295 - INFO - Streamed logs:



  return component_factory.create_component_from_func(


    Pulling image 'python:3.7'
    Image pull complete

    [KFP Executor 2024-03-19 12:24:40,053 INFO]: Looking for component `add` in --component_module_path `/tmp/tmp.EOYBBcdz35/ephemeral_component.py`
    [KFP Executor 2024-03-19 12:24:40,053 INFO]: Loading KFP component "add" from /tmp/tmp.EOYBBcdz35/ephemeral_component.py (directory "/tmp/tmp.EOYBBcdz35" and module name "ephemeral_component")
    [KFP Executor 2024-03-19 12:24:40,054 INFO]: Got executor_input:
    {
        "inputs": {
            "parameterValues": {
                "a": 1,
                "b": 2
            }
        },
        "outputs": {
            "parameters": {
                "Output": {
                    "outputFile": "/home/jupyter/chatbot/local_outputs/add-2024-03-19-12-24-11-294086/add/Output"
                }
            },
            "outputFile": "/home/jupyter/chatbot/local_outputs/add-2024-03-19-12-24-11-294086/add/executor_output.json"
        }
    }
      __import__(pkg_name)
    [KFP Ex